MXS-2342 Run MariaDBMonitor diagnostics concurrent with the monitor loop
This fixes some situations where MaxAdmin/MaxCtrl would block and wait until a monitor operation or tick is complete. This also fixes a deadlock caused by calling monitor diagnostics inside a monitor script. Concurrency is enabled by adding one mutex per server object to protect array-like fields from concurrent reading/writing.
This commit is contained in:
parent
b4c5500fa1
commit
040562f718
@ -253,34 +253,20 @@ bool MariaDBMonitor::configure(const MXS_CONFIG_PARAMETER* params)
|
||||
void MariaDBMonitor::diagnostics(DCB* dcb) const
|
||||
{
|
||||
/* The problem with diagnostic printing is that some of the printed elements are array-like and their
|
||||
* length could change during a monitor loop. Thus, the variables should only be read by the monitor
|
||||
* thread and not the admin thread. Because the diagnostic must be printable even when the monitor is
|
||||
* not running, the printing must be done outside the normal loop. */
|
||||
* length could change during a monitor loop. Such variables are protected by mutexes. Locking is
|
||||
* only required when the monitor thread writes to such a variable and when the admin thread is
|
||||
* reading it. */
|
||||
|
||||
mxb_assert(mxs_rworker_get_current() == mxs_rworker_get(MXS_RWORKER_MAIN));
|
||||
/* The 'dcb' is owned by the admin thread (the thread executing this function), and probably
|
||||
* should not be written to by any other thread. To prevent this, have the monitor thread
|
||||
* print the diagnostics to a string. */
|
||||
string diag_str;
|
||||
|
||||
// 'execute' is not a const method, although the task we are sending is.
|
||||
MariaDBMonitor* mutable_ptr = const_cast<MariaDBMonitor*>(this);
|
||||
auto func = [this, &diag_str] {
|
||||
diag_str = diagnostics_to_string();
|
||||
};
|
||||
|
||||
if (!mutable_ptr->call(func, Worker::EXECUTE_AUTO))
|
||||
{
|
||||
diag_str = DIAG_ERROR;
|
||||
}
|
||||
|
||||
dcb_printf(dcb, "%s", diag_str.c_str());
|
||||
dcb_printf(dcb, "%s", diagnostics_to_string().c_str());
|
||||
}
|
||||
|
||||
string MariaDBMonitor::diagnostics_to_string() const
|
||||
{
|
||||
using maxscale::string_printf;
|
||||
string rval;
|
||||
rval.reserve(1000); // Enough for basic output.
|
||||
|
||||
rval += string_printf("Automatic failover: %s\n", m_auto_failover ? "Enabled" : "Disabled");
|
||||
rval += string_printf("Failcount: %d\n", m_failcount);
|
||||
rval += string_printf("Failover timeout: %u\n", m_failover_timeout);
|
||||
@ -306,24 +292,16 @@ string MariaDBMonitor::diagnostics_to_string() const
|
||||
json_t* MariaDBMonitor::diagnostics_json() const
|
||||
{
|
||||
mxb_assert(mxs_rworker_get_current() == mxs_rworker_get(MXS_RWORKER_MAIN));
|
||||
json_t* rval = NULL;
|
||||
MariaDBMonitor* mutable_ptr = const_cast<MariaDBMonitor*>(this);
|
||||
auto func = [this, &rval] {
|
||||
rval = to_json();
|
||||
};
|
||||
|
||||
if (!mutable_ptr->call(func, Worker::EXECUTE_AUTO))
|
||||
{
|
||||
rval = mxs_json_error_append(rval, "%s", DIAG_ERROR);
|
||||
}
|
||||
|
||||
return rval;
|
||||
return to_json();
|
||||
}
|
||||
|
||||
json_t* MariaDBMonitor::to_json() const
|
||||
{
|
||||
json_t* rval = MonitorInstance::diagnostics_json();
|
||||
json_object_set_new(rval, "master", m_master == NULL ? json_null() : json_string(m_master->name()));
|
||||
|
||||
// The m_master-pointer can be modified during a tick, but the pointed object cannot be deleted.
|
||||
auto master = mxb::atomic::load(&m_master, mxb::atomic::RELAXED);
|
||||
json_object_set_new(rval, "master", master == nullptr ? json_null() : json_string(master->name()));
|
||||
json_object_set_new(rval,
|
||||
"master_gtid_domain_id",
|
||||
m_master_gtid_domain == GTID_DOMAIN_UNKNOWN ? json_null() :
|
||||
@ -678,7 +656,7 @@ void MariaDBMonitor::log_master_changes()
|
||||
|
||||
void MariaDBMonitor::assign_new_master(MariaDBServer* new_master)
|
||||
{
|
||||
m_master = new_master;
|
||||
mxb::atomic::store(&m_master, new_master, mxb::atomic::RELAXED);
|
||||
update_master_cycle_info();
|
||||
m_warn_current_master_invalid = true;
|
||||
m_warn_have_better_master = true;
|
||||
@ -820,8 +798,6 @@ bool MariaDBMonitor::run_manual_reset_replication(SERVER* master_server, json_t*
|
||||
return send_ok && rval;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Command handler for 'switchover'
|
||||
*
|
||||
|
@ -25,6 +25,7 @@ using std::string;
|
||||
using maxscale::string_printf;
|
||||
using maxbase::Duration;
|
||||
using maxbase::StopWatch;
|
||||
using Guard = std::lock_guard<std::mutex>;
|
||||
|
||||
MariaDBServer::MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_index,
|
||||
bool assume_unique_hostnames, bool query_events)
|
||||
@ -386,6 +387,7 @@ bool MariaDBServer::do_show_slave_status(string* errmsg_out)
|
||||
|
||||
// Always write to m_slave_status. Even if the new status is equal by topology,
|
||||
// gtid:s etc may have changed.
|
||||
Guard guard(m_arraylock);
|
||||
m_slave_status = std::move(slave_status_new);
|
||||
return true;
|
||||
}
|
||||
@ -400,6 +402,8 @@ bool MariaDBServer::update_gtids(string* errmsg_out)
|
||||
auto result = execute_query(query, errmsg_out);
|
||||
if (result.get() != NULL)
|
||||
{
|
||||
Guard guard(m_arraylock);
|
||||
|
||||
rval = true;
|
||||
if (result->next_row())
|
||||
{
|
||||
@ -655,9 +659,12 @@ string MariaDBServer::diagnostics() const
|
||||
const char fmt_int64[] = "%-23s %" PRIi64 "\n";
|
||||
|
||||
string rval;
|
||||
rval.reserve(300); // Enough for most common ouput.
|
||||
|
||||
rval += string_printf(fmt_string, "Server:", name());
|
||||
rval += string_printf(fmt_int64, "Server ID:", m_server_id);
|
||||
rval += string_printf(fmt_string, "Read only:", (m_read_only ? "Yes" : "No"));
|
||||
Guard guard(m_arraylock);
|
||||
if (!m_gtid_current_pos.empty())
|
||||
{
|
||||
rval += string_printf(fmt_string, "Gtid current position:", m_gtid_current_pos.to_string().c_str());
|
||||
@ -686,6 +693,7 @@ json_t* MariaDBServer::to_json() const
|
||||
json_object_set_new(result, "server_id", json_integer(m_server_id));
|
||||
json_object_set_new(result, "read_only", json_boolean(m_read_only));
|
||||
|
||||
Guard guard(m_arraylock);
|
||||
json_object_set_new(result,
|
||||
"gtid_current_pos",
|
||||
m_gtid_current_pos.empty() ? json_null() :
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <functional>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <maxscale/monitor.h>
|
||||
#include <maxbase/stopwatch.hh>
|
||||
#include "server_utils.hh"
|
||||
@ -114,6 +115,7 @@ public:
|
||||
bool log_slave_updates = false; /* Does the slave write replicated events to binlog? */
|
||||
};
|
||||
|
||||
|
||||
/* Monitored server base class/struct. MariaDBServer does not own the struct, it is not freed
|
||||
* (or connection closed) when a MariaDBServer is destroyed. */
|
||||
MXS_MONITORED_SERVER* m_server_base = NULL;
|
||||
@ -548,6 +550,12 @@ private:
|
||||
DISABLE
|
||||
};
|
||||
|
||||
/* Protects array-like fields from concurrent access. This is only required for fields which can be
|
||||
* read from another thread while the monitor is running. In practice, these are fields read during
|
||||
* diagnostics-methods. Reading inside monitor thread does not need to be mutexed, as outside threads
|
||||
* only read the values. */
|
||||
mutable std::mutex m_arraylock;
|
||||
|
||||
bool update_slave_status(std::string* errmsg_out = NULL);
|
||||
bool sstatus_array_topology_equal(const SlaveStatusArray& new_slave_status);
|
||||
const SlaveStatus* sstatus_find_previous_row(const SlaveStatus& new_row, size_t guess);
|
||||
|
Loading…
x
Reference in New Issue
Block a user