From 862ae099b078edecf0e28240e94a0c05f01799e2 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Thu, 28 Jun 2018 16:08:52 +0300 Subject: [PATCH] Construct diagnostics results in the monitor thread MariaDBMonitor diagnostics printing is unsafe as some of the read fields are arrays. To be on the safe side, the fields are now read in the monitor worker thread. Since diagnostics must work even for stopped monitors, a worker task is used. In practice, it usually runs when the monitor is sleeping. --- include/maxscale/monitor.hh | 2 +- include/maxscale/utils.hh | 9 ++ server/core/utils.cc | 28 +++++ .../modules/monitor/mariadbmon/mariadbmon.cc | 109 +++++++++++++++--- .../modules/monitor/mariadbmon/mariadbmon.hh | 6 +- 5 files changed, 138 insertions(+), 16 deletions(-) diff --git a/include/maxscale/monitor.hh b/include/maxscale/monitor.hh index 9dfe71ee3..4afa15933 100644 --- a/include/maxscale/monitor.hh +++ b/include/maxscale/monitor.hh @@ -22,7 +22,7 @@ namespace maxscale { class MonitorInstance : public MXS_MONITOR_INSTANCE - , private maxscale::Worker + , protected maxscale::Worker { public: MonitorInstance(const MonitorInstance&) = delete; diff --git a/include/maxscale/utils.hh b/include/maxscale/utils.hh index 2faccc388..bb1ee613b 100644 --- a/include/maxscale/utils.hh +++ b/include/maxscale/utils.hh @@ -131,6 +131,15 @@ inline std::vector strtok(std::string str, const char* delim) return rval; } +/** + * Format parameters to a string. Uses printf-formatting. + * + * @param format Format string + * @param ... Items to convert according to format string + * @return The result string + */ +std::string string_printf(const char* format, ...) mxb_attribute((format (printf, 1, 2))); + /** * @class CloserTraits utils.hh * diff --git a/server/core/utils.cc b/server/core/utils.cc index 0907bdd62..9e66bcfa8 100644 --- a/server/core/utils.cc +++ b/server/core/utils.cc @@ -1170,4 +1170,32 @@ uint8_t* set_byteN(uint8_t* ptr, uint64_t value, int bytes) return ptr + bytes; } +std::string string_printf(const char* format, ...) +{ + /* Use 'vsnprintf' for the formatted printing. It outputs the optimal buffer length - 1. */ + va_list args; + va_start(args, format); + int characters = vsnprintf(NULL, 0, format, args); + va_end(args); + std::string rval; + if (characters < 0) + { + // Encoding (programmer) error. + ss_dassert(!true); + MXS_ERROR("Could not format the string %s.", format); + } + else if (characters > 0) + { + // 'characters' does not include the \0-byte. + int total_size = characters + 1; + rval.reserve(total_size); + rval.resize(characters); // The final "length" of the string + va_start(args, format); + // Write directly to the string internal array, avoiding any temporary arrays. + vsnprintf(&rval[0], total_size, format, args); + va_end(args); + } + return rval; +} + } diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index 0710c641c..130934f25 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -23,7 +23,9 @@ #include #include +#include #include +#include #include // TODO: For monitor_add_parameters #include "../../../core/internal/monitor.h" @@ -51,6 +53,9 @@ static const char CN_MASTER_FAILURE_TIMEOUT[] = "master_failure_timeout"; static const char CN_REPLICATION_USER[] = "replication_user"; static const char CN_REPLICATION_PASSWORD[] = "replication_password"; +static const char DIAG_ERROR[] = "Internal error, could not print diagnostics. " + "Check log for more information."; + MariaDBMonitor::MariaDBMonitor(MXS_MONITOR* monitor) : maxscale::MonitorInstance(monitor) , m_id(config_get_global_options()->id) @@ -222,32 +227,75 @@ bool MariaDBMonitor::configure(const MXS_CONFIG_PARAMETER* params) void MariaDBMonitor::diagnostics(DCB *dcb) const { - dcb_printf(dcb, "Automatic failover: %s\n", m_auto_failover ? "Enabled" : "Disabled"); - dcb_printf(dcb, "Failcount: %d\n", m_failcount); - dcb_printf(dcb, "Failover timeout: %u\n", m_failover_timeout); - dcb_printf(dcb, "Switchover timeout: %u\n", m_switchover_timeout); - dcb_printf(dcb, "Automatic rejoin: %s\n", m_auto_rejoin ? "Enabled" : "Disabled"); - dcb_printf(dcb, "Enforce read-only: %s\n", m_enforce_read_only_slaves ? + /* The problem with diagnostic printing is that some of the printed elements are array-like and their + * length could change during a monitor loop. Thus, the variables should only be read by the monitor + * thread and not the admin thread. Because the diagnostic must be printable even when the monitor is + * not running, the printing must be done outside the normal loop. */ + + /* The 'dcb' is owned by the admin thread (the thread executing this function), and probably + * should not be written to by any other thread. To prevent this, have the monitor thread + * print the diagnostics to a string. */ + string diag_str; + // 'execute_worker_task' is not a const method, although the task we are sending is. + MariaDBMonitor* mutable_ptr = const_cast(this); + bool func_ran = mutable_ptr->execute_worker_task([this, &diag_str] + { + diag_str = diagnostics_to_string(); + }); + + if (!func_ran) + { + diag_str = DIAG_ERROR; + } + dcb_printf(dcb, "%s", diag_str.c_str()); +} + +string MariaDBMonitor::diagnostics_to_string() const +{ + using maxscale::string_printf; + string rval; + rval += string_printf("Automatic failover: %s\n", m_auto_failover ? "Enabled" : "Disabled"); + rval += string_printf("Failcount: %d\n", m_failcount); + rval += string_printf("Failover timeout: %u\n", m_failover_timeout); + rval += string_printf("Switchover timeout: %u\n", m_switchover_timeout); + rval += string_printf("Automatic rejoin: %s\n", m_auto_rejoin ? "Enabled" : "Disabled"); + rval += string_printf("Enforce read-only: %s\n", m_enforce_read_only_slaves ? "Enabled" : "Disabled"); - dcb_printf(dcb, "MaxScale monitor ID: %lu\n", m_id); - dcb_printf(dcb, "Detect replication lag: %s\n", (m_detect_replication_lag) ? "Enabled" : "Disabled"); - dcb_printf(dcb, "Detect stale master: %s\n", (m_detect_stale_master == 1) ? + rval += string_printf("MaxScale monitor ID: %lu\n", m_id); + rval += string_printf("Detect replication lag: %s\n", (m_detect_replication_lag) ? "Enabled" : "Disabled"); + rval += string_printf("Detect stale master: %s\n", (m_detect_stale_master == 1) ? "Enabled" : "Disabled"); if (m_excluded_servers.size() > 0) { - dcb_printf(dcb, "Non-promotable servers (failover): "); - dcb_printf(dcb, "%s\n", monitored_servers_to_string(m_excluded_servers).c_str()); + rval += string_printf("Non-promotable servers (failover): "); + rval += string_printf("%s\n", monitored_servers_to_string(m_excluded_servers).c_str()); } - dcb_printf(dcb, "\nServer information:\n-------------------\n\n"); + rval += string_printf("\nServer information:\n-------------------\n\n"); for (auto iter = m_servers.begin(); iter != m_servers.end(); iter++) { - string server_info = (*iter)->diagnostics() + "\n"; - dcb_printf(dcb, "%s", server_info.c_str()); + rval += (*iter)->diagnostics() + "\n"; } + return rval; } json_t* MariaDBMonitor::diagnostics_json() const +{ + json_t* rval = NULL; + MariaDBMonitor* mutable_ptr = const_cast(this); + bool func_ran = mutable_ptr->execute_worker_task([this, &rval] + { + rval = diagnostics_to_json(); + }); + + if (!func_ran) + { + rval = mxs_json_error_append(rval, "%s", DIAG_ERROR); + } + return rval; +} + +json_t* MariaDBMonitor::diagnostics_to_json() const { json_t* rval = json_object(); json_object_set_new(rval, "monitor_id", json_integer(m_id)); @@ -285,6 +333,39 @@ json_t* MariaDBMonitor::diagnostics_json() const return rval; } +bool MariaDBMonitor::execute_worker_task(GenericFunction func) +{ + ss_dassert(mxs_rworker_get_current() == mxs_rworker_get(MXS_RWORKER_MAIN)); + /* The worker message system works on objects of class Task, each representing a different action. + * Let's use a function object inside a task to construct a generic action. */ + class CustomTask : public maxscale::Worker::Task + { + public: + CustomTask(GenericFunction func) + : m_func(func) + {} + + private: + GenericFunction m_func; + void execute(maxscale::Worker& worker) + { + m_func(); + } + }; + + CustomTask task(func); + maxscale::Semaphore done(0); + /* Although the current method is being ran in the admin thread, 'post' sends the task to the + * worker thread of "this". "task" is a stack parameter, so need to always wait for completion + * even if there were no results to process. */ + bool sent = post(&task, &done, Worker::EXECUTE_AUTO); + if (sent) + { + done.wait(); + } + return sent; +} + /** * Connect to and query/update a server. * diff --git a/server/modules/monitor/mariadbmon/mariadbmon.hh b/server/modules/monitor/mariadbmon/mariadbmon.hh index fb3f6db7a..636b4cfe2 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.hh +++ b/server/modules/monitor/mariadbmon/mariadbmon.hh @@ -104,6 +104,7 @@ protected: void process_state_changes(); private: + typedef std::function GenericFunction; struct CycleInfo { @@ -188,7 +189,10 @@ private: bool set_replication_credentials(const MXS_CONFIG_PARAMETER* params); MariaDBServer* get_server_info(MXS_MONITORED_SERVER* db); MariaDBServer* get_server(int64_t id); - bool execute_manual_command(std::function command, json_t** error_out); + bool execute_manual_command(GenericFunction command, json_t** error_out); + bool execute_worker_task(GenericFunction func); + std::string diagnostics_to_string() const; + json_t* diagnostics_to_json() const; // Cluster discovery and status assignment methods void update_server(MariaDBServer& server);