diff --git a/include/maxscale/monitor.hh b/include/maxscale/monitor.hh index bf08ac020..5d9a7df24 100644 --- a/include/maxscale/monitor.hh +++ b/include/maxscale/monitor.hh @@ -212,7 +212,7 @@ public: */ mxs_monitor_event_t get_event_type() const; - void log_state_change(); + void log_state_change(const std::string& reason); /** * Is this server ok to update disk space status. Only checks if the server knows of valid disk space @@ -516,6 +516,19 @@ protected: bool server_status_request_waiting() const; + /** + * Returns the human-readable reason why the server changed state + * + * @param server The server that changed state + * + * @return The human-readable reason why the state change occurred or + * an empty string if no information is available + */ + virtual std::string annotate_state_change(mxs::MonitorServer* server) + { + return ""; + } + /** * Contains monitor base class settings. Since monitors are stopped before a setting change, * the items cannot be modified while a monitor is running. No locking required. diff --git a/server/core/monitor.cc b/server/core/monitor.cc index 7427a084d..405e0038b 100644 --- a/server/core/monitor.cc +++ b/server/core/monitor.cc @@ -1238,14 +1238,14 @@ void MonitorServer::log_connect_error(ConnectResult rval) latest_error.c_str()); } -void MonitorServer::log_state_change() +void MonitorServer::log_state_change(const std::string& reason) { string prev = SERVER::status_to_string(mon_prev_status); string next = server->status_string(); - MXS_NOTICE("Server changed state: %s[%s:%u]: %s. [%s] -> [%s]", + MXS_NOTICE("Server changed state: %s[%s:%u]: %s. [%s] -> [%s]%s%s", server->name(), server->address, server->port, - get_event_name(), - prev.c_str(), next.c_str()); + get_event_name(), prev.c_str(), next.c_str(), + reason.empty() ? "" : ": ", reason.c_str()); } void Monitor::hangup_failed_servers() @@ -1332,7 +1332,7 @@ void Monitor::detect_handle_state_changes() mxs_monitor_event_t event = ptr->get_event_type(); ptr->server->last_event = event; ptr->server->triggered_at = mxs_clock(); - ptr->log_state_change(); + ptr->log_state_change(annotate_state_change(ptr)); if (event == MASTER_DOWN_EVENT) { diff --git a/server/modules/monitor/mariadbmon/mariadbmon.cc b/server/modules/monitor/mariadbmon/mariadbmon.cc index 8a393033c..e70baa50f 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.cc +++ b/server/modules/monitor/mariadbmon/mariadbmon.cc @@ -580,6 +580,19 @@ void MariaDBMonitor::process_state_changes() } } +std::string MariaDBMonitor::annotate_state_change(MonitorServer* server) +{ + std::string rval; + + if (server->get_event_type() == LOST_SLAVE_EVENT) + { + MariaDBServer* srv = get_server(server); + rval = srv->print_changed_slave_connections(); + } + + return rval; +} + /** * Save info on the master server's multimaster group, if any. This is required when checking for changes * in the topology. diff --git a/server/modules/monitor/mariadbmon/mariadbmon.hh b/server/modules/monitor/mariadbmon/mariadbmon.hh index e1a2fee0d..2797064ef 100644 --- a/server/modules/monitor/mariadbmon/mariadbmon.hh +++ b/server/modules/monitor/mariadbmon/mariadbmon.hh @@ -107,9 +107,10 @@ public: bool run_manual_reset_replication(SERVER* master_server, json_t** error_out); protected: - void pre_loop() override; - void tick() override; - void process_state_changes() override; + void pre_loop() override; + void tick() override; + void process_state_changes() override; + std::string annotate_state_change(mxs::MonitorServer* server) override final; private: // Some methods need a log on/off setting. diff --git a/server/modules/monitor/mariadbmon/mariadbserver.cc b/server/modules/monitor/mariadbmon/mariadbserver.cc index abe3bc3b2..83166c2f5 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.cc +++ b/server/modules/monitor/mariadbmon/mariadbserver.cc @@ -418,6 +418,7 @@ bool MariaDBServer::do_show_slave_status(string* errmsg_out) // Always write to m_slave_status. Even if the new status is equal by topology, // gtid:s etc may have changed. Guard guard(m_arraylock); + m_old_slave_status = std::move(m_slave_status); m_slave_status = std::move(slave_status_new); } @@ -724,6 +725,35 @@ string MariaDBServer::diagnostics() const return rval; } +std::string MariaDBServer::print_changed_slave_connections() +{ + std::stringstream ss; + const char* separator = ""; + + for (size_t i = 0; i < m_old_slave_status.size(); i++) + { + const auto& old_row = m_old_slave_status[i]; + const auto* new_row = sstatus_find_previous_row(old_row, i); + + if (new_row && !new_row->equal(old_row)) + { + ss << "Host: " << new_row->settings.master_endpoint.to_string() + << ", IO Running: " << SlaveStatus::slave_io_to_string(new_row->slave_io_running) + << ", SQL Running: " << (new_row->slave_sql_running ? "Yes" : "No"); + + if (!new_row->last_error.empty()) + { + ss << ", Error: " << new_row->last_error; + } + + ss << separator; + separator = "; "; + } + } + + return ss.str(); +} + json_t* MariaDBServer::to_json() const { json_t* result = json_object(); @@ -1015,13 +1045,8 @@ bool MariaDBServer::sstatus_array_topology_equal(const SlaveStatusArray& new_sla { const auto new_row = new_slave_status[i]; const auto old_row = old_slave_status[i]; - // Strictly speaking, the following should depend on the 'assume_unique_hostnames', - // but the situations it would make a difference are so rare they can be ignored. - if (new_row.slave_io_running != old_row.slave_io_running - || new_row.slave_sql_running != old_row.slave_sql_running - || new_row.settings.master_endpoint != old_row.settings.master_endpoint - || new_row.settings.name != old_row.settings.name - || new_row.master_server_id != old_row.master_server_id) + + if (!new_row.equal(old_row)) { rval = false; break; diff --git a/server/modules/monitor/mariadbmon/mariadbserver.hh b/server/modules/monitor/mariadbmon/mariadbserver.hh index 67042bb14..dbd4496b4 100644 --- a/server/modules/monitor/mariadbmon/mariadbserver.hh +++ b/server/modules/monitor/mariadbmon/mariadbserver.hh @@ -136,6 +136,7 @@ public: GtidList m_gtid_current_pos; /* Gtid of latest event. */ GtidList m_gtid_binlog_pos; /* Gtid of latest event written to binlog. */ SlaveStatusArray m_slave_status; /* Data returned from SHOW (ALL) SLAVE(S) STATUS */ + SlaveStatusArray m_old_slave_status; /* Data from the previous loop */ NodeData m_node; /* Replication topology data */ /* Replication lag of the server. Used during calculation so that the actual SERVER struct is @@ -169,6 +170,8 @@ public: */ std::string diagnostics() const; + std::string print_changed_slave_connections(); + void update_server(bool time_to_update_disk_space, const mxs::MonitorServer::ConnectionSettings& conn_settings); diff --git a/server/modules/monitor/mariadbmon/server_utils.cc b/server/modules/monitor/mariadbmon/server_utils.cc index ca008963e..955fa987e 100644 --- a/server/modules/monitor/mariadbmon/server_utils.cc +++ b/server/modules/monitor/mariadbmon/server_utils.cc @@ -87,6 +87,17 @@ json_t* SlaveStatus::to_json() const return result; } +bool SlaveStatus::equal(const SlaveStatus& rhs) const +{ + // Strictly speaking, the following should depend on the 'assume_unique_hostnames', + // but the situations it would make a difference are so rare they can be ignored. + return slave_io_running == rhs.slave_io_running + && slave_sql_running == rhs.slave_sql_running + && settings.master_endpoint == rhs.settings.master_endpoint + && settings.name == rhs.settings.name + && master_server_id == rhs.master_server_id; +} + SlaveStatus::slave_io_running_t SlaveStatus::slave_io_from_string(const std::string& str) { slave_io_running_t rval = SLAVE_IO_NO; diff --git a/server/modules/monitor/mariadbmon/server_utils.hh b/server/modules/monitor/mariadbmon/server_utils.hh index 48da9b483..509af131c 100644 --- a/server/modules/monitor/mariadbmon/server_utils.hh +++ b/server/modules/monitor/mariadbmon/server_utils.hh @@ -262,6 +262,8 @@ public: std::string to_string() const; json_t* to_json() const; + bool equal(const SlaveStatus& rhs) const; + static slave_io_running_t slave_io_from_string(const std::string& str); static std::string slave_io_to_string(slave_io_running_t slave_io); bool should_be_copied(std::string* ignore_reason_out) const;