
The monitor now continuously updates a list of enabled server events. When promoting a new master in failover/switchover, only events that were enabled on the previous master are enabled on the new. This avoids enabling events that may have been disabled on the master yet stayed in the SLAVESIDE_DISABLED- state on the slave. In the case of reset-replication command, events on the new master are only enabled if the monitor had a master when the command was launched. Otherwise all events remain disabled.
576 lines
20 KiB
C++
576 lines
20 KiB
C++
/*
|
|
* Copyright (c) 2018 MariaDB Corporation Ab
|
|
*
|
|
* Use of this software is governed by the Business Source License included
|
|
* in the LICENSE.TXT file and at www.mariadb.com/bsl11.
|
|
*
|
|
* Change Date: 2022-01-01
|
|
*
|
|
* On the date above, in accordance with the Business Source License, use
|
|
* of this software will be governed by version 2 or later of the General
|
|
* Public License.
|
|
*/
|
|
#pragma once
|
|
#include "mariadbmon_common.hh"
|
|
#include <functional>
|
|
#include <string>
|
|
#include <memory>
|
|
#include <maxscale/monitor.h>
|
|
#include <maxbase/stopwatch.hh>
|
|
#include "server_utils.hh"
|
|
|
|
class QueryResult;
|
|
class MariaDBServer;
|
|
// Server pointer array
|
|
typedef std::vector<MariaDBServer*> ServerArray;
|
|
|
|
/**
|
|
* Data required for checking replication topology cycles and other graph algorithms. This data is mostly
|
|
* used by the monitor object, as the data only makes sense in relation to other nodes.
|
|
*/
|
|
struct NodeData
|
|
{
|
|
// Default values for index parameters
|
|
static const int INDEX_NOT_VISITED = 0;
|
|
static const int INDEX_FIRST = 1;
|
|
// Default values for the cycle
|
|
static const int CYCLE_NONE = 0;
|
|
static const int CYCLE_FIRST = 1;
|
|
// Default value for reach
|
|
static const int REACH_UNKNOWN = -1;
|
|
|
|
// Bookkeeping for graph searches. May be overwritten by multiple algorithms.
|
|
int index; /* Marks the order in which this node was visited. */
|
|
int lowest_index; /* The lowest index node this node has in its subtree. */
|
|
bool in_stack; /* Is this node currently is the search stack. */
|
|
|
|
// Results from algorithm runs. Should only be overwritten when server data has been queried.
|
|
int cycle; /* Which cycle is this node part of, if any. */
|
|
int reach; /* How many servers replicate from this server or its children. */
|
|
ServerArray parents; /* Which nodes is this node replicating from. External masters
|
|
* excluded. */
|
|
ServerArray children; /* Which nodes are replicating from this node. */
|
|
std::vector<int64_t> external_masters; /* Server id:s of external masters. */
|
|
|
|
NodeData();
|
|
|
|
/**
|
|
* Reset result data to default values. Should be ran when starting an iteration.
|
|
*/
|
|
void reset_results();
|
|
|
|
/**
|
|
* Reset index data. Should be ran before an algorithm run.
|
|
*/
|
|
void reset_indexes();
|
|
};
|
|
|
|
/**
|
|
* Monitor specific information about a server. Eventually, this will be the primary data structure handled
|
|
* by the monitor. These are initialized in @c init_server_info.
|
|
*/
|
|
class MariaDBServer
|
|
{
|
|
public:
|
|
MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_index,
|
|
bool assume_unique_hostnames, bool query_events);
|
|
|
|
class EventInfo
|
|
{
|
|
public:
|
|
std::string name; /**< Event name in <database.name> form */
|
|
std::string definer; /**< Definer of the event */
|
|
std::string status; /**< Status of the event */
|
|
};
|
|
|
|
enum class server_type
|
|
{
|
|
UNKNOWN, /* Totally unknown. Server has not been connected to yet. */
|
|
NORMAL, /* A normal MariaDB/MySQL server, possibly supported. */
|
|
BINLOG_ROUTER /* MaxScale binlog server. Requires special handling. */
|
|
};
|
|
|
|
enum class BinlogMode
|
|
{
|
|
BINLOG_ON,
|
|
BINLOG_OFF
|
|
};
|
|
|
|
// Class which encapsulates server capabilities depending on its version.
|
|
class Capabilities
|
|
{
|
|
public:
|
|
bool basic_support = false; // Is the server version supported by the monitor at all?
|
|
bool gtid = false; // Supports MariaDB gtid? Required for failover etc.
|
|
bool max_statement_time = false; // Supports max_statement_time?
|
|
};
|
|
|
|
// This class groups some miscellaneous replication related settings together.
|
|
class ReplicationSettings
|
|
{
|
|
public:
|
|
bool gtid_strict_mode = false; /* Enable additional checks for replication */
|
|
bool log_bin = false; /* Is binary logging enabled? */
|
|
bool log_slave_updates = false; /* Does the slave write replicated events to binlog? */
|
|
};
|
|
|
|
/* Monitored server base class/struct. MariaDBServer does not own the struct, it is not freed
|
|
* (or connection closed) when a MariaDBServer is destroyed. */
|
|
MXS_MONITORED_SERVER* m_server_base = NULL;
|
|
/* What position this server has in the monitor config? Used for tiebreaking between servers. */
|
|
int m_config_index = 0;
|
|
|
|
server_type m_srv_type = server_type::UNKNOWN; /* Server type. */
|
|
Capabilities m_capabilities; /* Server capabilities */
|
|
|
|
int64_t m_server_id = SERVER_ID_UNKNOWN; /* Value of @@server_id. Valid values are
|
|
* 32bit unsigned. */
|
|
int64_t m_gtid_domain_id = GTID_DOMAIN_UNKNOWN; /* The value of gtid_domain_id, the domain which is used
|
|
* for new non-replicated events. */
|
|
|
|
bool m_read_only = false; /* Value of @@read_only */
|
|
GtidList m_gtid_current_pos; /* Gtid of latest event. */
|
|
GtidList m_gtid_binlog_pos; /* Gtid of latest event written to binlog. */
|
|
SlaveStatusArray m_slave_status; /* Data returned from SHOW (ALL) SLAVE(S) STATUS */
|
|
NodeData m_node; /* Replication topology data */
|
|
|
|
/* Replication lag of the server. Used during calculation so that the actual SERVER struct is
|
|
* only written to once. */
|
|
int m_replication_lag = MXS_RLAG_UNDEFINED;
|
|
/* Copy of same field in monitor object. TODO: pass in struct when adding concurrent updating. */
|
|
bool m_assume_unique_hostnames = true;
|
|
/* Has anything that could affect replication topology changed this iteration?
|
|
* Causes: server id, slave connections, read-only. */
|
|
bool m_topology_changed = true;
|
|
/* Miscellaneous replication related settings. These are not normally queried from the server, call
|
|
* 'update_replication_settings' before use. */
|
|
ReplicationSettings m_rpl_settings;
|
|
|
|
bool m_query_events; /* Copy of monitor->m_handle_event_scheduler. TODO: move elsewhere */
|
|
EventNameSet m_enabled_events; /* Enabled scheduled events */
|
|
|
|
bool m_print_update_errormsg = true; /* Should an update error be printed? */
|
|
|
|
/**
|
|
* Print server information to a json object.
|
|
*
|
|
* @return Json diagnostics object
|
|
*/
|
|
json_t* to_json() const;
|
|
|
|
/**
|
|
* Print server information to a string.
|
|
*
|
|
* @return Diagnostics string
|
|
*/
|
|
std::string diagnostics() const;
|
|
|
|
/**
|
|
* Query this server.
|
|
*/
|
|
void monitor_server();
|
|
|
|
/**
|
|
* Update server version. This method should be called after (re)connecting to a
|
|
* backend. Calling this every monitoring loop is overkill.
|
|
*/
|
|
void update_server_version();
|
|
|
|
/**
|
|
* Checks monitor permissions on the server. Sets/clears the SERVER_AUTH_ERROR bit.
|
|
*/
|
|
void check_permissions();
|
|
|
|
/**
|
|
* Calculate how many events are left in the relay log of the slave connection.
|
|
*
|
|
* @param slave_conn The slave connection to calculate for
|
|
* @return Number of events in relay log. Always 0 or greater.
|
|
*/
|
|
uint64_t relay_log_events(const SlaveStatus& slave_conn);
|
|
|
|
/**
|
|
* Execute a query which returns data. The results are returned as a unique pointer to a QueryResult
|
|
* object. The column names of the results are assumed unique.
|
|
*
|
|
* @param query The query
|
|
* @param errmsg_out Where to store an error message if query fails. Can be null.
|
|
* @return Pointer to query results, or an empty pointer on failure
|
|
*/
|
|
std::unique_ptr<QueryResult> execute_query(const std::string& query, std::string* errmsg_out = NULL);
|
|
|
|
/**
|
|
* execute_cmd_ex with query retry ON.
|
|
*/
|
|
bool execute_cmd(const std::string& cmd, std::string* errmsg_out = NULL);
|
|
|
|
/**
|
|
* execute_cmd_ex with query retry OFF.
|
|
*/
|
|
bool execute_cmd_no_retry(const std::string& cmd,
|
|
std::string* errmsg_out = NULL, unsigned int* errno_out = NULL);
|
|
|
|
/**
|
|
* Update server slave connection information.
|
|
*
|
|
* @param gtid_domain Which gtid domain should be parsed.
|
|
* @param errmsg_out Where to store an error message if query fails. Can be null.
|
|
* @return True on success
|
|
*/
|
|
bool do_show_slave_status(std::string* errmsg_out = NULL);
|
|
|
|
/**
|
|
* Query gtid_current_pos and gtid_binlog_pos and save the values to the server.
|
|
*
|
|
* @param errmsg_out Where to store an error message if query fails. Can be null.
|
|
* @return True if query succeeded
|
|
*/
|
|
bool update_gtids(std::string* errmsg_out = NULL);
|
|
|
|
/**
|
|
* Query a few miscellaneous replication settings.
|
|
*
|
|
* @param error_out Query error output
|
|
* @return True on success
|
|
*/
|
|
bool update_replication_settings(std::string* error_out = NULL);
|
|
|
|
/**
|
|
* Query and save server_id, read_only and (if 10.X) gtid_domain_id.
|
|
*
|
|
* @param errmsg_out Where to store an error message if query fails. Can be null.
|
|
* @return True on success.
|
|
*/
|
|
bool read_server_variables(std::string* errmsg_out = NULL);
|
|
|
|
/**
|
|
* Print warnings if gtid_strict_mode or log_slave_updates is off. Does not query the server,
|
|
* so 'update_replication_settings' should have been called recently to update the values.
|
|
*/
|
|
void warn_replication_settings() const;
|
|
|
|
/**
|
|
* Wait until server catches up to demotion target. Only considers gtid domains common
|
|
* to this server and the target. The gtid compared to on the demotion target is 'gtid_binlog_pos'.
|
|
* It is not updated during this method.
|
|
*
|
|
* The gtid used for comparison on this server is 'gtid_binlog_pos' if this server has both 'log_bin'
|
|
* and 'log_slave_updates' on, and 'gtid_current_pos' otherwise. This server is updated during the
|
|
* method.
|
|
*
|
|
* @return True, if target server gtid was reached within allotted time
|
|
*/
|
|
bool catchup_to_master(GeneralOpData& op, const GtidList& target);
|
|
|
|
/**
|
|
* Find slave connection to the target server. If the IO thread is trying to connect
|
|
* ("Connecting"), the connection is only accepted if the 'Master_Server_Id' is known to be correct.
|
|
* If the IO or the SQL thread is stopped, the connection is not returned.
|
|
*
|
|
* @param target Immediate master or relay server
|
|
* @return The slave status info of the slave thread, or NULL if not found or not accepted
|
|
*/
|
|
const SlaveStatus* slave_connection_status(const MariaDBServer* target) const;
|
|
|
|
/**
|
|
* Find slave connection to the target server. Only considers host and port when selecting
|
|
* the connection. A matching connection is accepted even if its IO or SQL thread is stopped.
|
|
*
|
|
* @param target Immediate master or relay server
|
|
* @return The slave status info of the slave thread, or NULL if not found
|
|
*/
|
|
const SlaveStatus* slave_connection_status_host_port(const MariaDBServer* target) const;
|
|
|
|
/**
|
|
* Checks if this server can replicate from master. Only considers gtid:s and only detects obvious
|
|
* errors. The non-detected errors will mostly be detected once the slave tries to start replicating.
|
|
* Before calling this, update the gtid:s of the master so that the the gtid:s of the master are more
|
|
* recent than those of this server.
|
|
*
|
|
* @param master_info Master server
|
|
* @param reason_out Details the reason for a negative result
|
|
* @return True if slave can replicate from master
|
|
*/
|
|
bool can_replicate_from(MariaDBServer* master, std::string* reason_out);
|
|
|
|
/**
|
|
* Redirect one slave server to another master
|
|
*
|
|
* @param change_cmd Change master command, usually generated by generate_change_master_cmd()
|
|
* @return True if slave accepted all commands
|
|
*/
|
|
bool redirect_one_slave(const std::string& change_cmd);
|
|
|
|
/**
|
|
* Check if the server can be demoted by switchover.
|
|
*
|
|
* @param reason_out Output explaining why server cannot be demoted
|
|
* @return True if server can be demoted
|
|
*/
|
|
bool can_be_demoted_switchover(std::string* reason_out);
|
|
|
|
/**
|
|
* Check if the server can be demoted by failover.
|
|
*
|
|
* @param operation Switchover or failover
|
|
* @param reason_out Output explaining why server cannot be demoted
|
|
* @return True if server can be demoted
|
|
*/
|
|
bool can_be_demoted_failover(std::string* reason_out);
|
|
|
|
/**
|
|
* Check if the server can be promoted by switchover or failover.
|
|
*
|
|
* @param op Switchover or failover
|
|
* @param demotion_target The server this should be promoted to
|
|
* @param reason_out Output for the reason server cannot be promoted
|
|
* @return True, if suggested new master is a viable promotion candidate
|
|
*/
|
|
bool can_be_promoted(OperationType op, const MariaDBServer* demotion_target, std::string* reason_out);
|
|
|
|
/**
|
|
* Read the file contents and send them as sql queries to the server. Any data
|
|
* returned by the queries is discarded.
|
|
*
|
|
* @param server Server to send queries to
|
|
* @param path Text file path.
|
|
* @param error_out Error output
|
|
* @return True if file was read and all commands were completed successfully
|
|
*/
|
|
bool run_sql_from_file(const std::string& path, json_t** error_out);
|
|
|
|
/**
|
|
* Enable any "SLAVESIDE_DISABLED" or "DISABLED events. Event scheduler is not touched. Only events
|
|
* with names matching an element in the event_names set are enabled.
|
|
*
|
|
* @param event_names Names of events that should be enabled
|
|
* @param error_out Error output
|
|
* @return True if all SLAVESIDE_DISABLED events were enabled
|
|
*/
|
|
bool enable_events(const EventNameSet& event_names, json_t** error_out);
|
|
|
|
/**
|
|
* Disable any "ENABLED" events. Event scheduler is not touched.
|
|
*
|
|
* @param binlog_mode If OFF, binlog event creation is disabled for the session during method execution.
|
|
* @param error_out Error output
|
|
* @return True if all ENABLED events were disabled
|
|
*/
|
|
bool disable_events(BinlogMode binlog_mode, json_t** error_out);
|
|
|
|
/**
|
|
* Stop and delete all slave connections.
|
|
*
|
|
* @param error_out Error output
|
|
* @return True if successful. If false, some connections may have been successfully deleted.
|
|
*/
|
|
bool reset_all_slave_conns(json_t** error_out);
|
|
|
|
/**
|
|
* Promote this server to take role of demotion target. Remove slave connections from this server.
|
|
* If target is/was a master, set read-only to OFF. Copy slave connections from target.
|
|
*
|
|
* @param op Cluster operation descriptor
|
|
* @return True if successful
|
|
*/
|
|
bool promote(GeneralOpData& op, ServerOperation& promotion, OperationType type,
|
|
const MariaDBServer* demotion_target);
|
|
|
|
/**
|
|
* Demote this server. Removes all slave connections. If server was master, sets read_only.
|
|
*
|
|
* @param op Cluster operation descriptor
|
|
* @return True if successful
|
|
*/
|
|
bool demote(GeneralOpData& general, ServerOperation& op);
|
|
|
|
/**
|
|
* Redirect the slave connection going to old master to replicate from new master.
|
|
*
|
|
* @param op Operation descriptor
|
|
* @param old_conn The connection which is redirected
|
|
* @param new_master The new master for the redirected connection
|
|
* @return True on success
|
|
*/
|
|
bool redirect_existing_slave_conn(GeneralOpData& op, const SlaveStatus& old_conn,
|
|
const MariaDBServer* new_master);
|
|
|
|
/**
|
|
* Copy slave connections to this server. This is usually needed during switchover promotion and on
|
|
* the demoted server. It is assumed that all slave connections of this server have
|
|
* been stopped & removed so there will be no conflicts with existing connections.
|
|
* A special rule is applied to a connection which points to this server itself: that connection
|
|
* is directed towards the 'replacement'. This is required to properly handle connections between
|
|
* the promotion and demotion targets.
|
|
*
|
|
* @param op Operation descriptor
|
|
* @params conns_to_copy The connections to add to the server
|
|
* @params replacement Which server should rep
|
|
* @return True on success
|
|
*/
|
|
bool copy_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_copy,
|
|
const MariaDBServer* replacement);
|
|
|
|
/**
|
|
* Create a new slave connection on the server and start it.
|
|
*
|
|
* @param op Operation descriptor
|
|
* @param slave_conn Existing connection to emulate
|
|
* @return True on success
|
|
*/
|
|
bool create_start_slave(GeneralOpData& op, const SlaveStatus& slave_conn);
|
|
|
|
/**
|
|
* Is binary log on? 'update_replication_settings' should be ran before this function to query the data.
|
|
*
|
|
* @return True if server has binary log enabled
|
|
*/
|
|
bool binlog_on() const;
|
|
|
|
/**
|
|
* Check if server is a running master.
|
|
*
|
|
* @return True if server is a master
|
|
*/
|
|
bool is_master() const;
|
|
|
|
/**
|
|
* Check if server is a running slave.
|
|
*
|
|
* @return True if server is a slave
|
|
*/
|
|
bool is_slave() const;
|
|
|
|
/**
|
|
* Check if server is a slave of an external server.
|
|
*
|
|
* @return True if server is a slave of an external server
|
|
*/
|
|
bool is_slave_of_ext_master() const;
|
|
|
|
/**
|
|
* Check if server is running and not in maintenance.
|
|
*
|
|
* @return True if server is usable
|
|
*/
|
|
bool is_usable() const;
|
|
|
|
/**
|
|
* Check if server is running.
|
|
*
|
|
* @return True if server is running
|
|
*/
|
|
bool is_running() const;
|
|
|
|
/**
|
|
* Check if server is down.
|
|
*
|
|
* @return True if server is down
|
|
*/
|
|
bool is_down() const;
|
|
|
|
/**
|
|
* Check if server is in maintenance.
|
|
*/
|
|
bool is_in_maintenance() const;
|
|
|
|
/**
|
|
* Is the server a relay master.
|
|
*/
|
|
bool is_relay_master() const;
|
|
|
|
/**
|
|
* Is the server low on disk space?
|
|
*/
|
|
bool is_low_on_disk_space() const;
|
|
|
|
/**
|
|
* Check if server has the given bits on in 'pending_status'.
|
|
*
|
|
* @param bits Bits to check
|
|
* @return True if all given bits are on
|
|
*/
|
|
bool has_status(uint64_t bits) const;
|
|
|
|
/**
|
|
* Check if server has the given bits on in 'mon_prev_status'.
|
|
*
|
|
* @param bits Bits to check
|
|
* @return True if all given bits are on
|
|
*/
|
|
bool had_status(uint64_t bits) const;
|
|
|
|
/**
|
|
* Getter for m_read_only.
|
|
*
|
|
* @return True if server is in read_only mode
|
|
*/
|
|
bool is_read_only() const;
|
|
|
|
/**
|
|
* Returns the server name.
|
|
*
|
|
* @return Server unique name
|
|
*/
|
|
const char* name() const;
|
|
|
|
/**
|
|
* Clear server pending status flags.
|
|
*
|
|
* @param bits Which flags to clear
|
|
*/
|
|
void clear_status(uint64_t bits);
|
|
|
|
/**
|
|
* Set server pending status flags.
|
|
*
|
|
* @param bits Which flags to set
|
|
*/
|
|
void set_status(uint64_t bits);
|
|
|
|
private:
|
|
typedef std::function<void (const EventInfo&, json_t** error_out)> ManipulatorFunc;
|
|
|
|
enum class StopMode
|
|
{
|
|
STOP_ONLY,
|
|
RESET,
|
|
RESET_ALL
|
|
};
|
|
enum class QueryRetryMode
|
|
{
|
|
ENABLED,
|
|
DISABLED
|
|
};
|
|
enum class ReadOnlySetting
|
|
{
|
|
ENABLE,
|
|
DISABLE
|
|
};
|
|
|
|
bool update_slave_status(std::string* errmsg_out = NULL);
|
|
bool sstatus_array_topology_equal(const SlaveStatusArray& new_slave_status);
|
|
const SlaveStatus* sstatus_find_previous_row(const SlaveStatus& new_row, size_t guess);
|
|
|
|
void warn_event_scheduler();
|
|
bool events_foreach(ManipulatorFunc& func, json_t** error_out);
|
|
bool alter_event(const EventInfo& event, const std::string& target_status,
|
|
json_t** error_out);
|
|
|
|
bool stop_slave_conn(const std::string& conn_name, StopMode mode, maxbase::Duration time_limit,
|
|
json_t** error_out);
|
|
|
|
bool remove_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_remove);
|
|
bool execute_cmd_ex(const std::string& cmd, QueryRetryMode mode,
|
|
std::string* errmsg_out = NULL, unsigned int* errno_out = NULL);
|
|
|
|
bool execute_cmd_time_limit(const std::string& cmd, maxbase::Duration time_limit,
|
|
std::string* errmsg_out);
|
|
|
|
bool set_read_only(ReadOnlySetting value, maxbase::Duration time_limit, json_t** error_out);
|
|
bool merge_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_merge);
|
|
std::string generate_change_master_cmd(GeneralOpData& op, const SlaveStatus& slave_conn);
|
|
|
|
bool update_enabled_events();
|
|
};
|