/* * Copyright (c) 2018 MariaDB Corporation Ab * * Use of this software is governed by the Business Source License included * in the LICENSE.TXT file and at www.mariadb.com/bsl11. * * Change Date: 2022-01-01 * * On the date above, in accordance with the Business Source License, use * of this software will be governed by version 2 or later of the General * Public License. */ #pragma once #include "mariadbmon_common.hh" #include #include #include #include #include #include #include "server_utils.hh" class QueryResult; class MariaDBServer; // Server pointer array typedef std::vector ServerArray; /** * Data required for checking replication topology cycles and other graph algorithms. This data is mostly * used by the monitor object, as the data only makes sense in relation to other nodes. */ struct NodeData { // Default values for index parameters static const int INDEX_NOT_VISITED = 0; static const int INDEX_FIRST = 1; // Default values for the cycle static const int CYCLE_NONE = 0; static const int CYCLE_FIRST = 1; // Default value for reach static const int REACH_UNKNOWN = -1; // Bookkeeping for graph searches. May be overwritten by multiple algorithms. int index; /* Marks the order in which this node was visited. */ int lowest_index; /* The lowest index node this node has in its subtree. */ bool in_stack; /* Is this node currently is the search stack. */ // Results from algorithm runs. Should only be overwritten when server data has been queried. int cycle; /* Which cycle is this node part of, if any. */ int reach; /* How many servers replicate from this server or its children. */ ServerArray parents; /* Which nodes is this node replicating from. External masters * excluded. */ ServerArray children; /* Which nodes are replicating from this node. */ std::vector external_masters; /* Server id:s of external masters. */ NodeData(); /** * Reset result data to default values. Should be ran when starting an iteration. */ void reset_results(); /** * Reset index data. Should be ran before an algorithm run. */ void reset_indexes(); }; /** * Monitor specific information about a server. Eventually, this will be the primary data structure handled * by the monitor. These are initialized in @c init_server_info. */ class MariaDBServer { public: MariaDBServer(MXS_MONITORED_SERVER* monitored_server, int config_index, bool assume_unique_hostnames, bool query_events); class EventInfo { public: std::string name; /**< Event name in form */ std::string definer; /**< Definer of the event */ std::string status; /**< Status of the event */ }; enum class server_type { UNKNOWN, /* Totally unknown. Server has not been connected to yet. */ NORMAL, /* A normal MariaDB/MySQL server, possibly supported. */ BINLOG_ROUTER /* MaxScale binlog server. Requires special handling. */ }; enum class BinlogMode { BINLOG_ON, BINLOG_OFF }; // Class which encapsulates server capabilities depending on its version. class Capabilities { public: bool basic_support = false; // Is the server version supported by the monitor at all? bool gtid = false; // Supports MariaDB gtid? Required for failover etc. bool max_statement_time = false; // Supports max_statement_time? }; // This class groups some miscellaneous replication related settings together. class ReplicationSettings { public: bool gtid_strict_mode = false; /* Enable additional checks for replication */ bool log_bin = false; /* Is binary logging enabled? */ bool log_slave_updates = false; /* Does the slave write replicated events to binlog? */ }; /* Monitored server base class/struct. MariaDBServer does not own the struct, it is not freed * (or connection closed) when a MariaDBServer is destroyed. */ MXS_MONITORED_SERVER* m_server_base = NULL; /* What position this server has in the monitor config? Used for tiebreaking between servers. */ int m_config_index = 0; server_type m_srv_type = server_type::UNKNOWN; /* Server type. */ Capabilities m_capabilities; /* Server capabilities */ int64_t m_server_id = SERVER_ID_UNKNOWN; /* Value of @@server_id. Valid values are * 32bit unsigned. */ int64_t m_gtid_domain_id = GTID_DOMAIN_UNKNOWN; /* The value of gtid_domain_id, the domain which is used * for new non-replicated events. */ bool m_read_only = false; /* Value of @@read_only */ GtidList m_gtid_current_pos; /* Gtid of latest event. */ GtidList m_gtid_binlog_pos; /* Gtid of latest event written to binlog. */ SlaveStatusArray m_slave_status; /* Data returned from SHOW (ALL) SLAVE(S) STATUS */ NodeData m_node; /* Replication topology data */ /* Replication lag of the server. Used during calculation so that the actual SERVER struct is * only written to once. */ int m_replication_lag = MXS_RLAG_UNDEFINED; /* Copy of same field in monitor object. TODO: pass in struct when adding concurrent updating. */ bool m_assume_unique_hostnames = true; /* Has anything that could affect replication topology changed this iteration? * Causes: server id, slave connections, read-only. */ bool m_topology_changed = true; /* Miscellaneous replication related settings. These are not normally queried from the server, call * 'update_replication_settings' before use. */ ReplicationSettings m_rpl_settings; bool m_query_events; /* Copy of monitor->m_handle_event_scheduler. TODO: move elsewhere */ EventNameSet m_enabled_events; /* Enabled scheduled events */ bool m_print_update_errormsg = true; /* Should an update error be printed? */ /** * Print server information to a json object. * * @return Json diagnostics object */ json_t* to_json() const; /** * Print server information to a string. * * @return Diagnostics string */ std::string diagnostics() const; /** * Query this server. */ void monitor_server(); /** * Update server version. This method should be called after (re)connecting to a * backend. Calling this every monitoring loop is overkill. */ void update_server_version(); /** * Checks monitor permissions on the server. Sets/clears the SERVER_AUTH_ERROR bit. */ void check_permissions(); /** * Calculate how many events are left in the relay log of the slave connection. * * @param slave_conn The slave connection to calculate for * @return Number of events in relay log. Always 0 or greater. */ uint64_t relay_log_events(const SlaveStatus& slave_conn); /** * Execute a query which returns data. The results are returned as a unique pointer to a QueryResult * object. The column names of the results are assumed unique. * * @param query The query * @param errmsg_out Where to store an error message if query fails. Can be null. * @param errno_out Error code output. Can be null. * @return Pointer to query results, or an empty pointer on failure */ std::unique_ptr execute_query(const std::string& query, std::string* errmsg_out = NULL, unsigned int* errno_out = NULL); /** * execute_cmd_ex with query retry ON. */ bool execute_cmd(const std::string& cmd, std::string* errmsg_out = NULL); /** * execute_cmd_ex with query retry OFF. */ bool execute_cmd_no_retry(const std::string& cmd, std::string* errmsg_out = NULL, unsigned int* errno_out = NULL); /** * Update server slave connection information. * * @param gtid_domain Which gtid domain should be parsed. * @param errmsg_out Where to store an error message if query fails. Can be null. * @return True on success */ bool do_show_slave_status(std::string* errmsg_out = NULL); /** * Query gtid_current_pos and gtid_binlog_pos and save the values to the server. * * @param errmsg_out Where to store an error message if query fails. Can be null. * @return True if query succeeded */ bool update_gtids(std::string* errmsg_out = NULL); /** * Query a few miscellaneous replication settings. * * @param error_out Query error output * @return True on success */ bool update_replication_settings(std::string* error_out = NULL); /** * Query and save server_id, read_only and (if 10.X) gtid_domain_id. * * @param errmsg_out Where to store an error message if query fails. Can be null. * @return True on success. */ bool read_server_variables(std::string* errmsg_out = NULL); /** * Print warnings if gtid_strict_mode or log_slave_updates is off. Does not query the server, * so 'update_replication_settings' should have been called recently to update the values. */ void warn_replication_settings() const; /** * Wait until server catches up to demotion target. Only considers gtid domains common * to this server and the target. The gtid compared to on the demotion target is 'gtid_binlog_pos'. * It is not updated during this method. * * The gtid used for comparison on this server is 'gtid_binlog_pos' if this server has both 'log_bin' * and 'log_slave_updates' on, and 'gtid_current_pos' otherwise. This server is updated during the * method. * * @return True, if target server gtid was reached within allotted time */ bool catchup_to_master(GeneralOpData& op, const GtidList& target); /** * Find slave connection to the target server. If the IO thread is trying to connect * ("Connecting"), the connection is only accepted if the 'Master_Server_Id' is known to be correct. * If the IO or the SQL thread is stopped, the connection is not returned. * * @param target Immediate master or relay server * @return The slave status info of the slave thread, or NULL if not found or not accepted */ const SlaveStatus* slave_connection_status(const MariaDBServer* target) const; /** * Find slave connection to the target server. Only considers host and port when selecting * the connection. A matching connection is accepted even if its IO or SQL thread is stopped. * * @param target Immediate master or relay server * @return The slave status info of the slave thread, or NULL if not found */ const SlaveStatus* slave_connection_status_host_port(const MariaDBServer* target) const; /** * Checks if this server can replicate from master. Only considers gtid:s and only detects obvious * errors. The non-detected errors will mostly be detected once the slave tries to start replicating. * Before calling this, update the gtid:s of the master so that the the gtid:s of the master are more * recent than those of this server. * * @param master_info Master server * @param reason_out Details the reason for a negative result * @return True if slave can replicate from master */ bool can_replicate_from(MariaDBServer* master, std::string* reason_out); /** * Redirect one slave server to another master * * @param change_cmd Change master command, usually generated by generate_change_master_cmd() * @return True if slave accepted all commands */ bool redirect_one_slave(const std::string& change_cmd); /** * Check if the server can be demoted by switchover. * * @param reason_out Output explaining why server cannot be demoted * @return True if server can be demoted */ bool can_be_demoted_switchover(std::string* reason_out); /** * Check if the server can be demoted by failover. * * @param operation Switchover or failover * @param reason_out Output explaining why server cannot be demoted * @return True if server can be demoted */ bool can_be_demoted_failover(std::string* reason_out); /** * Check if the server can be promoted by switchover or failover. * * @param op Switchover or failover * @param demotion_target The server this should be promoted to * @param reason_out Output for the reason server cannot be promoted * @return True, if suggested new master is a viable promotion candidate */ bool can_be_promoted(OperationType op, const MariaDBServer* demotion_target, std::string* reason_out); /** * Read the file contents and send them as sql queries to the server. Any data * returned by the queries is discarded. * * @param server Server to send queries to * @param path Text file path. * @param error_out Error output * @return True if file was read and all commands were completed successfully */ bool run_sql_from_file(const std::string& path, json_t** error_out); /** * Enable any "SLAVESIDE_DISABLED" or "DISABLED events. Event scheduler is not touched. Only events * with names matching an element in the event_names set are enabled. * * @param event_names Names of events that should be enabled * @param error_out Error output * @return True if all SLAVESIDE_DISABLED events were enabled */ bool enable_events(const EventNameSet& event_names, json_t** error_out); /** * Disable any "ENABLED" events. Event scheduler is not touched. * * @param binlog_mode If OFF, binlog event creation is disabled for the session during method execution. * @param error_out Error output * @return True if all ENABLED events were disabled */ bool disable_events(BinlogMode binlog_mode, json_t** error_out); /** * Stop and delete all slave connections. * * @param error_out Error output * @return True if successful. If false, some connections may have been successfully deleted. */ bool reset_all_slave_conns(json_t** error_out); /** * Promote this server to take role of demotion target. Remove slave connections from this server. * If target is/was a master, set read-only to OFF. Copy slave connections from target. * * @param op Cluster operation descriptor * @return True if successful */ bool promote(GeneralOpData& op, ServerOperation& promotion, OperationType type, const MariaDBServer* demotion_target); /** * Demote this server. Removes all slave connections. If server was master, sets read_only. * * @param op Cluster operation descriptor * @return True if successful */ bool demote(GeneralOpData& general, ServerOperation& op); /** * Redirect the slave connection going to old master to replicate from new master. * * @param op Operation descriptor * @param old_conn The connection which is redirected * @param new_master The new master for the redirected connection * @return True on success */ bool redirect_existing_slave_conn(GeneralOpData& op, const SlaveStatus& old_conn, const MariaDBServer* new_master); /** * Copy slave connections to this server. This is usually needed during switchover promotion and on * the demoted server. It is assumed that all slave connections of this server have * been stopped & removed so there will be no conflicts with existing connections. * A special rule is applied to a connection which points to this server itself: that connection * is directed towards the 'replacement'. This is required to properly handle connections between * the promotion and demotion targets. * * @param op Operation descriptor * @params conns_to_copy The connections to add to the server * @params replacement Which server should rep * @return True on success */ bool copy_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_copy, const MariaDBServer* replacement); /** * Create a new slave connection on the server and start it. * * @param op Operation descriptor * @param slave_conn Existing connection to emulate * @return True on success */ bool create_start_slave(GeneralOpData& op, const SlaveStatus& slave_conn); /** * Kill the connections of any super-users except for the monitor itself. * * @param op Operation descriptor * @return True on success. If super-users cannot be queried because of insufficient privileges, * return true as it means the user does not want this feature. */ bool kick_out_super_users(GeneralOpData& op); /** * Is binary log on? 'update_replication_settings' should be ran before this function to query the data. * * @return True if server has binary log enabled */ bool binlog_on() const; /** * Check if server is a running master. * * @return True if server is a master */ bool is_master() const; /** * Check if server is a running slave. * * @return True if server is a slave */ bool is_slave() const; /** * Check if server is a slave of an external server. * * @return True if server is a slave of an external server */ bool is_slave_of_ext_master() const; /** * Check if server is running and not in maintenance. * * @return True if server is usable */ bool is_usable() const; /** * Check if server is running. * * @return True if server is running */ bool is_running() const; /** * Check if server is down. * * @return True if server is down */ bool is_down() const; /** * Check if server is in maintenance. */ bool is_in_maintenance() const; /** * Is the server a relay master. */ bool is_relay_master() const; /** * Is the server low on disk space? */ bool is_low_on_disk_space() const; /** * Check if server has the given bits on in 'pending_status'. * * @param bits Bits to check * @return True if all given bits are on */ bool has_status(uint64_t bits) const; /** * Check if server has the given bits on in 'mon_prev_status'. * * @param bits Bits to check * @return True if all given bits are on */ bool had_status(uint64_t bits) const; /** * Getter for m_read_only. * * @return True if server is in read_only mode */ bool is_read_only() const; /** * Returns the server name. * * @return Server unique name */ const char* name() const; /** * Clear server pending status flags. * * @param bits Which flags to clear */ void clear_status(uint64_t bits); /** * Set server pending status flags. * * @param bits Which flags to set */ void set_status(uint64_t bits); private: typedef std::function ManipulatorFunc; enum class StopMode { STOP_ONLY, RESET, RESET_ALL }; enum class QueryRetryMode { ENABLED, DISABLED }; enum class ReadOnlySetting { ENABLE, DISABLE }; /* Protects array-like fields from concurrent access. This is only required for fields which can be * read from another thread while the monitor is running. In practice, these are fields read during * diagnostics-methods. Reading inside monitor thread does not need to be mutexed, as outside threads * only read the values. */ mutable std::mutex m_arraylock; bool update_slave_status(std::string* errmsg_out = NULL); bool sstatus_array_topology_equal(const SlaveStatusArray& new_slave_status); const SlaveStatus* sstatus_find_previous_row(const SlaveStatus& new_row, size_t guess); void warn_event_scheduler(); bool events_foreach(ManipulatorFunc& func, json_t** error_out); bool alter_event(const EventInfo& event, const std::string& target_status, json_t** error_out); bool stop_slave_conn(const std::string& conn_name, StopMode mode, maxbase::Duration time_limit, json_t** error_out); bool remove_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_remove); bool execute_cmd_ex(const std::string& cmd, QueryRetryMode mode, std::string* errmsg_out = NULL, unsigned int* errno_out = NULL); bool execute_cmd_time_limit(const std::string& cmd, maxbase::Duration time_limit, std::string* errmsg_out); bool set_read_only(ReadOnlySetting value, maxbase::Duration time_limit, json_t** error_out); bool merge_slave_conns(GeneralOpData& op, const SlaveStatusArray& conns_to_merge); std::string generate_change_master_cmd(GeneralOpData& op, const SlaveStatus& slave_conn); bool update_enabled_events(); };