285 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			285 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
#pragma once
 | 
						|
 | 
						|
/*
 | 
						|
 * Copyright (c) 2016 MariaDB Corporation Ab
 | 
						|
 *
 | 
						|
 * Use of this software is governed by the Business Source License included
 | 
						|
 * in the LICENSE.TXT file and at www.mariadb.com/bsl11.
 | 
						|
 *
 | 
						|
 * Change Date: 2022-01-01
 | 
						|
 *
 | 
						|
 * On the date above, in accordance with the Business Source License, use
 | 
						|
 * of this software will be governed by version 2 or later of the General
 | 
						|
 * Public License.
 | 
						|
 */
 | 
						|
#include "mariadbmon_common.hh"
 | 
						|
#include <condition_variable>
 | 
						|
#include <functional>
 | 
						|
#include <string>
 | 
						|
#include <tr1/unordered_map>
 | 
						|
#include <vector>
 | 
						|
 | 
						|
#include <maxscale/monitor.hh>
 | 
						|
#include <maxscale/thread.h>
 | 
						|
 | 
						|
#include "mariadbserver.hh"
 | 
						|
 | 
						|
extern const char * const CN_AUTO_FAILOVER;
 | 
						|
extern const char * const CN_PROMOTION_SQL_FILE;
 | 
						|
extern const char * const CN_DEMOTION_SQL_FILE;
 | 
						|
 | 
						|
// Map of base struct to MariaDBServer. Does not own the server objects. May not be needed at the end.
 | 
						|
typedef std::tr1::unordered_map<MXS_MONITORED_SERVER*, MariaDBServer*> ServerInfoMap;
 | 
						|
// Map of server id:s to MariaDBServer. Useful when constructing the replication graph.
 | 
						|
typedef std::tr1::unordered_map<int64_t, MariaDBServer*> IdToServerMap;
 | 
						|
// Map of cycle number to cycle members
 | 
						|
typedef std::tr1::unordered_map<int, ServerArray> CycleMap;
 | 
						|
 | 
						|
// MariaDB Monitor instance data
 | 
						|
class MariaDBMonitor : public maxscale::MonitorInstance
 | 
						|
{
 | 
						|
private:
 | 
						|
    MariaDBMonitor(const MariaDBMonitor&);
 | 
						|
    MariaDBMonitor& operator = (const MariaDBMonitor&);
 | 
						|
public:
 | 
						|
    // Helper class used for testing
 | 
						|
    class Test;
 | 
						|
    friend class Test;
 | 
						|
 | 
						|
    ~MariaDBMonitor();
 | 
						|
 | 
						|
    /**
 | 
						|
     * Print diagnostics.
 | 
						|
     *
 | 
						|
     * @param dcb DCB to print to
 | 
						|
     */
 | 
						|
    void diagnostics(DCB *dcb) const;
 | 
						|
 | 
						|
    /**
 | 
						|
     * Print diagnostics to json object.
 | 
						|
     *
 | 
						|
     * @return Diagnostics messages
 | 
						|
     */
 | 
						|
    json_t* diagnostics_json() const;
 | 
						|
 | 
						|
    /**
 | 
						|
     * Create the monitor instance and return the instance data.
 | 
						|
     *
 | 
						|
     * @param monitor General monitor data
 | 
						|
     * @return A pointer to MariaDBMonitor specific data.
 | 
						|
     */
 | 
						|
    static MariaDBMonitor* create(MXS_MONITOR *monitor);
 | 
						|
 | 
						|
    /**
 | 
						|
     * Perform user-activated switchover.
 | 
						|
     *
 | 
						|
     * @param new_master      The specified new master. If NULL, monitor will autoselect.
 | 
						|
     * @param current_master  The specified current master. If NULL, monitor will autoselect.
 | 
						|
     * @param error_out       Json error output
 | 
						|
     * @return True if switchover was performed
 | 
						|
     */
 | 
						|
    bool run_manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
 | 
						|
 | 
						|
    /**
 | 
						|
     * Perform user-activated failover.
 | 
						|
     *
 | 
						|
     * @param error_out Json error output
 | 
						|
     * @return True if failover was performed
 | 
						|
     */
 | 
						|
    bool run_manual_failover(json_t** error_out);
 | 
						|
 | 
						|
    /**
 | 
						|
     * Perform user-activated rejoin
 | 
						|
     *
 | 
						|
     * @param rejoin_server Server to join
 | 
						|
     * @param error_out Json error output
 | 
						|
     * @return True if rejoin was performed
 | 
						|
     */
 | 
						|
    bool run_manual_rejoin(SERVER* rejoin_server, json_t** error_out);
 | 
						|
 | 
						|
protected:
 | 
						|
    void pre_loop();
 | 
						|
    void tick();
 | 
						|
    void process_state_changes();
 | 
						|
 | 
						|
private:
 | 
						|
 | 
						|
    struct CycleInfo
 | 
						|
    {
 | 
						|
        int cycle_id = NodeData::CYCLE_NONE;
 | 
						|
        ServerArray cycle_members;
 | 
						|
    };
 | 
						|
 | 
						|
    /* Structure used to communicate commands and results between the MaxAdmin and monitor threads.
 | 
						|
     * The monitor can only process one manual command at a time, which is already enforced by
 | 
						|
     * the admin thread. */
 | 
						|
    struct ManualCommand
 | 
						|
    {
 | 
						|
    public:
 | 
						|
        std::mutex mutex;                    /**< Mutex used by the condition variables */
 | 
						|
        std::condition_variable has_command; /**< Notified when a command is waiting execution */
 | 
						|
        bool command_waiting_exec = false;   /**< Guard variable for the above */
 | 
						|
        std::function<void (void)> method;   /**< The method to run when executing the command */
 | 
						|
        std::condition_variable has_result;  /**< Notified when the command has ran */
 | 
						|
        bool result_waiting = false;         /**< Guard variable for the above */
 | 
						|
    };
 | 
						|
 | 
						|
    unsigned long m_id;                  /**< Monitor ID */
 | 
						|
    ServerArray m_servers;               /**< Servers of the monitor */
 | 
						|
    ServerInfoMap m_server_info;         /**< Map from server base struct to MariaDBServer */
 | 
						|
    ManualCommand m_manual_cmd;          /**< Communicates manual commands and results */
 | 
						|
 | 
						|
    // Values updated by monitor
 | 
						|
    MariaDBServer* m_master;             /**< Master server for Master/Slave replication */
 | 
						|
    MariaDBServer* m_next_master;        /**< When master changes because of a failover/switchover, the new
 | 
						|
                                           *  master is written here so the next monitor loop picks it up. */
 | 
						|
    IdToServerMap m_servers_by_id;       /**< Map from server id:s to MariaDBServer */
 | 
						|
    int64_t m_master_gtid_domain;        /**< gtid_domain_id most recently seen on the master  */
 | 
						|
    std::string m_external_master_host;  /**< External master host, for fail/switchover */
 | 
						|
    int m_external_master_port;          /**< External master port */
 | 
						|
    bool m_cluster_topology_changed;     /**< Has cluster topology changed since last monitor loop? */
 | 
						|
    bool m_cluster_modified;             /**< Has a failover/switchover/rejoin been performed this loop? */
 | 
						|
    CycleMap m_cycles;                   /**< Map from cycle number to cycle member servers */
 | 
						|
    CycleInfo m_master_cycle_status;     /**< Info about master server cycle from previous round */
 | 
						|
 | 
						|
    // Replication topology detection settings
 | 
						|
    bool m_allow_cluster_recovery;       /**< Allow failed servers to rejoin the cluster */
 | 
						|
    bool m_detect_replication_lag;       /**< Monitor flag for MySQL replication heartbeat */
 | 
						|
    bool m_detect_multimaster;           /**< Detect and handle multi-master topologies */
 | 
						|
    bool m_detect_stale_master;          /**< Monitor flag for MySQL replication Stale Master detection */
 | 
						|
    bool m_detect_stale_slave;           /**< Monitor flag for MySQL replication Stale Slave detection */
 | 
						|
    bool m_detect_standalone_master;     /**< If standalone master are detected */
 | 
						|
    bool m_ignore_external_masters;      /**< Ignore masters outside of the monitor configuration */
 | 
						|
    bool m_mysql51_replication;          /**< Use MySQL 5.1 replication */
 | 
						|
 | 
						|
    // Failover, switchover and rejoin settings
 | 
						|
    bool m_auto_failover;                /**< Is automatic master failover is enabled? */
 | 
						|
    bool m_auto_rejoin;                  /**< Is automatic rejoin enabled? */
 | 
						|
    int m_failcount;                     /**< Numer of cycles master must be down before auto-failover begins */
 | 
						|
    std::string m_replication_user;      /**< Replication user for CHANGE MASTER TO-commands */
 | 
						|
    std::string m_replication_password;  /**< Replication password for CHANGE MASTER TO-commands */
 | 
						|
    uint32_t m_failover_timeout;         /**< Time limit in seconds for master failover */
 | 
						|
    uint32_t m_switchover_timeout;       /**< Time limit in seconds for master switchover */
 | 
						|
    bool m_verify_master_failure;        /**< Is master failure is verified via slaves? */
 | 
						|
    int m_master_failure_timeout;        /**< Master failure verification (via slaves) time in seconds */
 | 
						|
    ServerArray m_excluded_servers;      /**< Servers banned for master promotion during auto-failover or
 | 
						|
                                          *   autoselect switchover. */
 | 
						|
    std::string m_promote_sql_file;      /**< File with sql commands which are ran to a server being promoted. */
 | 
						|
    std::string m_demote_sql_file;       /**< File with sql commands which are ran to a server being demoted. */
 | 
						|
    bool m_enforce_read_only_slaves;     /**< Should the monitor set read-only=1 on any slave servers. */
 | 
						|
    bool m_switchover_on_low_disk_space; /**< Should the monitor do a switchover on low disk space. */
 | 
						|
 | 
						|
    // Other settings
 | 
						|
    std::string m_script;                /**< Script to call when state changes occur on servers */
 | 
						|
    uint64_t m_events;                   /**< enabled events */
 | 
						|
    bool m_warn_set_standalone_master;   /**< Log a warning when setting standalone master */
 | 
						|
    bool m_log_no_master;                /**< Should it be logged that there is no master */
 | 
						|
    bool m_warn_no_valid_in_cycle;       /**< Log a warning when a replication cycle has no valid master */
 | 
						|
    bool m_warn_no_valid_outside_cycle;  /**< Log a warning when a replication topology has no valid master
 | 
						|
                                          *   outside of a cycle. */
 | 
						|
    bool m_warn_failover_precond;        /**< Print failover preconditions error message? */
 | 
						|
    bool m_warn_cannot_rejoin;           /**< Print warning if auto_rejoin fails because of invalid gtid:s? */
 | 
						|
 | 
						|
    // Base methods
 | 
						|
    MariaDBMonitor(MXS_MONITOR* monitor_base);
 | 
						|
    void reset_server_info();
 | 
						|
    void clear_server_info();
 | 
						|
    void reset_node_index_info();
 | 
						|
    bool configure(const MXS_CONFIG_PARAMETER* params);
 | 
						|
    bool set_replication_credentials(const MXS_CONFIG_PARAMETER* params);
 | 
						|
    MariaDBServer* get_server_info(MXS_MONITORED_SERVER* db);
 | 
						|
    MariaDBServer* get_server(int64_t id);
 | 
						|
    bool execute_manual_command(std::function<void (void)> command, json_t** error_out);
 | 
						|
 | 
						|
    // Cluster discovery and status assignment methods
 | 
						|
    void update_server(MariaDBServer& server);
 | 
						|
    void find_graph_cycles();
 | 
						|
    bool standalone_master_required();
 | 
						|
    bool set_standalone_master();
 | 
						|
    void log_master_changes();
 | 
						|
    void update_gtid_domain();
 | 
						|
    void update_external_master();
 | 
						|
    void set_master_heartbeat(MariaDBServer*);
 | 
						|
    void set_slave_heartbeat(MariaDBServer*);
 | 
						|
    void measure_replication_lag();
 | 
						|
    void check_maxscale_schema_replication();
 | 
						|
    void build_replication_graph();
 | 
						|
    void tarjan_scc_visit_node(MariaDBServer *node, ServerArray* stack, int *index, int *cycle);
 | 
						|
    void assign_cycle_roles(int cycle);
 | 
						|
    MariaDBServer* find_topology_master_server(std::string* msg_out);
 | 
						|
    MariaDBServer* find_best_reach_server(const ServerArray& candidates);
 | 
						|
    void calculate_node_reach(MariaDBServer* node);
 | 
						|
    MariaDBServer* find_master_inside_cycle(ServerArray& cycle_servers);
 | 
						|
    void assign_master_and_slave();
 | 
						|
    void assign_slave_and_relay_master(MariaDBServer* node);
 | 
						|
    bool master_is_valid(std::string* reason_out);
 | 
						|
    bool cycle_has_master_server(ServerArray& cycle_servers);
 | 
						|
    void update_master_cycle_info();
 | 
						|
 | 
						|
    // Switchover methods
 | 
						|
    bool manual_switchover(SERVER* new_master, SERVER* current_master, json_t** error_out);
 | 
						|
    bool switchover_check(SERVER* new_master, SERVER* current_master,
 | 
						|
                          MariaDBServer** new_master_out, MariaDBServer** current_master_out,
 | 
						|
                          json_t** error_out);
 | 
						|
    bool switchover_check_new(const MariaDBServer* new_master_cand, json_t** error);
 | 
						|
    bool switchover_check_current(const MXS_MONITORED_SERVER* suggested_curr_master,
 | 
						|
                                  json_t** error_out) const;
 | 
						|
    bool do_switchover(MariaDBServer** current_master, MariaDBServer** new_master, json_t** err_out);
 | 
						|
    bool switchover_check_preferred_master(MariaDBServer* preferred, json_t** err_out);
 | 
						|
    bool switchover_demote_master(MariaDBServer* current_master,
 | 
						|
                                  json_t** err_out);
 | 
						|
    bool switchover_wait_slaves_catchup(const ServerArray& slaves, const GtidList& gtid, int total_timeout,
 | 
						|
                                        json_t** err_out);
 | 
						|
    bool switchover_start_slave(MariaDBServer* old_master, MariaDBServer* new_master);
 | 
						|
 | 
						|
    // Failover methods
 | 
						|
    bool manual_failover(json_t** output);
 | 
						|
    void handle_auto_failover();
 | 
						|
    bool failover_not_possible();
 | 
						|
    bool slave_receiving_events();
 | 
						|
    bool failover_check(std::string* error_out);
 | 
						|
    bool do_failover(json_t** err_out);
 | 
						|
 | 
						|
    // Rejoin methods
 | 
						|
    bool manual_rejoin(SERVER* rejoin_server, json_t** output);
 | 
						|
    bool cluster_can_be_joined();
 | 
						|
    void handle_auto_rejoin();
 | 
						|
    bool get_joinable_servers(ServerArray* output);
 | 
						|
    bool server_is_rejoin_suspect(MariaDBServer* rejoin_cand, json_t** output);
 | 
						|
    uint32_t do_rejoin(const ServerArray& joinable_servers, json_t** output);
 | 
						|
 | 
						|
    // Methods common to failover/switchover/rejoin
 | 
						|
    MariaDBServer* select_new_master(ServerArray* slaves_out, json_t** err_out);
 | 
						|
    bool server_is_excluded(const MariaDBServer* server);
 | 
						|
    bool is_candidate_better(const MariaDBServer* current_best, const MariaDBServer* candidate,
 | 
						|
                             uint32_t gtid_domain, std::string* reason_out = NULL);
 | 
						|
    bool promote_new_master(MariaDBServer* new_master, json_t** err_out);
 | 
						|
    int redirect_slaves(MariaDBServer* new_master, const ServerArray& slaves,
 | 
						|
                        ServerArray* redirected_slaves);
 | 
						|
    std::string generate_change_master_cmd(const std::string& master_host, int master_port);
 | 
						|
    bool start_external_replication(MariaDBServer* new_master, json_t** err_out);
 | 
						|
    bool wait_cluster_stabilization(MariaDBServer* new_master, const ServerArray& slaves,
 | 
						|
                                    int seconds_remaining);
 | 
						|
 | 
						|
    // Other methods
 | 
						|
    void disable_setting(const char* setting);
 | 
						|
    bool check_sql_files();
 | 
						|
    void enforce_read_only_on_slaves();
 | 
						|
};
 | 
						|
 | 
						|
/**
 | 
						|
 * Generates a list of server names separated by ', '
 | 
						|
 *
 | 
						|
 * @param servers The servers
 | 
						|
 * @return Server names
 | 
						|
 */
 | 
						|
std::string monitored_servers_to_string(const ServerArray& servers);
 | 
						|
 | 
						|
/**
 | 
						|
 * Get MariaDB connection error strings from all the given servers, form one string.
 | 
						|
 *
 | 
						|
 * @param servers Servers with errors
 | 
						|
 * @return Concatenated string.
 | 
						|
 */
 | 
						|
std::string get_connection_errors(const ServerArray& servers);
 |