diff --git a/server/modules/monitor/clustrixmon/clustrixmonitor.cc b/server/modules/monitor/clustrixmon/clustrixmonitor.cc index 503c6cde0..ecd8e7399 100644 --- a/server/modules/monitor/clustrixmon/clustrixmonitor.cc +++ b/server/modules/monitor/clustrixmon/clustrixmonitor.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include "../../../core/internal/config_runtime.hh" #include "../../../core/internal/service.hh" @@ -51,6 +52,28 @@ static const char SQL_UPSERT_FORMAT[] = static const char SQL_DELETE_FORMAT[] = "DELETE FROM clustrix_nodes WHERE id = %d"; + +static const char SQL_SELECT[] = + "SELECT ip, mysql_port FROM clustrix_nodes"; + +using HostPortPair = std::pair; +using HostPortPairs = std::vector; + +// sqlite3 callback. +int select_cb(void* pData, int nColumns, char** ppColumn, char** ppNames) +{ + std::vector* pNodes = static_cast*>(pData); + + mxb_assert(nColumns == 2); + + std::string host(ppColumn[0]); + int port = atoi(ppColumn[1]); + + pNodes->emplace_back(host, port); + + return 0; +} + } namespace @@ -88,7 +111,7 @@ sqlite3* open_or_create_db(const std::string& path) if (unlink(path.c_str()) != 0) { MXS_ERROR("Failed to delete database %s that could not be properly " - "initialized. You should delete the database manually.", path.c_str()); + "initialized. It should be deleted manually.", path.c_str()); sqlite3_close_v2(pDb); pDb = nullptr; } @@ -282,10 +305,14 @@ void ClustrixMonitor::choose_hub(Clustrix::Softfailed softfailed) // then we check the bootstrap servers, and if (!choose_bootstrap_hub(softfailed, ips)) { - // finally, if all else fails, we check servers that have been persisted. - // In practise we will only get here at startup (no dynamic servers) - // if the bootstrap servers cannot be contacted. - choose_persisted_hub(softfailed, ips); + // finally, if all else fails - in practise we will only get here at + // startup (no dynamic servers) if the bootstrap servers cannot be + // contacted - we try to refresh the nodes using persisted information + if (refresh_using_persisted_nodes(ips)) + { + // and then select a hub from the dynamic ones. + choose_dynamic_hub(softfailed, ips); + } } } @@ -345,32 +372,108 @@ bool ClustrixMonitor::choose_bootstrap_hub(Clustrix::Softfailed softfailed, std: return m_pHub_con != nullptr; } -bool ClustrixMonitor::choose_persisted_hub(Clustrix::Softfailed softfailed, std::set& ips_checked) +bool ClustrixMonitor::refresh_using_persisted_nodes(std::set& ips_checked) { - // TODO: Check persisted servers. - return false; + MXS_NOTICE("Attempting to find a Clustrix bootstrap node from one of the nodes " + "used during the previous run of MaxScale."); + + bool refreshed = false; + + HostPortPairs nodes; + char* pError = nullptr; + int rv = sqlite3_exec(m_pDb, SQL_SELECT, select_cb, &nodes, &pError); + + if (rv == SQLITE_OK) + { + const std::string& username = m_settings.conn_settings.username; + const std::string& password = m_settings.conn_settings.password; + char* zPassword = decrypt_password(password.c_str()); + + auto it = nodes.begin(); + + while (!refreshed && (it != nodes.end())) + { + const auto& node = *it; + + const std::string& host = node.first; + + if (ips_checked.find(host) == ips_checked.end()) + { + ips_checked.insert(host); + int port = node.second; + + MXS_NOTICE("Trying to find out cluster nodes from %s:%d.", host.c_str(), port); + + MYSQL* pHub_con = mysql_init(NULL); + + if (mysql_real_connect(pHub_con, host.c_str(), + username.c_str(), zPassword, + nullptr, + port, nullptr, 0)) + { + if (Clustrix::is_part_of_the_quorum(name(), pHub_con)) + { + if (refresh_nodes(pHub_con)) + { + MXS_NOTICE("Cluster nodes refreshed."); + refreshed = true; + } + } + else + { + MXS_WARNING("%s:%d is not part of the quorum, ignoring.", host.c_str(), port); + } + } + else + { + MXS_WARNING("Could not connect to %s:%d.", host.c_str(), port); + } + + mysql_close(pHub_con); + } + + ++it; + } + + MXS_FREE(zPassword); + } + else + { + MXS_ERROR("Could not look up persisted nodes: %s", pError ? pError : "Unknown error"); + } + + return refreshed; } -void ClustrixMonitor::refresh_nodes() +bool ClustrixMonitor::refresh_nodes() { mxb_assert(m_pHub_con); + return refresh_nodes(m_pHub_con); +} + +bool ClustrixMonitor::refresh_nodes(MYSQL* pHub_con) +{ + mxb_assert(pHub_con); + map memberships; - if (check_cluster_membership(&memberships)) + bool refreshed = check_cluster_membership(pHub_con, &memberships); + + if (refreshed) { const char ZQUERY[] = "SELECT ni.nodeid, ni.iface_ip, ni.mysql_port, ni.healthmon_port, sn.nodeid " "FROM system.nodeinfo AS ni " "LEFT JOIN system.softfailed_nodes AS sn ON ni.nodeid = sn.nodeid"; - if (mysql_query(m_pHub_con, ZQUERY) == 0) + if (mysql_query(pHub_con, ZQUERY) == 0) { - MYSQL_RES* pResult = mysql_store_result(m_pHub_con); + MYSQL_RES* pResult = mysql_store_result(pHub_con); if (pResult) { - mxb_assert(mysql_field_count(m_pHub_con) == 5); + mxb_assert(mysql_field_count(pHub_con) == 5); set nids; for (const auto& element : m_nodes) @@ -404,25 +507,7 @@ void ClustrixMonitor::refresh_nodes() ClustrixNode& node = nit->second; - bool changed = false; - - if (node.ip() != ip) - { - node.set_ip(ip); - changed = true; - } - - if (node.mysql_port() != mysql_port) - { - node.set_mysql_port(mysql_port); - changed = true; - } - - if (node.health_port() != health_port) - { - node.set_health_port(health_port); - changed = true; - } + node.update(ip, mysql_port, health_port); bool is_draining = node.server()->is_draining(); @@ -443,11 +528,6 @@ void ClustrixMonitor::refresh_nodes() node.server()->clear_status(SERVER_DRAINING); } - if (changed) - { - persist_node(node); - } - nids.erase(id); } else if (mit != memberships.end()) @@ -473,11 +553,10 @@ void ClustrixMonitor::refresh_nodes() const ClustrixMembership& membership = mit->second; int health_check_threshold = m_config.health_check_threshold(); - ClustrixNode node(membership, ip, mysql_port, health_port, + ClustrixNode node(this, membership, ip, mysql_port, health_port, health_check_threshold, pServer); m_nodes.insert(make_pair(id, node)); - persist_node(node); // New server, so it needs to be added to all services that // use this monitor for defining its cluster of servers. @@ -517,7 +596,6 @@ void ClustrixMonitor::refresh_nodes() ClustrixNode& node = it->second; node.set_running(false, ClustrixNode::APPROACH_OVERRIDE); - unpersist_node(node); } vector health_urls; @@ -536,15 +614,17 @@ void ClustrixMonitor::refresh_nodes() else { MXS_WARNING("%s: No result returned for '%s' on %s.", - name(), ZQUERY, m_pHub_server->address); + name(), ZQUERY, mysql_get_host_info(pHub_con)); } } else { MXS_ERROR("%s: Could not execute '%s' on %s: %s", - name(), ZQUERY, m_pHub_server->address, mysql_error(m_pHub_con)); + name(), ZQUERY, mysql_get_host_info(pHub_con), mysql_error(pHub_con)); } } + + return refreshed; } void ClustrixMonitor::check_cluster(Clustrix::Softfailed softfailed) @@ -578,24 +658,23 @@ void ClustrixMonitor::check_hub(Clustrix::Softfailed softfailed) } } -bool ClustrixMonitor::check_cluster_membership(std::map* pMemberships) +bool ClustrixMonitor::check_cluster_membership(MYSQL* pHub_con, + std::map* pMemberships) { + mxb_assert(pHub_con); mxb_assert(pMemberships); - mxb_assert(m_pHub_con); - mxb_assert(m_pHub_server); - bool rv = false; const char ZQUERY[] = "SELECT nid, status, instance, substate FROM system.membership"; - if (mysql_query(m_pHub_con, ZQUERY) == 0) + if (mysql_query(pHub_con, ZQUERY) == 0) { - MYSQL_RES* pResult = mysql_store_result(m_pHub_con); + MYSQL_RES* pResult = mysql_store_result(pHub_con); if (pResult) { - mxb_assert(mysql_field_count(m_pHub_con) == 4); + mxb_assert(mysql_field_count(pHub_con) == 4); set nids; for (const auto& element : m_nodes) @@ -666,7 +745,7 @@ bool ClustrixMonitor::check_cluster_membership(std::map else { MXS_ERROR("%s: Could not execute '%s' on %s: %s", - name(), ZQUERY, m_pHub_server->address, mysql_error(m_pHub_con)); + name(), ZQUERY, mysql_get_host_info(pHub_con), mysql_error(pHub_con)); } return rv; @@ -894,7 +973,7 @@ bool ClustrixMonitor::perform_operation(Operation operation, return performed; } -void ClustrixMonitor::persist_node(const ClustrixNode& node) +void ClustrixMonitor::persist(const ClustrixNode& node) { if (!m_pDb) { @@ -923,7 +1002,7 @@ void ClustrixMonitor::persist_node(const ClustrixNode& node) } } -void ClustrixMonitor::unpersist_node(const ClustrixNode& node) +void ClustrixMonitor::unpersist(const ClustrixNode& node) { if (!m_pDb) { diff --git a/server/modules/monitor/clustrixmon/clustrixmonitor.hh b/server/modules/monitor/clustrixmon/clustrixmonitor.hh index 40a239a42..a0b33eb2a 100644 --- a/server/modules/monitor/clustrixmon/clustrixmonitor.hh +++ b/server/modules/monitor/clustrixmon/clustrixmonitor.hh @@ -21,7 +21,8 @@ #include "clustrixmembership.hh" #include "clustrixnode.hh" -class ClustrixMonitor : public maxscale::MonitorWorker +class ClustrixMonitor : public maxscale::MonitorWorker, + private ClustrixNode::Persister { ClustrixMonitor(const ClustrixMonitor&) = delete; ClustrixMonitor& operator=(const ClustrixMonitor&) = delete; @@ -91,10 +92,12 @@ private: bool choose_dynamic_hub(Clustrix::Softfailed softfailed, std::set& ips_checked); bool choose_bootstrap_hub(Clustrix::Softfailed softfailed, std::set& ips_checked); - bool choose_persisted_hub(Clustrix::Softfailed softfailed, std::set& ips_checked); + bool refresh_using_persisted_nodes(std::set& ips_checked); - void refresh_nodes(); - bool check_cluster_membership(std::map* pMemberships); + bool refresh_nodes(); + bool refresh_nodes(MYSQL* pHub_con); + bool check_cluster_membership(MYSQL* pHub_con, + std::map* pMemberships); void update_server_statuses(); @@ -136,8 +139,9 @@ private: return mxb::WorkerLoad::get_time_ms(); } - void persist_node(const ClustrixNode& node); - void unpersist_node(const ClustrixNode& node); + // ClustrixNode::Persister + void persist(const ClustrixNode& node); + void unpersist(const ClustrixNode& node); private: Config m_config; diff --git a/server/modules/monitor/clustrixmon/clustrixnode.hh b/server/modules/monitor/clustrixmon/clustrixnode.hh index 5f85caee0..086bbab47 100644 --- a/server/modules/monitor/clustrixmon/clustrixnode.hh +++ b/server/modules/monitor/clustrixmon/clustrixnode.hh @@ -22,6 +22,13 @@ class ClustrixNode { public: + class Persister + { + public: + virtual void persist(const ClustrixNode& node) = 0; + virtual void unpersist(const ClustrixNode& node) = 0; + }; + enum { DEFAULT_MYSQL_PORT = 3306, @@ -34,13 +41,15 @@ public: APPROACH_DEFAULT }; - ClustrixNode(const ClustrixMembership& membership, + ClustrixNode(Persister* pPersister, + const ClustrixMembership& membership, const std::string& ip, int mysql_port, int health_port, int health_check_threshold, SERVER* pServer) - : m_id(membership.id()) + : m_persister(*pPersister) + , m_id(membership.id()) , m_status(membership.status()) , m_substate(membership.substate()) , m_instance(membership.instance()) @@ -52,6 +61,8 @@ public: , m_pServer(pServer) , m_pCon(nullptr) { + m_pServer->set_status(SERVER_MASTER | SERVER_RUNNING); + m_persister.persist(*this); } ~ClustrixNode() @@ -87,33 +98,16 @@ public: return m_ip; } - void set_ip(const std::string& ip) - { - m_ip = ip; - m_pServer->server_update_address(ip); - } - int mysql_port() const { return m_mysql_port; } - void set_mysql_port(int port) - { - m_mysql_port = port; - m_pServer->update_port(port); - } - int health_port() const { return m_health_port; } - void set_health_port(int port) - { - m_health_port = port; - } - bool is_running() const { return m_nRunning > 0; @@ -123,9 +117,13 @@ public: { if (running) { - m_nRunning = m_health_check_threshold; + if (m_nRunning == 0) + { + m_pServer->set_status(SERVER_MASTER | SERVER_RUNNING); + m_persister.persist(*this); + } - m_pServer->set_status(SERVER_MASTER | SERVER_RUNNING); + m_nRunning = m_health_check_threshold; } else { @@ -143,11 +141,43 @@ public: if (m_nRunning == 0) { m_pServer->clear_status(SERVER_MASTER | SERVER_RUNNING); + m_persister.unpersist(*this); } } } } + void update(const std::string& ip, + int mysql_port, + int health_port) + { + bool changed = false; + + if (ip != m_ip) + { + m_ip = ip; + changed = true; + } + + if (mysql_port != m_mysql_port) + { + m_mysql_port = mysql_port; + m_pServer->update_port(m_mysql_port); + changed = true; + } + + if (health_port != m_health_port) + { + m_health_port = health_port; + changed = true; + } + + if (changed) + { + m_persister.persist(*this); + } + } + void update(Clustrix::Status status, Clustrix::SubState substate, int instance) { m_status = status; @@ -158,6 +188,7 @@ public: void deactivate_server() { m_pServer->is_active = false; + m_persister.unpersist(*this); } bool can_be_used_as_hub(const char* zName, @@ -194,6 +225,7 @@ public: } private: + Persister& m_persister; int m_id; Clustrix::Status m_status; Clustrix::SubState m_substate;