From b8c78ca9fe4e382c3faa266afd59c48e5d59bf34 Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Wed, 24 Jan 2018 11:00:42 +0200 Subject: [PATCH 1/6] Remove erroneous casts --- server/core/atomic.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/core/atomic.cc b/server/core/atomic.cc index d2475c761..0d7a3efaa 100644 --- a/server/core/atomic.cc +++ b/server/core/atomic.cc @@ -58,7 +58,7 @@ int atomic_load_int32(const int *variable) #ifdef MXS_USE_ATOMIC_BUILTINS return __atomic_load_n(variable, __ATOMIC_SEQ_CST); #else - return __sync_fetch_and_or((volatile int *)variable, 0); + return __sync_fetch_and_or(variable, 0); #endif } @@ -67,7 +67,7 @@ int64_t atomic_load_int64(const int64_t *variable) #ifdef MXS_USE_ATOMIC_BUILTINS return __atomic_load_n(variable, __ATOMIC_SEQ_CST); #else - return __sync_fetch_and_or((volatile int *)variable, 0); + return __sync_fetch_and_or(variable, 0); #endif } @@ -76,7 +76,7 @@ uint64_t atomic_load_uint64(const uint64_t *variable) #ifdef MXS_USE_ATOMIC_BUILTINS return __atomic_load_n(variable, __ATOMIC_SEQ_CST); #else - return __sync_fetch_and_or((volatile int *)variable, 0); + return __sync_fetch_and_or(variable, 0); #endif } From 9093f19c8bfc3e0c4cfa9a83458f05e720660432 Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Wed, 24 Jan 2018 11:10:23 +0200 Subject: [PATCH 2/6] Clean up atomic_load-functions --- include/maxscale/atomic.h | 4 +++- server/core/atomic.cc | 20 +++++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/maxscale/atomic.h b/include/maxscale/atomic.h index edcf503ff..fc8045de4 100644 --- a/include/maxscale/atomic.h +++ b/include/maxscale/atomic.h @@ -57,8 +57,10 @@ uint64_t atomic_add_uint64(uint64_t *variable, int64_t value); * @param variable Pointer the the variable to load from * @return The stored value */ -int atomic_load_int32(const int *variable); +int atomic_load_int(const int *variable); +int32_t atomic_load_int32(const int32_t *variable); int64_t atomic_load_int64(const int64_t *variable); +uint32_t atomic_load_uint32(const uint32_t *variable); uint64_t atomic_load_uint64(const uint64_t *variable); void* atomic_load_ptr(void * const *variable); diff --git a/server/core/atomic.cc b/server/core/atomic.cc index 0d7a3efaa..8e611f60a 100644 --- a/server/core/atomic.cc +++ b/server/core/atomic.cc @@ -53,7 +53,16 @@ uint64_t atomic_add_uint64(uint64_t *variable, int64_t value) #endif } -int atomic_load_int32(const int *variable) +int atomic_load_int(const int *variable) +{ +#ifdef MXS_USE_ATOMIC_BUILTINS + return __atomic_load_n(variable, __ATOMIC_SEQ_CST); +#else + return __sync_fetch_and_or(variable, 0); +#endif +} + +int32_t atomic_load_int32(const int32_t *variable) { #ifdef MXS_USE_ATOMIC_BUILTINS return __atomic_load_n(variable, __ATOMIC_SEQ_CST); @@ -71,6 +80,15 @@ int64_t atomic_load_int64(const int64_t *variable) #endif } +uint32_t atomic_load_uint32(const uint32_t *variable) +{ +#ifdef MXS_USE_ATOMIC_BUILTINS + return __atomic_load_n(variable, __ATOMIC_SEQ_CST); +#else + return __sync_fetch_and_or(variable, 0); +#endif +} + uint64_t atomic_load_uint64(const uint64_t *variable) { #ifdef MXS_USE_ATOMIC_BUILTINS From d681d0f2faac6d3da0833642b9824ce7a7193f70 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Tue, 23 Jan 2018 14:09:29 +0200 Subject: [PATCH 3/6] Add manual rejoin tests --- maxscale-system-test/.gitignore | 2 + maxscale-system-test/CMakeLists.txt | 4 + ...xscale.cnf.template.mysqlmon_rejoin_manual | 94 ++++++++++++ .../mysqlmon_rejoin_manual.cpp | 101 +++++++++++++ .../mysqlmon_rejoin_manual2.cpp | 143 ++++++++++++++++++ 5 files changed, 344 insertions(+) create mode 100644 maxscale-system-test/cnf/maxscale.cnf.template.mysqlmon_rejoin_manual create mode 100644 maxscale-system-test/mysqlmon_rejoin_manual.cpp create mode 100644 maxscale-system-test/mysqlmon_rejoin_manual2.cpp diff --git a/maxscale-system-test/.gitignore b/maxscale-system-test/.gitignore index ce111daa4..5d831b984 100644 --- a/maxscale-system-test/.gitignore +++ b/maxscale-system-test/.gitignore @@ -183,6 +183,8 @@ mysqlmon_failover_manual mysqlmon_rejoin_good mysqlmon_rejoin_bad mysqlmon_rejoin_bad2 +mysqlmon_rejoin_manual +mysqlmon_rejoin_manual2 namedserverfilter no_password non_native_setup diff --git a/maxscale-system-test/CMakeLists.txt b/maxscale-system-test/CMakeLists.txt index 775f75e15..f497a20fc 100644 --- a/maxscale-system-test/CMakeLists.txt +++ b/maxscale-system-test/CMakeLists.txt @@ -281,6 +281,10 @@ add_test_executable(mysqlmon_rejoin_bad.cpp mysqlmon_rejoin_bad mysqlmon_rejoin_ # MySQL Monitor Rejoin (bad2) Test, use template for Rejoin (good) add_test_executable(mysqlmon_rejoin_bad2.cpp mysqlmon_rejoin_bad2 mysqlmon_rejoin_good LABELS mysqlmon REPL_BACKEND) +# MySQL Monitor Rejoin tests +add_test_executable(mysqlmon_rejoin_manual.cpp mysqlmon_rejoin_manual mysqlmon_rejoin_manual LABELS mysqlmon REPL_BACKEND) +add_test_executable(mysqlmon_rejoin_manual2.cpp mysqlmon_rejoin_manual2 mysqlmon_rejoin_manual LABELS mysqlmon REPL_BACKEND) + # MySQL Monitor rolling master add_test_executable(mysqlmon_failover_rolling_master.cpp mysqlmon_failover_rolling_master mysqlmon_failover_rolling_master LABELS mysqlmon REPL_BACKEND) diff --git a/maxscale-system-test/cnf/maxscale.cnf.template.mysqlmon_rejoin_manual b/maxscale-system-test/cnf/maxscale.cnf.template.mysqlmon_rejoin_manual new file mode 100644 index 000000000..dc66d89ce --- /dev/null +++ b/maxscale-system-test/cnf/maxscale.cnf.template.mysqlmon_rejoin_manual @@ -0,0 +1,94 @@ +[maxscale] +threads=###threads### + +[MySQL-Monitor] +type=monitor +module=mysqlmon +servers= server1, server2, server3, server4 +user=maxskysql +passwd= skysql +monitor_interval=1000 +detect_standalone_master=true +failcount=1 +allow_cluster_recovery=true +replication_user=repl +replication_password=repl +backend_connect_timeout=3 +backend_read_timeout=3 +backend_write_timeout=3 +auto_failover=true + +[RW Split Router] +type=service +router= readwritesplit +servers=server1, server2, server3, server4 +user=maxskysql +passwd=skysql + +[Read Connection Router Slave] +type=service +router=readconnroute +router_options= slave +servers=server1, server2, server3, server4 +user=maxskysql +passwd=skysql + +[Read Connection Router Master] +type=service +router=readconnroute +router_options=master +servers=server1, server2, server3, server4 +user=maxskysql +passwd=skysql + +[RW Split Listener] +type=listener +service=RW Split Router +protocol=MySQLClient +port=4006 + +[Read Connection Listener Slave] +type=listener +service=Read Connection Router Slave +protocol=MySQLClient +port=4009 + +[Read Connection Listener Master] +type=listener +service=Read Connection Router Master +protocol=MySQLClient +port=4008 + +[CLI] +type=service +router=cli + +[CLI Listener] +type=listener +service=CLI +protocol=maxscaled +socket=default + +[server1] +type=server +address=###node_server_IP_1### +port=###node_server_port_1### +protocol=MySQLBackend + +[server2] +type=server +address=###node_server_IP_2### +port=###node_server_port_2### +protocol=MySQLBackend + +[server3] +type=server +address=###node_server_IP_3### +port=###node_server_port_3### +protocol=MySQLBackend + +[server4] +type=server +address=###node_server_IP_4### +port=###node_server_port_4### +protocol=MySQLBackend diff --git a/maxscale-system-test/mysqlmon_rejoin_manual.cpp b/maxscale-system-test/mysqlmon_rejoin_manual.cpp new file mode 100644 index 000000000..51739b34e --- /dev/null +++ b/maxscale-system-test/mysqlmon_rejoin_manual.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2020-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include "fail_switch_rejoin_common.cpp" +#include + +using std::string; +using std::cout; +using std::endl; + +int main(int argc, char** argv) +{ + interactive = strcmp(argv[argc - 1], "interactive") == 0; + Mariadb_nodes::require_gtid(true); + TestConnections test(argc, argv); + int ec; + MYSQL* maxconn = test.maxscales->open_rwsplit_connection(0); + // Set up test table + basic_test(test); + // Delete binlogs to sync gtid:s + delete_slave_binlogs(test); + char result_tmp[bufsize]; + // Advance gtid:s a bit to so gtid variables are updated. + generate_traffic_and_check(test, maxconn, 10); + sleep(1); + test.tprintf(LINE); + print_gtids(test); + get_input(); + + cout << "Stopping master and waiting for failover. Check that another server is promoted." << endl; + const int old_master_id = get_master_server_id(test); // Read master id now before shutdown. + const int master_index = test.repl->master; + test.repl->stop_node(master_index); + sleep(10); + // Recreate maxscale session + mysql_close(maxconn); + maxconn = test.maxscales->open_rwsplit_connection(0); + get_output(test); + int master_id = get_master_server_id(test); + cout << "Master server id is " << master_id << endl; + const bool failover_ok = (master_id > 0 && master_id != old_master_id); + test.assert(failover_ok, "Master did not change or no master detected."); + string gtid_final; + if (failover_ok) + { + cout << "Sending more inserts." << endl; + generate_traffic_and_check(test, maxconn, 5); + sleep(1); + if (find_field(maxconn, GTID_QUERY, GTID_FIELD, result_tmp) == 0) + { + gtid_final = result_tmp; + } + print_gtids(test); + cout << "Bringing old master back online..." << endl; + test.repl->start_node(master_index, (char*) ""); + sleep(5); + test.repl->connect(); + get_output(test); + test.tprintf("and manually rejoining it to cluster."); + const char REJOIN_CMD[] = "maxadmin call command mariadbmon rejoin MySQL-Monitor server1"; + test.maxscales->ssh_node_output(0, REJOIN_CMD , true, &ec); + sleep(2); + get_output(test); + + string gtid_old_master; + if (find_field(test.repl->nodes[master_index], GTID_QUERY, GTID_FIELD, result_tmp) == 0) + { + gtid_old_master = result_tmp; + } + cout << LINE << "\n"; + print_gtids(test); + cout << LINE << "\n"; + test.assert(gtid_final == gtid_old_master, "Old master did not successfully rejoin the cluster."); + // Switch master back to server1 so last check is faster + int ec; + test.maxscales->ssh_node_output(0, "maxadmin call command mysqlmon switchover " + "MySQL-Monitor server1 server2" , true, &ec); + sleep(5); // Wait for monitor to update status + get_output(test); + master_id = get_master_server_id(test); + test.assert(master_id == old_master_id, "Switchover back to server1 failed."); + } + else + { + test.repl->start_node(master_index, (char*) ""); + sleep(10); + } + + test.repl->fix_replication(); + return test.global_result; +} diff --git a/maxscale-system-test/mysqlmon_rejoin_manual2.cpp b/maxscale-system-test/mysqlmon_rejoin_manual2.cpp new file mode 100644 index 000000000..451023bcd --- /dev/null +++ b/maxscale-system-test/mysqlmon_rejoin_manual2.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2020-01-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include + +#include "testconnections.h" +#include "fail_switch_rejoin_common.cpp" +#include + +using std::string; +using std::cout; +using std::endl; + +int main(int argc, char** argv) +{ + char result_tmp[bufsize]; + interactive = strcmp(argv[argc - 1], "interactive") == 0; + Mariadb_nodes::require_gtid(true); + TestConnections test(argc, argv); + MYSQL* maxconn = test.maxscales->open_rwsplit_connection(0); + + // Set up test table + basic_test(test); + // Delete binlogs to sync gtid:s + delete_slave_binlogs(test); + // Advance gtid:s a bit to so gtid variables are updated. + generate_traffic_and_check(test, maxconn, 10); + test.repl->sync_slaves(0); + + cout << LINE << "\n"; + print_gtids(test); + cout << LINE << "\n"; + string gtid_begin; + if (find_field(maxconn, GTID_QUERY, GTID_FIELD, result_tmp) == 0) + { + gtid_begin = result_tmp; + } + mysql_close(maxconn); + + // Leave first of three slaves connected so it's clear which one is the master server. + const char STOP_SLAVE[] = "STOP SLAVE;"; + const char RESET_SLAVE[] = "RESET SLAVE ALL;"; + const char READ_ONLY_OFF[] = "SET GLOBAL read_only=0;"; + test.repl->connect(); + const int FIRST_MOD_NODE = 2; // Modify nodes 2 & 3 + const int NODE_COUNT = test.repl->N; + MYSQL** nodes = test.repl->nodes; + + for (int i = FIRST_MOD_NODE; i < NODE_COUNT; i++) + { + if (mysql_query(nodes[i], STOP_SLAVE) != 0 || + mysql_query(nodes[i], RESET_SLAVE) != 0 || + mysql_query(nodes[i], READ_ONLY_OFF) != 0) + { + test.assert(false, "Could not stop slave connections and/or disable read_only for node %d.", i); + return test.global_result; + } + } + + // Add more events to node3. + string gtid_node2, gtid_node3; + cout << "Sending more inserts to server 4.\n"; + generate_traffic_and_check(test, nodes[3], 10); + // Save gtids + if (find_field(nodes[2], GTID_QUERY, GTID_FIELD, result_tmp) == 0) + { + gtid_node2 = result_tmp; + } + if (find_field(nodes[3], GTID_QUERY, GTID_FIELD, result_tmp) == 0) + { + gtid_node3 = result_tmp; + } + print_gtids(test); + bool gtids_ok = (gtid_begin == gtid_node2 && gtid_node2 < gtid_node3); + test.assert(gtids_ok, "Gtid:s have not advanced correctly."); + if (!gtids_ok) + { + return test.global_result; + } + cout << "Sending rejoin commands for servers 3 & 4. Server 4 should not rejoin the cluster.\n"; + const string REJOIN_CMD = "maxadmin call command mariadbmon rejoin MySQL-Monitor"; + int ec; + string rejoin_s3 = REJOIN_CMD + " server3"; + string rejoin_s4 = REJOIN_CMD + " server4"; + test.maxscales->ssh_node_output(0, rejoin_s3.c_str() , true, &ec); + test.maxscales->ssh_node_output(0, rejoin_s4.c_str() , true, &ec); + sleep(5); + get_output(test); + + StringSet node2_states = test.get_server_status("server3"); + StringSet node3_states = test.get_server_status("server4"); + bool states_n2_ok = (node2_states.find("Slave") != node2_states.end()); + bool states_n3_ok = (node3_states.find("Slave") == node3_states.end()); + test.assert(states_n2_ok, "Node 2 has not rejoined when it should have."); + test.assert(states_n3_ok, "Node 3 rejoined when it shouldn't have."); + if (!states_n2_ok || !states_n3_ok) + { + return test.global_result; + } + // Finally, fix replication by telling the current master to replicate from server4 + test.tprintf("Setting server 1 to replicate from server 4. Manually rejoin servers 2 and 3."); + const char CHANGE_CMD_FMT[] = "CHANGE MASTER TO MASTER_HOST = '%s', MASTER_PORT = %d, " + "MASTER_USE_GTID = current_pos, MASTER_USER='repl', MASTER_PASSWORD = 'repl';"; + char cmd[256]; + snprintf(cmd, sizeof(cmd), CHANGE_CMD_FMT, test.repl->IP[3], test.repl->port[3]); + mysql_query(nodes[0], cmd); + mysql_query(nodes[0], "START SLAVE;"); + sleep(5); + string rejoin_s2 = REJOIN_CMD + " server2"; + test.maxscales->ssh_node_output(0, rejoin_s2.c_str() , true, &ec); + test.maxscales->ssh_node_output(0, rejoin_s3.c_str() , true, &ec); + sleep(2); + get_output(test); + int master_id = get_master_server_id(test); + test.assert(master_id == 4, "Server 4 should be the cluster master."); + StringSet node0_states = test.get_server_status("server1"); + bool states_n0_ok = (node0_states.find("Slave") != node0_states.end() && + node0_states.find("Relay Master") == node0_states.end()); + test.assert(states_n0_ok, "Server 1 is not a slave when it should be."); + if (states_n0_ok) + { + int ec; + test.maxscales->ssh_node_output(0, + "maxadmin call command mysqlmon switchover MySQL-Monitor server1 server4" , true, &ec); + sleep(1); + master_id = get_master_server_id(test); + test.assert(master_id == 1, "Server 1 should be the cluster master."); + get_output(test); + } + + test.repl->fix_replication(); + return test.global_result; +} From c86ee1f53adc55b63e6ed6469d4e1249a31a6fe0 Mon Sep 17 00:00:00 2001 From: Esa Korhonen Date: Tue, 23 Jan 2018 15:45:42 +0200 Subject: [PATCH 4/6] Clarify documentation on verify_master_failure --- Documentation/Monitors/MariaDB-Monitor.md | 38 +++++++++-------------- 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/Documentation/Monitors/MariaDB-Monitor.md b/Documentation/Monitors/MariaDB-Monitor.md index d4e0aeae2..2044952bd 100644 --- a/Documentation/Monitors/MariaDB-Monitor.md +++ b/Documentation/Monitors/MariaDB-Monitor.md @@ -379,34 +379,24 @@ from passive to active will wait for a failover to take place after an apparent loss of a master server. If no new master server is detected within the configured time period, failover will be initiated again. -#### `verify_master_failure` +#### `verify_master_failure` and `master_failure_timeout` -Enable master failure verification for automatic failover. This parameter -expects a boolean value and the feature is enabled by default. +Enable additional master failure verification for automatic failover. +`verify_master_failure` is a boolean value (default: true) which enables this +feature and `master_failure_timeout` defines the timeout in seconds (default: 10). -The failure of a master can be verified by checking whether the slaves are still -connected to the master. The timeout for master failure verification is -controlled by the `master_failure_timeout` parameter. +The failure verification is performed by checking whether the slaves are still +connected to the master and receiving events. Effectively, if a slave has +received an event within `master_failure_timeout` seconds, the master is not +considered down when deciding whether to auto_failover. -#### `master_failure_timeout` +If every slave loses its connection to the master (*Slave_IO_Running* is not +"Yes"), master failure is considered verified regardless of timeout. This allows +a faster failover when the master server crashes, as that causes immediate +disconnection. -This parameter controls the period of time, in seconds, that the monitor must -wait before it can declare that the master has failed. The default value is 10 -seconds. For failover to activate, the `failcount` requirement must also be met. - -The failure of a master is verified by tracking when the last change to the -relay log was done and when the last replication heartbeat was received. If the -period of time between the last received event and the time of the check exceeds -the configured value, the slave's connection to the master is considered to be -broken. - -When all slaves of a failed master are no longer connected to the master, the -master failure is verified and the failover can be safely performed. - -If the slaves lose their connections to the master before the configured timeout -is exceeded, the failover is performed immediately. This allows a faster -failover when the master server crashes causing immediate disconnection of the -the network connections. +For automatic failover to activate, the `failcount` requirement must also be +met. #### `switchover_timeout` From 11b0f84b8e1dfdcd956c5c8df6dda1838953c18d Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Wed, 24 Jan 2018 11:28:38 +0200 Subject: [PATCH 5/6] MXS-1623 Maintain count of current/total descriptors --- server/core/internal/worker.hh | 40 +++++++++++++++++++++------------- server/core/worker.cc | 21 ++++++++++++++++-- 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/server/core/internal/worker.hh b/server/core/internal/worker.hh index 802733155..fa7daf063 100644 --- a/server/core/internal/worker.hh +++ b/server/core/internal/worker.hh @@ -172,6 +172,14 @@ public: return m_statistics; } + /** + * Return the count of descriptors. + * + * @param pnCurrent On output the current number of descriptors. + * @param pnTotal On output the total number of descriptors. + */ + void get_descriptor_counts(uint32_t* pnCurrent, uint64_t* pnTotal); + /** * Add a file descriptor to the epoll instance of the worker. * @@ -508,21 +516,23 @@ private: uint32_t handle_epoll_events(uint32_t events); private: - int m_id; /*< The id of the worker. */ - state_t m_state; /*< The state of the worker */ - int m_epoll_fd; /*< The epoll file descriptor. */ - STATISTICS m_statistics; /*< Worker statistics. */ - MessageQueue* m_pQueue; /*< The message queue of the worker. */ - THREAD m_thread; /*< The thread handle of the worker. */ - bool m_started; /*< Whether the thread has been started or not. */ - bool m_should_shutdown; /*< Whether shutdown should be performed. */ - bool m_shutdown_initiated; /*< Whether shutdown has been initated. */ - SessionsById m_sessions; /*< A mapping of session_id->MXS_SESSION. The map - * should contain sessions exclusive to this - * worker and not e.g. listener sessions. For now, - * it's up to the protocol to decide whether a new - * session is added to the map. */ - Zombies m_zombies; /*< DCBs to be deleted. */ + int m_id; /*< The id of the worker. */ + state_t m_state; /*< The state of the worker */ + int m_epoll_fd; /*< The epoll file descriptor. */ + STATISTICS m_statistics; /*< Worker statistics. */ + MessageQueue* m_pQueue; /*< The message queue of the worker. */ + THREAD m_thread; /*< The thread handle of the worker. */ + bool m_started; /*< Whether the thread has been started or not. */ + bool m_should_shutdown; /*< Whether shutdown should be performed. */ + bool m_shutdown_initiated; /*< Whether shutdown has been initated. */ + SessionsById m_sessions; /*< A mapping of session_id->MXS_SESSION. The map + * should contain sessions exclusive to this + * worker and not e.g. listener sessions. For now, + * it's up to the protocol to decide whether a new + * session is added to the map. */ + Zombies m_zombies; /*< DCBs to be deleted. */ + uint32_t m_nCurrent_descriptors; /*< Current number of descriptors. */ + uint64_t m_nTotal_descriptors; /*< Total number of descriptors. */ }; } diff --git a/server/core/worker.cc b/server/core/worker.cc index 97f477b7f..f957ac8af 100644 --- a/server/core/worker.cc +++ b/server/core/worker.cc @@ -162,6 +162,8 @@ Worker::Worker(int id, , m_started(false) , m_should_shutdown(false) , m_shutdown_initiated(false) + , m_nCurrent_descriptors(0) + , m_nTotal_descriptors(0) { MXS_POLL_DATA::handler = &Worker::epoll_instance_handler; MXS_POLL_DATA::thread.id = id; @@ -421,6 +423,12 @@ int64_t Worker::get_one_statistic(POLL_STAT what) return rv; } +void Worker::get_descriptor_counts(uint32_t* pnCurrent, uint64_t* pnTotal) +{ + *pnCurrent = atomic_load_uint32(&m_nCurrent_descriptors); + *pnTotal = atomic_load_uint64(&m_nTotal_descriptors); +} + bool Worker::add_fd(int fd, uint32_t events, MXS_POLL_DATA* pData) { bool rv = true; @@ -435,7 +443,12 @@ bool Worker::add_fd(int fd, uint32_t events, MXS_POLL_DATA* pData) pData->thread.id = m_id; - if (epoll_ctl(m_epoll_fd, EPOLL_CTL_ADD, fd, &ev) != 0) + if (epoll_ctl(m_epoll_fd, EPOLL_CTL_ADD, fd, &ev) == 0) + { + atomic_add_uint32(&m_nCurrent_descriptors, 1); + atomic_add_uint64(&m_nTotal_descriptors, 1); + } + else { poll_resolve_error(fd, errno, EPOLL_CTL_ADD); rv = false; @@ -479,7 +492,11 @@ bool Worker::remove_fd(int fd) struct epoll_event ev = {}; - if (epoll_ctl(m_epoll_fd, EPOLL_CTL_DEL, fd, &ev) != 0) + if (epoll_ctl(m_epoll_fd, EPOLL_CTL_DEL, fd, &ev) == 0) + { + atomic_add_uint32(&m_nCurrent_descriptors, -1); + } + else { poll_resolve_error(fd, errno, EPOLL_CTL_DEL); rv = false; From dcd57ea21bab0f6834ec878fea1a623cc7ce11bd Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Wed, 24 Jan 2018 11:39:31 +0200 Subject: [PATCH 6/6] MXS-1623 Expose descriptor counts through maxadmin --- server/core/poll.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/server/core/poll.cc b/server/core/poll.cc index ddefc7de3..b40cae50a 100644 --- a/server/core/poll.cc +++ b/server/core/poll.cc @@ -240,8 +240,8 @@ dShowThreads(DCB *dcb) { dcb_printf(dcb, "Polling Threads.\n\n"); - dcb_printf(dcb, " ID | State \n"); - dcb_printf(dcb, "----+------------\n"); + dcb_printf(dcb, " ID | State | #descriptors (curr) | #descriptors (tot) |\n"); + dcb_printf(dcb, "----+------------+---------------------+---------------------+\n"); for (int i = 0; i < n_threads; i++) { Worker* worker = Worker::get(i); @@ -271,7 +271,12 @@ dShowThreads(DCB *dcb) ss_dassert(!true); } - dcb_printf(dcb, " %2d | %s\n", i, state); + uint32_t nCurrent; + uint64_t nTotal; + + worker->get_descriptor_counts(&nCurrent, &nTotal); + + dcb_printf(dcb, " %2d | %10s | %19" PRIu32 " | %19" PRIu64 " |\n", i, state, nCurrent, nTotal); } }