diff --git a/include/maxscale/monitor.h b/include/maxscale/monitor.h index 37b507769..6cf5bb225 100644 --- a/include/maxscale/monitor.h +++ b/include/maxscale/monitor.h @@ -235,6 +235,8 @@ int mon_parse_event_string(bool* events, size_t count, char* string); connect_result_t mon_connect_to_db(MONITOR* mon, MONITOR_SERVERS *database); void mon_log_connect_error(MONITOR_SERVERS* database, connect_result_t rval); void mon_log_state_change(MONITOR_SERVERS *ptr); +void lock_monitor_servers(MONITOR *monitor); +void release_monitor_servers(MONITOR *monitor); /** * @brief Hangup connections to failed servers diff --git a/include/maxscale/server.h b/include/maxscale/server.h index 93b6aadd7..7f04f9c31 100644 --- a/include/maxscale/server.h +++ b/include/maxscale/server.h @@ -264,8 +264,8 @@ extern void dprintPersistentDCBs(DCB *, SERVER *); extern void dListServers(DCB *); extern char *server_status(SERVER *); extern void server_clear_set_status(SERVER *server, int specified_bits, int bits_to_set); -extern void server_set_status(SERVER *, int); -extern void server_clear_status(SERVER *, int); +extern void server_set_status_nolock(SERVER *, int); +extern void server_clear_status_nolock(SERVER *, int); extern void server_transfer_status(SERVER *dest_server, SERVER *source_server); extern void serverAddMonUser(SERVER *, char *, char *); extern void serverAddParameter(SERVER *, char *, char *); @@ -278,4 +278,7 @@ extern RESULTSET *serverGetList(); extern unsigned int server_map_status(char *str); extern bool server_set_version_string(SERVER* server, const char* string); +extern void server_set_status(SERVER *, int); +extern void server_clear_status(SERVER *, int); + MXS_END_DECLS diff --git a/server/core/monitor.c b/server/core/monitor.c index 9f18037be..d0d10e481 100644 --- a/server/core/monitor.c +++ b/server/core/monitor.c @@ -1451,3 +1451,26 @@ void mon_hangup_failed_servers(MONITOR *monitor) } } } +/** + * Acquire locks on all servers monitored my this monitor. There should + * only be max 1 monitor per server. + * @param monitor The target monitor + */ +void lock_monitor_servers(MONITOR *monitor) +{ + MONITOR_SERVERS *ptr = monitor->databases; + while (ptr) + { + spinlock_acquire(&ptr->server->lock); + ptr = ptr->next; + } +} +void release_monitor_servers(MONITOR *monitor) +{ + MONITOR_SERVERS *ptr = monitor->databases; + while (ptr) + { + spinlock_release(&ptr->server->lock); + ptr = ptr->next; + } +} \ No newline at end of file diff --git a/server/core/server.c b/server/core/server.c index 291a6bd0c..ab1ec724e 100644 --- a/server/core/server.c +++ b/server/core/server.c @@ -671,48 +671,50 @@ server_status(SERVER *server) { return NULL; } + + unsigned int server_status = server->status; status[0] = 0; - if (server->status & SERVER_MAINT) + if (server_status & SERVER_MAINT) { strcat(status, "Maintenance, "); } - if (server->status & SERVER_MASTER) + if (server_status & SERVER_MASTER) { strcat(status, "Master, "); } - if (server->status & SERVER_RELAY_MASTER) + if (server_status & SERVER_RELAY_MASTER) { strcat(status, "Relay Master, "); } - if (server->status & SERVER_SLAVE) + if (server_status & SERVER_SLAVE) { strcat(status, "Slave, "); } - if (server->status & SERVER_JOINED) + if (server_status & SERVER_JOINED) { strcat(status, "Synced, "); } - if (server->status & SERVER_NDB) + if (server_status & SERVER_NDB) { strcat(status, "NDB, "); } - if (server->status & SERVER_SLAVE_OF_EXTERNAL_MASTER) + if (server_status & SERVER_SLAVE_OF_EXTERNAL_MASTER) { strcat(status, "Slave of External Server, "); } - if (server->status & SERVER_STALE_STATUS) + if (server_status & SERVER_STALE_STATUS) { strcat(status, "Stale Status, "); } - if (server->status & SERVER_MASTER_STICKINESS) + if (server_status & SERVER_MASTER_STICKINESS) { strcat(status, "Master Stickiness, "); } - if (server->status & SERVER_AUTH_ERROR) + if (server_status & SERVER_AUTH_ERROR) { strcat(status, "Auth Error, "); } - if (server->status & SERVER_RUNNING) + if (server_status & SERVER_RUNNING) { strcat(status, "Running"); } @@ -724,13 +726,13 @@ server_status(SERVER *server) } /** - * Set a status bit in the server + * Set a status bit in the server without locking * * @param server The server to update * @param bit The bit to set for the server */ void -server_set_status(SERVER *server, int bit) +server_set_status_nolock(SERVER *server, int bit) { server->status |= bit; @@ -745,6 +747,8 @@ server_set_status(SERVER *server, int bit) * Set one or more status bit(s) from a specified set, clearing any others * in the specified set * + * @attention This function does no locking + * * @param server The server to update * @param bit The bit to set for the server */ @@ -764,13 +768,13 @@ server_clear_set_status(SERVER *server, int specified_bits, int bits_to_set) } /** - * Clear a status bit in the server + * Clear a status bit in the server without locking * * @param server The server to update * @param bit The bit to clear for the server */ void -server_clear_status(SERVER *server, int bit) +server_clear_status_nolock(SERVER *server, int bit) { server->status &= ~bit; } @@ -778,6 +782,8 @@ server_clear_status(SERVER *server, int bit) /** * Transfer status bitstring from one server to another * + * @attention This function does no locking + * * @param dest_server The server to be updated * @param source_server The server to provide the new bit string */ @@ -1071,10 +1077,8 @@ bool server_set_version_string(SERVER* server, const char* string) if (string) { - spinlock_acquire(&server->lock); char* old = server->server_string; server->server_string = (char*)string; - spinlock_release(&server->lock); MXS_FREE(old); } else @@ -1254,3 +1258,31 @@ SERVER* server_find_destroyed(const char *name, const char *protocol, return server; } +/** + * Set a status bit in the server under a lock. This ensures synchronization + * with the server monitor thread. Calling this inside the monitor will likely + * cause a deadlock. + * + * @param server The server to update + * @param bit The bit to set for the server + */ +void server_set_status(SERVER *server, int bit) +{ + spinlock_acquire(&server->lock); + server_set_status_nolock(server, bit); + spinlock_release(&server->lock); +} +/** + * Clear a status bit in the server under a lock. This ensures synchronization + * with the server monitor thread. Calling this inside the monitor will likely + * cause a deadlock. + * + * @param server The server to update + * @param bit The bit to clear for the server + */ +void server_clear_status(SERVER *server, int bit) +{ + spinlock_acquire(&server->lock); + server_clear_status_nolock(server, bit); + spinlock_release(&server->lock); +} diff --git a/server/core/test/testserver.c b/server/core/test/testserver.c index 481281834..536ba7238 100644 --- a/server/core/test/testserver.c +++ b/server/core/test/testserver.c @@ -82,11 +82,11 @@ test1() { MXS_FREE(status); } - server_set_status(server, SERVER_MASTER); + server_set_status_nolock(server, SERVER_MASTER); status = server_status(server); mxs_log_flush_sync(); ss_info_dassert(0 == strcmp("Master, Running", status), "Should find correct status."); - server_clear_status(server, SERVER_MASTER); + server_clear_status_nolock(server, SERVER_MASTER); MXS_FREE(status); status = server_status(server); mxs_log_flush_sync(); diff --git a/server/modules/monitor/auroramon/auroramon.c b/server/modules/monitor/auroramon/auroramon.c index 81196ff39..c3c0835ec 100644 --- a/server/modules/monitor/auroramon/auroramon.c +++ b/server/modules/monitor/auroramon/auroramon.c @@ -79,7 +79,7 @@ void update_server_status(MONITOR *monitor, MONITOR_SERVERS *database) if (!SERVER_IN_MAINT(database->server)) { SERVER temp_server = {.status = database->server->status}; - server_clear_status(&temp_server, SERVER_RUNNING | SERVER_MASTER | SERVER_SLAVE | SERVER_AUTH_ERROR); + server_clear_status_nolock(&temp_server, SERVER_RUNNING | SERVER_MASTER | SERVER_SLAVE | SERVER_AUTH_ERROR); database->mon_prev_status = database->server->status; /** Try to connect to or ping the database */ @@ -87,7 +87,7 @@ void update_server_status(MONITOR *monitor, MONITOR_SERVERS *database) if (rval == MONITOR_CONN_OK) { - server_set_status(&temp_server, SERVER_RUNNING); + server_set_status_nolock(&temp_server, SERVER_RUNNING); MYSQL_RES *result; /** Connection is OK, query for replica status */ @@ -106,7 +106,7 @@ void update_server_status(MONITOR *monitor, MONITOR_SERVERS *database) status = SERVER_MASTER; } - server_set_status(&temp_server, status); + server_set_status_nolock(&temp_server, status); mysql_free_result(result); } else @@ -122,7 +122,7 @@ void update_server_status(MONITOR *monitor, MONITOR_SERVERS *database) /** Failed to connect to the database */ if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR) { - server_set_status(&temp_server, SERVER_AUTH_ERROR); + server_set_status_nolock(&temp_server, SERVER_AUTH_ERROR); } if (mon_status_changed(database) && mon_print_fail_status(database)) @@ -187,6 +187,7 @@ monitorMain(void *arg) while (!handle->shutdown) { + lock_monitor_servers(monitor); for (MONITOR_SERVERS *ptr = monitor->databases; ptr; ptr = ptr->next) { update_server_status(monitor, ptr); @@ -221,6 +222,7 @@ monitorMain(void *arg) } } + release_monitor_servers(monitor); /** Sleep until the next monitoring interval */ int ms = 0; while (ms < monitor->interval && !handle->shutdown) diff --git a/server/modules/monitor/galeramon/galeramon.c b/server/modules/monitor/galeramon/galeramon.c index f98f3ff70..c62c01c06 100644 --- a/server/modules/monitor/galeramon/galeramon.c +++ b/server/modules/monitor/galeramon/galeramon.c @@ -285,20 +285,20 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database) database->mon_prev_status = database->server->status; server_transfer_status(&temp_server, database->server); - server_clear_status(&temp_server, SERVER_RUNNING); + server_clear_status_nolock(&temp_server, SERVER_RUNNING); /* Also clear Joined */ - server_clear_status(&temp_server, SERVER_JOINED); + server_clear_status_nolock(&temp_server, SERVER_JOINED); connect_result_t rval = mon_connect_to_db(mon, database); if (rval != MONITOR_CONN_OK) { if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR) { - server_set_status(&temp_server, SERVER_AUTH_ERROR); + server_set_status_nolock(&temp_server, SERVER_AUTH_ERROR); } else { - server_clear_status(&temp_server, SERVER_AUTH_ERROR); + server_clear_status_nolock(&temp_server, SERVER_AUTH_ERROR); } database->server->node_id = -1; @@ -314,7 +314,7 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database) } /* If we get this far then we have a working connection */ - server_set_status(&temp_server, SERVER_RUNNING); + server_set_status_nolock(&temp_server, SERVER_RUNNING); /* get server version string */ server_string = (char *) mysql_get_server_info(database->con); @@ -406,11 +406,11 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database) mysql_free_result(result); } - server_set_status(&temp_server, SERVER_JOINED); + server_set_status_nolock(&temp_server, SERVER_JOINED); } else { - server_clear_status(&temp_server, SERVER_JOINED); + server_clear_status_nolock(&temp_server, SERVER_JOINED); } /* clear bits for non member nodes */ @@ -419,11 +419,11 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database) database->server->depth = -1; /* clear M/S status */ - server_clear_status(&temp_server, SERVER_SLAVE); - server_clear_status(&temp_server, SERVER_MASTER); + server_clear_status_nolock(&temp_server, SERVER_SLAVE); + server_clear_status_nolock(&temp_server, SERVER_MASTER); /* clear master sticky status */ - server_clear_status(&temp_server, SERVER_MASTER_STICKINESS); + server_clear_status_nolock(&temp_server, SERVER_MASTER_STICKINESS); } server_transfer_status(database->server, &temp_server); @@ -488,8 +488,9 @@ monitorMain(void *arg) /* reset cluster members counter */ is_cluster = 0; - ptr = mon->databases; + lock_monitor_servers(mon); + ptr = mon->databases; while (ptr) { ptr->mon_prev_status = ptr->server->status; @@ -615,6 +616,7 @@ monitorMain(void *arg) } mon_hangup_failed_servers(mon); + release_monitor_servers(mon); } } diff --git a/server/modules/monitor/mmmon/mmmon.c b/server/modules/monitor/mmmon/mmmon.c index 1826ec328..1989429a7 100644 --- a/server/modules/monitor/mmmon/mmmon.c +++ b/server/modules/monitor/mmmon/mmmon.c @@ -252,20 +252,20 @@ monitorDatabase(MONITOR* mon, MONITOR_SERVERS *database) { if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR) { - server_set_status(database->server, SERVER_AUTH_ERROR); + server_set_status_nolock(database->server, SERVER_AUTH_ERROR); monitor_set_pending_status(database, SERVER_AUTH_ERROR); } - server_clear_status(database->server, SERVER_RUNNING); + server_clear_status_nolock(database->server, SERVER_RUNNING); monitor_clear_pending_status(database, SERVER_RUNNING); /* Also clear M/S state in both server and monitor server pending struct */ - server_clear_status(database->server, SERVER_SLAVE); - server_clear_status(database->server, SERVER_MASTER); + server_clear_status_nolock(database->server, SERVER_SLAVE); + server_clear_status_nolock(database->server, SERVER_MASTER); monitor_clear_pending_status(database, SERVER_SLAVE); monitor_clear_pending_status(database, SERVER_MASTER); /* Clean addition status too */ - server_clear_status(database->server, SERVER_STALE_STATUS); + server_clear_status_nolock(database->server, SERVER_STALE_STATUS); monitor_clear_pending_status(database, SERVER_STALE_STATUS); if (mon_status_changed(database) && mon_print_fail_status(database)) @@ -276,12 +276,12 @@ monitorDatabase(MONITOR* mon, MONITOR_SERVERS *database) } else { - server_clear_status(database->server, SERVER_AUTH_ERROR); + server_clear_status_nolock(database->server, SERVER_AUTH_ERROR); monitor_clear_pending_status(database, SERVER_AUTH_ERROR); } /* Store current status in both server and monitor server pending struct */ - server_set_status(database->server, SERVER_RUNNING); + server_set_status_nolock(database->server, SERVER_RUNNING); monitor_set_pending_status(database, SERVER_RUNNING); /* get server version from current server */ @@ -558,6 +558,7 @@ monitorMain(void *arg) } nrounds += 1; + lock_monitor_servers(mon); /* start from the first server in the list */ ptr = mon->databases; @@ -612,7 +613,7 @@ monitorMain(void *arg) "use it again even if it could be a stale master, you have " "been warned!", ptr->server->name, ptr->server->port); /* Set the STALE bit for this server in server struct */ - server_set_status(ptr->server, SERVER_STALE_STATUS); + server_set_status_nolock(ptr->server, SERVER_STALE_STATUS); } else { @@ -642,6 +643,7 @@ monitorMain(void *arg) } mon_hangup_failed_servers(mon); + release_monitor_servers(mon); } } diff --git a/server/modules/monitor/mysqlmon/mysql_mon.c b/server/modules/monitor/mysqlmon/mysql_mon.c index adb03135f..ac1df9edd 100644 --- a/server/modules/monitor/mysqlmon/mysql_mon.c +++ b/server/modules/monitor/mysqlmon/mysql_mon.c @@ -685,7 +685,7 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database) connect_result_t rval; if ((rval = mon_connect_to_db(mon, database)) == MONITOR_CONN_OK) { - server_clear_status(database->server, SERVER_AUTH_ERROR); + server_clear_status_nolock(database->server, SERVER_AUTH_ERROR); monitor_clear_pending_status(database, SERVER_AUTH_ERROR); } else @@ -697,24 +697,24 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database) */ if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR) { - server_set_status(database->server, SERVER_AUTH_ERROR); + server_set_status_nolock(database->server, SERVER_AUTH_ERROR); monitor_set_pending_status(database, SERVER_AUTH_ERROR); } - server_clear_status(database->server, SERVER_RUNNING); + server_clear_status_nolock(database->server, SERVER_RUNNING); monitor_clear_pending_status(database, SERVER_RUNNING); /* Also clear M/S state in both server and monitor server pending struct */ - server_clear_status(database->server, SERVER_SLAVE); - server_clear_status(database->server, SERVER_MASTER); - server_clear_status(database->server, SERVER_RELAY_MASTER); + server_clear_status_nolock(database->server, SERVER_SLAVE); + server_clear_status_nolock(database->server, SERVER_MASTER); + server_clear_status_nolock(database->server, SERVER_RELAY_MASTER); monitor_clear_pending_status(database, SERVER_SLAVE); monitor_clear_pending_status(database, SERVER_MASTER); monitor_clear_pending_status(database, SERVER_RELAY_MASTER); /* Clean addition status too */ - server_clear_status(database->server, SERVER_SLAVE_OF_EXTERNAL_MASTER); - server_clear_status(database->server, SERVER_STALE_STATUS); - server_clear_status(database->server, SERVER_STALE_SLAVE); + server_clear_status_nolock(database->server, SERVER_SLAVE_OF_EXTERNAL_MASTER); + server_clear_status_nolock(database->server, SERVER_STALE_STATUS); + server_clear_status_nolock(database->server, SERVER_STALE_SLAVE); monitor_clear_pending_status(database, SERVER_SLAVE_OF_EXTERNAL_MASTER); monitor_clear_pending_status(database, SERVER_STALE_STATUS); monitor_clear_pending_status(database, SERVER_STALE_SLAVE); @@ -729,7 +729,7 @@ monitorDatabase(MONITOR *mon, MONITOR_SERVERS *database) } } /* Store current status in both server and monitor server pending struct */ - server_set_status(database->server, SERVER_RUNNING); + server_set_status_nolock(database->server, SERVER_RUNNING); monitor_set_pending_status(database, SERVER_RUNNING); /* get server version from current server */ @@ -828,7 +828,7 @@ struct graph_node * https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */ static void visit_node(struct graph_node *node, struct graph_node **stack, - int *stacksize, int *index, int *cycle) + int *stacksize, int *index, int *cycle) { /** Assign an index to this node */ node->lowest_index = node->index = *index; @@ -1087,7 +1087,7 @@ void do_failover(MYSQL_MONITOR *handle, MONITOR_SERVERS *db) } else { - server_set_status(db->server, SERVER_MAINT); + server_set_status_nolock(db->server, SERVER_MAINT); monitor_set_pending_status(db, SERVER_MAINT); } db = db->next; @@ -1161,6 +1161,7 @@ monitorMain(void *arg) /* reset num_servers */ num_servers = 0; + lock_monitor_servers(mon); /* start from the first server in the list */ ptr = mon->databases; @@ -1302,7 +1303,7 @@ monitorMain(void *arg) * In this case server->status will not be updated from pending_status * Set the STALE bit for this server in server struct */ - server_set_status(ptr->server, SERVER_STALE_STATUS | SERVER_MASTER); + server_set_status_nolock(ptr->server, SERVER_STALE_STATUS | SERVER_MASTER); ptr->pending_status |= SERVER_STALE_STATUS | SERVER_MASTER; /** Log the message only if the master server didn't have @@ -1451,6 +1452,7 @@ monitorMain(void *arg) } mon_hangup_failed_servers(mon); + release_monitor_servers(mon); } /*< while (1) */ } @@ -1841,7 +1843,7 @@ static MONITOR_SERVERS *get_replication_tree(MONITOR *mon, int num_servers) current->node_id); master->server->depth = current->depth - 1; - if(handle->master && master->server->depth < handle->master->server->depth) + if (handle->master && master->server->depth < handle->master->server->depth) { /** A master with a lower depth was found, remove the master status from the previous master. */ diff --git a/server/modules/monitor/ndbclustermon/ndbclustermon.c b/server/modules/monitor/ndbclustermon/ndbclustermon.c index a8cab02b4..a1cef4274 100644 --- a/server/modules/monitor/ndbclustermon/ndbclustermon.c +++ b/server/modules/monitor/ndbclustermon/ndbclustermon.c @@ -234,11 +234,11 @@ monitorDatabase(MONITOR_SERVERS *database, char *defaultUser, char *defaultPassw connect_result_t rval = mon_connect_to_db(mon, database); if (rval != MONITOR_CONN_OK) { - server_clear_status(database->server, SERVER_RUNNING); + server_clear_status_nolock(database->server, SERVER_RUNNING); if (mysql_errno(database->con) == ER_ACCESS_DENIED_ERROR) { - server_set_status(database->server, SERVER_AUTH_ERROR); + server_set_status_nolock(database->server, SERVER_AUTH_ERROR); } database->server->node_id = -1; @@ -250,9 +250,9 @@ monitorDatabase(MONITOR_SERVERS *database, char *defaultUser, char *defaultPassw return; } - server_clear_status(database->server, SERVER_AUTH_ERROR); + server_clear_status_nolock(database->server, SERVER_AUTH_ERROR); /* If we get this far then we have a working connection */ - server_set_status(database->server, SERVER_RUNNING); + server_set_status_nolock(database->server, SERVER_RUNNING); /* get server version string */ server_string = (char *) mysql_get_server_info(database->con); @@ -313,12 +313,12 @@ monitorDatabase(MONITOR_SERVERS *database, char *defaultUser, char *defaultPassw if (isjoined) { - server_set_status(database->server, SERVER_NDB); + server_set_status_nolock(database->server, SERVER_NDB); database->server->depth = 0; } else { - server_clear_status(database->server, SERVER_NDB); + server_clear_status_nolock(database->server, SERVER_NDB); database->server->depth = -1; } } @@ -373,8 +373,9 @@ monitorMain(void *arg) continue; } nrounds += 1; - ptr = mon->databases; + lock_monitor_servers(mon); + ptr = mon->databases; while (ptr) { ptr->mon_prev_status = ptr->server->status; @@ -414,6 +415,7 @@ monitorMain(void *arg) } mon_hangup_failed_servers(mon); + release_monitor_servers(mon); } }