blr branch merge
blr branch merge
This commit is contained in:
commit
7245d1baa1
@ -52,3 +52,12 @@ endif
|
||||
#
|
||||
ERRMSG := $(HOME)/usr/share/mysql
|
||||
|
||||
#
|
||||
# Build a binary that produces profile data
|
||||
#
|
||||
PROFILE := N
|
||||
|
||||
#
|
||||
# Build a binary that produces code coverage data
|
||||
#
|
||||
GCOV := N
|
||||
|
10
makefile.inc
10
makefile.inc
@ -41,3 +41,13 @@ endif
|
||||
ifdef PROF
|
||||
CFLAGS := $(CFLAGS) -DSS_PROF
|
||||
endif
|
||||
|
||||
ifeq "$(PROFILE)" "Y"
|
||||
CFLAGS += -pg
|
||||
LDFLAGS += -pg
|
||||
endif
|
||||
|
||||
ifeq "$(GCOV)" "Y"
|
||||
CFLAGS += -fprofile-arcs -ftest-coverage
|
||||
LIBS += -lgcov
|
||||
endif
|
||||
|
@ -34,6 +34,7 @@
|
||||
# gateway needs mysql client lib, not qc.
|
||||
# 24/07/13 Mark Ridoch Addition of encryption routines
|
||||
# 30/05/14 Mark Ridoch Filter API added
|
||||
# 29/08/14 Mark Riddoch Added housekeeper
|
||||
|
||||
include ../../build_gateway.inc
|
||||
|
||||
@ -47,17 +48,23 @@ CFLAGS=-c -I/usr/include -I../include -I../modules/include -I../inih \
|
||||
-I$(LOGPATH) -I$(UTILSPATH) \
|
||||
-Wall -g
|
||||
|
||||
include ../../makefile.inc
|
||||
|
||||
LDFLAGS=-rdynamic -L$(LOGPATH) \
|
||||
-Wl,-rpath,$(DEST)/lib \
|
||||
-Wl,-rpath,$(LOGPATH) -Wl,-rpath,$(UTILSPATH) \
|
||||
-Wl,-rpath,$(EMBEDDED_LIB)
|
||||
|
||||
|
||||
LIBS=-L$(EMBEDDED_LIB) \
|
||||
-lmysqld \
|
||||
-lz -lm -lcrypt -lcrypto -ldl -laio -lrt -pthread -llog_manager \
|
||||
-L../inih/extra -linih -lssl -lstdc++
|
||||
|
||||
include ../../makefile.inc
|
||||
|
||||
SRCS= atomic.c buffer.c spinlock.c gateway.c \
|
||||
gw_utils.c utils.c dcb.c load_utils.c session.c service.c server.c \
|
||||
poll.c config.c users.c hashtable.c dbusers.c thread.c gwbitmask.c \
|
||||
monitor.c adminusers.c secrets.c filter.c modutil.c
|
||||
monitor.c adminusers.c secrets.c filter.c modutil.c housekeeper.c
|
||||
|
||||
HDRS= ../include/atomic.h ../include/buffer.h ../include/dcb.h \
|
||||
../include/gw.h ../modules/include/mysql_client_server_protocol.h \
|
||||
@ -65,18 +72,13 @@ HDRS= ../include/atomic.h ../include/buffer.h ../include/dcb.h \
|
||||
../include/modules.h ../include/poll.h ../include/config.h \
|
||||
../include/users.h ../include/hashtable.h ../include/gwbitmask.h \
|
||||
../include/adminusers.h ../include/version.h ../include/maxscale.h \
|
||||
../include/filter.h modutil.h
|
||||
../include/filter.h ../include/modutil.h ../include/housekeeper.h
|
||||
|
||||
OBJ=$(SRCS:.c=.o)
|
||||
|
||||
KOBJS=maxkeys.o secrets.o utils.o
|
||||
POBJS=maxpasswd.o secrets.o utils.o
|
||||
|
||||
LIBS=-L$(EMBEDDED_LIB) \
|
||||
-lmysqld \
|
||||
-lz -lm -lcrypt -lcrypto -ldl -laio -lrt -pthread -llog_manager \
|
||||
-L../inih/extra -linih -lssl -lstdc++
|
||||
|
||||
all: maxscale maxkeys maxpasswd
|
||||
|
||||
cleantests:
|
||||
|
@ -32,6 +32,8 @@
|
||||
* 11/07/13 Mark Riddoch Add reference count mechanism
|
||||
* 16/07/2013 Massimiliano Pinto Added command type to gwbuf struct
|
||||
* 24/06/2014 Mark Riddoch Addition of gwbuf_trim
|
||||
* 28/08/2014 Mark Riddoch Adition of tail pointer to speed
|
||||
* the gwbuf_append process
|
||||
*
|
||||
* @endverbatim
|
||||
*/
|
||||
@ -82,6 +84,7 @@ SHARED_BUF *sbuf;
|
||||
sbuf->refcount = 1;
|
||||
rval->sbuf = sbuf;
|
||||
rval->next = NULL;
|
||||
rval->tail = rval;
|
||||
rval->gwbuf_type = GWBUF_TYPE_UNDEFINED;
|
||||
rval->command = 0;
|
||||
CHK_GWBUF(rval);
|
||||
@ -131,6 +134,7 @@ GWBUF *rval;
|
||||
rval->end = buf->end;
|
||||
rval->gwbuf_type = buf->gwbuf_type;
|
||||
rval->next = NULL;
|
||||
rval->tail = rval;
|
||||
CHK_GWBUF(rval);
|
||||
return rval;
|
||||
}
|
||||
@ -157,6 +161,7 @@ GWBUF *gwbuf_clone_portion(
|
||||
clonebuf->end = (void *)((char *)clonebuf->start)+length;
|
||||
clonebuf->gwbuf_type = buf->gwbuf_type; /*< clone the type for now */
|
||||
clonebuf->next = NULL;
|
||||
clonebuf->tail = clonebuf;
|
||||
CHK_GWBUF(clonebuf);
|
||||
return clonebuf;
|
||||
|
||||
@ -233,11 +238,8 @@ GWBUF *ptr = head;
|
||||
if (!head)
|
||||
return tail;
|
||||
CHK_GWBUF(head);
|
||||
while (ptr->next)
|
||||
{
|
||||
ptr = ptr->next;
|
||||
}
|
||||
ptr->next = tail;
|
||||
head->tail->next = tail;
|
||||
head->tail = tail->tail;
|
||||
|
||||
return head;
|
||||
}
|
||||
@ -262,6 +264,7 @@ GWBUF *
|
||||
gwbuf_consume(GWBUF *head, unsigned int length)
|
||||
{
|
||||
GWBUF *rval = head;
|
||||
|
||||
CHK_GWBUF(head);
|
||||
GWBUF_CONSUME(head, length);
|
||||
CHK_GWBUF(head);
|
||||
@ -269,8 +272,13 @@ GWBUF *rval = head;
|
||||
if (GWBUF_EMPTY(head))
|
||||
{
|
||||
rval = head->next;
|
||||
if (head->next)
|
||||
head->next->tail = head->tail;
|
||||
gwbuf_free(head);
|
||||
}
|
||||
|
||||
ss_dassert(rval->end > rval->start);
|
||||
|
||||
return rval;
|
||||
}
|
||||
|
||||
@ -302,6 +310,8 @@ int rval = 0;
|
||||
* buffer has n_bytes or less then it will be freed and
|
||||
* NULL will be returned.
|
||||
*
|
||||
* This routine assumes the buffer is not part of a chain
|
||||
*
|
||||
* @param buf The buffer to trim
|
||||
* @param n_bytes The number of bytes to trim off
|
||||
* @return The buffer chain or NULL if buffer has <= n_bytes
|
||||
@ -309,6 +319,8 @@ int rval = 0;
|
||||
GWBUF *
|
||||
gwbuf_trim(GWBUF *buf, unsigned int n_bytes)
|
||||
{
|
||||
ss_dassert(buf->next == NULL);
|
||||
|
||||
if (GWBUF_LENGTH(buf) <= n_bytes)
|
||||
{
|
||||
gwbuf_consume(buf, GWBUF_LENGTH(buf));
|
||||
|
@ -923,6 +923,15 @@ config_threadcount()
|
||||
return gateway.n_threads;
|
||||
}
|
||||
|
||||
static struct {
|
||||
char *logname;
|
||||
logfile_id_t logfile;
|
||||
} lognames[] = {
|
||||
{ "log_messages", LOGFILE_MESSAGE },
|
||||
{ "log_trace", LOGFILE_TRACE },
|
||||
{ "log_debug", LOGFILE_DEBUG },
|
||||
{ NULL, 0 }
|
||||
};
|
||||
/**
|
||||
* Configuration handler for items in the global [MaxScale] section
|
||||
*
|
||||
@ -933,10 +942,20 @@ config_threadcount()
|
||||
static int
|
||||
handle_global_item(const char *name, const char *value)
|
||||
{
|
||||
int i;
|
||||
if (strcmp(name, "threads") == 0) {
|
||||
gateway.n_threads = atoi(value);
|
||||
} else {
|
||||
return 0;
|
||||
for (i = 0; lognames[i].logname; i++)
|
||||
{
|
||||
if (strcasecmp(name, lognames[i].logname) == 0)
|
||||
{
|
||||
if (atoi(value))
|
||||
skygw_log_enable(lognames[i].logfile);
|
||||
else
|
||||
skygw_log_disable(lognames[i].logfile);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
@ -89,7 +89,13 @@ static int dcb_null_write(DCB *dcb, GWBUF *buf);
|
||||
static int dcb_null_close(DCB *dcb);
|
||||
static int dcb_null_auth(DCB *dcb, SERVER *server, SESSION *session, GWBUF *buf);
|
||||
|
||||
DCB* dcb_get_zombies(void)
|
||||
/**
|
||||
* Return the pointer to the lsit of zombie DCB's
|
||||
*
|
||||
* @return Zombies DCB list
|
||||
*/
|
||||
DCB *
|
||||
dcb_get_zombies(void)
|
||||
{
|
||||
return zombies;
|
||||
}
|
||||
@ -128,6 +134,12 @@ DCB *rval;
|
||||
spinlock_init(&rval->delayqlock);
|
||||
spinlock_init(&rval->authlock);
|
||||
spinlock_init(&rval->cb_lock);
|
||||
spinlock_init(&rval->pollinlock);
|
||||
spinlock_init(&rval->polloutlock);
|
||||
rval->pollinbusy = 0;
|
||||
rval->readcheck = 0;
|
||||
rval->polloutbusy = 0;
|
||||
rval->writecheck = 0;
|
||||
rval->fd = -1;
|
||||
memset(&rval->stats, 0, sizeof(DCBSTATS)); // Zero the statistics
|
||||
rval->state = DCB_STATE_ALLOC;
|
||||
@ -376,11 +388,6 @@ DCB_CALLBACK *cb;
|
||||
}
|
||||
spinlock_release(&dcb->cb_lock);
|
||||
|
||||
if (dcb->dcb_readqueue)
|
||||
{
|
||||
GWBUF* queue = dcb->dcb_readqueue;
|
||||
while ((queue = gwbuf_consume(queue, GWBUF_LENGTH(queue))) != NULL);
|
||||
}
|
||||
bitmask_free(&dcb->memdata.bitmask);
|
||||
simple_mutex_done(&dcb->dcb_read_lock);
|
||||
simple_mutex_done(&dcb->dcb_write_lock);
|
||||
@ -399,7 +406,7 @@ DCB_CALLBACK *cb;
|
||||
*
|
||||
* @param threadid The thread ID of the caller
|
||||
*/
|
||||
DCB*
|
||||
DCB *
|
||||
dcb_process_zombies(int threadid)
|
||||
{
|
||||
DCB *ptr, *lptr;
|
||||
@ -1187,7 +1194,7 @@ printDCB(DCB *dcb)
|
||||
if (dcb->remote)
|
||||
printf("\tConnected to: %s\n", dcb->remote);
|
||||
if (dcb->user)
|
||||
printf("\tUsername to: %s\n", dcb->user);
|
||||
printf("\tUsername to: %s\n", dcb->user);
|
||||
if (dcb->writeq)
|
||||
printf("\tQueued write data: %d\n",gwbuf_length(dcb->writeq));
|
||||
printf("\tStatistics:\n");
|
||||
@ -1204,6 +1211,19 @@ printDCB(DCB *dcb)
|
||||
printf("\t\tNo. of Low Water Events: %d\n",
|
||||
dcb->stats.n_low_water);
|
||||
}
|
||||
/**
|
||||
* Display an entry from the spinlock statistics data
|
||||
*
|
||||
* @param dcb The DCB to print to
|
||||
* @param desc Description of the statistic
|
||||
* @param value The statistic value
|
||||
*/
|
||||
static void
|
||||
spin_reporter(void *dcb, char *desc, int value)
|
||||
{
|
||||
dcb_printf((DCB *)dcb, "\t\t%-35s %d\n", desc, value);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Diagnostic to print all DCB allocated in the system
|
||||
@ -1233,6 +1253,12 @@ void dprintAllDCBs(DCB *pdcb)
|
||||
DCB *dcb;
|
||||
|
||||
spinlock_acquire(&dcbspin);
|
||||
#if SPINLOCK_PROFILE
|
||||
dcb_printf(pdcb, "DCB List Spinlock Statistics:\n");
|
||||
spinlock_stats(&dcbspin, spin_reporter, pdcb);
|
||||
dcb_printf(pdcb, "Zombie Queue Lock Statistics:\n");
|
||||
spinlock_stats(&zombiespin, spin_reporter, pdcb);
|
||||
#endif
|
||||
dcb = allDCBs;
|
||||
while (dcb)
|
||||
{
|
||||
@ -1252,12 +1278,16 @@ DCB *dcb;
|
||||
dcb_printf(pdcb, "\tQueued write data: %d\n",
|
||||
gwbuf_length(dcb->writeq));
|
||||
dcb_printf(pdcb, "\tStatistics:\n");
|
||||
dcb_printf(pdcb, "\t\tNo. of Reads: %d\n", dcb->stats.n_reads);
|
||||
dcb_printf(pdcb, "\t\tNo. of Writes: %d\n", dcb->stats.n_writes);
|
||||
dcb_printf(pdcb, "\t\tNo. of Buffered Writes: %d\n", dcb->stats.n_buffered);
|
||||
dcb_printf(pdcb, "\t\tNo. of Accepts: %d\n", dcb->stats.n_accepts);
|
||||
dcb_printf(pdcb, "\t\tNo. of High Water Events: %d\n", dcb->stats.n_high_water);
|
||||
dcb_printf(pdcb, "\t\tNo. of Low Water Events: %d\n", dcb->stats.n_low_water);
|
||||
dcb_printf(pdcb, "\t\tNo. of Reads: %d\n", dcb->stats.n_reads);
|
||||
dcb_printf(pdcb, "\t\tNo. of Writes: %d\n", dcb->stats.n_writes);
|
||||
dcb_printf(pdcb, "\t\tNo. of Buffered Writes: %d\n", dcb->stats.n_buffered);
|
||||
dcb_printf(pdcb, "\t\tNo. of Accepts: %d\n", dcb->stats.n_accepts);
|
||||
dcb_printf(pdcb, "\t\tNo. of busy polls: %d\n", dcb->stats.n_busypolls);
|
||||
dcb_printf(pdcb, "\t\tNo. of read rechecks: %d\n", dcb->stats.n_readrechecks);
|
||||
dcb_printf(pdcb, "\t\tNo. of busy write polls: %d\n", dcb->stats.n_busywrpolls);
|
||||
dcb_printf(pdcb, "\t\tNo. of write rechecks: %d\n", dcb->stats.n_writerechecks);
|
||||
dcb_printf(pdcb, "\t\tNo. of High Water Events: %d\n", dcb->stats.n_high_water);
|
||||
dcb_printf(pdcb, "\t\tNo. of Low Water Events: %d\n", dcb->stats.n_low_water);
|
||||
if (dcb->flags & DCBF_CLONE)
|
||||
dcb_printf(pdcb, "\t\tDCB is a clone.\n");
|
||||
dcb = dcb->next;
|
||||
@ -1278,20 +1308,20 @@ DCB *dcb;
|
||||
spinlock_acquire(&dcbspin);
|
||||
dcb = allDCBs;
|
||||
dcb_printf(pdcb, "Descriptor Control Blocks\n");
|
||||
dcb_printf(pdcb, "------------+----------------------------+----------------------+----------\n");
|
||||
dcb_printf(pdcb, " %-10s | %-26s | %-20s | %s\n",
|
||||
dcb_printf(pdcb, "------------------+----------------------------+--------------------+----------\n");
|
||||
dcb_printf(pdcb, " %-16s | %-26s | %-18s | %s\n",
|
||||
"DCB", "State", "Service", "Remote");
|
||||
dcb_printf(pdcb, "------------+----------------------------+----------------------+----------\n");
|
||||
dcb_printf(pdcb, "------------------+----------------------------+--------------------+----------\n");
|
||||
while (dcb)
|
||||
{
|
||||
dcb_printf(pdcb, " %10p | %-26s | %-20s | %s\n",
|
||||
dcb_printf(pdcb, " %-16p | %-26s | %-18s | %s\n",
|
||||
dcb, gw_dcb_state2string(dcb->state),
|
||||
(dcb->session->service ?
|
||||
dcb->session->service->name : ""),
|
||||
|
||||
((dcb->session && dcb->session->service) ? dcb->session->service->name : ""),
|
||||
(dcb->remote ? dcb->remote : ""));
|
||||
dcb = dcb->next;
|
||||
}
|
||||
dcb_printf(pdcb, "------------+----------------------------+----------------------+----------\n\n");
|
||||
dcb_printf(pdcb, "------------------+----------------------------+--------------------+----------\n\n");
|
||||
spinlock_release(&dcbspin);
|
||||
}
|
||||
|
||||
@ -1308,16 +1338,16 @@ DCB *dcb;
|
||||
spinlock_acquire(&dcbspin);
|
||||
dcb = allDCBs;
|
||||
dcb_printf(pdcb, "Client Connections\n");
|
||||
dcb_printf(pdcb, "-----------------+------------+----------------------+------------\n");
|
||||
dcb_printf(pdcb, " %-15s | %-10s | %-20s | %s\n",
|
||||
dcb_printf(pdcb, "-----------------+------------------+----------------------+------------\n");
|
||||
dcb_printf(pdcb, " %-15s | %-16s | %-20s | %s\n",
|
||||
"Client", "DCB", "Service", "Session");
|
||||
dcb_printf(pdcb, "-----------------+------------+----------------------+------------\n");
|
||||
dcb_printf(pdcb, "-----------------+------------------+----------------------+------------\n");
|
||||
while (dcb)
|
||||
{
|
||||
if (dcb_isclient(dcb)
|
||||
&& dcb->dcb_role == DCB_ROLE_REQUEST_HANDLER)
|
||||
{
|
||||
dcb_printf(pdcb, " %-15s | %10p | %-20s | %10p\n",
|
||||
dcb_printf(pdcb, " %-15s | %16p | %-20s | %10p\n",
|
||||
(dcb->remote ? dcb->remote : ""),
|
||||
dcb, (dcb->session->service ?
|
||||
dcb->session->service->name : ""),
|
||||
@ -1325,7 +1355,7 @@ DCB *dcb;
|
||||
}
|
||||
dcb = dcb->next;
|
||||
}
|
||||
dcb_printf(pdcb, "-----------------+------------+----------------------+------------\n\n");
|
||||
dcb_printf(pdcb, "-----------------+------------------+----------------------+------------\n\n");
|
||||
spinlock_release(&dcbspin);
|
||||
}
|
||||
|
||||
@ -1342,16 +1372,18 @@ dprintDCB(DCB *pdcb, DCB *dcb)
|
||||
dcb_printf(pdcb, "DCB: %p\n", (void *)dcb);
|
||||
dcb_printf(pdcb, "\tDCB state: %s\n", gw_dcb_state2string(dcb->state));
|
||||
if (dcb->session && dcb->session->service)
|
||||
dcb_printf(pdcb, "\tService: %s\n",
|
||||
dcb_printf(pdcb, "\tService: %s\n",
|
||||
dcb->session->service->name);
|
||||
if (dcb->remote)
|
||||
dcb_printf(pdcb, "\tConnected to: %s\n", dcb->remote);
|
||||
if (dcb->user)
|
||||
dcb_printf(pdcb, "\tUsername: %s\n",
|
||||
dcb_printf(pdcb, "\tUsername: %s\n",
|
||||
dcb->user);
|
||||
dcb_printf(pdcb, "\tOwning Session: %p\n", dcb->session);
|
||||
if (dcb->writeq)
|
||||
dcb_printf(pdcb, "\tQueued write data: %d\n", gwbuf_length(dcb->writeq));
|
||||
if (dcb->delayq)
|
||||
dcb_printf(pdcb, "\tDelayed write data: %d\n", gwbuf_length(dcb->delayq));
|
||||
dcb_printf(pdcb, "\tStatistics:\n");
|
||||
dcb_printf(pdcb, "\t\tNo. of Reads: %d\n",
|
||||
dcb->stats.n_reads);
|
||||
@ -1361,12 +1393,30 @@ dprintDCB(DCB *pdcb, DCB *dcb)
|
||||
dcb->stats.n_buffered);
|
||||
dcb_printf(pdcb, "\t\tNo. of Accepts: %d\n",
|
||||
dcb->stats.n_accepts);
|
||||
dcb_printf(pdcb, "\t\tNo. of busy polls: %d\n", dcb->stats.n_busypolls);
|
||||
dcb_printf(pdcb, "\t\tNo. of read rechecks: %d\n", dcb->stats.n_readrechecks);
|
||||
dcb_printf(pdcb, "\t\tNo. of busy write polls: %d\n", dcb->stats.n_busywrpolls);
|
||||
dcb_printf(pdcb, "\t\tNo. of write rechecks: %d\n", dcb->stats.n_writerechecks);
|
||||
dcb_printf(pdcb, "\t\tNo. of High Water Events: %d\n",
|
||||
dcb->stats.n_high_water);
|
||||
dcb_printf(pdcb, "\t\tNo. of Low Water Events: %d\n",
|
||||
dcb->stats.n_low_water);
|
||||
if (dcb->flags & DCBF_CLONE)
|
||||
dcb_printf(pdcb, "\t\tDCB is a clone.\n");
|
||||
#if SPINLOCK_PROFILE
|
||||
dcb_printf(pdcb, "\tInitlock Statistics:\n");
|
||||
spinlock_stats(&dcb->dcb_initlock, spin_reporter, pdcb);
|
||||
dcb_printf(pdcb, "\tWrite Queue Lock Statistics:\n");
|
||||
spinlock_stats(&dcb->writeqlock, spin_reporter, pdcb);
|
||||
dcb_printf(pdcb, "\tDelay Queue Lock Statistics:\n");
|
||||
spinlock_stats(&dcb->delayqlock, spin_reporter, pdcb);
|
||||
dcb_printf(pdcb, "\tPollin Lock Statistics:\n");
|
||||
spinlock_stats(&dcb->pollinlock, spin_reporter, pdcb);
|
||||
dcb_printf(pdcb, "\tPollout Lock Statistics:\n");
|
||||
spinlock_stats(&dcb->polloutlock, spin_reporter, pdcb);
|
||||
dcb_printf(pdcb, "\tCallback Lock Statistics:\n");
|
||||
spinlock_stats(&dcb->cb_lock, spin_reporter, pdcb);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1719,10 +1769,7 @@ int gw_write(
|
||||
* @return Non-zero (true) if the callback was added
|
||||
*/
|
||||
int
|
||||
dcb_add_callback(
|
||||
DCB *dcb,
|
||||
DCB_REASON reason,
|
||||
int (*callback)(struct dcb *, DCB_REASON, void *), void *userdata)
|
||||
dcb_add_callback(DCB *dcb, DCB_REASON reason, int (*callback)(struct dcb *, DCB_REASON, void *), void *userdata)
|
||||
{
|
||||
DCB_CALLBACK *cb, *ptr;
|
||||
int rval = 1;
|
||||
@ -1754,7 +1801,10 @@ int rval = 1;
|
||||
return 0;
|
||||
}
|
||||
if (cb->next == NULL)
|
||||
{
|
||||
cb->next = ptr;
|
||||
break;
|
||||
}
|
||||
cb = cb->next;
|
||||
}
|
||||
spinlock_release(&dcb->cb_lock);
|
||||
@ -1775,7 +1825,7 @@ int rval = 1;
|
||||
* @return Non-zero (true) if the callback was removed
|
||||
*/
|
||||
int
|
||||
dcb_remove_callback(DCB *dcb, DCB_REASON reason, int (*callback)(struct dcb *, DCB_REASON), void *userdata)
|
||||
dcb_remove_callback(DCB *dcb, DCB_REASON reason, int (*callback)(struct dcb *, DCB_REASON, void *), void *userdata)
|
||||
{
|
||||
DCB_CALLBACK *cb, *pcb = NULL;
|
||||
int rval = 0;
|
||||
@ -1868,8 +1918,102 @@ int rval = 0;
|
||||
return rval;
|
||||
}
|
||||
|
||||
static DCB* dcb_get_next (
|
||||
DCB* dcb)
|
||||
/**
|
||||
* Called by the EPOLLIN event. Take care of calling the protocol
|
||||
* read entry point and managing multiple threads competing for the DCB
|
||||
* without blocking those threads.
|
||||
*
|
||||
* This mechanism does away with the need for a mutex on the EPOLLIN event
|
||||
* and instead implements a queuing mechanism in which nested events are
|
||||
* queued on the DCB such that when the thread processing the first event
|
||||
* returns it will read the queued event and process it. This allows the
|
||||
* thread that woudl otherwise have to wait to process the nested event
|
||||
* to return immediately and and process other events.
|
||||
*
|
||||
* @param dcb The DCB that has data available
|
||||
*/
|
||||
void
|
||||
dcb_pollin(DCB *dcb, int thread_id)
|
||||
{
|
||||
|
||||
spinlock_acquire(&dcb->pollinlock);
|
||||
if (dcb->pollinbusy == 0)
|
||||
{
|
||||
dcb->pollinbusy = 1;
|
||||
do {
|
||||
if (dcb->readcheck)
|
||||
{
|
||||
dcb->stats.n_readrechecks++;
|
||||
dcb_process_zombies(thread_id);
|
||||
}
|
||||
dcb->readcheck = 0;
|
||||
spinlock_release(&dcb->pollinlock);
|
||||
dcb->func.read(dcb);
|
||||
spinlock_acquire(&dcb->pollinlock);
|
||||
} while (dcb->readcheck);
|
||||
dcb->pollinbusy = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
dcb->stats.n_busypolls++;
|
||||
dcb->readcheck = 1;
|
||||
}
|
||||
spinlock_release(&dcb->pollinlock);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Called by the EPOLLOUT event. Take care of calling the protocol
|
||||
* write_ready entry point and managing multiple threads competing for the DCB
|
||||
* without blocking those threads.
|
||||
*
|
||||
* This mechanism does away with the need for a mutex on the EPOLLOUT event
|
||||
* and instead implements a queuing mechanism in which nested events are
|
||||
* queued on the DCB such that when the thread processing the first event
|
||||
* returns it will read the queued event and process it. This allows the
|
||||
* thread that would otherwise have to wait to process the nested event
|
||||
* to return immediately and and process other events.
|
||||
*
|
||||
* @param dcb The DCB thats available for writes
|
||||
*/
|
||||
void
|
||||
dcb_pollout(DCB *dcb, int thread_id)
|
||||
{
|
||||
|
||||
spinlock_acquire(&dcb->polloutlock);
|
||||
if (dcb->polloutbusy == 0)
|
||||
{
|
||||
dcb->polloutbusy = 1;
|
||||
do {
|
||||
if (dcb->writecheck)
|
||||
{
|
||||
dcb_process_zombies(thread_id);
|
||||
dcb->stats.n_writerechecks++;
|
||||
}
|
||||
dcb->writecheck = 0;
|
||||
spinlock_release(&dcb->polloutlock);
|
||||
dcb->func.write_ready(dcb);
|
||||
spinlock_acquire(&dcb->polloutlock);
|
||||
} while (dcb->writecheck);
|
||||
dcb->polloutbusy = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
dcb->stats.n_busywrpolls++;
|
||||
dcb->writecheck = 1;
|
||||
}
|
||||
spinlock_release(&dcb->polloutlock);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the next DCB in the list of all DCB's
|
||||
*
|
||||
* @param dcb The current DCB
|
||||
* @return The pointer to the next DCB or NULL if this is the last
|
||||
*/
|
||||
static DCB *
|
||||
dcb_get_next (DCB* dcb)
|
||||
{
|
||||
DCB* p;
|
||||
|
||||
@ -1903,8 +2047,13 @@ static DCB* dcb_get_next (
|
||||
return dcb;
|
||||
}
|
||||
|
||||
void dcb_call_foreach (
|
||||
DCB_REASON reason)
|
||||
/**
|
||||
* Call all the callbacks on all DCB's that match the reason given
|
||||
*
|
||||
* @param reason The DCB_REASON that triggers the callback
|
||||
*/
|
||||
void
|
||||
dcb_call_foreach(DCB_REASON reason)
|
||||
{
|
||||
switch (reason) {
|
||||
case DCB_REASON_CLOSE:
|
||||
|
@ -51,6 +51,7 @@
|
||||
#include <modules.h>
|
||||
#include <config.h>
|
||||
#include <poll.h>
|
||||
#include <housekeeper.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
@ -1510,6 +1511,12 @@ int main(int argc, char **argv)
|
||||
log_flush_thr = thread_start(
|
||||
log_flush_cb,
|
||||
(void *)&log_flush_timeout_ms);
|
||||
|
||||
/*
|
||||
* Start the housekeeper thread
|
||||
*/
|
||||
hkinit();
|
||||
|
||||
/*<
|
||||
* Start the polling threads, note this is one less than is
|
||||
* configured as the main thread will also poll.
|
||||
|
@ -28,7 +28,7 @@
|
||||
* and value and to free them.
|
||||
*
|
||||
* The hashtable is arrange as a set of linked lists, the number of linked
|
||||
* lists beign the hashsize as requested by the user. Entries are hashed by
|
||||
* lists being the hashsize as requested by the user. Entries are hashed by
|
||||
* calling the hash function that is passed in by the user, this is used as
|
||||
* an index into the array of linked lists, usign modulo hashsize.
|
||||
*
|
||||
|
195
server/core/housekeeper.c
Normal file
195
server/core/housekeeper.c
Normal file
@ -0,0 +1,195 @@
|
||||
/*
|
||||
* This file is distributed as part of the SkySQL Gateway. It is free
|
||||
* software: you can redistribute it and/or modify it under the terms of the
|
||||
* GNU General Public License as published by the Free Software Foundation,
|
||||
* version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
* Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Copyright SkySQL Ab 2014
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include <housekeeper.h>
|
||||
#include <thread.h>
|
||||
#include <spinlock.h>
|
||||
|
||||
/**
|
||||
* @file housekeeper.c Provide a mechanism to run periodic tasks
|
||||
*
|
||||
* @verbatim
|
||||
* Revision History
|
||||
*
|
||||
* Date Who Description
|
||||
* 29/08/14 Mark Riddoch Initial implementation
|
||||
*
|
||||
* @endverbatim
|
||||
*/
|
||||
|
||||
/**
|
||||
* List of all tasks that need to be run
|
||||
*/
|
||||
static HKTASK *tasks = NULL;
|
||||
/**
|
||||
* Spinlock to protect the tasks list
|
||||
*/
|
||||
static SPINLOCK tasklock = SPINLOCK_INIT;
|
||||
|
||||
static void hkthread(void *);
|
||||
|
||||
/**
|
||||
* Initialise the housekeeper thread
|
||||
*/
|
||||
void
|
||||
hkinit()
|
||||
{
|
||||
thread_start(hkthread, NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new task to the housekeepers lists of tasks that should be
|
||||
* run periodically.
|
||||
*
|
||||
* The task will be first run frequency seconds after this call is
|
||||
* made and will the be executed repeatedly every frequency seconds
|
||||
* until the task is removed.
|
||||
*
|
||||
* Task names must be unique.
|
||||
*
|
||||
* @param name The unique name for this housekeeper task
|
||||
* @param taskfn The function to call for the task
|
||||
* @param data Data to pass to the task function
|
||||
* @param frequency How often to run the task, expressed in seconds
|
||||
* @return Return the tiem in seconds when the task will be first run if the task was added, otherwise 0
|
||||
*/
|
||||
int
|
||||
hktask_add(char *name, void (*taskfn)(void *), void *data, int frequency)
|
||||
{
|
||||
HKTASK *task, *ptr;
|
||||
|
||||
if ((task = (HKTASK *)malloc(sizeof(HKTASK))) == NULL)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
if ((task->name = strdup(name)) == NULL)
|
||||
{
|
||||
free(task);
|
||||
return 0;
|
||||
}
|
||||
task->task = taskfn;
|
||||
task->data = data;
|
||||
task->frequency = frequency;
|
||||
task->nextdue = time(0) + frequency;
|
||||
task->next = NULL;
|
||||
spinlock_acquire(&tasklock);
|
||||
ptr = tasks;
|
||||
while (ptr && ptr->next)
|
||||
{
|
||||
if (strcmp(ptr->name, name) == 0)
|
||||
{
|
||||
spinlock_release(&tasklock);
|
||||
free(task->name);
|
||||
free(task);
|
||||
return 0;
|
||||
}
|
||||
ptr = ptr->next;
|
||||
}
|
||||
if (ptr)
|
||||
ptr->next = task;
|
||||
else
|
||||
tasks = task;
|
||||
spinlock_release(&tasklock);
|
||||
|
||||
return task->nextdue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a named task from the housekeepers task list
|
||||
*
|
||||
* @param name The task name to remove
|
||||
* @return Returns 0 if the task could not be removed
|
||||
*/
|
||||
int
|
||||
hktask_remove(char *name)
|
||||
{
|
||||
HKTASK *ptr, *lptr = NULL;
|
||||
|
||||
spinlock_acquire(&tasklock);
|
||||
ptr = tasks;
|
||||
while (ptr && strcmp(ptr->name, name) != 0)
|
||||
{
|
||||
lptr = ptr;
|
||||
ptr = ptr->next;
|
||||
}
|
||||
if (ptr && lptr)
|
||||
lptr->next = ptr->next;
|
||||
else if (ptr)
|
||||
tasks = ptr->next;
|
||||
spinlock_release(&tasklock);
|
||||
|
||||
if (ptr)
|
||||
{
|
||||
free(ptr->name);
|
||||
free(ptr);
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The housekeeper thread implementation.
|
||||
*
|
||||
* This function is responsible for executing the housekeeper tasks.
|
||||
*
|
||||
* The implementation of the callng of the task functions is such that
|
||||
* the tasks are called without the tasklock spinlock being held. This
|
||||
* allows manipulation of the housekeeper task list during execution of
|
||||
* one of the tasks. The resutl is that upon completion of a task the
|
||||
* search for tasks to run must restart from the start of the queue.
|
||||
* It is vital that the task->nextdue tiem is updated before the task
|
||||
* is run.
|
||||
*
|
||||
* @param data Unused, here to satisfy the thread system
|
||||
*/
|
||||
void
|
||||
hkthread(void *data)
|
||||
{
|
||||
HKTASK *ptr;
|
||||
time_t now;
|
||||
void (*taskfn)(void *);
|
||||
void *taskdata;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
thread_millisleep(1000);
|
||||
now = time(0);
|
||||
spinlock_acquire(&tasklock);
|
||||
ptr = tasks;
|
||||
while (ptr)
|
||||
{
|
||||
if (ptr->nextdue <= now)
|
||||
{
|
||||
ptr->nextdue = now + ptr->frequency;
|
||||
taskfn = ptr->task;
|
||||
taskdata = ptr->data;
|
||||
spinlock_release(&tasklock);
|
||||
(*taskfn)(taskdata);
|
||||
spinlock_acquire(&tasklock);
|
||||
ptr = tasks;
|
||||
}
|
||||
else
|
||||
ptr = ptr->next;
|
||||
}
|
||||
spinlock_release(&tasklock);
|
||||
}
|
||||
}
|
@ -28,6 +28,8 @@
|
||||
#include <skygw_utils.h>
|
||||
#include <log_manager.h>
|
||||
#include <gw.h>
|
||||
#include <config.h>
|
||||
#include <housekeeper.h>
|
||||
|
||||
extern int lm_enabled_logfiles_bitmask;
|
||||
|
||||
@ -41,14 +43,63 @@ extern int lm_enabled_logfiles_bitmask;
|
||||
* 19/06/13 Mark Riddoch Initial implementation
|
||||
* 28/06/13 Mark Riddoch Added poll mask support and DCB
|
||||
* zombie management
|
||||
* 29/08/14 Mark Riddoch Addition of thread status data, load average
|
||||
* etc.
|
||||
*
|
||||
* @endverbatim
|
||||
*/
|
||||
|
||||
static int epoll_fd = -1; /*< The epoll file descriptor */
|
||||
static int do_shutdown = 0; /*< Flag the shutdown of the poll subsystem */
|
||||
static int do_shutdown = 0; /*< Flag the shutdown of the poll subsystem */
|
||||
static GWBITMASK poll_mask;
|
||||
static simple_mutex_t epoll_wait_mutex; /*< serializes calls to epoll_wait */
|
||||
static int n_waiting = 0; /*< No. of threads in epoll_wait */
|
||||
|
||||
/**
|
||||
* Thread load average, this is the average number of descriptors in each
|
||||
* poll completion, a value of 1 or less is the ideal.
|
||||
*/
|
||||
static double load_average = 0.0;
|
||||
static int load_samples = 0;
|
||||
static int load_nfds = 0;
|
||||
static double current_avg = 0.0;
|
||||
static double *avg_samples = NULL;
|
||||
static int next_sample = 0;
|
||||
static int n_avg_samples;
|
||||
|
||||
/* Thread statistics data */
|
||||
static int n_threads; /*< No. of threads */
|
||||
|
||||
/**
|
||||
* Internal MaxScale thread states
|
||||
*/
|
||||
typedef enum { THREAD_STOPPED, THREAD_IDLE,
|
||||
THREAD_POLLING, THREAD_PROCESSING,
|
||||
THREAD_ZPROCESSING } THREAD_STATE;
|
||||
|
||||
/**
|
||||
* Thread data used to report the current state and activity related to
|
||||
* a thread
|
||||
*/
|
||||
typedef struct {
|
||||
THREAD_STATE state; /*< Current thread state */
|
||||
int n_fds; /*< No. of descriptors thread is processing */
|
||||
DCB *cur_dcb; /*< Current DCB being processed */
|
||||
uint32_t event; /*< Current event being processed */
|
||||
} THREAD_DATA;
|
||||
|
||||
static THREAD_DATA *thread_data = NULL; /*< Status of each thread */
|
||||
|
||||
/**
|
||||
* The number of buckets used to gather statistics about how many
|
||||
* descriptors where processed on each epoll completion.
|
||||
*
|
||||
* An array of wakeup counts is created, with the number of descriptors used
|
||||
* to index that array. Each time a completion occurs the n_fds - 1 value is
|
||||
* used to index this array and increment the count held there.
|
||||
* If n_fds - 1 >= MAXFDS then the count at MAXFDS -1 is incremented.
|
||||
*/
|
||||
#define MAXNFDS 10
|
||||
|
||||
/**
|
||||
* The polling statistics
|
||||
@ -60,8 +111,20 @@ static struct {
|
||||
int n_hup; /*< Number of hangup events */
|
||||
int n_accept; /*< Number of accept events */
|
||||
int n_polls; /*< Number of poll cycles */
|
||||
int n_nothreads; /*< Number of times no threads are polling */
|
||||
int n_fds[MAXNFDS]; /*< Number of wakeups with particular
|
||||
n_fds value */
|
||||
} pollStats;
|
||||
|
||||
/**
|
||||
* How frequently to call the poll_loadav function used to monitor the load
|
||||
* average of the poll subsystem.
|
||||
*/
|
||||
#define POLL_LOAD_FREQ 10
|
||||
/**
|
||||
* Periodic function to collect load data for average calculations
|
||||
*/
|
||||
static void poll_loadav(void *);
|
||||
|
||||
/**
|
||||
* Initialise the polling system we are using for the gateway.
|
||||
@ -71,6 +134,8 @@ static struct {
|
||||
void
|
||||
poll_init()
|
||||
{
|
||||
int i;
|
||||
|
||||
if (epoll_fd != -1)
|
||||
return;
|
||||
if ((epoll_fd = epoll_create(MAX_EVENTS)) == -1)
|
||||
@ -80,7 +145,23 @@ poll_init()
|
||||
}
|
||||
memset(&pollStats, 0, sizeof(pollStats));
|
||||
bitmask_init(&poll_mask);
|
||||
n_threads = config_threadcount();
|
||||
if ((thread_data =
|
||||
(THREAD_DATA *)malloc(n_threads * sizeof(THREAD_DATA))) != NULL)
|
||||
{
|
||||
for (i = 0; i < n_threads; i++)
|
||||
{
|
||||
thread_data[i].state = THREAD_STOPPED;
|
||||
}
|
||||
}
|
||||
simple_mutex_init(&epoll_wait_mutex, "epoll_wait_mutex");
|
||||
|
||||
hktask_add("Load Average", poll_loadav, NULL, POLL_LOAD_FREQ);
|
||||
n_avg_samples = 15 * 60 / POLL_LOAD_FREQ;
|
||||
avg_samples = (double *)malloc(sizeof(double *) * n_avg_samples);
|
||||
for (i = 0; i < n_avg_samples; i++)
|
||||
avg_samples[i] = 0.0;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
@ -100,7 +181,7 @@ poll_add_dcb(DCB *dcb)
|
||||
|
||||
CHK_DCB(dcb);
|
||||
|
||||
ev.events = EPOLLIN | EPOLLOUT | EPOLLET;
|
||||
ev.events = EPOLLIN | EPOLLOUT | EPOLLRDHUP | EPOLLHUP | EPOLLET;
|
||||
ev.data.ptr = dcb;
|
||||
|
||||
/*<
|
||||
@ -245,20 +326,26 @@ return_rc:
|
||||
void
|
||||
poll_waitevents(void *arg)
|
||||
{
|
||||
struct epoll_event events[MAX_EVENTS];
|
||||
int i, nfds;
|
||||
int thread_id = (int)arg;
|
||||
bool no_op = false;
|
||||
static bool process_zombies_only = false; /*< flag for all threads */
|
||||
DCB *zombies = NULL;
|
||||
struct epoll_event events[MAX_EVENTS];
|
||||
int i, nfds;
|
||||
int thread_id = (int)arg;
|
||||
bool no_op = false;
|
||||
static bool process_zombies_only = false; /*< flag for all threads */
|
||||
DCB *zombies = NULL;
|
||||
|
||||
/* Add this thread to the bitmask of running polling threads */
|
||||
bitmask_set(&poll_mask, thread_id);
|
||||
if (thread_data)
|
||||
{
|
||||
thread_data[thread_id].state = THREAD_IDLE;
|
||||
}
|
||||
|
||||
while (1)
|
||||
{
|
||||
atomic_add(&n_waiting, 1);
|
||||
#if BLOCKINGPOLL
|
||||
nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, -1);
|
||||
atomic_add(&n_waiting, -1);
|
||||
#else /* BLOCKINGPOLL */
|
||||
if (!no_op) {
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
@ -272,9 +359,14 @@ poll_waitevents(void *arg)
|
||||
#if 0
|
||||
simple_mutex_lock(&epoll_wait_mutex, TRUE);
|
||||
#endif
|
||||
if (thread_data)
|
||||
{
|
||||
thread_data[thread_id].state = THREAD_POLLING;
|
||||
}
|
||||
|
||||
if ((nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, 0)) == -1)
|
||||
{
|
||||
atomic_add(&n_waiting, -1);
|
||||
int eno = errno;
|
||||
errno = 0;
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
@ -288,6 +380,7 @@ poll_waitevents(void *arg)
|
||||
}
|
||||
else if (nfds == 0)
|
||||
{
|
||||
atomic_add(&n_waiting, -1);
|
||||
if (process_zombies_only) {
|
||||
#if 0
|
||||
simple_mutex_unlock(&epoll_wait_mutex);
|
||||
@ -310,6 +403,13 @@ poll_waitevents(void *arg)
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
atomic_add(&n_waiting, -1);
|
||||
}
|
||||
|
||||
if (n_waiting == 0)
|
||||
atomic_add(&pollStats.n_nothreads, 1);
|
||||
#if 0
|
||||
simple_mutex_unlock(&epoll_wait_mutex);
|
||||
#endif
|
||||
@ -322,6 +422,20 @@ poll_waitevents(void *arg)
|
||||
pthread_self(),
|
||||
nfds)));
|
||||
atomic_add(&pollStats.n_polls, 1);
|
||||
if (thread_data)
|
||||
{
|
||||
thread_data[thread_id].n_fds = nfds;
|
||||
thread_data[thread_id].cur_dcb = NULL;
|
||||
thread_data[thread_id].event = 0;
|
||||
thread_data[thread_id].state = THREAD_PROCESSING;
|
||||
}
|
||||
|
||||
pollStats.n_fds[(nfds < MAXNFDS ? (nfds - 1) : MAXNFDS - 1)]++;
|
||||
|
||||
load_average = (load_average * load_samples + nfds)
|
||||
/ (load_samples + 1);
|
||||
atomic_add(&load_samples, 1);
|
||||
atomic_add(&load_nfds, nfds);
|
||||
|
||||
for (i = 0; i < nfds; i++)
|
||||
{
|
||||
@ -329,6 +443,11 @@ poll_waitevents(void *arg)
|
||||
__uint32_t ev = events[i].events;
|
||||
|
||||
CHK_DCB(dcb);
|
||||
if (thread_data)
|
||||
{
|
||||
thread_data[thread_id].cur_dcb = dcb;
|
||||
thread_data[thread_id].event = ev;
|
||||
}
|
||||
|
||||
#if defined(SS_DEBUG)
|
||||
if (dcb_fake_write_ev[dcb->fd] != 0) {
|
||||
@ -364,6 +483,7 @@ poll_waitevents(void *arg)
|
||||
eno = gw_getsockerrno(dcb->fd);
|
||||
|
||||
if (eno == 0) {
|
||||
#if MUTEX_BLOCK
|
||||
simple_mutex_lock(
|
||||
&dcb->dcb_write_lock,
|
||||
true);
|
||||
@ -378,6 +498,11 @@ poll_waitevents(void *arg)
|
||||
dcb->dcb_write_active = FALSE;
|
||||
simple_mutex_unlock(
|
||||
&dcb->dcb_write_lock);
|
||||
#else
|
||||
atomic_add(&pollStats.n_write,
|
||||
1);
|
||||
dcb_pollout(dcb, thread_id);
|
||||
#endif
|
||||
} else {
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
@ -393,11 +518,13 @@ poll_waitevents(void *arg)
|
||||
}
|
||||
if (ev & EPOLLIN)
|
||||
{
|
||||
#if MUTEX_BLOCK
|
||||
simple_mutex_lock(&dcb->dcb_read_lock,
|
||||
true);
|
||||
ss_info_dassert(!dcb->dcb_read_active,
|
||||
"Read already active");
|
||||
dcb->dcb_read_active = TRUE;
|
||||
#endif
|
||||
|
||||
if (dcb->state == DCB_STATE_LISTENING)
|
||||
{
|
||||
@ -421,11 +548,17 @@ poll_waitevents(void *arg)
|
||||
dcb,
|
||||
dcb->fd)));
|
||||
atomic_add(&pollStats.n_read, 1);
|
||||
#if MUTEX_BLOCK
|
||||
dcb->func.read(dcb);
|
||||
#else
|
||||
dcb_pollin(dcb, thread_id);
|
||||
#endif
|
||||
}
|
||||
#if MUTEX_BLOCK
|
||||
dcb->dcb_read_active = FALSE;
|
||||
simple_mutex_unlock(
|
||||
&dcb->dcb_read_lock);
|
||||
#endif
|
||||
}
|
||||
if (ev & EPOLLERR)
|
||||
{
|
||||
@ -475,10 +608,33 @@ poll_waitevents(void *arg)
|
||||
atomic_add(&pollStats.n_hup, 1);
|
||||
dcb->func.hangup(dcb);
|
||||
}
|
||||
|
||||
if (ev & EPOLLRDHUP)
|
||||
{
|
||||
int eno = 0;
|
||||
eno = gw_getsockerrno(dcb->fd);
|
||||
|
||||
LOGIF(LD, (skygw_log_write(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [poll_waitevents] "
|
||||
"EPOLLRDHUP on dcb %p, fd %d. "
|
||||
"Errno %d, %s.",
|
||||
pthread_self(),
|
||||
dcb,
|
||||
dcb->fd,
|
||||
eno,
|
||||
strerror(eno))));
|
||||
atomic_add(&pollStats.n_hup, 1);
|
||||
dcb->func.hangup(dcb);
|
||||
}
|
||||
} /*< for */
|
||||
no_op = FALSE;
|
||||
}
|
||||
process_zombies:
|
||||
if (thread_data)
|
||||
{
|
||||
thread_data[thread_id].state = THREAD_ZPROCESSING;
|
||||
}
|
||||
zombies = dcb_process_zombies(thread_id);
|
||||
|
||||
if (zombies == NULL) {
|
||||
@ -491,9 +647,17 @@ poll_waitevents(void *arg)
|
||||
* Remove the thread from the bitmask of running
|
||||
* polling threads.
|
||||
*/
|
||||
if (thread_data)
|
||||
{
|
||||
thread_data[thread_id].state = THREAD_STOPPED;
|
||||
}
|
||||
bitmask_clear(&poll_mask, thread_id);
|
||||
return;
|
||||
}
|
||||
if (thread_data)
|
||||
{
|
||||
thread_data[thread_id].state = THREAD_IDLE;
|
||||
}
|
||||
} /*< while(1) */
|
||||
}
|
||||
|
||||
@ -525,10 +689,194 @@ poll_bitmask()
|
||||
void
|
||||
dprintPollStats(DCB *dcb)
|
||||
{
|
||||
dcb_printf(dcb, "Number of epoll cycles: %d\n", pollStats.n_polls);
|
||||
dcb_printf(dcb, "Number of read events: %d\n", pollStats.n_read);
|
||||
dcb_printf(dcb, "Number of write events: %d\n", pollStats.n_write);
|
||||
dcb_printf(dcb, "Number of error events: %d\n", pollStats.n_error);
|
||||
dcb_printf(dcb, "Number of hangup events: %d\n", pollStats.n_hup);
|
||||
dcb_printf(dcb, "Number of accept events: %d\n", pollStats.n_accept);
|
||||
int i;
|
||||
|
||||
dcb_printf(dcb, "Number of epoll cycles: %d\n",
|
||||
pollStats.n_polls);
|
||||
dcb_printf(dcb, "Number of read events: %d\n",
|
||||
pollStats.n_read);
|
||||
dcb_printf(dcb, "Number of write events: %d\n",
|
||||
pollStats.n_write);
|
||||
dcb_printf(dcb, "Number of error events: %d\n",
|
||||
pollStats.n_error);
|
||||
dcb_printf(dcb, "Number of hangup events: %d\n",
|
||||
pollStats.n_hup);
|
||||
dcb_printf(dcb, "Number of accept events: %d\n",
|
||||
pollStats.n_accept);
|
||||
dcb_printf(dcb, "Number of times no threads polling: %d\n",
|
||||
pollStats.n_nothreads);
|
||||
|
||||
dcb_printf(dcb, "No of poll completions with descriptors\n");
|
||||
dcb_printf(dcb, "\tNo. of descriptors\tNo. of poll completions.\n");
|
||||
for (i = 0; i < MAXNFDS - 1; i++)
|
||||
{
|
||||
dcb_printf(dcb, "\t%2d\t\t\t%d\n", i + 1, pollStats.n_fds[i]);
|
||||
}
|
||||
dcb_printf(dcb, "\t>= %d\t\t\t%d\n", MAXNFDS,
|
||||
pollStats.n_fds[MAXNFDS-1]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an EPOLL event mask into a printable string
|
||||
*
|
||||
* @param event The event mask
|
||||
* @return A string representation, the caller must free the string
|
||||
*/
|
||||
static char *
|
||||
event_to_string(uint32_t event)
|
||||
{
|
||||
char *str;
|
||||
|
||||
str = malloc(22); // 22 is max returned string length
|
||||
if (str == NULL)
|
||||
return NULL;
|
||||
*str = 0;
|
||||
if (event & EPOLLIN)
|
||||
{
|
||||
strcat(str, "IN");
|
||||
}
|
||||
if (event & EPOLLOUT)
|
||||
{
|
||||
if (*str)
|
||||
strcat(str, "|");
|
||||
strcat(str, "OUT");
|
||||
}
|
||||
if (event & EPOLLERR)
|
||||
{
|
||||
if (*str)
|
||||
strcat(str, "|");
|
||||
strcat(str, "ERR");
|
||||
}
|
||||
if (event & EPOLLHUP)
|
||||
{
|
||||
if (*str)
|
||||
strcat(str, "|");
|
||||
strcat(str, "HUP");
|
||||
}
|
||||
if (event & EPOLLRDHUP)
|
||||
{
|
||||
if (*str)
|
||||
strcat(str, "|");
|
||||
strcat(str, "RDHUP");
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print the thread status for all the polling threads
|
||||
*
|
||||
* @param dcb The DCB to send the thread status data
|
||||
*/
|
||||
void
|
||||
dShowThreads(DCB *dcb)
|
||||
{
|
||||
int i, j, n;
|
||||
char *state;
|
||||
double avg1 = 0.0, avg5 = 0.0, avg15 = 0.0;
|
||||
|
||||
|
||||
dcb_printf(dcb, "Polling Threads.\n\n");
|
||||
dcb_printf(dcb, "Historic Thread Load Average: %.2f.\n", load_average);
|
||||
dcb_printf(dcb, "Current Thread Load Average: %.2f.\n", current_avg);
|
||||
|
||||
/* Average all the samples to get the 15 minute average */
|
||||
for (i = 0; i < n_avg_samples; i++)
|
||||
avg15 += avg_samples[i];
|
||||
avg15 = avg15 / n_avg_samples;
|
||||
|
||||
/* Average the last third of the samples to get the 5 minute average */
|
||||
n = 5 * 60 / POLL_LOAD_FREQ;
|
||||
i = next_sample - (n + 1);
|
||||
if (i < 0)
|
||||
i += n_avg_samples;
|
||||
for (j = i; j < i + n; j++)
|
||||
avg5 += avg_samples[j % n_avg_samples];
|
||||
avg5 = (3 * avg5) / (n_avg_samples);
|
||||
|
||||
/* Average the last 15th of the samples to get the 1 minute average */
|
||||
n = 60 / POLL_LOAD_FREQ;
|
||||
i = next_sample - (n + 1);
|
||||
if (i < 0)
|
||||
i += n_avg_samples;
|
||||
for (j = i; j < i + n; j++)
|
||||
avg1 += avg_samples[j % n_avg_samples];
|
||||
avg1 = (15 * avg1) / (n_avg_samples);
|
||||
|
||||
dcb_printf(dcb, "15 Minute Average: %.2f, 5 Minute Average: %.2f, "
|
||||
"1 Minute Average: %.2f\n\n", avg15, avg5, avg1);
|
||||
|
||||
if (thread_data == NULL)
|
||||
return;
|
||||
dcb_printf(dcb, " ID | State | # fds | Descriptor | Event\n");
|
||||
dcb_printf(dcb, "----+------------+--------+------------------+---------------\n");
|
||||
for (i = 0; i < n_threads; i++)
|
||||
{
|
||||
switch (thread_data[i].state)
|
||||
{
|
||||
case THREAD_STOPPED:
|
||||
state = "Stopped";
|
||||
break;
|
||||
case THREAD_IDLE:
|
||||
state = "Idle";
|
||||
break;
|
||||
case THREAD_POLLING:
|
||||
state = "Polling";
|
||||
break;
|
||||
case THREAD_PROCESSING:
|
||||
state = "Processing";
|
||||
break;
|
||||
case THREAD_ZPROCESSING:
|
||||
state = "Collecting";
|
||||
break;
|
||||
}
|
||||
if (thread_data[i].state != THREAD_PROCESSING)
|
||||
dcb_printf(dcb,
|
||||
" %2d | %-10s | | |\n",
|
||||
i, state);
|
||||
else if (thread_data[i].cur_dcb == NULL)
|
||||
dcb_printf(dcb,
|
||||
" %2d | %-10s | %6d | |\n",
|
||||
i, state, thread_data[i].n_fds);
|
||||
else
|
||||
{
|
||||
char *event_string
|
||||
= event_to_string(thread_data[i].event);
|
||||
if (event_string == NULL)
|
||||
event_string = "??";
|
||||
dcb_printf(dcb,
|
||||
" %2d | %-10s | %6d | %-16p | %s\n",
|
||||
i, state, thread_data[i].n_fds,
|
||||
thread_data[i].cur_dcb, event_string);
|
||||
free(event_string);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The function used to calculate time based load data. This is called by the
|
||||
* housekeeper every POLL_LOAD_FREQ seconds.
|
||||
*
|
||||
* @param data Argument required by the housekeeper but not used here
|
||||
*/
|
||||
static void
|
||||
poll_loadav(void *data)
|
||||
{
|
||||
static int last_samples = 0, last_nfds = 0;
|
||||
int new_samples, new_nfds;
|
||||
|
||||
new_samples = load_samples - last_samples;
|
||||
new_nfds = load_nfds - last_nfds;
|
||||
last_samples = load_samples;
|
||||
last_nfds = load_nfds;
|
||||
|
||||
/* POLL_LOAD_FREQ average is... */
|
||||
if (new_samples)
|
||||
current_avg = new_nfds / new_samples;
|
||||
else
|
||||
current_avg = 0.0;
|
||||
avg_samples[next_sample] = current_avg;
|
||||
next_sample++;
|
||||
if (next_sample >= n_avg_samples)
|
||||
next_sample = 0;
|
||||
}
|
||||
|
@ -40,9 +40,12 @@ void
|
||||
spinlock_init(SPINLOCK *lock)
|
||||
{
|
||||
lock->lock = 0;
|
||||
#ifdef DEBUG
|
||||
#ifdef SPINLOCK_PROFILE
|
||||
lock->spins = 0;
|
||||
lock->acquired = 0;
|
||||
lock->waiting = 0;
|
||||
lock->max_waiting = 0;
|
||||
lock->contended = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -54,16 +57,29 @@ spinlock_init(SPINLOCK *lock)
|
||||
void
|
||||
spinlock_acquire(SPINLOCK *lock)
|
||||
{
|
||||
#ifdef SPINLOCK_PROFILE
|
||||
int spins = 0;
|
||||
|
||||
atomic_add(&(lock->waiting), 1);
|
||||
#endif
|
||||
while (atomic_add(&(lock->lock), 1) != 0)
|
||||
{
|
||||
atomic_add(&(lock->lock), -1);
|
||||
#ifdef DEBUG
|
||||
#ifdef SPINLOCK_PROFILE
|
||||
atomic_add(&(lock->spins), 1);
|
||||
spins++;
|
||||
#endif
|
||||
}
|
||||
#ifdef DEBUG
|
||||
#ifdef SPINLOCK_PROFILE
|
||||
if (spins)
|
||||
{
|
||||
lock->contended++;
|
||||
if (lock->maxspins < spins)
|
||||
lock->maxspins = spins;
|
||||
}
|
||||
lock->acquired++;
|
||||
lock->owner = THREAD_SHELF();
|
||||
atomic_add(&(lock->waiting), -1);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -71,7 +87,7 @@ spinlock_acquire(SPINLOCK *lock)
|
||||
* Acquire a spinlock if it is not already locked.
|
||||
*
|
||||
* @param lock The spinlock to acquire
|
||||
* @return True ifthe spinlock was acquired, otherwise false
|
||||
* @return True if the spinlock was acquired, otherwise false
|
||||
*/
|
||||
int
|
||||
spinlock_acquire_nowait(SPINLOCK *lock)
|
||||
@ -81,7 +97,7 @@ spinlock_acquire_nowait(SPINLOCK *lock)
|
||||
atomic_add(&(lock->lock), -1);
|
||||
return FALSE;
|
||||
}
|
||||
#ifdef DEBUG
|
||||
#ifdef SPINLOCK_PROFILE
|
||||
lock->acquired++;
|
||||
lock->owner = THREAD_SHELF();
|
||||
#endif
|
||||
@ -96,5 +112,45 @@ spinlock_acquire_nowait(SPINLOCK *lock)
|
||||
void
|
||||
spinlock_release(SPINLOCK *lock)
|
||||
{
|
||||
#ifdef SPINLOCK_PROFILE
|
||||
if (lock->waiting > lock->max_waiting)
|
||||
lock->max_waiting = lock->waiting;
|
||||
#endif
|
||||
atomic_add(&(lock->lock), -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Report statistics on a spinlock. This only has an effect if the
|
||||
* spinlock code has been compiled with the SPINLOCK_PROFILE option set.
|
||||
*
|
||||
* NB A callback function is used to return the data rather than
|
||||
* merely printing to a DCB in order to avoid a dependency on the DCB
|
||||
* form the spinlock code and also to facilitate other uses of the
|
||||
* statistics reporting.
|
||||
*
|
||||
* @param lock The spinlock to report on
|
||||
* @param reporter The callback function to pass the statistics to
|
||||
* @param hdl A handle that is passed to the reporter function
|
||||
*/
|
||||
void
|
||||
spinlock_stats(SPINLOCK *lock, void (*reporter)(void *, char *, int), void *hdl)
|
||||
{
|
||||
#ifdef SPINLOCK_PROFILE
|
||||
reporter(hdl, "Spinlock acquired", lock->acquired);
|
||||
if (lock->acquired)
|
||||
{
|
||||
reporter(hdl, "Total no. of spins", lock->spins);
|
||||
reporter(hdl, "Average no. of spins (overall)",
|
||||
lock->spins / lock->acquired);
|
||||
if (lock->contended)
|
||||
reporter(hdl, "Average no. of spins (when contended)",
|
||||
lock->spins / lock->contended);
|
||||
reporter(hdl, "Maximum no. of spins", lock->maxspins);
|
||||
reporter(hdl, "Maximim no. of blocked threads",
|
||||
lock->max_waiting);
|
||||
reporter(hdl, "Contended locks", lock->contended);
|
||||
reporter(hdl, "Contention percentage",
|
||||
(lock->contended * 100) / lock->acquired);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -83,6 +83,7 @@ typedef struct {
|
||||
*/
|
||||
typedef struct gwbuf {
|
||||
struct gwbuf *next; /*< Next buffer in a linked chain of buffers */
|
||||
struct gwbuf *tail; /*< Last buffer in a linked chain of buffers */
|
||||
void *start; /*< Start of the valid data */
|
||||
void *end; /*< First byte after the valid data */
|
||||
SHARED_BUF *sbuf; /*< The shared buffer with the real data */
|
||||
|
@ -53,6 +53,7 @@ struct service;
|
||||
* 07/02/2014 Massimiliano Pinto Added ipv4 data struct into for dcb
|
||||
* 07/05/2014 Mark Riddoch Addition of callback mechanism
|
||||
* 08/05/2014 Mark Riddoch Addition of writeq high and low watermarks
|
||||
* 27/08/2014 Mark Ridddoch Addition of write event queuing
|
||||
*
|
||||
* @endverbatim
|
||||
*/
|
||||
@ -107,12 +108,16 @@ typedef struct gw_protocol {
|
||||
* The statitics gathered on a descriptor control block
|
||||
*/
|
||||
typedef struct dcbstats {
|
||||
int n_reads; /*< Number of reads on this descriptor */
|
||||
int n_writes; /*< Number of writes on this descriptor */
|
||||
int n_accepts; /*< Number of accepts on this descriptor */
|
||||
int n_buffered; /*< Number of buffered writes */
|
||||
int n_high_water; /*< Number of crosses of high water mark */
|
||||
int n_low_water; /*< Number of crosses of low water mark */
|
||||
int n_reads; /*< Number of reads on this descriptor */
|
||||
int n_writes; /*< Number of writes on this descriptor */
|
||||
int n_accepts; /*< Number of accepts on this descriptor */
|
||||
int n_buffered; /*< Number of buffered writes */
|
||||
int n_high_water; /*< Number of crosses of high water mark */
|
||||
int n_low_water; /*< Number of crosses of low water mark */
|
||||
int n_busypolls; /*< Number of read polls whiel reading */
|
||||
int n_readrechecks; /*< Number of rechecks for reads */
|
||||
int n_busywrpolls; /*< Number of write polls while writing */
|
||||
int n_writerechecks;/*< Number of rechecks for writes */
|
||||
} DCBSTATS;
|
||||
|
||||
/**
|
||||
@ -231,6 +236,13 @@ typedef struct dcb {
|
||||
DCBMM memdata; /**< The data related to DCB memory management */
|
||||
SPINLOCK cb_lock; /**< The lock for the callbacks linked list */
|
||||
DCB_CALLBACK *callbacks; /**< The list of callbacks for the DCB */
|
||||
SPINLOCK pollinlock;
|
||||
int pollinbusy;
|
||||
int readcheck;
|
||||
|
||||
SPINLOCK polloutlock;
|
||||
int polloutbusy;
|
||||
int writecheck;
|
||||
|
||||
unsigned int high_water; /**< High water mark */
|
||||
unsigned int low_water; /**< Low water mark */
|
||||
@ -259,6 +271,8 @@ int fail_accept_errno;
|
||||
#define DCB_BELOW_LOW_WATER(x) ((x)->low_water && (x)->writeqlen < (x)->low_water)
|
||||
#define DCB_ABOVE_HIGH_WATER(x) ((x)->high_water && (x)->writeqlen > (x)->high_water)
|
||||
|
||||
void dcb_pollin(DCB *, int);
|
||||
void dcb_pollout(DCB *, int);
|
||||
DCB *dcb_get_zombies(void);
|
||||
int gw_write(
|
||||
#if defined(SS_DEBUG)
|
||||
@ -289,7 +303,7 @@ void dcb_hashtable_stats(DCB *, void *); /**< Print statisitics */
|
||||
void dcb_add_to_zombieslist(DCB* dcb);
|
||||
int dcb_add_callback(DCB *, DCB_REASON, int (*)(struct dcb *, DCB_REASON, void *),
|
||||
void *);
|
||||
int dcb_remove_callback(DCB *, DCB_REASON, int (*)(struct dcb *, DCB_REASON),
|
||||
int dcb_remove_callback(DCB *, DCB_REASON, int (*)(struct dcb *, DCB_REASON, void *),
|
||||
void *);
|
||||
int dcb_isvalid(DCB *); /* Check the DCB is in the linked list */
|
||||
|
||||
|
50
server/include/housekeeper.h
Normal file
50
server/include/housekeeper.h
Normal file
@ -0,0 +1,50 @@
|
||||
#ifndef _HOUSEKEEPER_H
|
||||
#define _HOUSEKEEPER_H
|
||||
/*
|
||||
* This file is distributed as part of the SkySQL Gateway. It is free
|
||||
* software: you can redistribute it and/or modify it under the terms of the
|
||||
* GNU General Public License as published by the Free Software Foundation,
|
||||
* version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
* Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Copyright SkySQL Ab 2014
|
||||
*/
|
||||
#include <time.h>
|
||||
|
||||
/**
|
||||
* @file housekeeper.h A mechanism to have task run periodically
|
||||
*
|
||||
* @verbatim
|
||||
* Revision History
|
||||
*
|
||||
* Date Who Description
|
||||
* 29/08/14 Mark Riddoch Initial implementation
|
||||
*
|
||||
* @endverbatim
|
||||
*/
|
||||
|
||||
/**
|
||||
* The housekeeper task list
|
||||
*/
|
||||
typedef struct hktask {
|
||||
char *name; /*< A simple task name */
|
||||
void (*task)(void *data); /*< The task to call */
|
||||
void *data; /*< Data to pass the task */
|
||||
int frequency; /*< How often to call the tasks (seconds) */
|
||||
time_t nextdue; /*< When the task should be next run */
|
||||
struct hktask
|
||||
*next; /*< Next task in the list */
|
||||
} HKTASK;
|
||||
|
||||
extern void hkinit();
|
||||
extern int hktask_add(char *name, void (*task)(void *), void *data, int frequency);
|
||||
extern int hktask_remove(char *name);
|
||||
#endif
|
@ -41,4 +41,5 @@ extern void poll_waitevents(void *);
|
||||
extern void poll_shutdown();
|
||||
extern GWBITMASK *poll_bitmask();
|
||||
extern void dprintPollStats(DCB *);
|
||||
extern void dShowThreads(DCB *dcb);
|
||||
#endif
|
||||
|
@ -21,7 +21,7 @@
|
||||
/**
|
||||
* @file spinlock.h
|
||||
*
|
||||
* Spinlock implementation for ther gateway.
|
||||
* Spinlock implementation for MaxScale.
|
||||
*
|
||||
* Spinlocks are cheap locks that can be used to protect short code blocks, they are
|
||||
* generally wasteful as any blocked threads will spin, consuming CPU cycles, waiting
|
||||
@ -31,12 +31,28 @@
|
||||
#include <thread.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define SPINLOCK_PROFILE 1
|
||||
|
||||
/**
|
||||
* The spinlock structure.
|
||||
*
|
||||
* In normal builds the structure merely contains a lock value which
|
||||
* is 0 if the spinlock is not taken and greater than zero if it is held.
|
||||
*
|
||||
* In builds with the SPINLOCK_PROFILE option set this structure also holds
|
||||
* a number of profile related fields that count the number of spins, number
|
||||
* of waiting threads and the number of times the lock has been acquired.
|
||||
*/
|
||||
typedef struct spinlock {
|
||||
int lock;
|
||||
#if DEBUG
|
||||
int spins;
|
||||
int acquired;
|
||||
THREAD owner;
|
||||
int lock; /*< Is the lock held? */
|
||||
#if SPINLOCK_PROFILE
|
||||
int spins; /*< Number of spins on this lock */
|
||||
int maxspins; /*< Max no of spins to acquire lock */
|
||||
int acquired; /*< No. of times lock was acquired */
|
||||
int waiting; /*< No. of threads acquiring this lock */
|
||||
int max_waiting; /*< Max no of threads waiting for lock */
|
||||
int contended; /*< No. of times acquire was contended */
|
||||
THREAD owner; /*< Last owner of this lock */
|
||||
#endif
|
||||
} SPINLOCK;
|
||||
|
||||
@ -47,8 +63,8 @@ typedef struct spinlock {
|
||||
#define FALSE false
|
||||
#endif
|
||||
|
||||
#if DEBUG
|
||||
#define SPINLOCK_INIT { 0, 0, 0, NULL }
|
||||
#if SPINLOCK_PROFILE
|
||||
#define SPINLOCK_INIT { 0, 0, 0, 0, 0, 0, 0, 0 }
|
||||
#else
|
||||
#define SPINLOCK_INIT { 0 }
|
||||
#endif
|
||||
@ -59,4 +75,6 @@ extern void spinlock_init(SPINLOCK *lock);
|
||||
extern void spinlock_acquire(SPINLOCK *lock);
|
||||
extern int spinlock_acquire_nowait(SPINLOCK *lock);
|
||||
extern void spinlock_release(SPINLOCK *lock);
|
||||
extern void spinlock_stats(SPINLOCK *lock,
|
||||
void (*reporter)(void *, char *, int), void *hdl);
|
||||
#endif
|
||||
|
367
server/modules/include/blr.h
Normal file
367
server/modules/include/blr.h
Normal file
@ -0,0 +1,367 @@
|
||||
#ifndef _BLR_H
|
||||
#define _BLR_H
|
||||
/*
|
||||
* This file is distributed as part of MaxScale. It is free
|
||||
* software: you can redistribute it and/or modify it under the terms of the
|
||||
* GNU General Public License as published by the Free Software Foundation,
|
||||
* version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
* Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Copyright SkySQL Ab 2014
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file blr.h - The binlog router header file
|
||||
*
|
||||
* @verbatim
|
||||
* Revision History
|
||||
*
|
||||
* Date Who Description
|
||||
* 02/04/14 Mark Riddoch Initial implementation
|
||||
*
|
||||
* @endverbatim
|
||||
*/
|
||||
#include <dcb.h>
|
||||
#include <buffer.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#define BINLOG_FNAMELEN 16
|
||||
#define BLR_PROTOCOL "MySQLBackend"
|
||||
#define BINLOG_MAGIC { 0xfe, 0x62, 0x69, 0x6e }
|
||||
#define BINLOG_NAMEFMT "%s.%06d"
|
||||
#define BINLOG_NAME_ROOT "mysql-bin"
|
||||
|
||||
/**
|
||||
* High and Low water marks for the slave dcb. These values can be overriden
|
||||
* by the router options highwater and lowwater.
|
||||
*/
|
||||
#define DEF_LOW_WATER 20000
|
||||
#define DEF_HIGH_WATER 300000
|
||||
|
||||
/**
|
||||
* Some useful macros for examining the MySQL Response packets
|
||||
*/
|
||||
#define MYSQL_RESPONSE_OK(buf) (*((uint8_t *)GWBUF_DATA(buf) + 4) == 0x00)
|
||||
#define MYSQL_RESPONSE_EOF(buf) (*((uint8_t *)GWBUF_DATA(buf) + 4) == 0xfe)
|
||||
#define MYSQL_RESPONSE_ERR(buf) (*((uint8_t *)GWBUF_DATA(buf) + 4) == 0xff)
|
||||
#define MYSQL_ERROR_CODE(buf) (*((uint8_t *)GWBUF_DATA(buf) + 5))
|
||||
#define MYSQL_ERROR_MSG(buf) ((uint8_t *)GWBUF_DATA(buf) + 6)
|
||||
#define MYSQL_COMMAND(buf) (*((uint8_t *)GWBUF_DATA(buf) + 4))
|
||||
|
||||
/**
|
||||
* Slave statistics
|
||||
*/
|
||||
typedef struct {
|
||||
int n_events; /*< Number of events sent */
|
||||
int n_bursts; /*< Number of bursts sent */
|
||||
int n_requests; /*< Number of requests received */
|
||||
int n_flows; /*< Number of flow control restarts */
|
||||
int n_catchupnr; /*< No. of times catchup resulted in not entering loop */
|
||||
int n_alreadyupd;
|
||||
int n_upd;
|
||||
int n_cb;
|
||||
int n_cbna;
|
||||
int n_dcb;
|
||||
int n_above;
|
||||
int n_failed_read;
|
||||
int n_overrun;
|
||||
int n_actions[3];
|
||||
} SLAVE_STATS;
|
||||
|
||||
/**
|
||||
* The client session structure used within this router. This represents
|
||||
* the slaves that are replicating binlogs from MaxScale.
|
||||
*/
|
||||
typedef struct router_slave {
|
||||
#if defined(SS_DEBUG)
|
||||
skygw_chk_t rses_chk_top;
|
||||
#endif
|
||||
DCB *dcb; /*< The slave server DCB */
|
||||
int state; /*< The state of this slave */
|
||||
int binlog_pos; /*< Binlog position for this slave */
|
||||
char binlogfile[BINLOG_FNAMELEN+1];
|
||||
/*< Current binlog file for this slave */
|
||||
int serverid; /*< Server-id of the slave */
|
||||
char *hostname; /*< Hostname of the slave, if known */
|
||||
char *user; /*< Username if given */
|
||||
char *passwd; /*< Password if given */
|
||||
short port; /*< MySQL port */
|
||||
int nocrc; /*< Disable CRC */
|
||||
int overrun;
|
||||
uint32_t rank; /*< Replication rank */
|
||||
uint8_t seqno; /*< Replication dump sequence no */
|
||||
SPINLOCK catch_lock; /*< Event catchup lock */
|
||||
unsigned int cstate; /*< Catch up state */
|
||||
SPINLOCK rses_lock; /*< Protects rses_deleted */
|
||||
pthread_t pthread;
|
||||
struct router_instance
|
||||
*router; /*< Pointer to the owning router */
|
||||
struct router_slave *next;
|
||||
SLAVE_STATS stats; /*< Slave statistics */
|
||||
#if defined(SS_DEBUG)
|
||||
skygw_chk_t rses_chk_tail;
|
||||
#endif
|
||||
} ROUTER_SLAVE;
|
||||
|
||||
|
||||
/**
|
||||
* The statistics for this router instance
|
||||
*/
|
||||
typedef struct {
|
||||
int n_slaves; /*< Number slave sessions created */
|
||||
int n_reads; /*< Number of record reads */
|
||||
uint64_t n_binlogs; /*< Number of binlog records from master */
|
||||
uint64_t n_binlog_errors;/*< Number of binlog records from master */
|
||||
uint64_t n_rotates; /*< Number of binlog rotate events */
|
||||
uint64_t n_cachehits; /*< Number of hits on the binlog cache */
|
||||
uint64_t n_cachemisses; /*< Number of misses on the binlog cache */
|
||||
int n_registered; /*< Number of registered slaves */
|
||||
int n_masterstarts; /*< Number of times connection restarted */
|
||||
int n_delayedreconnects;
|
||||
int n_residuals; /*< Number of times residual data was buffered */
|
||||
int n_heartbeats; /*< Number of heartbeat messages */
|
||||
time_t lastReply;
|
||||
uint64_t n_fakeevents; /*< Fake events not written to disk */
|
||||
uint64_t n_artificial; /*< Artificial events not written to disk */
|
||||
uint64_t events[0x24]; /*< Per event counters */
|
||||
} ROUTER_STATS;
|
||||
|
||||
/**
|
||||
* Saved responses from the master that will be forwarded to slaves
|
||||
*/
|
||||
typedef struct {
|
||||
GWBUF *server_id; /*< Master server id */
|
||||
GWBUF *heartbeat; /*< Heartbeat period */
|
||||
GWBUF *chksum1; /*< Binlog checksum 1st response */
|
||||
GWBUF *chksum2; /*< Binlog checksum 2nd response */
|
||||
GWBUF *gtid_mode; /*< GTID Mode response */
|
||||
GWBUF *uuid; /*< Master UUID */
|
||||
GWBUF *setslaveuuid; /*< Set Slave UUID */
|
||||
GWBUF *setnames; /*< Set NAMES latin1 */
|
||||
GWBUF *utf8; /*< Set NAMES utf8 */
|
||||
GWBUF *select1; /*< select 1 */
|
||||
GWBUF *selectver; /*< select version() */
|
||||
uint8_t *fde_event; /*< Format Description Event */
|
||||
int fde_len; /*< Length of fde_event */
|
||||
} MASTER_RESPONSES;
|
||||
|
||||
/**
|
||||
* The binlog record structure. This contains the actual packet received from the
|
||||
* master, the binlog position of the data in the packet, a point to the data and
|
||||
* the length of the binlog record.
|
||||
*
|
||||
* This allows requests for binlog records in the cache to be serviced by simply
|
||||
* sending the exact same packet as was received by MaxScale from the master.
|
||||
* Items are written to the backing file as soon as they are received. The binlog
|
||||
* cache is flushed of old records periodically, releasing the GWBUF's back to the
|
||||
* free memory pool.
|
||||
*/
|
||||
typedef struct {
|
||||
unsigned long position; /*< binlog record position for this cache entry */
|
||||
GWBUF *pkt; /*< The packet received from the master */
|
||||
unsigned char *data; /*< Pointer to the data within the packet */
|
||||
unsigned int record_len; /*< Binlog record length */
|
||||
} BLCACHE_RECORD;
|
||||
|
||||
/**
|
||||
* The binlog cache. A cache exists for each file that hold cached bin log records.
|
||||
* Typically the router will hold two binlog caches, one for the current file and one
|
||||
* for the previous file.
|
||||
*/
|
||||
typedef struct {
|
||||
char filename[BINLOG_FNAMELEN+1];
|
||||
BLCACHE_RECORD *first;
|
||||
BLCACHE_RECORD *current;
|
||||
int cnt;
|
||||
} BLCACHE;
|
||||
|
||||
|
||||
/**
|
||||
* The per instance data for the router.
|
||||
*/
|
||||
typedef struct router_instance {
|
||||
SERVICE *service; /*< Pointer to the service using this router */
|
||||
ROUTER_SLAVE *slaves; /*< Link list of all the slave connections */
|
||||
SPINLOCK lock; /*< Spinlock for the instance data */
|
||||
char *uuid; /*< UUID for the router to use w/master */
|
||||
int masterid; /*< Server ID of the master */
|
||||
int serverid; /*< Server ID to use with master */
|
||||
char *user; /*< User name to use with master */
|
||||
char *password; /*< Password to use with master */
|
||||
char *fileroot; /*< Root of binlog filename */
|
||||
DCB *master; /*< DCB for master connection */
|
||||
DCB *client; /*< DCB for dummy client */
|
||||
SESSION *session; /*< Fake session for master connection */
|
||||
unsigned int master_state; /*< State of the master FSM */
|
||||
uint8_t lastEventReceived;
|
||||
GWBUF *residual; /*< Any residual binlog event */
|
||||
MASTER_RESPONSES saved_master; /*< Saved master responses */
|
||||
char binlog_name[BINLOG_FNAMELEN+1];
|
||||
/*< Name of the current binlog file */
|
||||
uint64_t binlog_position;
|
||||
/*< Current binlog position */
|
||||
int binlog_fd; /*< File descriptor of the binlog
|
||||
* file being written
|
||||
*/
|
||||
unsigned int low_water; /*< Low water mark for client DCB */
|
||||
unsigned int high_water; /*< High water mark for client DCB */
|
||||
BLCACHE *cache[2];
|
||||
ROUTER_STATS stats; /*< Statistics for this router */
|
||||
int active_logs;
|
||||
int reconnect_pending;
|
||||
int handling_threads;
|
||||
struct router_instance
|
||||
*next;
|
||||
} ROUTER_INSTANCE;
|
||||
|
||||
/**
|
||||
* Packet header for replication messages
|
||||
*/
|
||||
typedef struct rep_header {
|
||||
int payload_len; /*< Payload length (24 bits) */
|
||||
uint8_t seqno; /*< Response sequence number */
|
||||
uint8_t ok; /*< OK Byte from packet */
|
||||
uint32_t timestamp; /*< Timestamp - start of binlog record */
|
||||
uint8_t event_type; /*< Binlog event type */
|
||||
uint32_t serverid; /*< Server id of master */
|
||||
uint32_t event_size; /*< Size of header, post-header and body */
|
||||
uint32_t next_pos; /*< Position of next event */
|
||||
uint16_t flags; /*< Event flags */
|
||||
} REP_HEADER;
|
||||
|
||||
/**
|
||||
* State machine for the master to MaxScale replication
|
||||
*/
|
||||
#define BLRM_UNCONNECTED 0x0000
|
||||
#define BLRM_AUTHENTICATED 0x0001
|
||||
#define BLRM_TIMESTAMP 0x0002
|
||||
#define BLRM_SERVERID 0x0003
|
||||
#define BLRM_HBPERIOD 0x0004
|
||||
#define BLRM_CHKSUM1 0x0005
|
||||
#define BLRM_CHKSUM2 0x0006
|
||||
#define BLRM_GTIDMODE 0x0007
|
||||
#define BLRM_MUUID 0x0008
|
||||
#define BLRM_SUUID 0x0009
|
||||
#define BLRM_LATIN1 0x000A
|
||||
#define BLRM_UTF8 0x000B
|
||||
#define BLRM_SELECT1 0x000C
|
||||
#define BLRM_SELECTVER 0x000D
|
||||
#define BLRM_REGISTER 0x000E
|
||||
#define BLRM_BINLOGDUMP 0x000F
|
||||
|
||||
#define BLRM_MAXSTATE 0x000F
|
||||
|
||||
static char *blrm_states[] = { "Unconnected", "Authenticated", "Timestamp retrieval",
|
||||
"Server ID retrieval", "HeartBeat Period setup", "binlog checksum config",
|
||||
"binlog checksum rerieval", "GTID Mode retrieval", "Master UUID retrieval",
|
||||
"Set Slave UUID", "Set Names latin1", "Set Names utf8", "select 1",
|
||||
"select version()", "Register slave", "Binlog Dump" };
|
||||
|
||||
#define BLRS_CREATED 0x0000
|
||||
#define BLRS_UNREGISTERED 0x0001
|
||||
#define BLRS_REGISTERED 0x0002
|
||||
#define BLRS_DUMPING 0x0003
|
||||
|
||||
#define BLRS_MAXSTATE 0x0003
|
||||
|
||||
static char *blrs_states[] = { "Created", "Unregistered", "Registered",
|
||||
"Sending binlogs" };
|
||||
|
||||
/**
|
||||
* Slave catch-up status
|
||||
*/
|
||||
#define CS_READING 0x0001
|
||||
#define CS_INNERLOOP 0x0002
|
||||
#define CS_UPTODATE 0x0004
|
||||
#define CS_EXPECTCB 0x0008
|
||||
#define CS_DIST 0x0010
|
||||
#define CS_DISTLATCH 0x0020
|
||||
|
||||
/**
|
||||
* MySQL protocol OpCodes needed for replication
|
||||
*/
|
||||
#define COM_QUIT 0x01
|
||||
#define COM_QUERY 0x03
|
||||
#define COM_REGISTER_SLAVE 0x15
|
||||
#define COM_BINLOG_DUMP 0x12
|
||||
|
||||
/**
|
||||
* Binlog event types
|
||||
*/
|
||||
#define START_EVENT_V3 0x01
|
||||
#define QUERY_EVENT 0x02
|
||||
#define STOP_EVENT 0x03
|
||||
#define ROTATE_EVENT 0x04
|
||||
#define INTVAR_EVENT 0x05
|
||||
#define LOAD_EVENT 0x06
|
||||
#define SLAVE_EVENT 0x07
|
||||
#define CREATE_FILE_EVENT 0x08
|
||||
#define APPEND_BLOCK_EVENT 0x09
|
||||
#define EXEC_LOAD_EVENT 0x0A
|
||||
#define DELETE_FILE_EVENT 0x0B
|
||||
#define NEW_LOAD_EVENT 0x0C
|
||||
#define RAND_EVENT 0x0D
|
||||
#define USER_VAR_EVENT 0x0E
|
||||
#define FORMAT_DESCRIPTION_EVENT 0x0F
|
||||
#define XID_EVENT 0x10
|
||||
#define BEGIN_LOAD_QUERY_EVENT 0x11
|
||||
#define EXECUTE_LOAD_QUERY_EVENT 0x12
|
||||
#define TABLE_MAP_EVENT 0x13
|
||||
#define WRITE_ROWS_EVENTv0 0x14
|
||||
#define UPDATE_ROWS_EVENTv0 0x15
|
||||
#define DELETE_ROWS_EVENTv0 0x16
|
||||
#define WRITE_ROWS_EVENTv1 0x17
|
||||
#define UPDATE_ROWS_EVENTv1 0x18
|
||||
#define DELETE_ROWS_EVENTv1 0x19
|
||||
#define INCIDENT_EVENT 0x1A
|
||||
#define HEARTBEAT_EVENT 0x1B
|
||||
#define IGNORABLE_EVENT 0x1C
|
||||
#define ROWS_QUERY_EVENT 0x1D
|
||||
#define WRITE_ROWS_EVENTv2 0x1E
|
||||
#define UPDATE_ROWS_EVENTv2 0x1F
|
||||
#define DELETE_ROWS_EVENTv2 0x20
|
||||
#define GTID_EVENT 0x21
|
||||
#define ANONYMOUS_GTID_EVENT 0x22
|
||||
#define PREVIOUS_GTIDS_EVENT 0x23
|
||||
|
||||
/**
|
||||
* Binlog event flags
|
||||
*/
|
||||
#define LOG_EVENT_BINLOG_IN_USE_F 0x0001
|
||||
#define LOG_EVENT_FORCED_ROTATE_F 0x0002
|
||||
#define LOG_EVENT_THREAD_SPECIFIC_F 0x0004
|
||||
#define LOG_EVENT_SUPPRESS_USE_F 0x0008
|
||||
#define LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F 0x0010
|
||||
#define LOG_EVENT_ARTIFICIAL_F 0x0020
|
||||
#define LOG_EVENT_RELAY_LOG_F 0x0040
|
||||
#define LOG_EVENT_IGNORABLE_F 0x0080
|
||||
#define LOG_EVENT_NO_FILTER_F 0x0100
|
||||
#define LOG_EVENT_MTS_ISOLATE_F 0x0200
|
||||
|
||||
/*
|
||||
* Externals within the router
|
||||
*/
|
||||
extern void blr_start_master(ROUTER_INSTANCE *);
|
||||
extern void blr_master_response(ROUTER_INSTANCE *, GWBUF *);
|
||||
extern void blr_master_reconnect(ROUTER_INSTANCE *);
|
||||
|
||||
extern int blr_slave_request(ROUTER_INSTANCE *, ROUTER_SLAVE *, GWBUF *);
|
||||
extern void blr_slave_rotate(ROUTER_SLAVE *slave, uint8_t *ptr);
|
||||
extern int blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave);
|
||||
extern void blr_init_cache(ROUTER_INSTANCE *);
|
||||
|
||||
extern void blr_file_init(ROUTER_INSTANCE *);
|
||||
extern int blr_open_binlog(ROUTER_INSTANCE *, char *);
|
||||
extern void blr_write_binlog_record(ROUTER_INSTANCE *, REP_HEADER *,uint8_t *);
|
||||
extern void blr_file_rotate(ROUTER_INSTANCE *, char *, uint64_t);
|
||||
extern void blr_file_flush(ROUTER_INSTANCE *);
|
||||
extern GWBUF *blr_read_binlog(int, unsigned int, REP_HEADER *);
|
||||
#endif
|
@ -235,6 +235,7 @@ maxscaled_error(DCB *dcb)
|
||||
static int
|
||||
maxscaled_hangup(DCB *dcb)
|
||||
{
|
||||
dcb_close(dcb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -313,9 +314,11 @@ maxscaled_close(DCB *dcb)
|
||||
MAXSCALED *maxscaled = dcb->protocol;
|
||||
|
||||
if (maxscaled && maxscaled->username)
|
||||
{
|
||||
free(maxscaled->username);
|
||||
maxscaled->username = NULL;
|
||||
}
|
||||
|
||||
dcb_close(dcb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -497,7 +497,7 @@ static int gw_read_backend_event(DCB *dcb) {
|
||||
{
|
||||
if (nbytes_read < 5)
|
||||
{
|
||||
gwbuf_append(dcb->dcb_readqueue, read_buffer);
|
||||
dcb->dcb_readqueue = gwbuf_append(dcb->dcb_readqueue, read_buffer);
|
||||
rc = 0;
|
||||
goto return_rc;
|
||||
}
|
||||
|
@ -798,7 +798,7 @@ int gw_read_client_event(
|
||||
}
|
||||
|
||||
/** succeed */
|
||||
if (rc == 1) {
|
||||
if (rc) {
|
||||
rc = 0; /**< here '0' means success */
|
||||
} else {
|
||||
GWBUF* errbuf;
|
||||
|
@ -51,6 +51,8 @@ MODULES= libdebugcli.so libreadconnroute.so libtestroute.so libcli.so
|
||||
|
||||
|
||||
all: $(MODULES)
|
||||
(cd readwritesplit; make)
|
||||
(cd binlog; make)
|
||||
|
||||
libtestroute.so: $(TESTOBJ)
|
||||
$(CC) $(LDFLAGS) $(TESTOBJ) $(LIBS) -o $@
|
||||
@ -73,19 +75,23 @@ libreadwritesplit.so:
|
||||
clean:
|
||||
$(DEL) $(OBJ) $(MODULES)
|
||||
(cd readwritesplit; touch depend.mk; make clean)
|
||||
(cd binlog; touch depend.mk; make clean)
|
||||
|
||||
tags:
|
||||
ctags $(SRCS) $(HDRS)
|
||||
(cd readwritesplit; make tags)
|
||||
(cd binlog; make tags)
|
||||
|
||||
depend:
|
||||
@$(DEL) depend.mk
|
||||
cc -M $(CFLAGS) $(SRCS) > depend.mk
|
||||
(cd readwritesplit; touch depend.mk ; make depend)
|
||||
(cd binlog; touch depend.mk ; make depend)
|
||||
|
||||
install: $(MODULES)
|
||||
install -D $(MODULES) $(DEST)/modules
|
||||
(cd readwritesplit; make DEST=$(DEST) install)
|
||||
(cd binlog; make DEST=$(DEST) install)
|
||||
|
||||
cleantests:
|
||||
$(MAKE) -C readwritesplit/test cleantests
|
||||
|
65
server/modules/routing/binlog/Makefile
Normal file
65
server/modules/routing/binlog/Makefile
Normal file
@ -0,0 +1,65 @@
|
||||
# This file is distributed as part of the SkySQL Gateway. It is free
|
||||
# software: you can redistribute it and/or modify it under the terms of the
|
||||
# GNU General Public License as published by the Free Software Foundation,
|
||||
# version 2.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
# details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
#
|
||||
# Copyright SkySQL Ab 2013
|
||||
#
|
||||
# Revision History
|
||||
# Date Who Description
|
||||
# 2/04/14 Mark Riddoch Initial framework put in place
|
||||
|
||||
include ../../../../build_gateway.inc
|
||||
|
||||
LOGPATH := $(ROOT_PATH)/log_manager
|
||||
UTILSPATH := $(ROOT_PATH)/utils
|
||||
QCLASSPATH := $(ROOT_PATH)/query_classifier
|
||||
|
||||
CC=cc
|
||||
CFLAGS=-c -fPIC -I/usr/include -I../../include -I../../../include \
|
||||
-I$(LOGPATH) -I$(UTILSPATH) -I$(QCLASSPATH) \
|
||||
$(MYSQL_HEADERS) -Wall -g
|
||||
|
||||
include ../../../../makefile.inc
|
||||
|
||||
LDFLAGS=-shared -L$(LOGPATH) -L$(QCLASSPATH) -L$(EMBEDDED_LIB) \
|
||||
-Wl,-rpath,$(DEST)/lib \
|
||||
-Wl,-rpath,$(LOGPATH) -Wl,-rpath,$(UTILSPATH) -Wl,-rpath,$(QCLASSPATH) \
|
||||
-Wl,-rpath,$(EMBEDDED_LIB)
|
||||
|
||||
SRCS=blr.c blr_master.c blr_cache.c blr_slave.c blr_file.c
|
||||
OBJ=$(SRCS:.c=.o)
|
||||
LIBS=-lssl -pthread -llog_manager -lmysqld
|
||||
MODULES=libbinlogrouter.so
|
||||
|
||||
all: $(MODULES)
|
||||
|
||||
$(MODULES): $(OBJ)
|
||||
$(CC) $(LDFLAGS) $(OBJ) $(UTILSPATH)/skygw_utils.o $(LIBS) -o $@
|
||||
|
||||
.c.o:
|
||||
$(CC) $(CFLAGS) $< -o $@
|
||||
|
||||
clean:
|
||||
rm -f $(OBJ) $(MODULES)
|
||||
|
||||
tags:
|
||||
ctags $(SRCS) $(HDRS)
|
||||
|
||||
depend:
|
||||
@rm -f depend.mk
|
||||
cc -M $(CFLAGS) $(SRCS) > depend.mk
|
||||
|
||||
install: $(MODULES)
|
||||
install -D $(MODULES) $(DEST)/MaxScale/modules
|
||||
|
||||
include depend.mk
|
53
server/modules/routing/binlog/README
Normal file
53
server/modules/routing/binlog/README
Normal file
@ -0,0 +1,53 @@
|
||||
The binlog router is not a "normal" MaxScale router, it is not
|
||||
designed to be used to route client requests to a database in the
|
||||
usual proxy fashion. Rather it is designed to allow MaxScale to be
|
||||
used as a relay server in a MySQL replication environment.
|
||||
|
||||
In this environment MaxScale sits between a master MySQL server and
|
||||
a set of slave servers. The slaves servers execute a change master
|
||||
to the MaxScale server, otehrwise they are configured in exactly
|
||||
the same way as a normal MySQL slave server.
|
||||
|
||||
The master server configuration is unaltered, it simply sees a
|
||||
single slave server.
|
||||
|
||||
MaxScale is configured as usual, with a service definition that
|
||||
references the binlog router. The major configuration option to
|
||||
consider is the router_options paramter, in the binlog router this
|
||||
provides the binlog specific configuration parameters.
|
||||
|
||||
uuid=
|
||||
This is the UUID that MaxScale uses when it connects
|
||||
to the real master. It will report the master's
|
||||
UUID to slaves that connect to it.
|
||||
|
||||
server-id=
|
||||
The server-id that MaxScale uses when it connects
|
||||
to the real master server. Again it will reports
|
||||
the master's server-id to the slaves that connect
|
||||
to it.
|
||||
user=
|
||||
The user that MaxScale uses to login to the real
|
||||
master
|
||||
password=
|
||||
The password that MaxScale uses to login to the
|
||||
real master
|
||||
master-id=
|
||||
The server-id of the real master. MaxScale should
|
||||
get this by sending a query, but at the moment it
|
||||
is in the configuration file for ease of implementation
|
||||
|
||||
|
||||
An example binlog service configuration is shown below:
|
||||
|
||||
[Binlog Service]
|
||||
type=service
|
||||
router=binlogrouter
|
||||
servers=master
|
||||
router_options=uuid=f12fcb7f-b97b-11e3-bc5e-0401152c4c22,server-id=3,user=repl,password=slavepass,master-id=1
|
||||
user=maxscale
|
||||
passwd=Mhu87p2D
|
||||
|
||||
The servers list for a binlog router service should contain just
|
||||
the master server. In future a list will be given and the monitor
|
||||
used to determine which server is the current master server.
|
13
server/modules/routing/binlog/STATUS
Normal file
13
server/modules/routing/binlog/STATUS
Normal file
@ -0,0 +1,13 @@
|
||||
The binlog router contained here is a prototype implementation and
|
||||
should not be consider as production ready.
|
||||
|
||||
The router has been written and tested with MySQL 5.6 as a reference
|
||||
for the replication behaviour, more investigation and implementation
|
||||
is likely to be needed in order to use other versions of MySQL,
|
||||
MariaDB or Percona Server.
|
||||
|
||||
To Do List:
|
||||
|
||||
1. The router does not implement the replication heartbeat mechanism.
|
||||
|
||||
2. Performance measurements have yet to be made.
|
770
server/modules/routing/binlog/blr.c
Normal file
770
server/modules/routing/binlog/blr.c
Normal file
@ -0,0 +1,770 @@
|
||||
/*
|
||||
* This file is distributed as part of MaxScale. It is free
|
||||
* software: you can redistribute it and/or modify it under the terms of the
|
||||
* GNU General Public License as published by the Free Software Foundation,
|
||||
* version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
* Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Copyright SkySQL Ab 2014
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file blr.c - binlog router, allows MaxScale to act as an intermediatory for replication
|
||||
*
|
||||
* The binlog router is designed to be used in replication environments to
|
||||
* increase the replication fanout of a master server. It provides a transparant
|
||||
* mechanism to read the binlog entries for multiple slaves while requiring
|
||||
* only a single connection to the actual master to support the slaves.
|
||||
*
|
||||
* The current prototype implement is designed to support MySQL 5.6 and has
|
||||
* a number of limitations. This prototype is merely a proof of concept and
|
||||
* should not be considered production ready.
|
||||
*
|
||||
* @verbatim
|
||||
* Revision History
|
||||
*
|
||||
* Date Who Description
|
||||
* 02/04/2014 Mark Riddoch Initial implementation
|
||||
*
|
||||
* @endverbatim
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <service.h>
|
||||
#include <server.h>
|
||||
#include <router.h>
|
||||
#include <atomic.h>
|
||||
#include <spinlock.h>
|
||||
#include <blr.h>
|
||||
#include <dcb.h>
|
||||
#include <spinlock.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <skygw_types.h>
|
||||
#include <skygw_utils.h>
|
||||
#include <log_manager.h>
|
||||
|
||||
#include <mysql_client_server_protocol.h>
|
||||
|
||||
extern int lm_enabled_logfiles_bitmask;
|
||||
|
||||
static char *version_str = "V1.0.6";
|
||||
|
||||
/* The router entry points */
|
||||
static ROUTER *createInstance(SERVICE *service, char **options);
|
||||
static void *newSession(ROUTER *instance, SESSION *session);
|
||||
static void closeSession(ROUTER *instance, void *router_session);
|
||||
static void freeSession(ROUTER *instance, void *router_session);
|
||||
static int routeQuery(ROUTER *instance, void *router_session, GWBUF *queue);
|
||||
static void diagnostics(ROUTER *instance, DCB *dcb);
|
||||
static void clientReply(
|
||||
ROUTER *instance,
|
||||
void *router_session,
|
||||
GWBUF *queue,
|
||||
DCB *backend_dcb);
|
||||
static void errorReply(
|
||||
ROUTER *instance,
|
||||
void *router_session,
|
||||
GWBUF *message,
|
||||
DCB *backend_dcb,
|
||||
error_action_t action,
|
||||
bool *succp);
|
||||
static uint8_t getCapabilities (ROUTER* inst, void* router_session);
|
||||
|
||||
|
||||
/** The module object definition */
|
||||
static ROUTER_OBJECT MyObject = {
|
||||
createInstance,
|
||||
newSession,
|
||||
closeSession,
|
||||
freeSession,
|
||||
routeQuery,
|
||||
diagnostics,
|
||||
clientReply,
|
||||
errorReply,
|
||||
getCapabilities
|
||||
};
|
||||
|
||||
static bool rses_begin_locked_router_action(ROUTER_SLAVE *);
|
||||
static void rses_end_locked_router_action(ROUTER_SLAVE *);
|
||||
|
||||
static SPINLOCK instlock;
|
||||
static ROUTER_INSTANCE *instances;
|
||||
|
||||
/**
|
||||
* Implementation of the mandatory version entry point
|
||||
*
|
||||
* @return version string of the module
|
||||
*/
|
||||
char *
|
||||
version()
|
||||
{
|
||||
return version_str;
|
||||
}
|
||||
|
||||
/**
|
||||
* The module initialisation routine, called when the module
|
||||
* is first loaded.
|
||||
*/
|
||||
void
|
||||
ModuleInit()
|
||||
{
|
||||
LOGIF(LM, (skygw_log_write(
|
||||
LOGFILE_MESSAGE,
|
||||
"Initialise binlog router module %s.\n", version_str)));
|
||||
spinlock_init(&instlock);
|
||||
instances = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* The module entry point routine. It is this routine that
|
||||
* must populate the structure that is referred to as the
|
||||
* "module object", this is a structure with the set of
|
||||
* external entry points for this module.
|
||||
*
|
||||
* @return The module object
|
||||
*/
|
||||
ROUTER_OBJECT *
|
||||
GetModuleObject()
|
||||
{
|
||||
return &MyObject;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of the router for a particular service
|
||||
* within MaxScale.
|
||||
*
|
||||
* The process of creating the instance causes the router to register
|
||||
* with the master server and begin replication of the binlogs from
|
||||
* the master server to MaxScale.
|
||||
*
|
||||
* @param service The service this router is being create for
|
||||
* @param options An array of options for this query router
|
||||
*
|
||||
* @return The instance data for this new instance
|
||||
*/
|
||||
static ROUTER *
|
||||
createInstance(SERVICE *service, char **options)
|
||||
{
|
||||
ROUTER_INSTANCE *inst;
|
||||
char *value;
|
||||
int i;
|
||||
|
||||
if ((inst = calloc(1, sizeof(ROUTER_INSTANCE))) == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memset(&inst->stats, 0, sizeof(ROUTER_STATS));
|
||||
memset(&inst->saved_master, 0, sizeof(MASTER_RESPONSES));
|
||||
|
||||
inst->service = service;
|
||||
spinlock_init(&inst->lock);
|
||||
|
||||
inst->low_water = DEF_LOW_WATER;
|
||||
inst->high_water = DEF_HIGH_WATER;
|
||||
|
||||
/*
|
||||
* We only support one server behind this router, since the server is
|
||||
* the master from which we replicate binlog records. Therefore check
|
||||
* that only one server has been defined.
|
||||
*
|
||||
* A later improvement will be to define multiple servers and have the
|
||||
* router use the information that is supplied by the monitor to find
|
||||
* which of these servers is currently the master and replicate from
|
||||
* that server.
|
||||
*/
|
||||
if (service->databases == NULL || service->databases->nextdb != NULL)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(
|
||||
LOGFILE_ERROR,
|
||||
"Error : Exactly one database server may be "
|
||||
"for use with the binlog router.")));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Process the options.
|
||||
* We have an array of attrbute values passed to us that we must
|
||||
* examine. Supported attributes are:
|
||||
* uuid=
|
||||
* server-id=
|
||||
* user=
|
||||
* password=
|
||||
* master-id=
|
||||
* filestem=
|
||||
* lowwater=
|
||||
* highwater=
|
||||
*/
|
||||
if (options)
|
||||
{
|
||||
for (i = 0; options[i]; i++)
|
||||
{
|
||||
if ((value = strchr(options[i], '=')) == NULL)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(
|
||||
LOGFILE_ERROR, "Warning : Unsupported router "
|
||||
"option %s for binlog router.",
|
||||
options[i])));
|
||||
}
|
||||
else
|
||||
{
|
||||
*value = 0;
|
||||
value++;
|
||||
if (strcmp(options[i], "uuid") == 0)
|
||||
{
|
||||
inst->uuid = strdup(value);
|
||||
}
|
||||
else if (strcmp(options[i], "server-id") == 0)
|
||||
{
|
||||
inst->serverid = atoi(value);
|
||||
}
|
||||
else if (strcmp(options[i], "user") == 0)
|
||||
{
|
||||
inst->user = strdup(value);
|
||||
}
|
||||
else if (strcmp(options[i], "password") == 0)
|
||||
{
|
||||
inst->password = strdup(value);
|
||||
}
|
||||
else if (strcmp(options[i], "master-id") == 0)
|
||||
{
|
||||
inst->masterid = atoi(value);
|
||||
}
|
||||
else if (strcmp(options[i], "filestem") == 0)
|
||||
{
|
||||
inst->fileroot = strdup(value);
|
||||
}
|
||||
else if (strcmp(options[i], "lowwater") == 0)
|
||||
{
|
||||
inst->low_water = atoi(value);
|
||||
}
|
||||
else if (strcmp(options[i], "highwater") == 0)
|
||||
{
|
||||
inst->high_water = atoi(value);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(
|
||||
LOGFILE_ERROR,
|
||||
"Warning : Unsupported router "
|
||||
"option %s for binlog router.",
|
||||
options[i])));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (inst->fileroot == NULL)
|
||||
inst->fileroot = strdup(BINLOG_NAME_ROOT);
|
||||
}
|
||||
|
||||
/*
|
||||
* We have completed the creation of the instance data, so now
|
||||
* insert this router instance into the linked list of routers
|
||||
* that have been created with this module.
|
||||
*/
|
||||
spinlock_acquire(&instlock);
|
||||
inst->next = instances;
|
||||
instances = inst;
|
||||
spinlock_release(&instlock);
|
||||
|
||||
inst->active_logs = 0;
|
||||
inst->reconnect_pending = 0;
|
||||
inst->handling_threads = 0;
|
||||
inst->residual = NULL;
|
||||
inst->slaves = NULL;
|
||||
inst->next = NULL;
|
||||
|
||||
/*
|
||||
* Initialise the binlog file and position
|
||||
*/
|
||||
blr_file_init(inst);
|
||||
LOGIF(LT, (skygw_log_write(
|
||||
LOGFILE_TRACE,
|
||||
"Binlog router: current binlog file is: %s, current position %u\n",
|
||||
inst->binlog_name, inst->binlog_position)));
|
||||
|
||||
/*
|
||||
* Initialise the binlog cache for this router instance
|
||||
*/
|
||||
blr_init_cache(inst);
|
||||
|
||||
/*
|
||||
* Now start the replication from the master to MaxScale
|
||||
*/
|
||||
blr_start_master(inst);
|
||||
|
||||
return (ROUTER *)inst;
|
||||
}
|
||||
|
||||
/**
|
||||
* Associate a new session with this instance of the router.
|
||||
*
|
||||
* In the case of the binlog router a new session equates to a new slave
|
||||
* connecting to MaxScale and requesting binlog records. We need to go
|
||||
* through the slave registration process for this new slave.
|
||||
*
|
||||
* @param instance The router instance data
|
||||
* @param session The session itself
|
||||
* @return Session specific data for this session
|
||||
*/
|
||||
static void *
|
||||
newSession(ROUTER *instance, SESSION *session)
|
||||
{
|
||||
ROUTER_INSTANCE *inst = (ROUTER_INSTANCE *)instance;
|
||||
ROUTER_SLAVE *slave;
|
||||
|
||||
LOGIF(LD, (skygw_log_write_flush(
|
||||
LOGFILE_DEBUG,
|
||||
"binlog router: %lu [newSession] new router session with "
|
||||
"session %p, and inst %p.",
|
||||
pthread_self(),
|
||||
session,
|
||||
inst)));
|
||||
|
||||
|
||||
if ((slave = (ROUTER_SLAVE *)calloc(1, sizeof(ROUTER_SLAVE))) == NULL)
|
||||
{
|
||||
LOGIF(LD, (skygw_log_write_flush(
|
||||
LOGFILE_ERROR,
|
||||
"Insufficient memory to create new slave session for binlog router")));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if defined(SS_DEBUG)
|
||||
slave->rses_chk_top = CHK_NUM_ROUTER_SES;
|
||||
slave->rses_chk_tail = CHK_NUM_ROUTER_SES;
|
||||
#endif
|
||||
|
||||
memset(&slave->stats, 0, sizeof(SLAVE_STATS));
|
||||
atomic_add(&inst->stats.n_slaves, 1);
|
||||
slave->state = BLRS_CREATED; /* Set initial state of the slave */
|
||||
slave->cstate = 0;
|
||||
slave->pthread = 0;
|
||||
slave->overrun = 0;
|
||||
spinlock_init(&slave->catch_lock);
|
||||
slave->dcb = session->client;
|
||||
slave->router = inst;
|
||||
|
||||
/**
|
||||
* Add this session to the list of active sessions.
|
||||
*/
|
||||
spinlock_acquire(&inst->lock);
|
||||
slave->next = inst->slaves;
|
||||
inst->slaves = slave;
|
||||
spinlock_release(&inst->lock);
|
||||
|
||||
CHK_CLIENT_RSES(slave);
|
||||
|
||||
return (void *)slave;
|
||||
}
|
||||
|
||||
/**
|
||||
* The session is no longer required. Shutdown all operation and free memory
|
||||
* associated with this session. In this case a single session is associated
|
||||
* to a slave of MaxScale. Therefore this is called when that slave is no
|
||||
* longer active and should remove of reference to that slave, free memory
|
||||
* and prevent any further forwarding of binlog records to that slave.
|
||||
*
|
||||
* Parameters:
|
||||
* @param router_instance The instance of the router
|
||||
* @param router_cli_ses The particular session to free
|
||||
*
|
||||
*/
|
||||
static void freeSession(
|
||||
ROUTER* router_instance,
|
||||
void* router_client_ses)
|
||||
{
|
||||
ROUTER_INSTANCE *router = (ROUTER_INSTANCE *)router_instance;
|
||||
ROUTER_SLAVE *slave = (ROUTER_SLAVE *)router_client_ses;
|
||||
int prev_val;
|
||||
|
||||
prev_val = atomic_add(&router->stats.n_slaves, -1);
|
||||
ss_dassert(prev_val > 0);
|
||||
|
||||
/*
|
||||
* Remove the slave session form the list of slaves that are using the
|
||||
* router currently.
|
||||
*/
|
||||
spinlock_acquire(&router->lock);
|
||||
if (router->slaves == slave) {
|
||||
router->slaves = slave->next;
|
||||
} else {
|
||||
ROUTER_SLAVE *ptr = router->slaves;
|
||||
|
||||
while (ptr != NULL && ptr->next != slave) {
|
||||
ptr = ptr->next;
|
||||
}
|
||||
|
||||
if (ptr != NULL) {
|
||||
ptr->next = slave->next;
|
||||
}
|
||||
}
|
||||
spinlock_release(&router->lock);
|
||||
|
||||
LOGIF(LD, (skygw_log_write_flush(
|
||||
LOGFILE_DEBUG,
|
||||
"%lu [freeSession] Unlinked router_client_session %p from "
|
||||
"router %p. Connections : %d. ",
|
||||
pthread_self(),
|
||||
slave,
|
||||
router,
|
||||
prev_val-1)));
|
||||
|
||||
if (slave->hostname)
|
||||
free(slave->hostname);
|
||||
if (slave->user)
|
||||
free(slave->user);
|
||||
if (slave->passwd)
|
||||
free(slave->passwd);
|
||||
free(slave);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Close a session with the router, this is the mechanism
|
||||
* by which a router may cleanup data structure etc.
|
||||
*
|
||||
* @param instance The router instance data
|
||||
* @param router_session The session being closed
|
||||
*/
|
||||
static void
|
||||
closeSession(ROUTER *instance, void *router_session)
|
||||
{
|
||||
ROUTER_INSTANCE *router = (ROUTER_INSTANCE *)instance;
|
||||
ROUTER_SLAVE *slave = (ROUTER_SLAVE *)router_session;
|
||||
|
||||
if (slave == NULL)
|
||||
{
|
||||
/*
|
||||
* We must be closing the master session.
|
||||
*
|
||||
* TODO: Handle closure of master session
|
||||
*/
|
||||
LOGIF(LE, (skygw_log_write_flush(
|
||||
LOGFILE_ERROR, "Binlog router close session with master")));
|
||||
blr_master_reconnect(router);
|
||||
return;
|
||||
}
|
||||
CHK_CLIENT_RSES(slave);
|
||||
/**
|
||||
* Lock router client session for secure read and update.
|
||||
*/
|
||||
if (rses_begin_locked_router_action(slave))
|
||||
{
|
||||
/* decrease server registered slaves counter */
|
||||
atomic_add(&router->stats.n_registered, -1);
|
||||
|
||||
/*
|
||||
* Mark the slave as unregistered to prevent the forwarding
|
||||
* of any more binlog records to this slave.
|
||||
*/
|
||||
slave->state = BLRS_UNREGISTERED;
|
||||
|
||||
/* Unlock */
|
||||
rses_end_locked_router_action(slave);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* We have data from the client, this is likely to be packets related to
|
||||
* the registration of the slave to receive binlog records. Unlike most
|
||||
* MaxScale routers there is no forwarding to the backend database, merely
|
||||
* the return of either predefined server responses that have been cached
|
||||
* or binlog records.
|
||||
*
|
||||
* @param instance The router instance
|
||||
* @param router_session The router session returned from the newSession call
|
||||
* @param queue The queue of data buffers to route
|
||||
* @return The number of bytes sent
|
||||
*/
|
||||
static int
|
||||
routeQuery(ROUTER *instance, void *router_session, GWBUF *queue)
|
||||
{
|
||||
ROUTER_INSTANCE *router = (ROUTER_INSTANCE *)instance;
|
||||
ROUTER_SLAVE *slave = (ROUTER_SLAVE *)router_session;
|
||||
|
||||
return blr_slave_request(router, slave, queue);
|
||||
}
|
||||
|
||||
static char *event_names[] = {
|
||||
"Invalid", "Start Event V3", "Query Event", "Stop Event", "Rotate Event",
|
||||
"Integer Session Variable", "Load Event", "Slave Event", "Create File Event",
|
||||
"Append Block Event", "Exec Load Event", "Delete File Event",
|
||||
"New Load Event", "Rand Event", "User Variable Event", "Format Description Event",
|
||||
"Transaction ID Event (2 Phase Commit)", "Begin Load Query Event",
|
||||
"Execute Load Query Event", "Table Map Event", "Write Rows Event (v0)",
|
||||
"Update Rows Event (v0)", "Delete Rows Event (v0)", "Write Rows Event (v1)",
|
||||
"Update Rows Event (v1)", "Delete Rows Event (v1)", "Incident Event",
|
||||
"Heartbeat Event", "Ignorable Event", "Rows Query Event", "Write Rows Event (v2)",
|
||||
"Update Rows Event (v2)", "Delete Rows Event (v2)", "GTID Event",
|
||||
"Anonymous GTID Event", "Previous GTIDS Event"
|
||||
};
|
||||
|
||||
/**
|
||||
* Display an entry from the spinlock statistics data
|
||||
*
|
||||
* @param dcb The DCB to print to
|
||||
* @param desc Description of the statistic
|
||||
* @param value The statistic value
|
||||
*/
|
||||
static void
|
||||
spin_reporter(void *dcb, char *desc, int value)
|
||||
{
|
||||
dcb_printf((DCB *)dcb, "\t\t%-35s %d\n", desc, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Display router diagnostics
|
||||
*
|
||||
* @param instance Instance of the router
|
||||
* @param dcb DCB to send diagnostics to
|
||||
*/
|
||||
static void
|
||||
diagnostics(ROUTER *router, DCB *dcb)
|
||||
{
|
||||
ROUTER_INSTANCE *router_inst = (ROUTER_INSTANCE *)router;
|
||||
ROUTER_SLAVE *session;
|
||||
int i = 0;
|
||||
char buf[40];
|
||||
struct tm tm;
|
||||
|
||||
spinlock_acquire(&router_inst->lock);
|
||||
session = router_inst->slaves;
|
||||
while (session)
|
||||
{
|
||||
i++;
|
||||
session = session->next;
|
||||
}
|
||||
spinlock_release(&router_inst->lock);
|
||||
|
||||
dcb_printf(dcb, "\tMaster connection DCB: %p\n",
|
||||
router_inst->master);
|
||||
dcb_printf(dcb, "\tMaster connection state: %s\n",
|
||||
blrm_states[router_inst->master_state]);
|
||||
|
||||
localtime_r(&router_inst->stats.lastReply, &tm);
|
||||
asctime_r(&tm, buf);
|
||||
|
||||
dcb_printf(dcb, "\tNumber of master connects: %d\n",
|
||||
router_inst->stats.n_masterstarts);
|
||||
dcb_printf(dcb, "\tNumber of delayed reconnects: %d\n",
|
||||
router_inst->stats.n_delayedreconnects);
|
||||
dcb_printf(dcb, "\tCurrent binlog file: %s\n",
|
||||
router_inst->binlog_name);
|
||||
dcb_printf(dcb, "\tCurrent binlog position: %u\n",
|
||||
router_inst->binlog_position);
|
||||
dcb_printf(dcb, "\tNumber of slave servers: %u\n",
|
||||
router_inst->stats.n_slaves);
|
||||
dcb_printf(dcb, "\tNumber of binlog events received: %u\n",
|
||||
router_inst->stats.n_binlogs);
|
||||
dcb_printf(dcb, "\tNumber of fake binlog events: %u\n",
|
||||
router_inst->stats.n_fakeevents);
|
||||
dcb_printf(dcb, "\tNumber of artificial binlog events: %u\n",
|
||||
router_inst->stats.n_artificial);
|
||||
dcb_printf(dcb, "\tNumber of binlog events in error: %u\n",
|
||||
router_inst->stats.n_binlog_errors);
|
||||
dcb_printf(dcb, "\tNumber of binlog rotate events: %u\n",
|
||||
router_inst->stats.n_rotates);
|
||||
dcb_printf(dcb, "\tNumber of binlog cache hits: %u\n",
|
||||
router_inst->stats.n_cachehits);
|
||||
dcb_printf(dcb, "\tNumber of binlog cache misses: %u\n",
|
||||
router_inst->stats.n_cachemisses);
|
||||
dcb_printf(dcb, "\tNumber of heartbeat events: %u\n",
|
||||
router_inst->stats.n_heartbeats);
|
||||
dcb_printf(dcb, "\tNumber of packets received: %u\n",
|
||||
router_inst->stats.n_reads);
|
||||
dcb_printf(dcb, "\tNumber of residual data packets: %u\n",
|
||||
router_inst->stats.n_residuals);
|
||||
dcb_printf(dcb, "\tAverage events per packet %.1f\n",
|
||||
(double)router_inst->stats.n_binlogs / router_inst->stats.n_reads);
|
||||
dcb_printf(dcb, "\tLast event from master at: %s",
|
||||
buf);
|
||||
dcb_printf(dcb, "\t (%d seconds ago)\n",
|
||||
time(0) - router_inst->stats.lastReply);
|
||||
dcb_printf(dcb, "\tLast event from master: 0x%x\n",
|
||||
router_inst->lastEventReceived);
|
||||
if (router_inst->active_logs)
|
||||
dcb_printf(dcb, "\tRouter processing binlog records\n");
|
||||
if (router_inst->reconnect_pending)
|
||||
dcb_printf(dcb, "\tRouter pending reconnect to master\n");
|
||||
dcb_printf(dcb, "\tEvents received:\n");
|
||||
for (i = 0; i < 0x24; i++)
|
||||
{
|
||||
dcb_printf(dcb, "\t\t%-38s: %u\n", event_names[i], router_inst->stats.events[i]);
|
||||
}
|
||||
|
||||
#if SPINLOCK_PROFILE
|
||||
dcb_printf(dcb, "\tSpinlock statistics (instlock):\n");
|
||||
spinlock_stats(&instlock, spin_reporter, dcb);
|
||||
dcb_printf(dcb, "\tSpinlock statistics (instance lock):\n");
|
||||
spinlock_stats(&router_inst->lock, spin_reporter, dcb);
|
||||
#endif
|
||||
|
||||
if (router_inst->slaves)
|
||||
{
|
||||
dcb_printf(dcb, "\tSlaves:\n");
|
||||
spinlock_acquire(&router_inst->lock);
|
||||
session = router_inst->slaves;
|
||||
while (session)
|
||||
{
|
||||
dcb_printf(dcb, "\t\tServer-id: %d\n", session->serverid);
|
||||
if (session->hostname)
|
||||
dcb_printf(dcb, "\t\tHostname: %s\n", session->hostname);
|
||||
dcb_printf(dcb, "\t\tSlave DCB: %p\n", session->dcb);
|
||||
dcb_printf(dcb, "\t\tNext Sequence No: %d\n", session->seqno);
|
||||
dcb_printf(dcb, "\t\tState: %s\n", blrs_states[session->state]);
|
||||
dcb_printf(dcb, "\t\tBinlog file: %s\n", session->binlogfile);
|
||||
dcb_printf(dcb, "\t\tBinlog position: %u\n", session->binlog_pos);
|
||||
if (session->nocrc)
|
||||
dcb_printf(dcb, "\t\tMaster Binlog CRC: None\n");
|
||||
dcb_printf(dcb, "\t\tNo. requests: %u\n", session->stats.n_requests);
|
||||
dcb_printf(dcb, "\t\tNo. events sent: %u\n", session->stats.n_events);
|
||||
dcb_printf(dcb, "\t\tNo. bursts sent: %u\n", session->stats.n_bursts);
|
||||
dcb_printf(dcb, "\t\tNo. flow control: %u\n", session->stats.n_flows);
|
||||
dcb_printf(dcb, "\t\tNo. catchup NRs: %u\n", session->stats.n_catchupnr);
|
||||
dcb_printf(dcb, "\t\tNo. already up to date: %u\n", session->stats.n_alreadyupd);
|
||||
dcb_printf(dcb, "\t\tNo. up to date: %u\n", session->stats.n_upd);
|
||||
dcb_printf(dcb, "\t\tNo. of low water cbs %u\n", session->stats.n_cb);
|
||||
dcb_printf(dcb, "\t\tNo. of drained cbs %u\n", session->stats.n_dcb);
|
||||
dcb_printf(dcb, "\t\tNo. of low water cbs N/A %u\n", session->stats.n_cbna);
|
||||
dcb_printf(dcb, "\t\tNo. of events > high water %u\n", session->stats.n_above);
|
||||
dcb_printf(dcb, "\t\tNo. of failed reads %u\n", session->stats.n_failed_read);
|
||||
dcb_printf(dcb, "\t\tNo. of nested distribute events %u\n", session->stats.n_overrun);
|
||||
dcb_printf(dcb, "\t\tNo. of distribute action 1 %u\n", session->stats.n_actions[0]);
|
||||
dcb_printf(dcb, "\t\tNo. of distribute action 2 %u\n", session->stats.n_actions[1]);
|
||||
dcb_printf(dcb, "\t\tNo. of distribute action 3 %u\n", session->stats.n_actions[2]);
|
||||
if ((session->cstate & CS_UPTODATE) == 0)
|
||||
{
|
||||
dcb_printf(dcb, "\t\tSlave is in catchup mode. %s\n",
|
||||
((session->cstate & CS_EXPECTCB) == 0 ? "" :
|
||||
"Waiting for DCB queue to drain."));
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
dcb_printf(dcb, "\t\tSlave is in normal mode.\n");
|
||||
if (session->binlog_pos != router_inst->binlog_position)
|
||||
{
|
||||
dcb_printf(dcb, "\t\tSlave reports up to date however "
|
||||
"the slave binlog position does not match the master\n");
|
||||
}
|
||||
}
|
||||
#if SPINLOCK_PROFILE
|
||||
dcb_printf(dcb, "\tSpinlock statistics (catch_lock):\n");
|
||||
spinlock_stats(&session->catch_lock, spin_reporter, dcb);
|
||||
dcb_printf(dcb, "\tSpinlock statistics (rses_lock):\n");
|
||||
spinlock_stats(&session->rses_lock, spin_reporter, dcb);
|
||||
#endif
|
||||
|
||||
session = session->next;
|
||||
}
|
||||
spinlock_release(&router_inst->lock);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Client Reply routine - in this case this is a message from the
|
||||
* master server, It should be sent to the state machine that manages
|
||||
* master packets as it may be binlog records or part of the registration
|
||||
* handshake that takes part during connection establishment.
|
||||
*
|
||||
*
|
||||
* @param instance The router instance
|
||||
* @param router_session The router session
|
||||
* @param master_dcb The DCB for the connection to the master
|
||||
* @param queue The GWBUF with reply data
|
||||
*/
|
||||
static void
|
||||
clientReply(ROUTER *instance, void *router_session, GWBUF *queue, DCB *backend_dcb)
|
||||
{
|
||||
ROUTER_INSTANCE *router = (ROUTER_INSTANCE *)instance;
|
||||
|
||||
atomic_add(&router->stats.n_reads, 1);
|
||||
blr_master_response(router, queue);
|
||||
router->stats.lastReply = time(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Error Reply routine
|
||||
*
|
||||
* The routine will reply to client errors and/or closing the session
|
||||
* or try to open a new backend connection.
|
||||
*
|
||||
* @param instance The router instance
|
||||
* @param router_session The router session
|
||||
* @param message The error message to reply
|
||||
* @param backend_dcb The backend DCB
|
||||
* @param action The action: REPLY, REPLY_AND_CLOSE, NEW_CONNECTION
|
||||
* @param succp Result of action
|
||||
*
|
||||
*/
|
||||
static void
|
||||
errorReply(ROUTER *instance, void *router_session, GWBUF *message, DCB *backend_dcb, error_action_t action, bool *succp)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write_flush(
|
||||
LOGFILE_ERROR, "Erorr Reply '%s'", message)));
|
||||
*succp = false;
|
||||
}
|
||||
|
||||
/** to be inline'd */
|
||||
/**
|
||||
* @node Acquires lock to router client session if it is not closed.
|
||||
*
|
||||
* Parameters:
|
||||
* @param rses - in, use
|
||||
*
|
||||
*
|
||||
* @return true if router session was not closed. If return value is true
|
||||
* it means that router is locked, and must be unlocked later. False, if
|
||||
* router was closed before lock was acquired.
|
||||
*
|
||||
*
|
||||
* @details (write detailed description here)
|
||||
*
|
||||
*/
|
||||
static bool rses_begin_locked_router_action(ROUTER_SLAVE *rses)
|
||||
{
|
||||
bool succp = false;
|
||||
|
||||
CHK_CLIENT_RSES(rses);
|
||||
|
||||
spinlock_acquire(&rses->rses_lock);
|
||||
succp = true;
|
||||
|
||||
return succp;
|
||||
}
|
||||
|
||||
/** to be inline'd */
|
||||
/**
|
||||
* @node Releases router client session lock.
|
||||
*
|
||||
* Parameters:
|
||||
* @param rses - <usage>
|
||||
* <description>
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
*
|
||||
* @details (write detailed description here)
|
||||
*
|
||||
*/
|
||||
static void rses_end_locked_router_action(ROUTER_SLAVE * rses)
|
||||
{
|
||||
CHK_CLIENT_RSES(rses);
|
||||
spinlock_release(&rses->rses_lock);
|
||||
}
|
||||
|
||||
|
||||
static uint8_t getCapabilities(ROUTER *inst, void *router_session)
|
||||
{
|
||||
return 0;
|
||||
}
|
69
server/modules/routing/binlog/blr_cache.c
Normal file
69
server/modules/routing/binlog/blr_cache.c
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* This file is distributed as part of MaxScale. It is free
|
||||
* software: you can redistribute it and/or modify it under the terms of the
|
||||
* GNU General Public License as published by the Free Software Foundation,
|
||||
* version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
* Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Copyright SkySQL Ab 2014
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file blr_cache.c - binlog router cache, manage the binlog cache
|
||||
*
|
||||
* The binlog router is designed to be used in replication environments to
|
||||
* increase the replication fanout of a master server. It provides a transparant
|
||||
* mechanism to read the binlog entries for multiple slaves while requiring
|
||||
* only a single connection to the actual master to support the slaves.
|
||||
*
|
||||
* The current prototype implement is designed to support MySQL 5.6 and has
|
||||
* a number of limitations. This prototype is merely a proof of concept and
|
||||
* should not be considered production ready.
|
||||
*
|
||||
* @verbatim
|
||||
* Revision History
|
||||
*
|
||||
* Date Who Description
|
||||
* 07/04/2014 Mark Riddoch Initial implementation
|
||||
*
|
||||
* @endverbatim
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <service.h>
|
||||
#include <server.h>
|
||||
#include <router.h>
|
||||
#include <atomic.h>
|
||||
#include <spinlock.h>
|
||||
#include <blr.h>
|
||||
#include <dcb.h>
|
||||
#include <spinlock.h>
|
||||
|
||||
#include <skygw_types.h>
|
||||
#include <skygw_utils.h>
|
||||
#include <log_manager.h>
|
||||
|
||||
|
||||
extern int lm_enabled_logfiles_bitmask;
|
||||
|
||||
|
||||
/**
|
||||
* Initialise the cache for this instanceof the binlog router. As a side
|
||||
* effect also determine the binlog file to read and the position to read
|
||||
* from.
|
||||
*
|
||||
* @param router The router instance
|
||||
*/
|
||||
void
|
||||
blr_init_cache(ROUTER_INSTANCE *router)
|
||||
{
|
||||
}
|
346
server/modules/routing/binlog/blr_file.c
Normal file
346
server/modules/routing/binlog/blr_file.c
Normal file
@ -0,0 +1,346 @@
|
||||
/*
|
||||
* This file is distributed as part of MaxScale. It is free
|
||||
* software: you can redistribute it and/or modify it under the terms of the
|
||||
* GNU General Public License as published by the Free Software Foundation,
|
||||
* version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
* Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Copyright SkySQL Ab 2014
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file blr_file.c - contains code for the router binlog file management
|
||||
*
|
||||
*
|
||||
* @verbatim
|
||||
* Revision History
|
||||
*
|
||||
* Date Who Description
|
||||
* 14/04/2014 Mark Riddoch Initial implementation
|
||||
*
|
||||
* @endverbatim
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <dirent.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <service.h>
|
||||
#include <server.h>
|
||||
#include <router.h>
|
||||
#include <atomic.h>
|
||||
#include <spinlock.h>
|
||||
#include <blr.h>
|
||||
#include <dcb.h>
|
||||
#include <spinlock.h>
|
||||
|
||||
#include <skygw_types.h>
|
||||
#include <skygw_utils.h>
|
||||
#include <log_manager.h>
|
||||
|
||||
|
||||
extern int lm_enabled_logfiles_bitmask;
|
||||
|
||||
static void blr_file_create(ROUTER_INSTANCE *router, char *file);
|
||||
static void blr_file_append(ROUTER_INSTANCE *router, char *file);
|
||||
static uint32_t extract_field(uint8_t *src, int bits);
|
||||
|
||||
/**
|
||||
* Initialise the binlog file for this instance. MaxScale will look
|
||||
* for all the binlogs that it has on local disk, determien the next
|
||||
* binlog to use and initialise it for writing, determining the
|
||||
* next record to be fetched from the real master.
|
||||
*
|
||||
* @param router The router instance this defines the master for this replication chain
|
||||
*/
|
||||
void
|
||||
blr_file_init(ROUTER_INSTANCE *router)
|
||||
{
|
||||
char *ptr, path[1024], filename[1050];
|
||||
int file_found, n = 1;
|
||||
int root_len, i;
|
||||
DIR *dirp;
|
||||
struct dirent *dp;
|
||||
|
||||
strcpy(path, "/usr/local/skysql/MaxScale");
|
||||
if ((ptr = getenv("MAXSCALE_HOME")) != NULL)
|
||||
{
|
||||
strcpy(path, ptr);
|
||||
}
|
||||
strcat(path, "/");
|
||||
strcat(path, router->service->name);
|
||||
|
||||
if (access(path, R_OK) == -1)
|
||||
mkdir(path, 0777);
|
||||
|
||||
/* First try to find a binlog file number by reading the directory */
|
||||
root_len = strlen(router->fileroot);
|
||||
dirp = opendir(path);
|
||||
while ((dp = readdir(dirp)) != NULL)
|
||||
{
|
||||
if (strncmp(dp->d_name, router->fileroot, root_len) == 0)
|
||||
{
|
||||
i = atoi(dp->d_name + root_len + 1);
|
||||
if (i > n)
|
||||
n = i;
|
||||
}
|
||||
}
|
||||
closedir(dirp);
|
||||
|
||||
|
||||
file_found = 0;
|
||||
do {
|
||||
sprintf(filename, "%s/" BINLOG_NAMEFMT, path, router->fileroot, n);
|
||||
if (access(filename, R_OK) != -1)
|
||||
{
|
||||
file_found = 1;
|
||||
n++;
|
||||
}
|
||||
else
|
||||
file_found = 0;
|
||||
} while (file_found);
|
||||
n--;
|
||||
|
||||
if (n == 0) // No binlog files found
|
||||
{
|
||||
sprintf(filename, BINLOG_NAMEFMT, router->fileroot, 1);
|
||||
blr_file_create(router, filename);
|
||||
}
|
||||
else
|
||||
{
|
||||
sprintf(filename, BINLOG_NAMEFMT, router->fileroot, n);
|
||||
blr_file_append(router, filename);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
blr_file_rotate(ROUTER_INSTANCE *router, char *file, uint64_t pos)
|
||||
{
|
||||
blr_file_create(router, file);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a new binlog file for the router to use.
|
||||
*
|
||||
* @param router The router instance
|
||||
* @param file The binlog file name
|
||||
*/
|
||||
static void
|
||||
blr_file_create(ROUTER_INSTANCE *router, char *file)
|
||||
{
|
||||
char *ptr, path[1024];
|
||||
int fd;
|
||||
unsigned char magic[] = BINLOG_MAGIC;
|
||||
|
||||
strcpy(path, "/usr/local/skysql/MaxScale");
|
||||
if ((ptr = getenv("MAXSCALE_HOME")) != NULL)
|
||||
{
|
||||
strcpy(path, ptr);
|
||||
}
|
||||
strcat(path, "/");
|
||||
strcat(path, router->service->name);
|
||||
strcat(path, "/");
|
||||
strcat(path, file);
|
||||
|
||||
if ((fd = open(path, O_RDWR|O_CREAT, 0666)) != -1)
|
||||
{
|
||||
write(fd, magic, 4);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR,
|
||||
"Failed to create binlog file %s\n", path)));
|
||||
}
|
||||
fsync(fd);
|
||||
close(router->binlog_fd);
|
||||
strcpy(router->binlog_name, file);
|
||||
router->binlog_position = 4; /* Initial position after the magic number */
|
||||
router->binlog_fd = fd;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Prepare an existing binlog file to be appened to.
|
||||
*
|
||||
* @param router The router instance
|
||||
* @param file The binlog file name
|
||||
*/
|
||||
static void
|
||||
blr_file_append(ROUTER_INSTANCE *router, char *file)
|
||||
{
|
||||
char *ptr, path[1024];
|
||||
int fd;
|
||||
|
||||
strcpy(path, "/usr/local/skysql/MaxScale");
|
||||
if ((ptr = getenv("MAXSCALE_HOME")) != NULL)
|
||||
{
|
||||
strcpy(path, ptr);
|
||||
}
|
||||
strcat(path, "/");
|
||||
strcat(path, router->service->name);
|
||||
strcat(path, "/");
|
||||
strcat(path, file);
|
||||
|
||||
if ((fd = open(path, O_RDWR|O_APPEND, 0666)) == -1)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR,
|
||||
"Failed to open binlog file %s for append.\n",
|
||||
path)));
|
||||
return;
|
||||
}
|
||||
fsync(fd);
|
||||
close(router->binlog_fd);
|
||||
strcpy(router->binlog_name, file);
|
||||
router->binlog_position = lseek(fd, 0L, SEEK_END);
|
||||
router->binlog_fd = fd;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a binlog entry to disk.
|
||||
*
|
||||
* @param router The router instance
|
||||
* @param buf The binlog record
|
||||
* @param len The length of the binlog record
|
||||
*/
|
||||
void
|
||||
blr_write_binlog_record(ROUTER_INSTANCE *router, REP_HEADER *hdr, uint8_t *buf)
|
||||
{
|
||||
pwrite(router->binlog_fd, buf, hdr->event_size, hdr->next_pos - hdr->event_size);
|
||||
router->binlog_position = hdr->next_pos;
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush the content of the binlog file to disk.
|
||||
*
|
||||
* @param router The binlog router
|
||||
*/
|
||||
void
|
||||
blr_file_flush(ROUTER_INSTANCE *router)
|
||||
{
|
||||
fsync(router->binlog_fd);
|
||||
}
|
||||
|
||||
int
|
||||
blr_open_binlog(ROUTER_INSTANCE *router, char *binlog)
|
||||
{
|
||||
char *ptr, path[1024];
|
||||
int rval;
|
||||
|
||||
strcpy(path, "/usr/local/skysql/MaxScale");
|
||||
if ((ptr = getenv("MAXSCALE_HOME")) != NULL)
|
||||
{
|
||||
strcpy(path, ptr);
|
||||
}
|
||||
strcat(path, "/");
|
||||
strcat(path, router->service->name);
|
||||
strcat(path, "/");
|
||||
strcat(path, binlog);
|
||||
|
||||
if ((rval = open(path, O_RDONLY, 0666)) == -1)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR,
|
||||
"Failed to open binlog file %s\n", path)));
|
||||
}
|
||||
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a replication event into a GWBUF structure.
|
||||
*
|
||||
* @param fd File descriptor of the binlog file
|
||||
* @param pos Position of binlog record to read
|
||||
* @param hdr Binlog header to populate
|
||||
* @return The binlog record wrapped in a GWBUF structure
|
||||
*/
|
||||
GWBUF *
|
||||
blr_read_binlog(int fd, unsigned int pos, REP_HEADER *hdr)
|
||||
{
|
||||
uint8_t hdbuf[19];
|
||||
GWBUF *result;
|
||||
unsigned char *data;
|
||||
int n;
|
||||
|
||||
if (lseek(fd, pos, SEEK_SET) != pos)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR,
|
||||
"Failed to seek for binlog entry, "
|
||||
"at %d.\n", pos)));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Read the header information from the file */
|
||||
if ((n = read(fd, hdbuf, 19)) != 19)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR,
|
||||
"Failed to read header for binlog entry, "
|
||||
"at %d (%s).\n", pos, strerror(errno))));
|
||||
if (n> 0 && n < 19)
|
||||
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR,
|
||||
"Short read when reading the header. "
|
||||
"Expected 19 bytes got %d bytes.\n",
|
||||
n)));
|
||||
return NULL;
|
||||
}
|
||||
hdr->timestamp = extract_field(hdbuf, 32);
|
||||
hdr->event_type = hdbuf[4];
|
||||
hdr->serverid = extract_field(&hdbuf[5], 32);
|
||||
hdr->event_size = extract_field(&hdbuf[9], 32);
|
||||
hdr->next_pos = extract_field(&hdbuf[13], 32);
|
||||
hdr->flags = extract_field(&hdbuf[17], 16);
|
||||
if ((result = gwbuf_alloc(hdr->event_size)) == NULL)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR,
|
||||
"Failed to allocate memory for binlog entry, "
|
||||
"size %d at %d.\n",
|
||||
hdr->event_size, pos)));
|
||||
return NULL;
|
||||
}
|
||||
data = GWBUF_DATA(result);
|
||||
memcpy(data, hdbuf, 19); // Copy the header in
|
||||
if ((n = read(fd, &data[19], hdr->event_size - 19))
|
||||
!= hdr->event_size - 19) // Read the balance
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR,
|
||||
"Short read when reading the event at %d. "
|
||||
"Expected %d bytes got %d bytes.\n",
|
||||
pos, n)));
|
||||
gwbuf_consume(result, hdr->event_size);
|
||||
return NULL;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a numeric field from a packet of the specified number of bits
|
||||
*
|
||||
* @param src The raw packet source
|
||||
* @param birs The number of bits to extract (multiple of 8)
|
||||
*/
|
||||
static uint32_t
|
||||
extract_field(uint8_t *src, int bits)
|
||||
{
|
||||
uint32_t rval = 0, shift = 0;
|
||||
|
||||
while (bits > 0)
|
||||
{
|
||||
rval |= (*src++) << shift;
|
||||
shift += 8;
|
||||
bits -= 8;
|
||||
}
|
||||
return rval;
|
||||
}
|
1024
server/modules/routing/binlog/blr_master.c
Normal file
1024
server/modules/routing/binlog/blr_master.c
Normal file
File diff suppressed because it is too large
Load Diff
944
server/modules/routing/binlog/blr_slave.c
Normal file
944
server/modules/routing/binlog/blr_slave.c
Normal file
@ -0,0 +1,944 @@
|
||||
/*
|
||||
* This file is distributed as part of MaxScale. It is free
|
||||
* software: you can redistribute it and/or modify it under the terms of the
|
||||
* GNU General Public License as published by the Free Software Foundation,
|
||||
* version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with
|
||||
* this program; if not, write to the Free Software Foundation, Inc., 51
|
||||
* Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Copyright SkySQL Ab 2014
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file blr_slave.c - contains code for the router to slave communication
|
||||
*
|
||||
* The binlog router is designed to be used in replication environments to
|
||||
* increase the replication fanout of a master server. It provides a transparant
|
||||
* mechanism to read the binlog entries for multiple slaves while requiring
|
||||
* only a single connection to the actual master to support the slaves.
|
||||
*
|
||||
* The current prototype implement is designed to support MySQL 5.6 and has
|
||||
* a number of limitations. This prototype is merely a proof of concept and
|
||||
* should not be considered production ready.
|
||||
*
|
||||
* @verbatim
|
||||
* Revision History
|
||||
*
|
||||
* Date Who Description
|
||||
* 14/04/2014 Mark Riddoch Initial implementation
|
||||
*
|
||||
* @endverbatim
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <service.h>
|
||||
#include <server.h>
|
||||
#include <router.h>
|
||||
#include <atomic.h>
|
||||
#include <spinlock.h>
|
||||
#include <blr.h>
|
||||
#include <dcb.h>
|
||||
#include <spinlock.h>
|
||||
|
||||
#include <skygw_types.h>
|
||||
#include <skygw_utils.h>
|
||||
#include <log_manager.h>
|
||||
|
||||
|
||||
static uint32_t extract_field(uint8_t *src, int bits);
|
||||
static void encode_value(unsigned char *data, unsigned int value, int len);
|
||||
static int blr_slave_query(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue);
|
||||
static int blr_slave_replay(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *master);
|
||||
static void blr_slave_send_error(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, char *msg);
|
||||
static int blr_slave_send_timestamp(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave);
|
||||
static int blr_slave_register(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue);
|
||||
static int blr_slave_binlog_dump(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue);
|
||||
int blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave);
|
||||
static uint8_t *blr_build_header(GWBUF *pkt, REP_HEADER *hdr);
|
||||
static int blr_slave_callback(DCB *dcb, DCB_REASON reason, void *data);
|
||||
|
||||
extern int lm_enabled_logfiles_bitmask;
|
||||
|
||||
/**
|
||||
* Process a request packet from the slave server.
|
||||
*
|
||||
* The router can handle a limited subset of requests from the slave, these
|
||||
* include a subset of general SQL queries, a slave registeration command and
|
||||
* the binlog dump command.
|
||||
*
|
||||
* The strategy for responding to these commands is to use caches responses
|
||||
* for the the same commands that have previously been made to the real master
|
||||
* if this is possible, if it is not then the router itself will synthesize a
|
||||
* response.
|
||||
*
|
||||
* @param router The router instance this defines the master for this replication chain
|
||||
* @param slave The slave specific data
|
||||
* @param queue The incoming request packet
|
||||
*/
|
||||
int
|
||||
blr_slave_request(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue)
|
||||
{
|
||||
if (slave->state < 0 || slave->state > BLRS_MAXSTATE)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(
|
||||
LOGFILE_ERROR, "Invalid slave state machine state (%d) for binlog router.\n",
|
||||
slave->state)));
|
||||
gwbuf_consume(queue, gwbuf_length(queue));
|
||||
return 0;
|
||||
}
|
||||
|
||||
atomic_add(&slave->stats.n_requests, 1);
|
||||
switch (MYSQL_COMMAND(queue))
|
||||
{
|
||||
case COM_QUERY:
|
||||
return blr_slave_query(router, slave, queue);
|
||||
break;
|
||||
case COM_REGISTER_SLAVE:
|
||||
return blr_slave_register(router, slave, queue);
|
||||
break;
|
||||
case COM_BINLOG_DUMP:
|
||||
return blr_slave_binlog_dump(router, slave, queue);
|
||||
break;
|
||||
case COM_QUIT:
|
||||
LOGIF(LD, (skygw_log_write(LOGFILE_DEBUG,
|
||||
"COM_QUIT received from slave with server_id %d\n",
|
||||
slave->serverid)));
|
||||
break;
|
||||
default:
|
||||
LOGIF(LE, (skygw_log_write(
|
||||
LOGFILE_ERROR,
|
||||
"Unexpected MySQL Command (%d) received from slave\n",
|
||||
MYSQL_COMMAND(queue))));
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a query from the slave. This is expected to be one of the "standard"
|
||||
* queries we expect as part of the registraton process. Most of these can
|
||||
* be dealt with by replying the stored responses we got from the master
|
||||
* when MaxScale registered as a slave. The exception to the rule is the
|
||||
* request to obtain the current timestamp value of the server.
|
||||
*
|
||||
* Five select statements are currently supported:
|
||||
* SELECT UNIX_TIMESTAMP();
|
||||
* SELECT @master_binlog_checksum
|
||||
* SELECT @@GLOBAL.GTID_MODE
|
||||
* SELECT VERSION()
|
||||
* SELECT 1
|
||||
*
|
||||
* Two show commands are supported:
|
||||
* SHOW VARIABLES LIKE 'SERVER_ID'
|
||||
* SHOW VARIABLES LIKE 'SERVER_UUID'
|
||||
*
|
||||
* Five set commands are supported:
|
||||
* SET @master_binlog_checksum = @@global.binlog_checksum
|
||||
* SET @master_heartbeat_period=...
|
||||
* SET @slave_slave_uuid=...
|
||||
* SET NAMES latin1
|
||||
* SET NAMES utf8
|
||||
*
|
||||
* @param router The router instance this defines the master for this replication chain
|
||||
* @param slave The slave specific data
|
||||
* @param queue The incoming request packet
|
||||
* @return Non-zero if data has been sent
|
||||
*/
|
||||
static int
|
||||
blr_slave_query(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue)
|
||||
{
|
||||
char *qtext, *query_text;
|
||||
char *sep = " ,=";
|
||||
char *word, *brkb;
|
||||
int query_len;
|
||||
|
||||
qtext = GWBUF_DATA(queue);
|
||||
query_len = extract_field((uint8_t *)qtext, 24) - 1;
|
||||
qtext += 5; // Skip header and first byte of the payload
|
||||
query_text = strndup(qtext, query_len);
|
||||
|
||||
LOGIF(LT, (skygw_log_write(
|
||||
LOGFILE_TRACE, "Execute statement from the slave '%s'\n", query_text)));
|
||||
/*
|
||||
* Implement a very rudimental "parsing" of the query text by extarcting the
|
||||
* words from the statement and matchng them against the subset of queries we
|
||||
* are expecting from the slave. We already have responses to these commands,
|
||||
* except for the select of UNIX_TIMESTAMP(), that we have saved from MaxScale's
|
||||
* own interaction with the real master. We simply replay these saved responses
|
||||
* to the slave.
|
||||
*/
|
||||
word = strtok_r(query_text, sep, &brkb);
|
||||
if (strcasecmp(word, "SELECT") == 0)
|
||||
{
|
||||
word = strtok_r(NULL, sep, &brkb);
|
||||
if (strcasecmp(word, "UNIX_TIMESTAMP()") == 0)
|
||||
{
|
||||
free(query_text);
|
||||
return blr_slave_send_timestamp(router, slave);
|
||||
}
|
||||
else if (strcasecmp(word, "@master_binlog_checksum") == 0)
|
||||
{
|
||||
free(query_text);
|
||||
return blr_slave_replay(router, slave, router->saved_master.chksum2);
|
||||
}
|
||||
else if (strcasecmp(word, "@@GLOBAL.GTID_MODE") == 0)
|
||||
{
|
||||
free(query_text);
|
||||
return blr_slave_replay(router, slave, router->saved_master.gtid_mode);
|
||||
}
|
||||
else if (strcasecmp(word, "1") == 0)
|
||||
{
|
||||
free(query_text);
|
||||
return blr_slave_replay(router, slave, router->saved_master.select1);
|
||||
}
|
||||
else if (strcasecmp(word, "VERSION()") == 0)
|
||||
{
|
||||
free(query_text);
|
||||
return blr_slave_replay(router, slave, router->saved_master.selectver);
|
||||
}
|
||||
}
|
||||
else if (strcasecmp(word, "SHOW") == 0)
|
||||
{
|
||||
word = strtok_r(NULL, sep, &brkb);
|
||||
if (strcasecmp(word, "VARIABLES") == 0)
|
||||
{
|
||||
word = strtok_r(NULL, sep, &brkb);
|
||||
if (strcasecmp(word, "LIKE") == 0)
|
||||
{
|
||||
word = strtok_r(NULL, sep, &brkb);
|
||||
if (strcasecmp(word, "'SERVER_ID'") == 0)
|
||||
{
|
||||
free(query_text);
|
||||
return blr_slave_replay(router, slave, router->saved_master.server_id);
|
||||
}
|
||||
else if (strcasecmp(word, "'SERVER_UUID'") == 0)
|
||||
{
|
||||
free(query_text);
|
||||
return blr_slave_replay(router, slave, router->saved_master.uuid);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (strcasecmp(query_text, "SET") == 0)
|
||||
{
|
||||
word = strtok_r(NULL, sep, &brkb);
|
||||
if (strcasecmp(word, "@master_heartbeat_period") == 0)
|
||||
{
|
||||
free(query_text);
|
||||
return blr_slave_replay(router, slave, router->saved_master.heartbeat);
|
||||
}
|
||||
else if (strcasecmp(word, "@master_binlog_checksum") == 0)
|
||||
{
|
||||
word = strtok_r(NULL, sep, &brkb);
|
||||
if (strcasecmp(word, "'none'") == 0)
|
||||
slave->nocrc = 1;
|
||||
else
|
||||
slave->nocrc = 0;
|
||||
free(query_text);
|
||||
return blr_slave_replay(router, slave, router->saved_master.chksum1);
|
||||
}
|
||||
else if (strcasecmp(word, "@slave_uuid") == 0)
|
||||
{
|
||||
free(query_text);
|
||||
return blr_slave_replay(router, slave, router->saved_master.setslaveuuid);
|
||||
}
|
||||
else if (strcasecmp(word, "NAMES") == 0)
|
||||
{
|
||||
word = strtok_r(NULL, sep, &brkb);
|
||||
if (strcasecmp(word, "latin1") == 0)
|
||||
{
|
||||
free(query_text);
|
||||
return blr_slave_replay(router, slave, router->saved_master.setnames);
|
||||
}
|
||||
else if (strcasecmp(word, "utf8") == 0)
|
||||
{
|
||||
free(query_text);
|
||||
return blr_slave_replay(router, slave, router->saved_master.utf8);
|
||||
}
|
||||
}
|
||||
}
|
||||
free(query_text);
|
||||
|
||||
query_text = strndup(qtext, query_len);
|
||||
LOGIF(LE, (skygw_log_write(
|
||||
LOGFILE_ERROR, "Unexpected query from slave server %s\n", query_text)));
|
||||
free(query_text);
|
||||
blr_slave_send_error(router, slave, "Unexpected SQL query received from slave.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Send a reply to a command we have received from the slave. The reply itself
|
||||
* is merely a copy of a previous message we received from the master when we
|
||||
* registered as a slave. Hence we just replay this saved reply.
|
||||
*
|
||||
* @param router The binlog router instance
|
||||
* @param slave The slave server to which we are sending the response
|
||||
* @param master The saved master response
|
||||
* @return Non-zero if data was sent
|
||||
*/
|
||||
static int
|
||||
blr_slave_replay(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *master)
|
||||
{
|
||||
GWBUF *clone;
|
||||
|
||||
if (!master)
|
||||
return 0;
|
||||
if ((clone = gwbuf_clone(master)) != NULL)
|
||||
{
|
||||
return slave->dcb->func.write(slave->dcb, clone);
|
||||
}
|
||||
else
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR,
|
||||
"Failed to clone server response to send to slave.\n")));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct an error response
|
||||
*
|
||||
* @param router The router instance
|
||||
* @param slave The slave server instance
|
||||
* @param msg The error message to send
|
||||
*/
|
||||
static void
|
||||
blr_slave_send_error(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, char *msg)
|
||||
{
|
||||
GWBUF *pkt;
|
||||
unsigned char *data;
|
||||
int len;
|
||||
|
||||
if ((pkt = gwbuf_alloc(strlen(msg) + 13)) == NULL)
|
||||
return;
|
||||
data = GWBUF_DATA(pkt);
|
||||
len = strlen(msg) + 1;
|
||||
encode_value(&data[0], len, 24); // Payload length
|
||||
data[3] = 0; // Sequence id
|
||||
// Payload
|
||||
data[4] = 0xff; // Error indicator
|
||||
data[5] = 0; // Error Code
|
||||
data[6] = 0; // Error Code
|
||||
strncpy((char *)&data[7], "#00000", 6);
|
||||
memcpy(&data[13], msg, strlen(msg)); // Error Message
|
||||
slave->dcb->func.write(slave->dcb, pkt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Some standard packets that have been captured from a network trace of server
|
||||
* interactions. These packets are the schema definition sent in response to
|
||||
* a SELECT UNIX_TIMESTAMP() statement and the EOF packet that marks the end
|
||||
* of transmission of the result set.
|
||||
*/
|
||||
static uint8_t timestamp_def[] = {
|
||||
0x01, 0x00, 0x00, 0x01, 0x01, 0x26, 0x00, 0x00, 0x02, 0x03, 0x64, 0x65, 0x66, 0x00, 0x00, 0x00,
|
||||
0x10, 0x55, 0x4e, 0x49, 0x58, 0x5f, 0x54, 0x49, 0x4d, 0x45, 0x53, 0x54, 0x41, 0x4d, 0x50, 0x28,
|
||||
0x29, 0x00, 0x0c, 0x3f, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x08, 0x81, 0x00, 0x00, 0x00, 0x00, 0x05,
|
||||
0x00, 0x00, 0x03, 0xfe, 0x00, 0x00, 0x02, 0x00
|
||||
};
|
||||
static uint8_t timestamp_eof[] = { 0x05, 0x00, 0x00, 0x05, 0xfe, 0x00, 0x00, 0x02, 0x00 };
|
||||
|
||||
/**
|
||||
* Send a response to a "SELECT UNIX_TIMESTAMP()" request. This differs from the other
|
||||
* requests since we do not save a copy of the original interaction with the master
|
||||
* and simply replay it. We want to always send the current time. We have stored a typcial
|
||||
* response, which gives us the schema information normally returned. This is sent to the
|
||||
* client and then we add a dynamic part that will insert the current timestamp data.
|
||||
* Finally we send a preprepaed EOF packet to end the response stream.
|
||||
*
|
||||
* @param router The binlog router instance
|
||||
* @param slave The slave server to which we are sending the response
|
||||
* @return Non-zero if data was sent
|
||||
*/
|
||||
static int
|
||||
blr_slave_send_timestamp(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave)
|
||||
{
|
||||
GWBUF *pkt;
|
||||
char timestamp[20];
|
||||
uint8_t *ptr;
|
||||
int len, ts_len;
|
||||
|
||||
sprintf(timestamp, "%ld", time(0));
|
||||
ts_len = strlen(timestamp);
|
||||
len = sizeof(timestamp_def) + sizeof(timestamp_eof) + 5 + ts_len;
|
||||
if ((pkt = gwbuf_alloc(len)) == NULL)
|
||||
return 0;
|
||||
ptr = GWBUF_DATA(pkt);
|
||||
memcpy(ptr, timestamp_def, sizeof(timestamp_def)); // Fixed preamble
|
||||
ptr += sizeof(timestamp_def);
|
||||
encode_value(ptr, ts_len + 1, 24); // Add length of data packet
|
||||
ptr += 3;
|
||||
*ptr++ = 0x04; // Sequence number in response
|
||||
*ptr++ = ts_len; // Length of result string
|
||||
strncpy((char *)ptr, timestamp, ts_len); // Result string
|
||||
ptr += ts_len;
|
||||
memcpy(ptr, timestamp_eof, sizeof(timestamp_eof)); // EOF packet to terminate result
|
||||
return slave->dcb->func.write(slave->dcb, pkt);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a slave replication registration message.
|
||||
*
|
||||
* We store the various bits of information the slave gives us and generate
|
||||
* a reply message.
|
||||
*
|
||||
* @param router The router instance
|
||||
* @param slave The slave server
|
||||
* @param queue The BINLOG_DUMP packet
|
||||
* @return Non-zero if data was sent
|
||||
*/
|
||||
static int
|
||||
blr_slave_register(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue)
|
||||
{
|
||||
GWBUF *resp;
|
||||
uint8_t *ptr;
|
||||
int len, slen;
|
||||
|
||||
ptr = GWBUF_DATA(queue);
|
||||
len = extract_field(ptr, 24);
|
||||
ptr += 4; // Skip length and sequence number
|
||||
if (*ptr++ != COM_REGISTER_SLAVE)
|
||||
return 0;
|
||||
slave->serverid = extract_field(ptr, 32);
|
||||
ptr += 4;
|
||||
slen = *ptr++;
|
||||
if (slen != 0)
|
||||
{
|
||||
slave->hostname = strndup((char *)ptr, slen);
|
||||
ptr += slen;
|
||||
}
|
||||
else
|
||||
slave->hostname = NULL;
|
||||
slen = *ptr++;
|
||||
if (slen != 0)
|
||||
{
|
||||
ptr += slen;
|
||||
slave->user = strndup((char *)ptr, slen);
|
||||
}
|
||||
else
|
||||
slave->user = NULL;
|
||||
slen = *ptr++;
|
||||
if (slen != 0)
|
||||
{
|
||||
slave->passwd = strndup((char *)ptr, slen);
|
||||
ptr += slen;
|
||||
}
|
||||
else
|
||||
slave->passwd = NULL;
|
||||
slave->port = extract_field(ptr, 16);
|
||||
ptr += 2;
|
||||
slave->rank = extract_field(ptr, 32);
|
||||
|
||||
/*
|
||||
* Now construct a response
|
||||
*/
|
||||
if ((resp = gwbuf_alloc(11)) == NULL)
|
||||
return 0;
|
||||
ptr = GWBUF_DATA(resp);
|
||||
encode_value(ptr, 7, 24); // Payload length
|
||||
ptr += 3;
|
||||
*ptr++ = 1; // Sequence number
|
||||
encode_value(ptr, 0, 24);
|
||||
ptr += 3;
|
||||
encode_value(ptr, slave->serverid, 32);
|
||||
slave->state = BLRS_REGISTERED;
|
||||
return slave->dcb->func.write(slave->dcb, resp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a COM_BINLOG_DUMP message from the slave. This is the
|
||||
* final step in the process of registration. The new master, MaxScale
|
||||
* must send a response packet and generate a fake BINLOG_ROTATE event
|
||||
* with the binlog file requested by the slave. And then send a
|
||||
* FORMAT_DESCRIPTION_EVENT that has been saved from the real master.
|
||||
*
|
||||
* Once send MaxScale must continue to send binlog events to the slave.
|
||||
*
|
||||
* @param router The router instance
|
||||
* @param slave The slave server
|
||||
* @param queue The BINLOG_DUMP packet
|
||||
* @return The number of bytes written to the slave
|
||||
*/
|
||||
static int
|
||||
blr_slave_binlog_dump(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, GWBUF *queue)
|
||||
{
|
||||
GWBUF *resp;
|
||||
uint8_t *ptr;
|
||||
int len, flags, serverid, rval;
|
||||
REP_HEADER hdr;
|
||||
uint32_t chksum;
|
||||
|
||||
ptr = GWBUF_DATA(queue);
|
||||
len = extract_field(ptr, 24);
|
||||
ptr += 4; // Skip length and sequence number
|
||||
if (*ptr++ != COM_BINLOG_DUMP)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(
|
||||
LOGFILE_ERROR,
|
||||
"blr_slave_binlog_dump expected a COM_BINLOG_DUMP but received %d\n",
|
||||
*(ptr-1))));
|
||||
return 0;
|
||||
}
|
||||
|
||||
slave->binlog_pos = extract_field(ptr, 32);
|
||||
ptr += 4;
|
||||
flags = extract_field(ptr, 16);
|
||||
ptr += 2;
|
||||
serverid = extract_field(ptr, 32);
|
||||
ptr += 4;
|
||||
strncpy(slave->binlogfile, (char *)ptr, BINLOG_FNAMELEN);
|
||||
|
||||
slave->state = BLRS_DUMPING;
|
||||
slave->seqno = 1;
|
||||
|
||||
if (slave->nocrc)
|
||||
len = 0x2b;
|
||||
else
|
||||
len = 0x2f;
|
||||
|
||||
// Build a fake rotate event
|
||||
resp = gwbuf_alloc(len + 5);
|
||||
hdr.payload_len = len + 1;
|
||||
hdr.seqno = slave->seqno++;
|
||||
hdr.ok = 0;
|
||||
hdr.timestamp = 0L;
|
||||
hdr.event_type = ROTATE_EVENT;
|
||||
hdr.serverid = router->masterid;
|
||||
hdr.event_size = len;
|
||||
hdr.next_pos = 0;
|
||||
hdr.flags = 0x20;
|
||||
ptr = blr_build_header(resp, &hdr);
|
||||
encode_value(ptr, slave->binlog_pos, 64);
|
||||
ptr += 8;
|
||||
memcpy(ptr, slave->binlogfile, BINLOG_FNAMELEN);
|
||||
ptr += BINLOG_FNAMELEN;
|
||||
|
||||
if (!slave->nocrc)
|
||||
{
|
||||
/*
|
||||
* Now add the CRC to the fake binlog rotate event.
|
||||
*
|
||||
* The algorithm is first to compute the checksum of an empty buffer
|
||||
* and then the checksum of the event portion of the message, ie we do not
|
||||
* include the length, sequence number and ok byte that makes up the first
|
||||
* 5 bytes of the message. We also do not include the 4 byte checksum itself.
|
||||
*/
|
||||
chksum = crc32(0L, NULL, 0);
|
||||
chksum = crc32(chksum, GWBUF_DATA(resp) + 5, hdr.event_size - 4);
|
||||
encode_value(ptr, chksum, 32);
|
||||
}
|
||||
|
||||
rval = slave->dcb->func.write(slave->dcb, resp);
|
||||
|
||||
/* Send the FORMAT_DESCRIPTION_EVENT */
|
||||
if (router->saved_master.fde_event)
|
||||
{
|
||||
resp = gwbuf_alloc(router->saved_master.fde_len + 5);
|
||||
ptr = GWBUF_DATA(resp);
|
||||
encode_value(ptr, router->saved_master.fde_len + 1, 24); // Payload length
|
||||
ptr += 3;
|
||||
*ptr++ = slave->seqno++;
|
||||
*ptr++ = 0; // OK
|
||||
memcpy(ptr, router->saved_master.fde_event, router->saved_master.fde_len);
|
||||
encode_value(ptr, time(0), 32); // Overwrite timestamp
|
||||
/*
|
||||
* Since we have changed the timestamp we must recalculate the CRC
|
||||
*
|
||||
* Position ptr to the start of the event header,
|
||||
* calculate a new checksum
|
||||
* and write it into the header
|
||||
*/
|
||||
ptr = GWBUF_DATA(resp) + 5 + router->saved_master.fde_len - 4;
|
||||
chksum = crc32(0L, NULL, 0);
|
||||
chksum = crc32(chksum, GWBUF_DATA(resp) + 5, router->saved_master.fde_len - 4);
|
||||
encode_value(ptr, chksum, 32);
|
||||
rval = slave->dcb->func.write(slave->dcb, resp);
|
||||
}
|
||||
|
||||
slave->dcb->low_water = router->low_water;
|
||||
slave->dcb->high_water = router->high_water;
|
||||
dcb_add_callback(slave->dcb, DCB_REASON_LOW_WATER, blr_slave_callback, slave);
|
||||
dcb_add_callback(slave->dcb, DCB_REASON_DRAINED, blr_slave_callback, slave);
|
||||
|
||||
if (slave->binlog_pos != router->binlog_position ||
|
||||
strcmp(slave->binlogfile, router->binlog_name) != 0)
|
||||
{
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
slave->cstate &= ~CS_UPTODATE;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
rval = blr_slave_catchup(router, slave);
|
||||
}
|
||||
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a numeric field from a packet of the specified number of bits,
|
||||
* the number of bits must be a multiple of 8.
|
||||
*
|
||||
* @param src The raw packet source
|
||||
* @param bits The number of bits to extract (multiple of 8)
|
||||
* @return The extracted value
|
||||
*/
|
||||
static uint32_t
|
||||
extract_field(uint8_t *src, int bits)
|
||||
{
|
||||
uint32_t rval = 0, shift = 0;
|
||||
|
||||
while (bits > 0)
|
||||
{
|
||||
rval |= (*src++) << shift;
|
||||
shift += 8;
|
||||
bits -= 8;
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a value into a number of bits in a MySQL packet
|
||||
*
|
||||
* @param data Pointer to location in target packet
|
||||
* @param value The value to encode into the buffer
|
||||
* @param len Number of bits to encode value into
|
||||
*/
|
||||
static void
|
||||
encode_value(unsigned char *data, unsigned int value, int len)
|
||||
{
|
||||
while (len > 0)
|
||||
{
|
||||
*data++ = value & 0xff;
|
||||
value >>= 8;
|
||||
len -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Populate a header structure for a replication message from a GWBUF structure.
|
||||
*
|
||||
* @param pkt The incoming packet in a GWBUF chain
|
||||
* @param hdr The packet header to populate
|
||||
* @return A pointer to the first byte following the event header
|
||||
*/
|
||||
static uint8_t *
|
||||
blr_build_header(GWBUF *pkt, REP_HEADER *hdr)
|
||||
{
|
||||
uint8_t *ptr;
|
||||
|
||||
ptr = GWBUF_DATA(pkt);
|
||||
|
||||
encode_value(ptr, hdr->payload_len, 24);
|
||||
ptr += 3;
|
||||
*ptr++ = hdr->seqno;
|
||||
*ptr++ = hdr->ok;
|
||||
encode_value(ptr, hdr->timestamp, 32);
|
||||
ptr += 4;
|
||||
*ptr++ = hdr->event_type;
|
||||
encode_value(ptr, hdr->serverid, 32);
|
||||
ptr += 4;
|
||||
encode_value(ptr, hdr->event_size, 32);
|
||||
ptr += 4;
|
||||
encode_value(ptr, hdr->next_pos, 32);
|
||||
ptr += 4;
|
||||
encode_value(ptr, hdr->flags, 16);
|
||||
ptr += 2;
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* We have a registered slave that is behind the current leading edge of the
|
||||
* binlog. We must replay the log entries to bring this node up to speed.
|
||||
*
|
||||
* There may be a large numebr of records to send to the slave, the process
|
||||
* is triggered by the slave COM_BINLOG_DUMP message and all the events must
|
||||
* be sent without receiving any new event. This measn there is no trigger into
|
||||
* MaxScale other than this initial message. However, if we simply send all the
|
||||
* events we end up with an extremely long write queue on the DCB and risk running
|
||||
* the server out of resources.
|
||||
*
|
||||
* To resolve this the concept of high and low water marks within the DCB has been
|
||||
* added, with the ability for the DCB code to call user defined callbacks when the
|
||||
* write queue is completely drained, when it crosses above the high water mark and
|
||||
* when it crosses below the low water mark.
|
||||
*
|
||||
* The blr_slave_catchup routine will send binlog events to the slave until the high
|
||||
* water mark is reached, at which point it will return. Later, when a low water mark
|
||||
* callback is generated by the code that drains the DCB of data the blr_slave_catchup
|
||||
* routine will again be called to write more events. The process is repeated until
|
||||
* the slave has caught up with the master.
|
||||
*
|
||||
* Note: an additional check that the DCB is still above the low water mark is done
|
||||
* prior to the return from this function to allow for any delays due to the call to
|
||||
* the close system call, since this may cause thread rescheduling.
|
||||
*
|
||||
* @param router The binlog router
|
||||
* @param slave The slave that is behind
|
||||
* @return The number of bytes written
|
||||
*/
|
||||
int
|
||||
blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave)
|
||||
{
|
||||
GWBUF *head, *record;
|
||||
REP_HEADER hdr;
|
||||
int written, fd, rval = 1, burst = 0;
|
||||
uint8_t *ptr;
|
||||
struct timespec req;
|
||||
|
||||
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
slave->cstate &= ~CS_EXPECTCB;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
doitagain:
|
||||
/*
|
||||
* We have a slightly complex syncronisation mechansim here,
|
||||
* we need to make sure that we do not have multiple threads
|
||||
* running the catchup loop, but we need to be very careful
|
||||
* that we do not loose a call that is coming via a callback
|
||||
* call as this will stall the binlog catchup process.
|
||||
*
|
||||
* We don't want to simply use a traditional mutex here for
|
||||
* the loop, since this would block a MaxScale thread for
|
||||
* an unacceptable length of time.
|
||||
*
|
||||
* We have two status bits, the CS_READING that says we are
|
||||
* in the outer loop and the CS_INNERLOOP, to say we are in
|
||||
* the inner loop.
|
||||
*
|
||||
* If just CS_READING is set the other thread may be about to
|
||||
* enter the inner loop or may be about to exit the function
|
||||
* completely. Therefore we have to wait to see if CS_READING
|
||||
* is cleared or CS_INNERLOOP is set.
|
||||
*
|
||||
* If CS_READING gets cleared then this thread should proceed
|
||||
* into the loop.
|
||||
*
|
||||
* If CS_INNERLOOP get's set then this thread does not need to
|
||||
* proceed.
|
||||
*
|
||||
* If CS_READING is not set then this thread simply enters the
|
||||
* loop.
|
||||
*/
|
||||
req.tv_sec = 0;
|
||||
req.tv_nsec = 1000;
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
if (slave->cstate & CS_UPTODATE)
|
||||
{
|
||||
LOGIF(LM, (skygw_log_write(LOGFILE_MESSAGE,
|
||||
"blr_slave_catchup called with up to date slave %d at "
|
||||
"%s@%d. Reading position %s@%d\n",
|
||||
slave->serverid, slave->binlogfile,
|
||||
slave->binlog_pos, router->binlog_name,
|
||||
router->binlog_position)));
|
||||
slave->stats.n_alreadyupd++;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
return 1;
|
||||
}
|
||||
while (slave->cstate & CS_READING)
|
||||
{
|
||||
// Wait until we know what the other thread is doing
|
||||
while ((slave->cstate & (CS_READING|CS_INNERLOOP)) == CS_READING)
|
||||
{
|
||||
spinlock_release(&slave->catch_lock);
|
||||
nanosleep(&req, NULL);
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
}
|
||||
// Other thread is in the innerloop
|
||||
if ((slave->cstate & (CS_READING|CS_INNERLOOP)) == (CS_READING|CS_INNERLOOP))
|
||||
{
|
||||
spinlock_release(&slave->catch_lock);
|
||||
LOGIF(LM, (skygw_log_write(
|
||||
LOGFILE_MESSAGE,
|
||||
"blr_slave_catchup thread returning due to "
|
||||
"lock being held by another thread. %s@%d\n",
|
||||
slave->binlogfile,
|
||||
slave->binlog_pos)));
|
||||
slave->stats.n_catchupnr++;
|
||||
return 1; // We cheat here and return 1 because otherwise
|
||||
// an error would be sent and we do not want that
|
||||
}
|
||||
|
||||
/* Release the lock for a short time to allow the other
|
||||
* thread to exit the outer reading loop.
|
||||
*/
|
||||
spinlock_release(&slave->catch_lock);
|
||||
nanosleep(&req, NULL);
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
}
|
||||
if (slave->pthread)
|
||||
LOGIF(LD, (skygw_log_write(LOGFILE_DEBUG, "Multiple threads sending to same thread.\n")));
|
||||
slave->pthread = pthread_self();
|
||||
slave->cstate |= CS_READING;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
|
||||
if (DCB_ABOVE_HIGH_WATER(slave->dcb))
|
||||
LOGIF(LT, (skygw_log_write(LOGFILE_TRACE, "blr_slave_catchup above high water on entry.\n")));
|
||||
|
||||
do {
|
||||
if ((fd = blr_open_binlog(router, slave->binlogfile)) == -1)
|
||||
{
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
slave->cstate &= ~CS_READING;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
LOGIF(LE, (skygw_log_write(
|
||||
LOGFILE_ERROR,
|
||||
"blr_slave_catchup failed to open binlog file %s\n",
|
||||
slave->binlogfile)));
|
||||
return 0;
|
||||
}
|
||||
atomic_add(&slave->stats.n_bursts, 1);
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
slave->cstate |= CS_INNERLOOP;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
while ((!DCB_ABOVE_HIGH_WATER(slave->dcb)) &&
|
||||
(record = blr_read_binlog(fd, slave->binlog_pos, &hdr)) != NULL)
|
||||
{
|
||||
if (hdr.event_size > DEF_HIGH_WATER) slave->stats.n_above++;
|
||||
head = gwbuf_alloc(5);
|
||||
ptr = GWBUF_DATA(head);
|
||||
encode_value(ptr, hdr.event_size + 1, 24);
|
||||
ptr += 3;
|
||||
*ptr++ = slave->seqno++;
|
||||
*ptr++ = 0; // OK
|
||||
head = gwbuf_append(head, record);
|
||||
if (hdr.event_type == ROTATE_EVENT)
|
||||
{
|
||||
close(fd);
|
||||
blr_slave_rotate(slave, GWBUF_DATA(record));
|
||||
if ((fd = blr_open_binlog(router, slave->binlogfile)) == -1)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(
|
||||
LOGFILE_ERROR,
|
||||
"blr_slave_catchup failed to open binlog file %s\n",
|
||||
slave->binlogfile)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
written = slave->dcb->func.write(slave->dcb, head);
|
||||
if (written && hdr.event_type != ROTATE_EVENT)
|
||||
{
|
||||
slave->binlog_pos = hdr.next_pos;
|
||||
}
|
||||
rval = written;
|
||||
atomic_add(&slave->stats.n_events, 1);
|
||||
burst++;
|
||||
}
|
||||
if (record == NULL)
|
||||
slave->stats.n_failed_read++;
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
slave->cstate &= ~CS_INNERLOOP;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
|
||||
close(fd);
|
||||
} while (record && DCB_BELOW_LOW_WATER(slave->dcb));
|
||||
if (record)
|
||||
{
|
||||
atomic_add(&slave->stats.n_flows, 1);
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
slave->cstate |= CS_EXPECTCB;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
}
|
||||
else
|
||||
{
|
||||
int state_change = 0;
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
if ((slave->cstate & CS_UPTODATE) == 0)
|
||||
{
|
||||
atomic_add(&slave->stats.n_upd, 1);
|
||||
slave->cstate |= CS_UPTODATE;
|
||||
state_change = 1;
|
||||
}
|
||||
spinlock_release(&slave->catch_lock);
|
||||
if (state_change)
|
||||
LOGIF(LM, (skygw_log_write(LOGFILE_MESSAGE,
|
||||
"blr_slave_catchup slave is up to date %s, %u\n",
|
||||
slave->binlogfile, slave->binlog_pos)));
|
||||
}
|
||||
spinlock_acquire(&slave->catch_lock);
|
||||
#if 0
|
||||
if (slave->pthread != pthread_self())
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR, "Multple threads in catchup for same slave: %x and %x\n", slave->pthread, pthread_self())));
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
slave->pthread = 0;
|
||||
#if 0
|
||||
if (DCB_BELOW_LOW_WATER(slave->dcb) && slave->binlog_pos != router->binlog_position) abort();
|
||||
#endif
|
||||
slave->cstate &= ~CS_READING;
|
||||
spinlock_release(&slave->catch_lock);
|
||||
if (DCB_BELOW_LOW_WATER(slave->dcb) && slave->binlog_pos != router->binlog_position)
|
||||
{
|
||||
LOGIF(LE, (skygw_log_write(LOGFILE_ERROR, "Expected to be above low water\n")));
|
||||
goto doitagain;
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* The DCB callback used by the slave to obtain DCB_REASON_LOW_WATER callbacks
|
||||
* when the server sends all the the queue data for a DCB. This is the mechanism
|
||||
* that is used to implement the flow control mechanism for the sending of
|
||||
* large quantities of binlog records during the catchup process.
|
||||
*
|
||||
* @param dcb The DCB of the slave connection
|
||||
* @param reason The reason the callback was called
|
||||
* @param data The user data, in this case the server structure
|
||||
*/
|
||||
static int
|
||||
blr_slave_callback(DCB *dcb, DCB_REASON reason, void *data)
|
||||
{
|
||||
ROUTER_SLAVE *slave = (ROUTER_SLAVE *)data;
|
||||
ROUTER_INSTANCE *router = slave->router;
|
||||
|
||||
if (reason == DCB_REASON_DRAINED)
|
||||
{
|
||||
if (slave->state == BLRS_DUMPING &&
|
||||
slave->binlog_pos != router->binlog_position)
|
||||
{
|
||||
atomic_add(&slave->stats.n_dcb, 1);
|
||||
blr_slave_catchup(router, slave);
|
||||
}
|
||||
}
|
||||
|
||||
if (reason == DCB_REASON_LOW_WATER)
|
||||
{
|
||||
if (slave->state == BLRS_DUMPING)
|
||||
{
|
||||
atomic_add(&slave->stats.n_cb, 1);
|
||||
blr_slave_catchup(router, slave);
|
||||
}
|
||||
else
|
||||
{
|
||||
atomic_add(&slave->stats.n_cbna, 1);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rotate the slave to the new binlog file
|
||||
*
|
||||
* @param slave The slave instance
|
||||
* @param ptr The rotate event (minux header and OK byte)
|
||||
*/
|
||||
void
|
||||
blr_slave_rotate(ROUTER_SLAVE *slave, uint8_t *ptr)
|
||||
{
|
||||
ptr += 19; // Skip header
|
||||
slave->binlog_pos = extract_field(ptr, 32);
|
||||
slave->binlog_pos += (extract_field(ptr+4, 32) << 32);
|
||||
memcpy(slave->binlogfile, ptr + 8, BINLOG_FNAMELEN);
|
||||
slave->binlogfile[BINLOG_FNAMELEN] = 0;
|
||||
}
|
@ -160,6 +160,10 @@ struct subcommand showoptions[] = {
|
||||
"Show all active sessions in MaxScale",
|
||||
"Show all active sessions in MaxScale",
|
||||
{0, 0, 0} },
|
||||
{ "threads", 0, dShowThreads,
|
||||
"Show the status of the polling threads in MaxScale",
|
||||
"Show the status of the polling threads in MaxScale",
|
||||
{0, 0, 0} },
|
||||
{ "users", 0, telnetdShowUsers,
|
||||
"Show statistics and user names for the debug interface",
|
||||
"Show statistics and user names for the debug interface",
|
||||
@ -208,6 +212,10 @@ struct subcommand listoptions[] = {
|
||||
"List all the active sessions within MaxScale",
|
||||
"List all the active sessions within MaxScale",
|
||||
{0, 0, 0} },
|
||||
{ "threads", 0, dShowThreads,
|
||||
"List the status of the polling threads in MaxScale",
|
||||
"List the status of the polling threads in MaxScale",
|
||||
{0, 0, 0} },
|
||||
{ NULL, 0, NULL, NULL, NULL,
|
||||
{0, 0, 0} }
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user