Large events are now processed in chuncks

The router->last_written is used to store the position where the last event was
written. The replication header is also stored in a separate structure in
the router which is used later when the last packet of a multi-packet event
arrives.
This commit is contained in:
Markus Makela 2016-02-08 17:19:55 +02:00
parent d3e1d4dd2f
commit ae33df3cbc
5 changed files with 115 additions and 77 deletions

View File

@ -198,6 +198,14 @@
#define MYSQL_ERROR_MSG(buf) ((uint8_t *)GWBUF_DATA(buf) + 7)
#define MYSQL_COMMAND(buf) (*((uint8_t *)GWBUF_DATA(buf) + 4))
enum blr_event_state
{
BLR_EVENT_DONE,
BLR_EVENT_STARTED,
BLR_EVENT_ONGOING,
BLR_EVENT_COMPLETE
};
/* Master Server configuration struct */
typedef struct master_server_config {
char *host;
@ -415,7 +423,8 @@ typedef struct router_instance {
SPINLOCK binlog_lock; /*< Lock to control update of the binlog position */
int trx_safe; /*< Detect and handle partial transactions */
int pending_transaction; /*< Pending transaction */
int pending_16mb; /*< Pending larger than 16mb transmission */
enum blr_event_state master_event_state; /*< Packet read state */
REP_HEADER stored_header; /*< Relication header of the event the master is sending */
uint64_t last_safe_pos; /* last committed transaction */
char binlog_name[BINLOG_FNAMELEN+1];
/*< Name of the current binlog file */
@ -426,7 +435,8 @@ typedef struct router_instance {
int binlog_fd; /*< File descriptor of the binlog
* file being written
*/
uint64_t last_written; /*< Position of last event written */
uint64_t last_written; /*< Position of the last write operation */
uint64_t last_event_pos; /*< Position of last event written */
uint64_t current_safe_event;
/*< Position of the latest safe event being sent to slaves */
char prevbinlog[BINLOG_FNAMELEN+1];
@ -557,7 +567,7 @@ extern int blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool
extern void blr_init_cache(ROUTER_INSTANCE *);
extern int blr_file_init(ROUTER_INSTANCE *);
extern int blr_write_binlog_record(ROUTER_INSTANCE *, REP_HEADER *,uint8_t *);
extern int blr_write_binlog_record(ROUTER_INSTANCE *, REP_HEADER *, uint32_t pos, uint8_t *);
extern int blr_file_rotate(ROUTER_INSTANCE *, char *, uint64_t);
extern void blr_file_flush(ROUTER_INSTANCE *);
extern BLFILE *blr_open_binlog(ROUTER_INSTANCE *, char *);

View File

@ -80,6 +80,9 @@
#define GW_MYSQL_SCRAMBLE_SIZE 20
#define GW_SCRAMBLE_LENGTH_323 8
/** Maximum length of a MySQL packet */
#define MYSQL_PACKET_LENGTH_MAX 0x00ffffff
#ifndef MYSQL_SCRAMBLE_LEN
# define MYSQL_SCRAMBLE_LEN GW_MYSQL_SCRAMBLE_SIZE
#endif

View File

@ -498,6 +498,7 @@ char task_name[BLRM_TASK_NAME_LEN+1] = "";
inst->binlog_position = 0;
inst->current_pos = 0;
inst->current_safe_event = 0;
inst->master_event_state = BLR_EVENT_DONE;
strcpy(inst->binlog_name, "");
strcpy(inst->prevbinlog, "");

View File

@ -235,7 +235,8 @@ blr_file_create(ROUTER_INSTANCE *router, char *file)
router->current_pos = BINLOG_MAGIC_SIZE; /* Initial position after the magic number */
router->binlog_position = BINLOG_MAGIC_SIZE;
router->current_safe_event = BINLOG_MAGIC_SIZE;
router->last_written = 0;
router->last_written = BINLOG_MAGIC_SIZE;
router->last_event_pos = 0;
spinlock_release(&router->binlog_lock);
created = 1;
@ -296,7 +297,8 @@ int fd;
router->current_pos = BINLOG_MAGIC_SIZE;
router->binlog_position = BINLOG_MAGIC_SIZE;
router->current_safe_event = BINLOG_MAGIC_SIZE;
router->last_written = 0;
router->last_written = BINLOG_MAGIC_SIZE;
router->last_event_pos = 0;
} else {
MXS_ERROR("%s: Could not write magic to binlog file.", router->service->name);
}
@ -323,26 +325,27 @@ int fd;
* @return Return the number of bytes written
*/
int
blr_write_binlog_record(ROUTER_INSTANCE *router, REP_HEADER *hdr, uint8_t *buf)
blr_write_binlog_record(ROUTER_INSTANCE *router, REP_HEADER *hdr, uint32_t size, uint8_t *buf)
{
int n;
if ((n = pwrite(router->binlog_fd, buf, hdr->event_size,
hdr->next_pos - hdr->event_size)) != hdr->event_size)
if ((n = pwrite(router->binlog_fd, buf, size,
router->last_written)) != size)
{
char err_msg[STRERROR_BUFLEN];
MXS_ERROR("%s: Failed to write binlog record at %d of %s, %s. "
MXS_ERROR("%s: Failed to write binlog record at %lu of %s, %s. "
"Truncating to previous record.",
router->service->name, hdr->next_pos - hdr->event_size,
router->service->name, router->last_written,
router->binlog_name,
strerror_r(errno, err_msg, sizeof(err_msg)));
/* Remove any partual event that was written */
ftruncate(router->binlog_fd, hdr->next_pos - hdr->event_size);
ftruncate(router->binlog_fd, router->last_written);
return 0;
}
spinlock_acquire(&router->binlog_lock);
router->current_pos = hdr->next_pos;
router->last_written = hdr->next_pos - hdr->event_size;
router->last_written =+ size;
router->last_event_pos = hdr->next_pos - hdr->event_size;
spinlock_release(&router->binlog_lock);
return n;
}

View File

@ -99,7 +99,7 @@ extern char * blr_last_event_description(ROUTER_INSTANCE *router);
static void blr_log_identity(ROUTER_INSTANCE *router);
static void blr_distribute_error_message(ROUTER_INSTANCE *router, char *message, char *state, unsigned int err_code);
int blr_write_data_into_binlog(ROUTER_INSTANCE *router, uint32_t data_len, uint32_t pos, uint8_t *buf);
int blr_write_data_into_binlog(ROUTER_INSTANCE *router, uint32_t data_len, uint8_t *buf);
static int keepalive = 1;
@ -291,6 +291,7 @@ blr_master_close(ROUTER_INSTANCE *router)
{
dcb_close(router->master);
router->master_state = BLRM_UNCONNECTED;
router->master_event_state = BLR_EVENT_DONE;
}
/**
@ -937,7 +938,7 @@ uint32_t partialpos = 0;
}
else
{
if (!router->pending_16mb)
if (router->master_event_state == BLR_EVENT_DONE)
{
router->stats.n_binlogs++;
router->stats.n_binlogs_ses++;
@ -945,27 +946,27 @@ uint32_t partialpos = 0;
blr_extract_header(ptr, &hdr);
/* Sanity check */
if (hdr.ok == 0 && (hdr.event_size != len - 5))
if (hdr.ok == 0)
{
if ((hdr.event_size + 1) < 0x00ffffff)
if (hdr.event_size != len - 5 && (hdr.event_size + 1) < MYSQL_PACKET_LENGTH_MAX)
{
MXS_ERROR("Packet length is %d, but event size is %d, "
"binlog file %s position %lu "
"reslen is %d and preslen is %d, "
"length of previous event %d. %s",
len, hdr.event_size,
router->binlog_name,
router->current_pos,
reslen, preslen, prev_length,
(prev_length == -1 ?
(no_residual ? "No residual data from previous call" :
"Residual data from previous call") : ""));
len, hdr.event_size,
router->binlog_name,
router->current_pos,
reslen, preslen, prev_length,
(prev_length == -1 ?
(no_residual ? "No residual data from previous call" :
"Residual data from previous call") : ""));
blr_log_packet(LOG_ERR, "Packet:", ptr, len);
MXS_ERROR("This event (0x%x) was contained in %d GWBUFs, "
"the previous events was contained in %d GWBUFs",
router->lastEventReceived, n_bufs, pn_bufs);
"the previous events was contained in %d GWBUFs",
router->lastEventReceived, n_bufs, pn_bufs);
if (msg)
{
@ -975,16 +976,14 @@ uint32_t partialpos = 0;
break;
}
}
else
{
MXS_INFO("Transmission of event > 16MB");
else
{
MXS_INFO("Transmission of event > 16MB");
router->master_event_state = BLR_EVENT_STARTED;
spinlock_acquire(&router->binlog_lock);
router->pending_16mb = 1;
totalsize = hdr.event_size + 1;
partialpos = router->current_pos;
spinlock_release(&router->binlog_lock);
/** Store the header for later use */
memcpy(&router->stored_header, &hdr, sizeof(hdr));
}
}
if (hdr.ok == 0)
@ -1009,7 +1008,8 @@ uint32_t partialpos = 0;
* First check that the checksum we calculate matches the
* checksum in the packet we received.
*/
if (router->master_chksum && !router->pending_16mb)
if (router->master_chksum &&
router->master_event_state == BLR_EVENT_DONE)
{
uint32_t chksum, pktsum;
@ -1039,41 +1039,47 @@ uint32_t partialpos = 0;
}
}
/* pending 16 mb */
if (router->pending_16mb)
/* pending large event */
if (router->master_event_state != BLR_EVENT_DONE)
{
uint32_t data_len;
if (totalsize >= 0x00ffffff)
data_len = 0x00ffffff;
else
data_len = len-5;
/* pending 16 mb */
/* current partial event is being written to disk file */
if (blr_write_data_into_binlog(router, data_len, partialpos, ptr) == 0)
{
/*
* Failed to write to the
* binlog file, destroy the
* buffer chain and close the
* connection with the master
*/
while ((pkt = gwbuf_consume(pkt,
GWBUF_LENGTH(pkt))) != NULL);
blr_master_close(router);
blr_master_delayed_connect(router);
return;
}
partialpos += data_len;
if (data_len >= 0x00ffffff)
if (len < MYSQL_PACKET_LENGTH_MAX)
{
totalsize -= 0x00ffffff;
/** This is the last packet, we can now proceed to distribute
* the event */
ss_dassert(router->master_event_state != BLR_EVENT_COMPLETE);
router->master_event_state = BLR_EVENT_COMPLETE;
memcpy(&hdr, &router->stored_header, sizeof(hdr));
}
else
{
/* pending 16 mb */
/* current partial event is being written to disk file */
uint32_t offset = 4;
/** Don't write the OK byte into the binlog */
if (router->master_event_state == BLR_EVENT_STARTED)
{
offset++;
router->master_event_state = BLR_EVENT_ONGOING;
}
if (blr_write_data_into_binlog(router, len - offset, ptr + offset) == 0)
{
/*
* Failed to write to the
* binlog file, destroy the
* buffer chain and close the
* connection with the master
*/
while ((pkt = gwbuf_consume(pkt,
GWBUF_LENGTH(pkt))) != NULL);
blr_master_close(router);
blr_master_delayed_connect(router);
return;
}
pkt = gwbuf_consume(pkt, len);
pkt_length -= len;
continue;
}
else
{
router->pending_16mb = 0;
}
}
@ -1108,7 +1114,7 @@ uint32_t partialpos = 0;
* This marks the transaction starts instead of
* QUERY_EVENT with "BEGIN"
*/
if (router->trx_safe) {
if (router->trx_safe && router->master_event_state == BLR_EVENT_DONE) {
if (router->mariadb10_compat) {
if (hdr.event_type == MARIADB10_GTID_EVENT) {
uint64_t n_sequence;
@ -1256,8 +1262,16 @@ uint32_t partialpos = 0;
}
else if (hdr.flags != LOG_EVENT_ARTIFICIAL_F)
{
ptr = ptr + 5; // We don't put the first byte of the payload
// into the binlog file
ptr = ptr + 4; // Skip header
uint32_t offset = 4;
if (router->master_event_state == BLR_EVENT_STARTED ||
router->master_event_state == BLR_EVENT_DONE)
{
ptr++;
offset++;
}
if (hdr.event_type == ROTATE_EVENT)
{
spinlock_acquire(&router->binlog_lock);
@ -1266,7 +1280,7 @@ uint32_t partialpos = 0;
}
/* current event is being written to disk file */
if (blr_write_binlog_record(router, &hdr, ptr) == 0)
if (blr_write_binlog_record(router, &hdr, len - offset, ptr) == 0)
{
/*
* Failed to write to the
@ -1445,6 +1459,12 @@ uint32_t partialpos = 0;
}
}
}
/** A large event is now fully received and processed */
if(router->master_event_state == BLR_EVENT_COMPLETE)
{
router->master_event_state = BLR_EVENT_DONE;
}
}
else
{
@ -1699,7 +1719,7 @@ unsigned int cstate;
*/
slave_action = SLAVE_SEND_EVENT;
}
else if (slave->binlog_pos == router->last_written &&
else if (slave->binlog_pos == router->last_event_pos &&
(strcmp(slave->binlogfile, router->binlog_name) == 0 ||
(hdr->event_type == ROTATE_EVENT &&
strcmp(slave->binlogfile, router->prevbinlog))))
@ -2294,23 +2314,24 @@ ROUTER_SLAVE *slave;
}
int
blr_write_data_into_binlog(ROUTER_INSTANCE *router, uint32_t data_len, uint32_t pos, uint8_t *buf)
blr_write_data_into_binlog(ROUTER_INSTANCE *router, uint32_t data_len, uint8_t *buf)
{
int n;
if ((n = pwrite(router->binlog_fd, buf, data_len,
pos)) != data_len)
router->last_written)) != data_len)
{
char err_msg[STRERROR_BUFLEN];
MXS_ERROR("%s: Failed to write binlog record at %d of %s, %s. "
MXS_ERROR("%s: Failed to write binlog record at %lu of %s, %s. "
"Truncating to previous record.",
router->service->name, pos,
router->service->name, router->last_written,
router->binlog_name,
strerror_r(errno, err_msg, sizeof(err_msg)));
/* Remove any partial event that was written */
ftruncate(router->binlog_fd, pos);
ftruncate(router->binlog_fd, router->last_written);
return 0;
}
router->last_written += data_len;
return n;
}