Large events are now processed in chuncks

The router->last_written is used to store the position where the last event was
written. The replication header is also stored in a separate structure in
the router which is used later when the last packet of a multi-packet event
arrives.
This commit is contained in:
Markus Makela
2016-02-08 17:19:55 +02:00
parent d3e1d4dd2f
commit ae33df3cbc
5 changed files with 115 additions and 77 deletions

View File

@ -198,6 +198,14 @@
#define MYSQL_ERROR_MSG(buf) ((uint8_t *)GWBUF_DATA(buf) + 7) #define MYSQL_ERROR_MSG(buf) ((uint8_t *)GWBUF_DATA(buf) + 7)
#define MYSQL_COMMAND(buf) (*((uint8_t *)GWBUF_DATA(buf) + 4)) #define MYSQL_COMMAND(buf) (*((uint8_t *)GWBUF_DATA(buf) + 4))
enum blr_event_state
{
BLR_EVENT_DONE,
BLR_EVENT_STARTED,
BLR_EVENT_ONGOING,
BLR_EVENT_COMPLETE
};
/* Master Server configuration struct */ /* Master Server configuration struct */
typedef struct master_server_config { typedef struct master_server_config {
char *host; char *host;
@ -415,7 +423,8 @@ typedef struct router_instance {
SPINLOCK binlog_lock; /*< Lock to control update of the binlog position */ SPINLOCK binlog_lock; /*< Lock to control update of the binlog position */
int trx_safe; /*< Detect and handle partial transactions */ int trx_safe; /*< Detect and handle partial transactions */
int pending_transaction; /*< Pending transaction */ int pending_transaction; /*< Pending transaction */
int pending_16mb; /*< Pending larger than 16mb transmission */ enum blr_event_state master_event_state; /*< Packet read state */
REP_HEADER stored_header; /*< Relication header of the event the master is sending */
uint64_t last_safe_pos; /* last committed transaction */ uint64_t last_safe_pos; /* last committed transaction */
char binlog_name[BINLOG_FNAMELEN+1]; char binlog_name[BINLOG_FNAMELEN+1];
/*< Name of the current binlog file */ /*< Name of the current binlog file */
@ -426,7 +435,8 @@ typedef struct router_instance {
int binlog_fd; /*< File descriptor of the binlog int binlog_fd; /*< File descriptor of the binlog
* file being written * file being written
*/ */
uint64_t last_written; /*< Position of last event written */ uint64_t last_written; /*< Position of the last write operation */
uint64_t last_event_pos; /*< Position of last event written */
uint64_t current_safe_event; uint64_t current_safe_event;
/*< Position of the latest safe event being sent to slaves */ /*< Position of the latest safe event being sent to slaves */
char prevbinlog[BINLOG_FNAMELEN+1]; char prevbinlog[BINLOG_FNAMELEN+1];
@ -557,7 +567,7 @@ extern int blr_slave_catchup(ROUTER_INSTANCE *router, ROUTER_SLAVE *slave, bool
extern void blr_init_cache(ROUTER_INSTANCE *); extern void blr_init_cache(ROUTER_INSTANCE *);
extern int blr_file_init(ROUTER_INSTANCE *); extern int blr_file_init(ROUTER_INSTANCE *);
extern int blr_write_binlog_record(ROUTER_INSTANCE *, REP_HEADER *,uint8_t *); extern int blr_write_binlog_record(ROUTER_INSTANCE *, REP_HEADER *, uint32_t pos, uint8_t *);
extern int blr_file_rotate(ROUTER_INSTANCE *, char *, uint64_t); extern int blr_file_rotate(ROUTER_INSTANCE *, char *, uint64_t);
extern void blr_file_flush(ROUTER_INSTANCE *); extern void blr_file_flush(ROUTER_INSTANCE *);
extern BLFILE *blr_open_binlog(ROUTER_INSTANCE *, char *); extern BLFILE *blr_open_binlog(ROUTER_INSTANCE *, char *);

View File

@ -80,6 +80,9 @@
#define GW_MYSQL_SCRAMBLE_SIZE 20 #define GW_MYSQL_SCRAMBLE_SIZE 20
#define GW_SCRAMBLE_LENGTH_323 8 #define GW_SCRAMBLE_LENGTH_323 8
/** Maximum length of a MySQL packet */
#define MYSQL_PACKET_LENGTH_MAX 0x00ffffff
#ifndef MYSQL_SCRAMBLE_LEN #ifndef MYSQL_SCRAMBLE_LEN
# define MYSQL_SCRAMBLE_LEN GW_MYSQL_SCRAMBLE_SIZE # define MYSQL_SCRAMBLE_LEN GW_MYSQL_SCRAMBLE_SIZE
#endif #endif

View File

@ -498,6 +498,7 @@ char task_name[BLRM_TASK_NAME_LEN+1] = "";
inst->binlog_position = 0; inst->binlog_position = 0;
inst->current_pos = 0; inst->current_pos = 0;
inst->current_safe_event = 0; inst->current_safe_event = 0;
inst->master_event_state = BLR_EVENT_DONE;
strcpy(inst->binlog_name, ""); strcpy(inst->binlog_name, "");
strcpy(inst->prevbinlog, ""); strcpy(inst->prevbinlog, "");

View File

@ -235,7 +235,8 @@ blr_file_create(ROUTER_INSTANCE *router, char *file)
router->current_pos = BINLOG_MAGIC_SIZE; /* Initial position after the magic number */ router->current_pos = BINLOG_MAGIC_SIZE; /* Initial position after the magic number */
router->binlog_position = BINLOG_MAGIC_SIZE; router->binlog_position = BINLOG_MAGIC_SIZE;
router->current_safe_event = BINLOG_MAGIC_SIZE; router->current_safe_event = BINLOG_MAGIC_SIZE;
router->last_written = 0; router->last_written = BINLOG_MAGIC_SIZE;
router->last_event_pos = 0;
spinlock_release(&router->binlog_lock); spinlock_release(&router->binlog_lock);
created = 1; created = 1;
@ -296,7 +297,8 @@ int fd;
router->current_pos = BINLOG_MAGIC_SIZE; router->current_pos = BINLOG_MAGIC_SIZE;
router->binlog_position = BINLOG_MAGIC_SIZE; router->binlog_position = BINLOG_MAGIC_SIZE;
router->current_safe_event = BINLOG_MAGIC_SIZE; router->current_safe_event = BINLOG_MAGIC_SIZE;
router->last_written = 0; router->last_written = BINLOG_MAGIC_SIZE;
router->last_event_pos = 0;
} else { } else {
MXS_ERROR("%s: Could not write magic to binlog file.", router->service->name); MXS_ERROR("%s: Could not write magic to binlog file.", router->service->name);
} }
@ -323,26 +325,27 @@ int fd;
* @return Return the number of bytes written * @return Return the number of bytes written
*/ */
int int
blr_write_binlog_record(ROUTER_INSTANCE *router, REP_HEADER *hdr, uint8_t *buf) blr_write_binlog_record(ROUTER_INSTANCE *router, REP_HEADER *hdr, uint32_t size, uint8_t *buf)
{ {
int n; int n;
if ((n = pwrite(router->binlog_fd, buf, hdr->event_size, if ((n = pwrite(router->binlog_fd, buf, size,
hdr->next_pos - hdr->event_size)) != hdr->event_size) router->last_written)) != size)
{ {
char err_msg[STRERROR_BUFLEN]; char err_msg[STRERROR_BUFLEN];
MXS_ERROR("%s: Failed to write binlog record at %d of %s, %s. " MXS_ERROR("%s: Failed to write binlog record at %lu of %s, %s. "
"Truncating to previous record.", "Truncating to previous record.",
router->service->name, hdr->next_pos - hdr->event_size, router->service->name, router->last_written,
router->binlog_name, router->binlog_name,
strerror_r(errno, err_msg, sizeof(err_msg))); strerror_r(errno, err_msg, sizeof(err_msg)));
/* Remove any partual event that was written */ /* Remove any partual event that was written */
ftruncate(router->binlog_fd, hdr->next_pos - hdr->event_size); ftruncate(router->binlog_fd, router->last_written);
return 0; return 0;
} }
spinlock_acquire(&router->binlog_lock); spinlock_acquire(&router->binlog_lock);
router->current_pos = hdr->next_pos; router->current_pos = hdr->next_pos;
router->last_written = hdr->next_pos - hdr->event_size; router->last_written =+ size;
router->last_event_pos = hdr->next_pos - hdr->event_size;
spinlock_release(&router->binlog_lock); spinlock_release(&router->binlog_lock);
return n; return n;
} }

View File

@ -99,7 +99,7 @@ extern char * blr_last_event_description(ROUTER_INSTANCE *router);
static void blr_log_identity(ROUTER_INSTANCE *router); static void blr_log_identity(ROUTER_INSTANCE *router);
static void blr_distribute_error_message(ROUTER_INSTANCE *router, char *message, char *state, unsigned int err_code); static void blr_distribute_error_message(ROUTER_INSTANCE *router, char *message, char *state, unsigned int err_code);
int blr_write_data_into_binlog(ROUTER_INSTANCE *router, uint32_t data_len, uint32_t pos, uint8_t *buf); int blr_write_data_into_binlog(ROUTER_INSTANCE *router, uint32_t data_len, uint8_t *buf);
static int keepalive = 1; static int keepalive = 1;
@ -291,6 +291,7 @@ blr_master_close(ROUTER_INSTANCE *router)
{ {
dcb_close(router->master); dcb_close(router->master);
router->master_state = BLRM_UNCONNECTED; router->master_state = BLRM_UNCONNECTED;
router->master_event_state = BLR_EVENT_DONE;
} }
/** /**
@ -937,7 +938,7 @@ uint32_t partialpos = 0;
} }
else else
{ {
if (!router->pending_16mb) if (router->master_event_state == BLR_EVENT_DONE)
{ {
router->stats.n_binlogs++; router->stats.n_binlogs++;
router->stats.n_binlogs_ses++; router->stats.n_binlogs_ses++;
@ -945,27 +946,27 @@ uint32_t partialpos = 0;
blr_extract_header(ptr, &hdr); blr_extract_header(ptr, &hdr);
/* Sanity check */ /* Sanity check */
if (hdr.ok == 0 && (hdr.event_size != len - 5)) if (hdr.ok == 0)
{ {
if ((hdr.event_size + 1) < 0x00ffffff) if (hdr.event_size != len - 5 && (hdr.event_size + 1) < MYSQL_PACKET_LENGTH_MAX)
{ {
MXS_ERROR("Packet length is %d, but event size is %d, " MXS_ERROR("Packet length is %d, but event size is %d, "
"binlog file %s position %lu " "binlog file %s position %lu "
"reslen is %d and preslen is %d, " "reslen is %d and preslen is %d, "
"length of previous event %d. %s", "length of previous event %d. %s",
len, hdr.event_size, len, hdr.event_size,
router->binlog_name, router->binlog_name,
router->current_pos, router->current_pos,
reslen, preslen, prev_length, reslen, preslen, prev_length,
(prev_length == -1 ? (prev_length == -1 ?
(no_residual ? "No residual data from previous call" : (no_residual ? "No residual data from previous call" :
"Residual data from previous call") : "")); "Residual data from previous call") : ""));
blr_log_packet(LOG_ERR, "Packet:", ptr, len); blr_log_packet(LOG_ERR, "Packet:", ptr, len);
MXS_ERROR("This event (0x%x) was contained in %d GWBUFs, " MXS_ERROR("This event (0x%x) was contained in %d GWBUFs, "
"the previous events was contained in %d GWBUFs", "the previous events was contained in %d GWBUFs",
router->lastEventReceived, n_bufs, pn_bufs); router->lastEventReceived, n_bufs, pn_bufs);
if (msg) if (msg)
{ {
@ -975,16 +976,14 @@ uint32_t partialpos = 0;
break; break;
} }
} else
else {
{ MXS_INFO("Transmission of event > 16MB");
MXS_INFO("Transmission of event > 16MB"); router->master_event_state = BLR_EVENT_STARTED;
spinlock_acquire(&router->binlog_lock); /** Store the header for later use */
router->pending_16mb = 1; memcpy(&router->stored_header, &hdr, sizeof(hdr));
totalsize = hdr.event_size + 1; }
partialpos = router->current_pos;
spinlock_release(&router->binlog_lock);
} }
if (hdr.ok == 0) if (hdr.ok == 0)
@ -1009,7 +1008,8 @@ uint32_t partialpos = 0;
* First check that the checksum we calculate matches the * First check that the checksum we calculate matches the
* checksum in the packet we received. * checksum in the packet we received.
*/ */
if (router->master_chksum && !router->pending_16mb) if (router->master_chksum &&
router->master_event_state == BLR_EVENT_DONE)
{ {
uint32_t chksum, pktsum; uint32_t chksum, pktsum;
@ -1039,41 +1039,47 @@ uint32_t partialpos = 0;
} }
} }
/* pending 16 mb */ /* pending large event */
if (router->pending_16mb) if (router->master_event_state != BLR_EVENT_DONE)
{ {
uint32_t data_len; if (len < MYSQL_PACKET_LENGTH_MAX)
if (totalsize >= 0x00ffffff)
data_len = 0x00ffffff;
else
data_len = len-5;
/* pending 16 mb */
/* current partial event is being written to disk file */
if (blr_write_data_into_binlog(router, data_len, partialpos, ptr) == 0)
{
/*
* Failed to write to the
* binlog file, destroy the
* buffer chain and close the
* connection with the master
*/
while ((pkt = gwbuf_consume(pkt,
GWBUF_LENGTH(pkt))) != NULL);
blr_master_close(router);
blr_master_delayed_connect(router);
return;
}
partialpos += data_len;
if (data_len >= 0x00ffffff)
{ {
totalsize -= 0x00ffffff; /** This is the last packet, we can now proceed to distribute
* the event */
ss_dassert(router->master_event_state != BLR_EVENT_COMPLETE);
router->master_event_state = BLR_EVENT_COMPLETE;
memcpy(&hdr, &router->stored_header, sizeof(hdr));
}
else
{
/* pending 16 mb */
/* current partial event is being written to disk file */
uint32_t offset = 4;
/** Don't write the OK byte into the binlog */
if (router->master_event_state == BLR_EVENT_STARTED)
{
offset++;
router->master_event_state = BLR_EVENT_ONGOING;
}
if (blr_write_data_into_binlog(router, len - offset, ptr + offset) == 0)
{
/*
* Failed to write to the
* binlog file, destroy the
* buffer chain and close the
* connection with the master
*/
while ((pkt = gwbuf_consume(pkt,
GWBUF_LENGTH(pkt))) != NULL);
blr_master_close(router);
blr_master_delayed_connect(router);
return;
}
pkt = gwbuf_consume(pkt, len);
pkt_length -= len;
continue; continue;
}
else
{
router->pending_16mb = 0;
} }
} }
@ -1108,7 +1114,7 @@ uint32_t partialpos = 0;
* This marks the transaction starts instead of * This marks the transaction starts instead of
* QUERY_EVENT with "BEGIN" * QUERY_EVENT with "BEGIN"
*/ */
if (router->trx_safe) { if (router->trx_safe && router->master_event_state == BLR_EVENT_DONE) {
if (router->mariadb10_compat) { if (router->mariadb10_compat) {
if (hdr.event_type == MARIADB10_GTID_EVENT) { if (hdr.event_type == MARIADB10_GTID_EVENT) {
uint64_t n_sequence; uint64_t n_sequence;
@ -1256,8 +1262,16 @@ uint32_t partialpos = 0;
} }
else if (hdr.flags != LOG_EVENT_ARTIFICIAL_F) else if (hdr.flags != LOG_EVENT_ARTIFICIAL_F)
{ {
ptr = ptr + 5; // We don't put the first byte of the payload ptr = ptr + 4; // Skip header
// into the binlog file uint32_t offset = 4;
if (router->master_event_state == BLR_EVENT_STARTED ||
router->master_event_state == BLR_EVENT_DONE)
{
ptr++;
offset++;
}
if (hdr.event_type == ROTATE_EVENT) if (hdr.event_type == ROTATE_EVENT)
{ {
spinlock_acquire(&router->binlog_lock); spinlock_acquire(&router->binlog_lock);
@ -1266,7 +1280,7 @@ uint32_t partialpos = 0;
} }
/* current event is being written to disk file */ /* current event is being written to disk file */
if (blr_write_binlog_record(router, &hdr, ptr) == 0) if (blr_write_binlog_record(router, &hdr, len - offset, ptr) == 0)
{ {
/* /*
* Failed to write to the * Failed to write to the
@ -1445,6 +1459,12 @@ uint32_t partialpos = 0;
} }
} }
} }
/** A large event is now fully received and processed */
if(router->master_event_state == BLR_EVENT_COMPLETE)
{
router->master_event_state = BLR_EVENT_DONE;
}
} }
else else
{ {
@ -1699,7 +1719,7 @@ unsigned int cstate;
*/ */
slave_action = SLAVE_SEND_EVENT; slave_action = SLAVE_SEND_EVENT;
} }
else if (slave->binlog_pos == router->last_written && else if (slave->binlog_pos == router->last_event_pos &&
(strcmp(slave->binlogfile, router->binlog_name) == 0 || (strcmp(slave->binlogfile, router->binlog_name) == 0 ||
(hdr->event_type == ROTATE_EVENT && (hdr->event_type == ROTATE_EVENT &&
strcmp(slave->binlogfile, router->prevbinlog)))) strcmp(slave->binlogfile, router->prevbinlog))))
@ -2294,23 +2314,24 @@ ROUTER_SLAVE *slave;
} }
int int
blr_write_data_into_binlog(ROUTER_INSTANCE *router, uint32_t data_len, uint32_t pos, uint8_t *buf) blr_write_data_into_binlog(ROUTER_INSTANCE *router, uint32_t data_len, uint8_t *buf)
{ {
int n; int n;
if ((n = pwrite(router->binlog_fd, buf, data_len, if ((n = pwrite(router->binlog_fd, buf, data_len,
pos)) != data_len) router->last_written)) != data_len)
{ {
char err_msg[STRERROR_BUFLEN]; char err_msg[STRERROR_BUFLEN];
MXS_ERROR("%s: Failed to write binlog record at %d of %s, %s. " MXS_ERROR("%s: Failed to write binlog record at %lu of %s, %s. "
"Truncating to previous record.", "Truncating to previous record.",
router->service->name, pos, router->service->name, router->last_written,
router->binlog_name, router->binlog_name,
strerror_r(errno, err_msg, sizeof(err_msg))); strerror_r(errno, err_msg, sizeof(err_msg)));
/* Remove any partial event that was written */ /* Remove any partial event that was written */
ftruncate(router->binlog_fd, pos); ftruncate(router->binlog_fd, router->last_written);
return 0; return 0;
} }
router->last_written += data_len;
return n; return n;
} }