2018-01-30 16:28:11 +02:00

361 lines
13 KiB
C

#pragma once
#ifndef _MXS_AVRO_H
#define _MXS_AVRO_H
/*
* Copyright (c) 2016 MariaDB Corporation Ab
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file and at www.mariadb.com/bsl11.
*
* Change Date: 2020-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2 or later of the General
* Public License.
*/
/**
* MaxScale AVRO router
*
*/
#define MXS_MODULE_NAME "avrorouter"
#include <maxscale/cdefs.h>
#include <stdbool.h>
#include <stdint.h>
#include <blr_constants.h>
#include <maxscale/dcb.h>
#include <maxscale/service.h>
#include <maxscale/spinlock.h>
#include <maxscale/mysql_binlog.h>
#include <maxscale/users.h>
#include <avro.h>
#include <cdc.h>
#include <maxscale/pcre2.h>
#include <maxavro.h>
#include <binlog_common.h>
#include <maxscale/sqlite3.h>
#include <maxscale/protocol/mysql.h>
MXS_BEGIN_DECLS
/**
* How often to call the router status function (seconds)
*/
#define AVRO_STATS_FREQ 60
#define AVRO_NSTATS_MINUTES 30
/**
* Avro block grouping defaults
*/
#define AVRO_DEFAULT_BLOCK_TRX_COUNT 1
#define AVRO_DEFAULT_BLOCK_ROW_COUNT 1000
#define MAX_MAPPED_TABLES 1024
#define GTID_TABLE_NAME "gtid"
#define USED_TABLES_TABLE_NAME "used_tables"
#define MEMORY_DATABASE_NAME "memory"
#define MEMORY_TABLE_NAME MEMORY_DATABASE_NAME".mem_used_tables"
#define INDEX_TABLE_NAME "indexing_progress"
/** Name of the file where the binlog to Avro conversion progress is stored */
#define AVRO_PROGRESS_FILE "avro-conversion.ini"
/** Buffer limits */
#define AVRO_SQL_BUFFER_SIZE 2048
/** Avro filename maxlen */
#ifdef NAME_MAX
#define AVRO_MAX_FILENAME_LEN NAME_MAX
#else
#define AVRO_MAX_FILENAME_LEN 255
#endif
static char *avro_client_states[] = { "Unregistered", "Registered", "Processing", "Errored" };
static char *avro_client_client_mode[] = { "Catch-up", "Busy", "Wait_for_data" };
static const char *avro_domain = "domain";
static const char *avro_server_id = "server_id";
static const char *avro_sequence = "sequence";
static const char *avro_event_number = "event_number";
static const char *avro_event_type = "event_type";
static const char *avro_timestamp = "timestamp";
static char *avro_client_ouput[] = { "Undefined", "JSON", "Avro" };
static inline bool is_reserved_word(const char* word)
{
return strcasecmp(word, avro_domain) == 0 ||
strcasecmp(word, avro_server_id) == 0 ||
strcasecmp(word, avro_sequence) == 0 ||
strcasecmp(word, avro_event_number) == 0 ||
strcasecmp(word, avro_event_type) == 0 ||
strcasecmp(word, avro_timestamp) == 0;
}
static inline void fix_reserved_word(char *tok)
{
if (is_reserved_word(tok))
{
strcat(tok, "_");
}
}
/** How a binlog file is closed */
typedef enum avro_binlog_end
{
AVRO_OK = 0, /**< A newer binlog file exists with a rotate event to that file */
AVRO_LAST_FILE, /**< Last binlog which is closed */
AVRO_OPEN_TRANSACTION, /**< The binlog ends with an open transaction */
AVRO_BINLOG_ERROR /**< An error occurred while processing the binlog file */
} avro_binlog_end_t;
/** How many numbers each table version has (db.table.000001.avro) */
#define TABLE_MAP_VERSION_DIGITS 6
/** Maximum version number*/
#define TABLE_MAP_VERSION_MAX 999999
/** Maximum column name length */
#define TABLE_MAP_MAX_NAME_LEN 64
/** How many bytes each thread tries to send */
#define AVRO_DATA_BURST_SIZE (32 * 1024)
/** A CREATE TABLE abstraction */
typedef struct table_create
{
uint64_t columns;
char **column_names;
char **column_types;
int* column_lengths;
char *table;
char *database;
int version; /**< How many versions of this table have been used */
bool was_used; /**< Has this schema been persisted to disk */
} TABLE_CREATE;
/** A representation of a table map event read from a binary log. A table map
* maps a table to a unique ID which can be used to match row events to table map
* events. The table map event tells us how the table is laid out and gives us
* some meta information on the columns. */
typedef struct table_map
{
uint64_t id;
uint64_t columns;
uint16_t flags;
uint8_t *column_types;
uint8_t *null_bitmap;
uint8_t *column_metadata;
size_t column_metadata_size;
TABLE_CREATE *table_create; /*< The definition of the table */
int version;
char version_string[TABLE_MAP_VERSION_DIGITS + 1];
char *table;
char *database;
} TABLE_MAP;
/**
* The statistics for this AVRO router instance
*/
typedef struct
{
int n_clients; /*< Number slave sessions created */
int n_reads; /*< Number of record reads */
uint64_t n_binlogs; /*< Number of binlog records from master */
uint64_t n_rotates; /*< Number of binlog rotate events */
int n_masterstarts; /*< Number of times connection restarted */
time_t lastReply;
uint64_t events[MAX_EVENT_TYPE_END + 1]; /*< Per event counters */
uint64_t lastsample;
int minno;
int minavgs[AVRO_NSTATS_MINUTES];
} AVRO_ROUTER_STATS;
/**
* Client statistics
*/
typedef struct
{
int n_events; /*< Number of events sent */
unsigned long n_bytes; /*< Number of bytes sent */
int n_requests; /*< Number of requests received */
int n_queries; /*< Number of queries */
int n_failed_read;
uint64_t lastsample;
int minno;
int minavgs[AVRO_NSTATS_MINUTES];
} AVRO_CLIENT_STATS;
typedef struct avro_table_t
{
char* filename; /*< Absolute filename */
char* json_schema; /*< JSON representation of the schema */
avro_file_writer_t avro_file; /*< Current Avro data file */
avro_value_iface_t *avro_writer_iface; /*< Avro C API writer interface */
avro_schema_t avro_schema; /*< Native Avro schema of the table */
} AVRO_TABLE;
/** Data format used when streaming data to the clients */
enum avro_data_format
{
AVRO_FORMAT_UNDEFINED,
AVRO_FORMAT_JSON,
AVRO_FORMAT_AVRO,
};
enum mxs_avro_codec_type
{
MXS_AVRO_CODEC_NULL,
MXS_AVRO_CODEC_DEFLATE,
MXS_AVRO_CODEC_SNAPPY, /**< Not yet implemented */
} ;
typedef struct gtid_pos
{
uint32_t timestamp; /*< GTID event timestamp */
uint64_t domain; /*< Replication domain */
uint64_t server_id; /*< Server ID */
uint64_t seq; /*< Sequence number */
uint64_t event_num; /*< Subsequence number, increases monotonically. This
* is an internal representation of the position of
* an event inside a GTID event and it is used to
* rebuild GTID events in the correct order. */
} gtid_pos_t;
/**
* The client structure used within this router.
* This represents the clients that are requesting AVRO files from MaxScale.
*/
typedef struct avro_client
{
#if defined(SS_DEBUG)
skygw_chk_t rses_chk_top;
#endif
DCB *dcb; /*< The client DCB */
int state; /*< The state of this client */
enum avro_data_format format; /*< Stream JSON or Avro data */
char *uuid; /*< Client UUID */
SPINLOCK catch_lock; /*< Event catchup lock */
SPINLOCK file_lock; /*< Protects rses_deleted */
struct avro_instance *router; /*< Pointer to the owning router */
struct avro_client *next;
MAXAVRO_FILE *file_handle; /*< Current open file handle */
uint64_t last_sent_pos; /*< The last record we sent */
AVRO_CLIENT_STATS stats; /*< Slave statistics */
time_t connect_time; /*< Connect time of slave */
MAXAVRO_FILE avro_file; /*< Avro file struct */
char avro_binfile[AVRO_MAX_FILENAME_LEN + 1];
bool requested_gtid; /*< If the client requested */
gtid_pos_t gtid; /*< Current/requested GTID */
gtid_pos_t gtid_start; /*< First sent GTID */
unsigned int cstate; /*< Catch up state */
sqlite3 *sqlite_handle;
#if defined(SS_DEBUG)
skygw_chk_t rses_chk_tail;
#endif
} AVRO_CLIENT;
/**
* * The per instance data for the AVRO router.
* */
typedef struct avro_instance
{
SERVICE *service; /*< Pointer to the service using this router */
AVRO_CLIENT *clients; /*< Link list of all the CDC client connections */
SPINLOCK lock; /*< Spinlock for the instance data */
int initbinlog; /*< Initial binlog file number */
char *fileroot; /*< Root of binlog filename */
unsigned int state; /*< State of the AVRO router */
uint8_t lastEventReceived; /*< Last even received */
uint32_t lastEventTimestamp; /*< Timestamp from last event */
char *binlogdir; /*< The directory where the binlog files are stored */
char *avrodir; /*< The directory with the AVRO files */
char binlog_name[BINLOG_FNAMELEN + 1];
/*< Name of the current binlog file */
uint64_t binlog_position;
/*< last committed transaction position */
uint64_t current_pos;
/*< Current binlog position */
int binlog_fd; /*< File descriptor of the binlog file being read */
pcre2_code *create_table_re;
pcre2_code *alter_table_re;
uint8_t event_types;
uint8_t event_type_hdr_lens[MAX_EVENT_TYPE_END];
uint8_t binlog_checksum;
gtid_pos_t gtid;
TABLE_MAP *active_maps[MAX_MAPPED_TABLES];
HASHTABLE *table_maps;
HASHTABLE *open_tables;
HASHTABLE *created_tables;
sqlite3 *sqlite_handle;
char prevbinlog[BINLOG_FNAMELEN + 1];
int rotating; /*< Rotation in progress flag */
SPINLOCK fileslock; /*< Lock for the files queue above */
AVRO_ROUTER_STATS stats; /*< Statistics for this router */
int task_delay; /*< Delay in seconds until the next conversion takes place */
uint64_t trx_count; /*< Transactions processed */
uint64_t trx_target; /*< Minimum about of transactions that will trigger
* a flush of all tables */
uint64_t row_count; /*< Row events processed */
uint64_t row_target; /*< Minimum about of row events that will trigger
* a flush of all tables */
uint64_t block_size; /**< Avro datablock size */
enum mxs_avro_codec_type codec; /**< Avro codec type, defaults to `null` */
struct avro_instance *next;
} AVRO_INSTANCE;
extern void read_table_info(uint8_t *ptr, uint8_t post_header_len, uint64_t *table_id,
char* dest, size_t len);
extern TABLE_MAP *table_map_alloc(uint8_t *ptr, uint8_t hdr_len, TABLE_CREATE* create);
extern void table_map_free(TABLE_MAP *map);
extern TABLE_CREATE* table_create_alloc(const char* ident, const char* sql, int len);
extern TABLE_CREATE* table_create_copy(AVRO_INSTANCE *router, const char* sql, size_t len, const char* db);
extern void table_create_free(TABLE_CREATE* value);
extern bool table_create_save(TABLE_CREATE *create, const char *filename);
extern bool table_create_alter(TABLE_CREATE *create, const char *sql, const char *end);
extern void read_table_identifier(const char* db, const char *sql, const char *end, char *dest, int size);
extern int avro_client_handle_request(AVRO_INSTANCE *, AVRO_CLIENT *, GWBUF *);
extern void avro_client_rotate(AVRO_INSTANCE *router, AVRO_CLIENT *client, uint8_t *ptr);
extern bool avro_open_binlog(const char *binlogdir, const char *file, int *fd);
extern void avro_close_binlog(int fd);
extern avro_binlog_end_t avro_read_all_events(AVRO_INSTANCE *router);
extern AVRO_TABLE* avro_table_alloc(const char* filepath, const char* json_schema,
const char *codec, size_t block_size);
extern void avro_table_free(AVRO_TABLE *table);
extern char* json_new_schema_from_table(TABLE_MAP *map);
extern void save_avro_schema(const char *path, const char* schema, TABLE_MAP *map);
extern bool handle_table_map_event(AVRO_INSTANCE *router, REP_HEADER *hdr, uint8_t *ptr);
extern bool handle_row_event(AVRO_INSTANCE *router, REP_HEADER *hdr, uint8_t *ptr);
enum avrorouter_file_op
{
AVROROUTER_SYNC,
AVROROUTER_FLUSH
};
/**
* @brief Flush or sync all tables
*
* @param router Router instance
* @param flush AVROROUTER_SYNC for sync only or AVROROUTER_FLUSH for full flush
*/
extern void avro_flush_all_tables(AVRO_INSTANCE *router, enum avrorouter_file_op flush);
#define AVRO_CLIENT_UNREGISTERED 0x0000
#define AVRO_CLIENT_REGISTERED 0x0001
#define AVRO_CLIENT_REQUEST_DATA 0x0002
#define AVRO_CLIENT_ERRORED 0x0003
#define AVRO_CLIENT_MAXSTATE 0x0003
/**
* Client catch-up status
*/
#define AVRO_CS_BUSY 0x0001
#define AVRO_WAIT_DATA 0x0002
MXS_END_DECLS
#endif