Abstract all row event processing

The code that handles the Avro files is now fully abstracted behind the
AvroConverter class that implements the RowEventHandler interface.

The code still has some avro specific behavior in a few places (parsing of
JSON files into TableCreate objects). This can be replaced, if needed, by
querying the master server for the CREATE TABLE statements.
This commit is contained in:
Markus Mäkelä
2018-06-07 17:52:53 +03:00
parent d094e93209
commit 7c18696608
10 changed files with 531 additions and 475 deletions

View File

@ -30,112 +30,6 @@
#include <strings.h>
#include <maxscale/alloc.h>
/**
* @brief Convert the MySQL column type to a compatible Avro type
*
* Some fields are larger than they need to be but since the Avro integer
* compression is quite efficient, the real loss in performance is negligible.
* @param type MySQL column type
* @return String representation of the Avro type
*/
static const char* column_type_to_avro_type(uint8_t type)
{
switch (type)
{
case TABLE_COL_TYPE_TINY:
case TABLE_COL_TYPE_SHORT:
case TABLE_COL_TYPE_LONG:
case TABLE_COL_TYPE_INT24:
case TABLE_COL_TYPE_BIT:
return "int";
case TABLE_COL_TYPE_FLOAT:
return "float";
case TABLE_COL_TYPE_DOUBLE:
case TABLE_COL_TYPE_NEWDECIMAL:
return "double";
case TABLE_COL_TYPE_NULL:
return "null";
case TABLE_COL_TYPE_LONGLONG:
return "long";
case TABLE_COL_TYPE_TINY_BLOB:
case TABLE_COL_TYPE_MEDIUM_BLOB:
case TABLE_COL_TYPE_LONG_BLOB:
case TABLE_COL_TYPE_BLOB:
return "bytes";
default:
return "string";
}
}
/**
* @brief Create a new JSON Avro schema from the table map and create table abstractions
*
* The schema will always have a GTID field and all records contain the current
* GTID of the transaction.
* @param map TABLE_MAP for this table
* @param create The TABLE_CREATE for this table
* @return New schema or NULL if an error occurred
*/
char* json_new_schema_from_table(const STableMapEvent& map, const STableCreateEvent& create)
{
if (map->version != create->version)
{
MXS_ERROR("Version mismatch for table %s.%s. Table map version is %d and "
"the table definition version is %d.", map->database.c_str(),
map->table.c_str(), map->version, create->version);
ss_dassert(!true); // Should not happen
return NULL;
}
json_error_t err;
memset(&err, 0, sizeof(err));
json_t *schema = json_object();
json_object_set_new(schema, "namespace", json_string("MaxScaleChangeDataSchema.avro"));
json_object_set_new(schema, "type", json_string("record"));
json_object_set_new(schema, "name", json_string("ChangeRecord"));
json_t *array = json_array();
json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name",
avro_domain, "type", "int"));
json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name",
avro_server_id, "type", "int"));
json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name",
avro_sequence, "type", "int"));
json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name",
avro_event_number, "type", "int"));
json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:s}", "name",
avro_timestamp, "type", "int"));
/** Enums and other complex types are defined with complete JSON objects
* instead of string values */
json_t *event_types = json_pack_ex(&err, 0, "{s:s, s:s, s:[s,s,s,s]}", "type", "enum",
"name", "EVENT_TYPES", "symbols", "insert",
"update_before", "update_after", "delete");
// Ownership of `event_types` is stolen when using the `o` format
json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:o}", "name", avro_event_type,
"type", event_types));
for (uint64_t i = 0; i < map->columns() && i < create->columns.size(); i++)
{
json_array_append_new(array, json_pack_ex(&err, 0, "{s:s, s:s, s:s, s:i}",
"name", create->columns[i].name.c_str(),
"type", column_type_to_avro_type(map->column_types[i]),
"real_type", create->columns[i].type.c_str(),
"length", create->columns[i].length));
}
json_object_set_new(schema, "fields", array);
char* rval = json_dumps(schema, JSON_PRESERVE_ORDER);
json_decref(schema);
return rval;
}
/**
* @brief Check whether the field is one that was generated by the avrorouter
*
@ -242,34 +136,6 @@ bool json_extract_field_names(const char* filename, std::vector<Column>& columns
return rval;
}
/**
* @brief Save the Avro schema of a table to disk
*
* @param path Schema directory
* @param schema Schema in JSON format
* @param map Table map that @p schema represents
*/
void save_avro_schema(const char *path, const char* schema, STableMapEvent& map, STableCreateEvent& create)
{
char filepath[PATH_MAX];
snprintf(filepath, sizeof(filepath), "%s/%s.%s.%06d.avsc", path,
map->database.c_str(), map->table.c_str(), map->version);
if (access(filepath, F_OK) != 0)
{
if (!create->was_used)
{
FILE *file = fopen(filepath, "wb");
if (file)
{
fprintf(file, "%s\n", schema);
create->was_used = true;
fclose(file);
}
}
}
}
/**
* Extract the table definition from a CREATE TABLE statement
* @param sql The SQL statement