Improve UTF-8 handling in avrorouter

The json_stringn function should be used instead of the json_string to
allow null characters as well as non-null terminated strings to be
embedded in the JSON values.

The CDC example Python programs now decode the data as UTF-8 instead of
ASCII.
This commit is contained in:
Markus Mäkelä 2017-03-15 11:38:12 +02:00
parent 67590d59ac
commit 3396741c21
6 changed files with 11 additions and 8 deletions

View File

@ -103,7 +103,7 @@ uint64_t avro_length_integer(uint64_t val)
*
* @see maxavro_get_error
*/
char* maxavro_read_string(MAXAVRO_FILE* file)
char* maxavro_read_string(MAXAVRO_FILE* file, size_t* size)
{
char *key = NULL;
uint64_t len;
@ -117,6 +117,7 @@ char* maxavro_read_string(MAXAVRO_FILE* file)
if (nread == len)
{
key[len] = '\0';
*size = len;
}
else
{
@ -261,8 +262,9 @@ MAXAVRO_MAP* maxavro_map_read(MAXAVRO_FILE *file)
{
for (long i = 0; i < blocks; i++)
{
size_t size;
MAXAVRO_MAP* val = calloc(1, sizeof(MAXAVRO_MAP));
if (val && (val->key = maxavro_read_string(file)) && (val->value = maxavro_read_string(file)))
if (val && (val->key = maxavro_read_string(file, &size)) && (val->value = maxavro_read_string(file, &size)))
{
val->next = rval;
rval = val;

View File

@ -139,7 +139,7 @@ bool maxavro_datablock_add_double(MAXAVRO_DATABLOCK *file, double val);
/** Reading primitives */
bool maxavro_read_integer(MAXAVRO_FILE *file, uint64_t *val);
char* maxavro_read_string(MAXAVRO_FILE *file);
char* maxavro_read_string(MAXAVRO_FILE *file, size_t *size);
bool maxavro_skip_string(MAXAVRO_FILE* file);
bool maxavro_read_float(MAXAVRO_FILE *file, float *dest);
bool maxavro_read_double(MAXAVRO_FILE *file, double *dest);

View File

@ -99,10 +99,11 @@ static json_t* read_and_pack_value(MAXAVRO_FILE *file, MAXAVRO_SCHEMA_FIELD *fie
case MAXAVRO_TYPE_BYTES:
case MAXAVRO_TYPE_STRING:
{
char *str = maxavro_read_string(file);
size_t len;
char *str = maxavro_read_string(file, &len);
if (str)
{
value = json_string(str);
value = json_stringn(str, len);
free(str);
}
}

View File

@ -37,7 +37,7 @@ def read_json():
rbuf += buf
while True:
rbuf = rbuf.lstrip()
data = decoder.raw_decode(rbuf.decode('ascii'))
data = decoder.raw_decode(rbuf.decode('utf_8'))
rbuf = rbuf[data[1]:]
print(json.dumps(data[0]))
except ValueError as err:

View File

@ -45,7 +45,7 @@ while True:
while True:
rbuf = rbuf.lstrip()
data = decoder.raw_decode(rbuf.decode('ascii'))
data = decoder.raw_decode(rbuf.decode('utf_8'))
rbuf = rbuf[data[1]:]
producer.send(topic=opts.kafka_topic, value=json.dumps(data[0]).encode())
producer.flush()

View File

@ -52,4 +52,4 @@ else:
sock.send(bytes("QUERY-LAST-TRANSACTION".encode()))
response = sock.recv(1024)
print(response.decode('ascii'))
print(response.decode('utf_8'))