Added removal of MySQL style comments to query canonicalization.

Also fixed minor issues with the canonicalization regular expressions.
This commit is contained in:
Markus Makela
2016-01-08 10:43:38 +02:00
parent c067bbe34a
commit e0c84e0fe3
3 changed files with 122 additions and 39 deletions

View File

@ -1443,6 +1443,11 @@ char* qc_get_canonical(GWBUF* querybuf)
memcpy(buffer, (uint8_t*) GWBUF_DATA(querybuf) + 5, bufsize); memcpy(buffer, (uint8_t*) GWBUF_DATA(querybuf) + 5, bufsize);
buffer[bufsize] = '\0'; buffer[bufsize] = '\0';
char* replaced = replace_quoted(buffer); char* replaced = replace_quoted(buffer);
if (replaced == NULL || (querystr = remove_mysql_comments(replaced)) == NULL)
{
querystr = NULL;
}
replaced = querystr;
if (replaced == NULL || (querystr = replace_values(replaced)) == NULL) if (replaced == NULL || (querystr = replace_values(replaced)) == NULL)
{ {
querystr = NULL; querystr = NULL;

View File

@ -2031,10 +2031,63 @@ void skygw_file_close(
} }
} }
static pcre2_code* remove_comments_re = NULL;
static const PCRE2_SPTR remove_comments_pattern = (PCRE2_SPTR)
"((--\\s.*)|(#.*))";
/**
* Remove SQL comments from the end of a string
*
* The inline comments are not removed due to the fact that they can alter the
* behavior of the query.
* @param str String to modify
* @return Pointer to new modified string or NULL if memory allocation failed
*/
char* remove_mysql_comments(const char* str)
{
static const PCRE2_SPTR replace = (PCRE2_SPTR) "";
pcre2_match_data* mdata;
size_t orig_len = strlen(str);
size_t len = orig_len;
char* output = NULL;
if (len > 0)
{
if ((output = (char*) malloc(len * sizeof (char))) &&
(mdata = pcre2_match_data_create_from_pattern(remove_comments_re, NULL)))
{
while (pcre2_substitute(remove_comments_re, (PCRE2_SPTR) str, orig_len, 0,
PCRE2_SUBSTITUTE_GLOBAL, mdata, NULL,
replace, PCRE2_ZERO_TERMINATED,
(PCRE2_UCHAR8*) output, &len) == PCRE2_ERROR_NOMEMORY)
{
char* tmp = (char*) realloc(output, len *= 2);
if (tmp == NULL)
{
free(output);
output = NULL;
break;
}
output = tmp;
}
pcre2_match_data_free(mdata);
}
else
{
free(output);
output = NULL;
}
}
else
{
output = strdup(str);
}
return output;
}
static pcre2_code* replace_values_re = NULL; static pcre2_code* replace_values_re = NULL;
static const PCRE2_SPTR replace_values_pattern = (PCRE2_SPTR) "(?i)([-=,+*/([:space:]]|\\b|[@])" static const PCRE2_SPTR replace_values_pattern = (PCRE2_SPTR) "(?i)([-=,+*/([:space:]]|\\b|[@])"
"(?:[0-9.]+|(?<=[@])[a-z_]+|NULL)([-=,+*/)[:space:];]|$)"; "(?:[0-9.]+|(?<=[@])[a-z_]+|NULL)([-=,+*/)[:space:];]|$)";
/** /**
* Replace every literal number and NULL value with a question mark. * Replace every literal number and NULL value with a question mark.
@ -2047,31 +2100,38 @@ char* replace_values(const char* str)
pcre2_match_data* mdata; pcre2_match_data* mdata;
size_t orig_len = strlen(str); size_t orig_len = strlen(str);
size_t len = orig_len; size_t len = orig_len;
char* output; char* output = NULL;
if ((output = (char*) malloc(len * sizeof (char))) && if (len > 0)
(mdata = pcre2_match_data_create_from_pattern(replace_values_re, NULL)))
{ {
while (pcre2_substitute(replace_values_re, (PCRE2_SPTR) str, orig_len, 0, if ((output = (char*) malloc(len * sizeof (char))) &&
PCRE2_SUBSTITUTE_GLOBAL, mdata, NULL, (mdata = pcre2_match_data_create_from_pattern(replace_values_re, NULL)))
replace, PCRE2_ZERO_TERMINATED,
(PCRE2_UCHAR8*) output, &len) == PCRE2_ERROR_NOMEMORY)
{ {
char* tmp = (char*) realloc(output, len *= 2); while (pcre2_substitute(replace_values_re, (PCRE2_SPTR) str, orig_len, 0,
if (tmp == NULL) PCRE2_SUBSTITUTE_GLOBAL, mdata, NULL,
replace, PCRE2_ZERO_TERMINATED,
(PCRE2_UCHAR8*) output, &len) == PCRE2_ERROR_NOMEMORY)
{ {
free(output); char* tmp = (char*) realloc(output, len *= 2);
output = NULL; if (tmp == NULL)
break; {
free(output);
output = NULL;
break;
}
output = tmp;
} }
output = tmp; pcre2_match_data_free(mdata);
}
else
{
free(output);
output = NULL;
} }
pcre2_match_data_free(mdata);
} }
else else
{ {
free(output); output = strdup(str);
output = NULL;
} }
return output; return output;
} }
@ -2168,7 +2228,8 @@ retblock:
} }
static pcre2_code* replace_quoted_re = NULL; static pcre2_code* replace_quoted_re = NULL;
static const PCRE2_SPTR replace_quoted_pattern = (PCRE2_SPTR) "(['\"])[^'\"]*(['\"])"; static const PCRE2_SPTR replace_quoted_pattern = (PCRE2_SPTR)
"(((?>(?<=[\"]))[^\"]*(?>(?=[\"])))|((?>(?<=[']))[^']*(?>(?=[']))))";
/** /**
* Replace everything inside single or double quotes with question marks. * Replace everything inside single or double quotes with question marks.
@ -2177,35 +2238,41 @@ static const PCRE2_SPTR replace_quoted_pattern = (PCRE2_SPTR) "(['\"])[^'\"]*(['
*/ */
char* replace_quoted(const char* str) char* replace_quoted(const char* str)
{ {
static const PCRE2_SPTR replace = (PCRE2_SPTR) "$1?$2"; static const PCRE2_SPTR replace = (PCRE2_SPTR) "?";
pcre2_match_data* mdata; pcre2_match_data* mdata;
size_t orig_len = strlen(str); size_t orig_len = strlen(str);
size_t len = orig_len; size_t len = orig_len;
char* output; char* output = NULL;
if (len > 0)
if ((output = (char*) malloc(len * sizeof (char))) &&
(mdata = pcre2_match_data_create_from_pattern(replace_quoted_re, NULL)))
{ {
while (pcre2_substitute(replace_quoted_re, (PCRE2_SPTR) str, orig_len, 0, if ((output = (char*) malloc(len * sizeof (char))) &&
PCRE2_SUBSTITUTE_GLOBAL, mdata, NULL, (mdata = pcre2_match_data_create_from_pattern(replace_quoted_re, NULL)))
replace, PCRE2_ZERO_TERMINATED,
(PCRE2_UCHAR8*) output, &len) == PCRE2_ERROR_NOMEMORY)
{ {
char* tmp = (char*) realloc(output, len *= 2); while (pcre2_substitute(replace_quoted_re, (PCRE2_SPTR) str, orig_len, 0,
if (tmp == NULL) PCRE2_SUBSTITUTE_GLOBAL, mdata, NULL,
replace, PCRE2_ZERO_TERMINATED,
(PCRE2_UCHAR8*) output, &len) == PCRE2_ERROR_NOMEMORY)
{ {
free(output); char* tmp = (char*) realloc(output, len *= 2);
output = NULL; if (tmp == NULL)
break; {
free(output);
output = NULL;
break;
}
output = tmp;
} }
output = tmp; pcre2_match_data_free(mdata);
}
else
{
free(output);
output = NULL;
} }
pcre2_match_data_free(mdata);
} }
else else
{ {
free(output); output = strdup(str);
output = NULL;
} }
return output; return output;
} }
@ -2310,10 +2377,18 @@ bool utils_init()
bool rval = true; bool rval = true;
PCRE2_SIZE erroffset; PCRE2_SIZE erroffset;
int errcore; int errcode;
ss_info_dassert(remove_comments_re == NULL, "utils_init called multiple times");
remove_comments_re = pcre2_compile(remove_comments_pattern, PCRE2_ZERO_TERMINATED, 0, &errcode,
&erroffset, NULL);
if (remove_comments_re == NULL)
{
rval = false;
}
ss_info_dassert(replace_quoted_re == NULL, "utils_init called multiple times"); ss_info_dassert(replace_quoted_re == NULL, "utils_init called multiple times");
replace_quoted_re = pcre2_compile(replace_quoted_pattern, PCRE2_ZERO_TERMINATED, 0, &errcore, replace_quoted_re = pcre2_compile(replace_quoted_pattern, PCRE2_ZERO_TERMINATED, 0, &errcode,
&erroffset, NULL); &erroffset, NULL);
if (replace_quoted_re == NULL) if (replace_quoted_re == NULL)
{ {
@ -2321,7 +2396,7 @@ bool utils_init()
} }
ss_info_dassert(replace_values_re == NULL, "utils_init called multiple times"); ss_info_dassert(replace_values_re == NULL, "utils_init called multiple times");
replace_values_re = pcre2_compile(replace_values_pattern, PCRE2_ZERO_TERMINATED, 0, &errcore, replace_values_re = pcre2_compile(replace_values_pattern, PCRE2_ZERO_TERMINATED, 0, &errcode,
&erroffset, NULL); &erroffset, NULL);
if (replace_values_re == NULL) if (replace_values_re == NULL)
{ {
@ -2336,6 +2411,8 @@ bool utils_init()
*/ */
void utils_end() void utils_end()
{ {
pcre2_code_free(remove_comments_re);
remove_comments_re = NULL;
pcre2_code_free(replace_quoted_re); pcre2_code_free(replace_quoted_re);
replace_quoted_re = NULL; replace_quoted_re = NULL;
pcre2_code_free(replace_values_re); pcre2_code_free(replace_values_re);

View File

@ -276,6 +276,7 @@ EXTERN_C_BLOCK_BEGIN
size_t get_decimal_len(size_t s); size_t get_decimal_len(size_t s);
char* remove_mysql_comments(const char* str);
char* replace_values(const char* str); char* replace_values(const char* str);
char* replace_literal(char* haystack, char* replace_literal(char* haystack,
const char* needle, const char* needle,