Improved query canonicalization

The query does not need to be parsed for it to be canonicalized and the parsing
uses the PCRE2 library. The regular expressions were changed so that only one
call to the function which replaces literal unquoted values is made.
This commit is contained in:
Markus Makela 2016-01-06 15:17:28 +02:00
parent b01e8b2eec
commit 995ed8c9d2
5 changed files with 87 additions and 109 deletions

View File

@ -61,6 +61,7 @@
#include <string.h>
#include <stdarg.h>
#define MAX_QUERYBUF_SIZE 2048
typedef struct parsing_info_st
{
#if defined(SS_DEBUG)
@ -1427,107 +1428,27 @@ bool qc_query_has_clause(GWBUF* buf)
}
/*
* Replace user-provided literals with question marks. Return a copy of the
* querystr with replacements.
* Replace user-provided literals with question marks.
*
* @param querybuf GWBUF buffer including necessary parsing info
*
* @return Copy of querystr where literals are replaces with question marks or
* NULL if querystr is NULL, thread context or lex are NULL or if replacement
* function fails.
*
* Replaced literal types are STRING_ITEM,INT_ITEM,DECIMAL_ITEM,REAL_ITEM,
* VARBIN_ITEM,NULL_ITEM
* @param querybuf GWBUF with a COM_QUERY statement
* @return A copy of the query in its canonical form or NULL if an error occurred.
*/
char* qc_get_canonical(GWBUF* querybuf)
{
parsing_info_t* pi;
MYSQL* mysql;
THD* thd;
LEX* lex;
Item* item;
char* querystr = NULL;
if (!querybuf)
char *querystr = NULL;
if (GWBUF_LENGTH(querybuf) > 5 && GWBUF_IS_SQL(querybuf))
{
goto retblock;
}
if (!ensure_query_is_parsed(querybuf))
{
goto retblock;
}
pi = (parsing_info_t *) gwbuf_get_buffer_object_data(querybuf, GWBUF_PARSING_INFO);
CHK_PARSING_INFO(pi);
if (pi == NULL)
{
goto retblock;
}
if (pi->pi_query_plain_str == NULL ||
(mysql = (MYSQL *) pi->pi_handle) == NULL ||
(thd = (THD *) mysql->thd) == NULL ||
(lex = thd->lex) == NULL)
{
ss_dassert(pi->pi_query_plain_str != NULL &&
mysql != NULL &&
thd != NULL &&
lex != NULL);
goto retblock;
}
querystr = strdup(pi->pi_query_plain_str);
for (item = thd->free_list; item != NULL; item = item->next)
{
Item::Type itype;
if (item->name == NULL)
const size_t bufsize = MIN(MAX_QUERYBUF_SIZE, GWBUF_LENGTH(querybuf) - 5);
char buffer[bufsize + 1];
memcpy(buffer, (uint8_t*) GWBUF_DATA(querybuf) + 5, bufsize);
buffer[bufsize] = '\0';
char* replaced = replace_quoted(buffer);
if (replaced == NULL || (querystr = replace_values(replaced)) == NULL)
{
continue;
}
itype = item->type();
if (itype == Item::STRING_ITEM)
{
String tokenstr;
String* res = item->val_str_ascii(&tokenstr);
if (res->is_empty()) /*< empty string */
{
querystr = replace_literal(querystr, "\"\"", "\"?\"");
}
else
{
querystr = replace_literal(querystr, res->ptr(), "?");
}
}
else if (itype == Item::INT_ITEM ||
itype == Item::DECIMAL_ITEM ||
itype == Item::REAL_ITEM ||
itype == Item::VARBIN_ITEM ||
itype == Item::NULL_ITEM)
{
querystr = replace_literal(querystr, item->name, "?");
}
} /*< for */
/** Check for SET ... options with no Item classes */
if (thd->free_list == NULL)
{
char *replaced = replace_quoted(querystr);
if (replaced)
{
free(querystr);
querystr = replaced;
querystr = NULL;
}
free(replaced);
}
retblock:
return querystr;
}

View File

@ -60,24 +60,26 @@ int main(int argc, char** argv)
while (!feof(infile))
{
fgets(readbuff,4092,infile);
char* nl = strchr(readbuff, '\n');
if(nl)
{
*nl = '\0';
}
psize = strlen(readbuff);
if (psize < 4092)
{
qbuff = gwbuf_alloc(psize + 7);
*(qbuff->sbuf->data + 0) = (unsigned char)psize;
*(qbuff->sbuf->data + 1) = (unsigned char)(psize>>8);
*(qbuff->sbuf->data + 2) = (unsigned char)(psize>>16);
*(qbuff->sbuf->data + 4) = 0x03;
memcpy(qbuff->start + 5,readbuff,psize + 1);
tok = qc_get_canonical(qbuff);
fprintf(outfile,"%s\n",tok);
free(tok);
gwbuf_free(qbuff);
}
qbuff = gwbuf_alloc(psize + 7);
*(qbuff->sbuf->data + 0) = (unsigned char)psize;
*(qbuff->sbuf->data + 1) = (unsigned char)(psize>>8);
*(qbuff->sbuf->data + 2) = (unsigned char)(psize>>16);
*(qbuff->sbuf->data + 4) = 0x03;
memcpy(qbuff->start + 5,readbuff,psize + 1);
tok = qc_get_canonical(qbuff);
fprintf(outfile,"%s\n",tok);
free(tok);
gwbuf_free(qbuff);
}
fclose(infile);
fclose(outfile);
mysql_library_end();
return 0;
return 0;
}

View File

@ -9,7 +9,7 @@ select * from tst where fname like '?';
select * from tst where lname like '?' order by fname;
insert into tst values ("?","?"),("?",?),("?","?");
drop table if exists tst;
create table tst(fname varchar(30), lname varchar(30));
create table tst(fname varchar(?), lname varchar(?));
update tst set lname="?" where fname like '?' or lname like '?';
delete from tst where lname like '?' and fname like '?';
select ? from tst where fname='?' or lname like '?';

View File

@ -2031,6 +2031,50 @@ void skygw_file_close(
}
}
static pcre2_code* replace_values_re = NULL;
static const PCRE2_SPTR replace_values_pattern = (PCRE2_SPTR) "(?i)([-=,+*/([:space:]]|\\b)([0-9.]+|NULL)([-=,+*/)[:space:];]|$)";
/**
* Replace every literal number and NULL value with a question mark.
* @param str String to modify
* @return Pointer to new modified string or NULL if memory allocation failed
*/
char* replace_values(const char* str)
{
static const PCRE2_SPTR replace = (PCRE2_SPTR) "$1?$3";
pcre2_match_data* mdata;
size_t orig_len = strlen(str);
size_t len = orig_len;
char* output;
if ((output = (char*) malloc(len * sizeof (char))) &&
(mdata = pcre2_match_data_create_from_pattern(replace_values_re, NULL)))
{
while (pcre2_substitute(replace_values_re, (PCRE2_SPTR) str, orig_len, 0,
PCRE2_SUBSTITUTE_GLOBAL, mdata, NULL,
replace, PCRE2_ZERO_TERMINATED,
(PCRE2_UCHAR8*) output, &len) == PCRE2_ERROR_NOMEMORY)
{
char* tmp = (char*) realloc(output, len *= 2);
if (tmp == NULL)
{
free(output);
output = NULL;
break;
}
output = tmp;
}
pcre2_match_data_free(mdata);
}
else
{
free(output);
output = NULL;
}
return output;
}
/**
* Find the given needle - user-provided literal - and replace it with
* replacement string. Separate user-provided literals from matching table names
@ -2123,7 +2167,7 @@ retblock:
}
static pcre2_code* replace_quoted_re = NULL;
static const PCRE2_SPTR replace_quoted_pattern = (PCRE2_SPTR) "(['\"])[^'\"]+(['\"])";
static const PCRE2_SPTR replace_quoted_pattern = (PCRE2_SPTR) "(['\"])[^'\"]*(['\"])";
/**
* Replace everything inside single or double quotes with question marks.
@ -2275,6 +2319,14 @@ bool utils_init()
rval = false;
}
ss_info_dassert(replace_values_re == NULL, "utils_init called multiple times");
replace_values_re = pcre2_compile(replace_values_pattern, PCRE2_ZERO_TERMINATED, 0, &errcore,
&erroffset, NULL);
if (replace_values_re == NULL)
{
rval = false;
}
return rval;
}
@ -2285,4 +2337,6 @@ void utils_end()
{
pcre2_code_free(replace_quoted_re);
replace_quoted_re = NULL;
pcre2_code_free(replace_values_re);
replace_values_re = NULL;
}

View File

@ -276,6 +276,7 @@ EXTERN_C_BLOCK_BEGIN
size_t get_decimal_len(size_t s);
char* replace_values(const char* str);
char* replace_literal(char* haystack,
const char* needle,
const char* replacement);