Improved query canonicalization

The query does not need to be parsed for it to be canonicalized and the parsing
uses the PCRE2 library. The regular expressions were changed so that only one
call to the function which replaces literal unquoted values is made.
This commit is contained in:
Markus Makela
2016-01-06 15:17:28 +02:00
parent b01e8b2eec
commit 995ed8c9d2
5 changed files with 87 additions and 109 deletions

View File

@ -61,6 +61,7 @@
#include <string.h>
#include <stdarg.h>
#define MAX_QUERYBUF_SIZE 2048
typedef struct parsing_info_st
{
#if defined(SS_DEBUG)
@ -1427,107 +1428,27 @@ bool qc_query_has_clause(GWBUF* buf)
}
/*
* Replace user-provided literals with question marks. Return a copy of the
* querystr with replacements.
* Replace user-provided literals with question marks.
*
* @param querybuf GWBUF buffer including necessary parsing info
*
* @return Copy of querystr where literals are replaces with question marks or
* NULL if querystr is NULL, thread context or lex are NULL or if replacement
* function fails.
*
* Replaced literal types are STRING_ITEM,INT_ITEM,DECIMAL_ITEM,REAL_ITEM,
* VARBIN_ITEM,NULL_ITEM
* @param querybuf GWBUF with a COM_QUERY statement
* @return A copy of the query in its canonical form or NULL if an error occurred.
*/
char* qc_get_canonical(GWBUF* querybuf)
{
parsing_info_t* pi;
MYSQL* mysql;
THD* thd;
LEX* lex;
Item* item;
char* querystr = NULL;
if (!querybuf)
char *querystr = NULL;
if (GWBUF_LENGTH(querybuf) > 5 && GWBUF_IS_SQL(querybuf))
{
goto retblock;
}
if (!ensure_query_is_parsed(querybuf))
{
goto retblock;
}
pi = (parsing_info_t *) gwbuf_get_buffer_object_data(querybuf, GWBUF_PARSING_INFO);
CHK_PARSING_INFO(pi);
if (pi == NULL)
{
goto retblock;
}
if (pi->pi_query_plain_str == NULL ||
(mysql = (MYSQL *) pi->pi_handle) == NULL ||
(thd = (THD *) mysql->thd) == NULL ||
(lex = thd->lex) == NULL)
{
ss_dassert(pi->pi_query_plain_str != NULL &&
mysql != NULL &&
thd != NULL &&
lex != NULL);
goto retblock;
}
querystr = strdup(pi->pi_query_plain_str);
for (item = thd->free_list; item != NULL; item = item->next)
{
Item::Type itype;
if (item->name == NULL)
const size_t bufsize = MIN(MAX_QUERYBUF_SIZE, GWBUF_LENGTH(querybuf) - 5);
char buffer[bufsize + 1];
memcpy(buffer, (uint8_t*) GWBUF_DATA(querybuf) + 5, bufsize);
buffer[bufsize] = '\0';
char* replaced = replace_quoted(buffer);
if (replaced == NULL || (querystr = replace_values(replaced)) == NULL)
{
continue;
}
itype = item->type();
if (itype == Item::STRING_ITEM)
{
String tokenstr;
String* res = item->val_str_ascii(&tokenstr);
if (res->is_empty()) /*< empty string */
{
querystr = replace_literal(querystr, "\"\"", "\"?\"");
}
else
{
querystr = replace_literal(querystr, res->ptr(), "?");
}
}
else if (itype == Item::INT_ITEM ||
itype == Item::DECIMAL_ITEM ||
itype == Item::REAL_ITEM ||
itype == Item::VARBIN_ITEM ||
itype == Item::NULL_ITEM)
{
querystr = replace_literal(querystr, item->name, "?");
}
} /*< for */
/** Check for SET ... options with no Item classes */
if (thd->free_list == NULL)
{
char *replaced = replace_quoted(querystr);
if (replaced)
{
free(querystr);
querystr = replaced;
querystr = NULL;
}
free(replaced);
}
retblock:
return querystr;
}

View File

@ -60,24 +60,26 @@ int main(int argc, char** argv)
while (!feof(infile))
{
fgets(readbuff,4092,infile);
char* nl = strchr(readbuff, '\n');
if(nl)
{
*nl = '\0';
}
psize = strlen(readbuff);
if (psize < 4092)
{
qbuff = gwbuf_alloc(psize + 7);
*(qbuff->sbuf->data + 0) = (unsigned char)psize;
*(qbuff->sbuf->data + 1) = (unsigned char)(psize>>8);
*(qbuff->sbuf->data + 2) = (unsigned char)(psize>>16);
*(qbuff->sbuf->data + 4) = 0x03;
memcpy(qbuff->start + 5,readbuff,psize + 1);
tok = qc_get_canonical(qbuff);
fprintf(outfile,"%s\n",tok);
free(tok);
gwbuf_free(qbuff);
}
qbuff = gwbuf_alloc(psize + 7);
*(qbuff->sbuf->data + 0) = (unsigned char)psize;
*(qbuff->sbuf->data + 1) = (unsigned char)(psize>>8);
*(qbuff->sbuf->data + 2) = (unsigned char)(psize>>16);
*(qbuff->sbuf->data + 4) = 0x03;
memcpy(qbuff->start + 5,readbuff,psize + 1);
tok = qc_get_canonical(qbuff);
fprintf(outfile,"%s\n",tok);
free(tok);
gwbuf_free(qbuff);
}
fclose(infile);
fclose(outfile);
mysql_library_end();
return 0;
return 0;
}

View File

@ -9,7 +9,7 @@ select * from tst where fname like '?';
select * from tst where lname like '?' order by fname;
insert into tst values ("?","?"),("?",?),("?","?");
drop table if exists tst;
create table tst(fname varchar(30), lname varchar(30));
create table tst(fname varchar(?), lname varchar(?));
update tst set lname="?" where fname like '?' or lname like '?';
delete from tst where lname like '?' and fname like '?';
select ? from tst where fname='?' or lname like '?';