diff --git a/query_classifier/query_classifier.cc b/query_classifier/query_classifier.cc index 99672f38f..e5abb2e38 100644 --- a/query_classifier/query_classifier.cc +++ b/query_classifier/query_classifier.cc @@ -61,6 +61,7 @@ #include #include +#define MAX_QUERYBUF_SIZE 2048 typedef struct parsing_info_st { #if defined(SS_DEBUG) @@ -1427,107 +1428,27 @@ bool qc_query_has_clause(GWBUF* buf) } /* - * Replace user-provided literals with question marks. Return a copy of the - * querystr with replacements. + * Replace user-provided literals with question marks. * - * @param querybuf GWBUF buffer including necessary parsing info - * - * @return Copy of querystr where literals are replaces with question marks or - * NULL if querystr is NULL, thread context or lex are NULL or if replacement - * function fails. - * - * Replaced literal types are STRING_ITEM,INT_ITEM,DECIMAL_ITEM,REAL_ITEM, - * VARBIN_ITEM,NULL_ITEM + * @param querybuf GWBUF with a COM_QUERY statement + * @return A copy of the query in its canonical form or NULL if an error occurred. */ char* qc_get_canonical(GWBUF* querybuf) { - parsing_info_t* pi; - MYSQL* mysql; - THD* thd; - LEX* lex; - Item* item; - char* querystr = NULL; - - if (!querybuf) + char *querystr = NULL; + if (GWBUF_LENGTH(querybuf) > 5 && GWBUF_IS_SQL(querybuf)) { - goto retblock; - } - - if (!ensure_query_is_parsed(querybuf)) - { - goto retblock; - } - - pi = (parsing_info_t *) gwbuf_get_buffer_object_data(querybuf, GWBUF_PARSING_INFO); - CHK_PARSING_INFO(pi); - - if (pi == NULL) - { - goto retblock; - } - - if (pi->pi_query_plain_str == NULL || - (mysql = (MYSQL *) pi->pi_handle) == NULL || - (thd = (THD *) mysql->thd) == NULL || - (lex = thd->lex) == NULL) - { - ss_dassert(pi->pi_query_plain_str != NULL && - mysql != NULL && - thd != NULL && - lex != NULL); - goto retblock; - } - - querystr = strdup(pi->pi_query_plain_str); - - for (item = thd->free_list; item != NULL; item = item->next) - { - Item::Type itype; - - if (item->name == NULL) + const size_t bufsize = MIN(MAX_QUERYBUF_SIZE, GWBUF_LENGTH(querybuf) - 5); + char buffer[bufsize + 1]; + memcpy(buffer, (uint8_t*) GWBUF_DATA(querybuf) + 5, bufsize); + buffer[bufsize] = '\0'; + char* replaced = replace_quoted(buffer); + if (replaced == NULL || (querystr = replace_values(replaced)) == NULL) { - continue; - } - - itype = item->type(); - - if (itype == Item::STRING_ITEM) - { - String tokenstr; - String* res = item->val_str_ascii(&tokenstr); - - if (res->is_empty()) /*< empty string */ - { - querystr = replace_literal(querystr, "\"\"", "\"?\""); - } - else - { - querystr = replace_literal(querystr, res->ptr(), "?"); - } - } - else if (itype == Item::INT_ITEM || - itype == Item::DECIMAL_ITEM || - itype == Item::REAL_ITEM || - itype == Item::VARBIN_ITEM || - itype == Item::NULL_ITEM) - { - querystr = replace_literal(querystr, item->name, "?"); - } - } /*< for */ - - /** Check for SET ... options with no Item classes */ - if (thd->free_list == NULL) - { - char *replaced = replace_quoted(querystr); - - if (replaced) - { - free(querystr); - querystr = replaced; + querystr = NULL; } + free(replaced); } - -retblock: return querystr; } diff --git a/query_classifier/test/canonical_tests/canonizer.c b/query_classifier/test/canonical_tests/canonizer.c index cdf469595..da06c998d 100644 --- a/query_classifier/test/canonical_tests/canonizer.c +++ b/query_classifier/test/canonical_tests/canonizer.c @@ -60,24 +60,26 @@ int main(int argc, char** argv) while (!feof(infile)) { fgets(readbuff,4092,infile); + char* nl = strchr(readbuff, '\n'); + if(nl) + { + *nl = '\0'; + } psize = strlen(readbuff); - if (psize < 4092) - { - qbuff = gwbuf_alloc(psize + 7); - *(qbuff->sbuf->data + 0) = (unsigned char)psize; - *(qbuff->sbuf->data + 1) = (unsigned char)(psize>>8); - *(qbuff->sbuf->data + 2) = (unsigned char)(psize>>16); - *(qbuff->sbuf->data + 4) = 0x03; - memcpy(qbuff->start + 5,readbuff,psize + 1); - tok = qc_get_canonical(qbuff); - fprintf(outfile,"%s\n",tok); - free(tok); - gwbuf_free(qbuff); - } + qbuff = gwbuf_alloc(psize + 7); + *(qbuff->sbuf->data + 0) = (unsigned char)psize; + *(qbuff->sbuf->data + 1) = (unsigned char)(psize>>8); + *(qbuff->sbuf->data + 2) = (unsigned char)(psize>>16); + *(qbuff->sbuf->data + 4) = 0x03; + memcpy(qbuff->start + 5,readbuff,psize + 1); + tok = qc_get_canonical(qbuff); + fprintf(outfile,"%s\n",tok); + free(tok); + gwbuf_free(qbuff); } fclose(infile); fclose(outfile); mysql_library_end(); - return 0; + return 0; } diff --git a/query_classifier/test/canonical_tests/expected.sql b/query_classifier/test/canonical_tests/expected.sql index 7303d585e..b0a712096 100644 --- a/query_classifier/test/canonical_tests/expected.sql +++ b/query_classifier/test/canonical_tests/expected.sql @@ -9,7 +9,7 @@ select * from tst where fname like '?'; select * from tst where lname like '?' order by fname; insert into tst values ("?","?"),("?",?),("?","?"); drop table if exists tst; -create table tst(fname varchar(30), lname varchar(30)); +create table tst(fname varchar(?), lname varchar(?)); update tst set lname="?" where fname like '?' or lname like '?'; delete from tst where lname like '?' and fname like '?'; select ? from tst where fname='?' or lname like '?'; diff --git a/utils/skygw_utils.cc b/utils/skygw_utils.cc index 0c59b585d..56dc04d61 100644 --- a/utils/skygw_utils.cc +++ b/utils/skygw_utils.cc @@ -2031,6 +2031,50 @@ void skygw_file_close( } } + +static pcre2_code* replace_values_re = NULL; +static const PCRE2_SPTR replace_values_pattern = (PCRE2_SPTR) "(?i)([-=,+*/([:space:]]|\\b)([0-9.]+|NULL)([-=,+*/)[:space:];]|$)"; + +/** + * Replace every literal number and NULL value with a question mark. + * @param str String to modify + * @return Pointer to new modified string or NULL if memory allocation failed + */ +char* replace_values(const char* str) +{ + static const PCRE2_SPTR replace = (PCRE2_SPTR) "$1?$3"; + pcre2_match_data* mdata; + size_t orig_len = strlen(str); + size_t len = orig_len; + char* output; + + if ((output = (char*) malloc(len * sizeof (char))) && + (mdata = pcre2_match_data_create_from_pattern(replace_values_re, NULL))) + { + while (pcre2_substitute(replace_values_re, (PCRE2_SPTR) str, orig_len, 0, + PCRE2_SUBSTITUTE_GLOBAL, mdata, NULL, + replace, PCRE2_ZERO_TERMINATED, + (PCRE2_UCHAR8*) output, &len) == PCRE2_ERROR_NOMEMORY) + { + char* tmp = (char*) realloc(output, len *= 2); + if (tmp == NULL) + { + free(output); + output = NULL; + break; + } + output = tmp; + } + pcre2_match_data_free(mdata); + } + else + { + free(output); + output = NULL; + } + return output; +} + /** * Find the given needle - user-provided literal - and replace it with * replacement string. Separate user-provided literals from matching table names @@ -2123,7 +2167,7 @@ retblock: } static pcre2_code* replace_quoted_re = NULL; -static const PCRE2_SPTR replace_quoted_pattern = (PCRE2_SPTR) "(['\"])[^'\"]+(['\"])"; +static const PCRE2_SPTR replace_quoted_pattern = (PCRE2_SPTR) "(['\"])[^'\"]*(['\"])"; /** * Replace everything inside single or double quotes with question marks. @@ -2275,6 +2319,14 @@ bool utils_init() rval = false; } + ss_info_dassert(replace_values_re == NULL, "utils_init called multiple times"); + replace_values_re = pcre2_compile(replace_values_pattern, PCRE2_ZERO_TERMINATED, 0, &errcore, + &erroffset, NULL); + if (replace_values_re == NULL) + { + rval = false; + } + return rval; } @@ -2285,4 +2337,6 @@ void utils_end() { pcre2_code_free(replace_quoted_re); replace_quoted_re = NULL; + pcre2_code_free(replace_values_re); + replace_values_re = NULL; } diff --git a/utils/skygw_utils.h b/utils/skygw_utils.h index 68a0f9384..ddffc4237 100644 --- a/utils/skygw_utils.h +++ b/utils/skygw_utils.h @@ -276,6 +276,7 @@ EXTERN_C_BLOCK_BEGIN size_t get_decimal_len(size_t s); +char* replace_values(const char* str); char* replace_literal(char* haystack, const char* needle, const char* replacement);