diff --git a/query_classifier/qc_sqlite/sqlite-src-3110100/src/parse.y b/query_classifier/qc_sqlite/sqlite-src-3110100/src/parse.y index 008ae939a..d7a248609 100644 --- a/query_classifier/qc_sqlite/sqlite-src-3110100/src/parse.y +++ b/query_classifier/qc_sqlite/sqlite-src-3110100/src/parse.y @@ -621,7 +621,7 @@ columnid(A) ::= nm(X). { // TODO: BINARY is a reserved word and should not automatically convert into an identifer. // TODO: However, if not here then rules such as CAST need to be modified. BINARY - CACHE /*CASCADE*/ CAST CLOSE COLUMNKW COLUMNS COMMENT CONCURRENT /*CONFLICT*/ CONNECTION + CACHE /*CASCADE*/ CAST CHARSET_NAME_KW CLOSE COLUMNKW COLUMNS COMMENT CONCURRENT /*CONFLICT*/ CONNECTION DATA DATABASE DEALLOCATE DEFERRED /*DESC*/ /*DETACH*/ DUMPFILE /*EACH*/ END ENGINE ENUM EXCLUSIVE /*EXPLAIN*/ EXTENDED FIELDS FIRST FLUSH /*FOR*/ FORMAT @@ -1908,6 +1908,7 @@ expr(A) ::= nm(X) DOT nm(Y) DOT nm(Z). { } term(A) ::= INTEGER|FLOAT|BLOB(X). {spanExpr(&A, pParse, @X, &X);} term(A) ::= STRING(X). {spanExpr(&A, pParse, @X, &X);} +term(A) ::= CHARSET_NAME_KW(X) STRING(Y). {spanExpr(&A, pParse, @X, &Y);} expr(A) ::= VARIABLE(X). { if( X.n>=2 && X.z[0]=='#' && sqlite3Isdigit(X.z[1]) ){ /* When doing a nested parse, one can include terms in an expression @@ -1927,7 +1928,7 @@ expr(A) ::= VARIABLE(X). { spanSet(&A, &X, &X); } %ifdef MAXSCALE -expr(A) ::= id(X) INTEGER(Y). { +expr(A) ::= CHARSET_NAME_KW(X) INTEGER(Y). { // The sole purpose of this is to interpret something like '_utf8mb4 0xD091D092D093' // as a string. It does not matter that any identifier followed by an integer will // be interpreted as a string, as invalid usage will be caught by the server. diff --git a/query_classifier/qc_sqlite/sqlite-src-3110100/src/tokenize.c b/query_classifier/qc_sqlite/sqlite-src-3110100/src/tokenize.c index a698d29ba..05070b107 100644 --- a/query_classifier/qc_sqlite/sqlite-src-3110100/src/tokenize.c +++ b/query_classifier/qc_sqlite/sqlite-src-3110100/src/tokenize.c @@ -199,6 +199,91 @@ int sqlite3IsIdChar(u8 c){ return IdChar(c); } */ #ifdef MAXSCALE extern int maxscaleComment(); + +struct mxs_charset_entry +{ + const char* name; + size_t len; +}; + +// Character set names of MariaDB. +// +// NOTE: MUST be kept in alphabetical order. +const struct mxs_charset_entry mxs_charset_names[] = +{ + { "armscii8", 8 }, + { "ascii", 5 }, + { "big5", 4 }, + { "binary", 6 }, + { "cp1250", 6 }, + { "cp1251", 6 }, + { "cp1256", 6 }, + { "cp1257", 6 }, + { "cp850", 5 }, + { "cp852", 5 }, + { "cp866", 5 }, + { "cp932", 5 }, + { "dec8", 4 }, + { "eucjpms", 7 }, + { "euckr", 5 }, + { "gb2312", 6 }, + { "gbk", 3 }, + { "geostd8", 7 }, + { "greek", 5 }, + { "hebrew", 6 }, + { "hp8", 3 }, + { "keybcs2", 7 }, + { "koi8r", 5 }, + { "koi8u", 5 }, + { "latin1", 6 }, + { "latin2", 6 }, + { "latin5", 6 }, + { "latin7", 6 }, + { "macce", 5 }, + { "macroman", 8 }, + { "sjis", 4 }, + { "swe7", 4 }, + { "tis620", 6 }, + { "ucs2", 4 }, + { "ujis", 4 }, + { "utf16", 5 }, + { "utf16le", 7 }, + { "utf32", 5 }, + { "utf8", 4 }, + { "utf8mb4", 7 } +}; + +#define N_MXS_CHARSET_NAMES (sizeof(mxs_charset_names)/sizeof(mxs_charset_names[0])) + +int mxs_compare_charset_names(const void* l, const void* r) +{ + const struct mxs_charset_entry* key = (const struct mxs_charset_entry*)l; + const struct mxs_charset_entry* value = (const struct mxs_charset_entry*)r; + + int rv = strncasecmp(key->name, value->name, MIN(key->len, value->len)); + + if (key->len != value->len) + { + if (rv == 0) + { + rv = key->len < value->len ? -1 : 1; + } + } + + return rv; +} + +int mxs_is_charset_name(const char* p, size_t n) +{ + struct mxs_charset_entry key = { p, n }; + + return bsearch(&key, + mxs_charset_names, N_MXS_CHARSET_NAMES, sizeof(mxs_charset_names[0]), + mxs_compare_charset_names) != 0; +} + + + int sqlite3GetToken(Parse* pParse, const unsigned char *z, int *tokenType){ #else int sqlite3GetToken(const unsigned char *z, int *tokenType){ @@ -558,6 +643,22 @@ int sqlite3GetToken(const unsigned char *z, int *tokenType){ /* Not a bit field. It may be a keyword so we flow through */ #endif for(i=1; aiClass[z[i]]<=CC_KYWD; i++){} +#ifdef MAXSCALE + if ( z[0]== '_' ) { + // This can be a case of [_charset_name], so we need to + // accept more. We can eat all characters acceptable for + // an identifier. + while ( IdChar(z[i]) ) { i++; } + + if (mxs_is_charset_name((char*)z + 1, i - 1)) { + *tokenType = TK_CHARSET_NAME_KW; + return i; + } else { + // Token type will be TK_ID. + break; + } + } +#endif if( IdChar(z[i]) ){ /* This token started out using characters that can appear in keywords, ** but z[i] is a character not allowed within keywords, so this must diff --git a/query_classifier/test/maxscale.test b/query_classifier/test/maxscale.test index 052b4935d..1d1685b98 100644 --- a/query_classifier/test/maxscale.test +++ b/query_classifier/test/maxscale.test @@ -140,6 +140,11 @@ select soundex(_utf8mb4 0xD091D092D093) as vx, gray_user_tag from user_extends w # MXS-2713 SET PASSWORD FOR 'user'@'10.0.0.1'='*C50EB75D7CB4C76B5264218B92BC69E6815B057A'; +# MXS-2732 +SELECT * FROM t WHERE f = _latin1'a'; +# This was actually in the bug-report that was made due to a leak. +CREATE VIEW v1 AS select sql_no_cache T0001.SERVERNAME AS SERVERNAME, T0003.TABNAME AS TABNAME,T0003.LOCAL_NAME AS LOCAL_NAME,T0002.DBINSTANCE AS DBINSTANCE from t2 T0001 join t1 T0002 join t3 T0003 where ((T0002.SERVERGRP = T0001.SERVERGRP) and (T0002.SERVERGRP = T0003.SERVERGRP) and (T0003.MAPSTATE = _latin1'A') and (T0003.ACTSTATE = _latin1' ')); + # MXS-2432 RESET QUERY CACHE; RESET MASTER;