From 8ee110efa8d8742252095bae75107c86503dc6ff Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Mon, 11 Jan 2016 03:03:28 +0200 Subject: [PATCH] Fixed canonicalization and added more tests Fixed mistakes in the canonicalization regular expressions and altered the functions to use a source and destination buffers. This reduces the amount of memory allocations that take place. Added more canonical query tests to the internal test suite. --- query_classifier/query_classifier.cc | 30 ++-- .../test/canonical_tests/CMakeLists.txt | 16 ++- .../test/canonical_tests/alter.expected | 19 +++ .../test/canonical_tests/alter.sql | 19 +++ .../test/canonical_tests/expected.sql | 2 +- .../test/canonical_tests/select.expected | 60 ++++++++ .../test/canonical_tests/select.sql | 60 ++++++++ utils/skygw_utils.cc | 133 ++++++++++++------ utils/skygw_utils.h | 9 +- 9 files changed, 288 insertions(+), 60 deletions(-) create mode 100644 query_classifier/test/canonical_tests/alter.expected create mode 100644 query_classifier/test/canonical_tests/alter.sql create mode 100644 query_classifier/test/canonical_tests/select.expected create mode 100644 query_classifier/test/canonical_tests/select.sql diff --git a/query_classifier/query_classifier.cc b/query_classifier/query_classifier.cc index ab8f07332..602e9d34b 100644 --- a/query_classifier/query_classifier.cc +++ b/query_classifier/query_classifier.cc @@ -61,6 +61,7 @@ #include #include +#define MYSQL_COM_QUERY_HEADER_SIZE 5 /*< 3 bytes size, 1 sequence, 1 command */ #define MAX_QUERYBUF_SIZE 2048 typedef struct parsing_info_st { @@ -1436,23 +1437,24 @@ bool qc_query_has_clause(GWBUF* buf) char* qc_get_canonical(GWBUF* querybuf) { char *querystr = NULL; - if (GWBUF_LENGTH(querybuf) > 5 && GWBUF_IS_SQL(querybuf)) + if (GWBUF_LENGTH(querybuf) > MYSQL_COM_QUERY_HEADER_SIZE && GWBUF_IS_SQL(querybuf)) { - const size_t bufsize = MIN(MAX_QUERYBUF_SIZE, GWBUF_LENGTH(querybuf) - 5); - char buffer[bufsize + 1]; - memcpy(buffer, (uint8_t*) GWBUF_DATA(querybuf) + 5, bufsize); - buffer[bufsize] = '\0'; - char* replaced = replace_quoted(buffer); - if (replaced == NULL || (querystr = remove_mysql_comments(replaced)) == NULL) + size_t srcsize = GWBUF_LENGTH(querybuf) - MYSQL_COM_QUERY_HEADER_SIZE; + char *src = (char*) malloc(srcsize); + size_t destsize = 0; + char *dest = NULL; + if (src) { - querystr = NULL; + memcpy(src, (uint8_t*) GWBUF_DATA(querybuf) + MYSQL_COM_QUERY_HEADER_SIZE, + srcsize); + if (replace_quoted((const char**) &src, &srcsize, &dest, &destsize) && + remove_mysql_comments((const char**) &dest, &destsize, &src, &srcsize) && + replace_values((const char**) &src, &srcsize, &dest, &destsize)) + { + querystr = dest; + } + free(src); } - replaced = querystr; - if (replaced == NULL || (querystr = replace_values(replaced)) == NULL) - { - querystr = NULL; - } - free(replaced); } return querystr; } diff --git a/query_classifier/test/canonical_tests/CMakeLists.txt b/query_classifier/test/canonical_tests/CMakeLists.txt index a81aa3e88..0092b7841 100644 --- a/query_classifier/test/canonical_tests/CMakeLists.txt +++ b/query_classifier/test/canonical_tests/CMakeLists.txt @@ -9,9 +9,23 @@ else() endif() add_executable(canonizer canonizer.c ${CMAKE_SOURCE_DIR}/server/core/random_jkiss.c) target_link_libraries(canonizer ${PCRE2_LIBRARIES} utils pthread query_classifier z dl ssl aio crypt crypto rt m ${EMBEDDED_LIB} fullcore stdc++) -add_test(NAME Internal-TestCanonicalQuery COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh +add_test(NAME Internal-CanonicalQuery COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh ${CMAKE_CURRENT_BINARY_DIR}/test.log ${CMAKE_CURRENT_SOURCE_DIR}/input.sql ${CMAKE_CURRENT_BINARY_DIR}/output.sql ${CMAKE_CURRENT_SOURCE_DIR}/expected.sql $) + +add_test(NAME Internal-CanonicalQuerySelect COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh + ${CMAKE_CURRENT_BINARY_DIR}/test.log + ${CMAKE_CURRENT_SOURCE_DIR}/select.sql + ${CMAKE_CURRENT_BINARY_DIR}/select.output + ${CMAKE_CURRENT_SOURCE_DIR}/select.expected + $) + +add_test(NAME Internal-CanonicalQueryAlter COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh + ${CMAKE_CURRENT_BINARY_DIR}/test.log + ${CMAKE_CURRENT_SOURCE_DIR}/alter.sql + ${CMAKE_CURRENT_BINARY_DIR}/alter.output + ${CMAKE_CURRENT_SOURCE_DIR}/alter.expected + $) diff --git a/query_classifier/test/canonical_tests/alter.expected b/query_classifier/test/canonical_tests/alter.expected new file mode 100644 index 000000000..be94039f6 --- /dev/null +++ b/query_classifier/test/canonical_tests/alter.expected @@ -0,0 +1,19 @@ +ALTER DATABASE `` DEFAULT CHARACTER SET latin2; +ALTER DATABASE `#mysql50#../` UPGRADE DATA DIRECTORY NAME; +ALTER DATABASE `#mysql50#../..` UPGRADE DATA DIRECTORY NAME; +ALTER DATABASE `#mysql51#not-yet` UPGRADE DATA DIRECTORY NAME; +ALTER DATABASE `test-database` CHARACTER SET utf8 COLLATE utf8_unicode_ci ; +ALTER DEFINER=root@? EVENT e1 ON SCHEDULE EVERY ? HOUR; +ALTER EVENT e1 COMMENT '?'; +ALTER EVENT e1 DO SELECT ?; +ALTER EVENT e1 ON SCHEDULE AT '?' ON COMPLETION PRESERVE DISABLE; +ALTER TABLE `@0023sql1` RENAME `#sql-1`; +ALTER TABLE t1 ADD INDEX (c13) COMMENT '?'; +ALTER TABLE t1 ADD PARTITION IF NOT EXISTS(PARTITION `p5` VALUES LESS THAN (?)COMMENT '?'); +ALTER TABLE `t1` ADD PRIMARY KEY (`a`); +alter table t1 change a a enum('?','?','?','?','?','?','?','?') character set utf16; +alter table t1 change a a int `FKEY1`='?'; +alter table t1i engine=innodb; +alter table t1 max_rows=?; +ALTER TABLE t2 PARTITION BY RANGE COLUMNS(c)(PARTITION p0 VALUES LESS THAN ('?'), PARTITION p1 VALUES LESS THAN (MAXVALUE)); +alter table table_24562 order by table_24562.subsection ASC, table_24562.section DESC; diff --git a/query_classifier/test/canonical_tests/alter.sql b/query_classifier/test/canonical_tests/alter.sql new file mode 100644 index 000000000..94448d8e5 --- /dev/null +++ b/query_classifier/test/canonical_tests/alter.sql @@ -0,0 +1,19 @@ +ALTER DATABASE `` DEFAULT CHARACTER SET latin2; +ALTER DATABASE `#mysql50#../` UPGRADE DATA DIRECTORY NAME; +ALTER DATABASE `#mysql50#../..` UPGRADE DATA DIRECTORY NAME; # a comment +ALTER DATABASE `#mysql51#not-yet` UPGRADE DATA DIRECTORY NAME; # a comment with backticks `this should work` +ALTER DATABASE `test-database` CHARACTER SET utf8 COLLATE utf8_unicode_ci ; +ALTER DEFINER=root@localhost EVENT e1 ON SCHEDULE EVERY 1 HOUR; +ALTER EVENT e1 COMMENT 'comment'; +ALTER EVENT e1 DO SELECT 2; +ALTER EVENT e1 ON SCHEDULE AT '2000-01-02 00:00:00' ON COMPLETION PRESERVE DISABLE; +ALTER TABLE `@0023sql1` RENAME `#sql-1`; +ALTER TABLE t1 ADD INDEX (c13) COMMENT 'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcd'; +ALTER TABLE t1 ADD PARTITION IF NOT EXISTS(PARTITION `p5` VALUES LESS THAN (2010)COMMENT 'APSTART \' APEND'); +ALTER TABLE `t1` ADD PRIMARY KEY (`a`); +alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf16; +alter table t1 change a a int `FKEY1`='v1'; +alter table t1i engine=innodb; +alter table t1 max_rows=100000000000; +ALTER TABLE t2 PARTITION BY RANGE COLUMNS(c)(PARTITION p0 VALUES LESS THAN ('2000-01-01 00:00:00'), PARTITION p1 VALUES LESS THAN (MAXVALUE)); +alter table table_24562 order by table_24562.subsection ASC, table_24562.section DESC; diff --git a/query_classifier/test/canonical_tests/expected.sql b/query_classifier/test/canonical_tests/expected.sql index 8dab956e8..0598742f7 100644 --- a/query_classifier/test/canonical_tests/expected.sql +++ b/query_classifier/test/canonical_tests/expected.sql @@ -7,7 +7,7 @@ select * from tst where lname='?'; select ?,?,?,?,?,? from tst; select * from tst where fname like '?'; select * from tst where lname like '?' order by fname; -insert into tst values ("?","?"),("?",?),("?","?"); +insert into tst values ("?","?"),("?",null),("?","?"); drop table if exists tst; create table tst(fname varchar(?), lname varchar(?)); update tst set lname="?" where fname like '?' or lname like '?'; diff --git a/query_classifier/test/canonical_tests/select.expected b/query_classifier/test/canonical_tests/select.expected new file mode 100644 index 000000000..af7ef85ad --- /dev/null +++ b/query_classifier/test/canonical_tests/select.expected @@ -0,0 +1,60 @@ +select count(*) from t1 where id not in (?,?); +select count(*) from t1 where match a against ('?'); +SELECT COUNT(*) FROM t1 WHERE MATCH(a) AGAINST("?" IN BOOLEAN MODE); +select count(*) from t1 where s1 < ? or s1 is null; +SELECT COUNT(*) FROM t1 WHERE s1 = ?; +select count(*) from t1 where x < ?; +select count(*) from t1 where x = ?; +select count(*) from t1 where x > ?; +select count(*) from t1 where x = ?; +select truncate(?,?); +select truncate(?,-?); +select v/?; +select uncompress("?"); +SELECT UNHEX('?'); +select unhex(hex("?")), hex(unhex("?")), unhex("?"), unhex(NULL); +select UpdateXML('?','?','?'); +select UpdateXML(@?, '?', '?'); +SELECT USER(),CURRENT_USER(),@@LOCAL.external_user; +SELECT user(),current_user(),@@?; +SELECT user, host FROM mysql.user where user = '?' order by ?,?; +select user, host, password, plugin, authentication_string from mysql.user where user = '?'; +select userid,count(*) from t1 group by userid desc having ? IN (?,COUNT(*)); +select userid,count(*) from t1 group by userid desc having (count(*)+?) IN (?,?); +SELECT user_id FROM t1 WHERE request_id=?; +SELECT UserId FROM t1 WHERE UserId=? group by Userid; +select userid,pmtotal,pmnew, (select count(rd) from t1 where toid=t2.userid) calc_total, (select count(rd) from t1 where rd=? and toid=t2.userid) calc_new from t2 where userid in (select distinct toid from t1); +select yearweek("?",?) as '?', yearweek("?",?) as '?', yearweek("?",?) as '?',yearweek("?",?) as '?', yearweek("?",?) as '?', yearweek("?",?) as '?', yearweek("?",?) as '?'; +select user() like "?"; +select user,password,plugin,authentication_string from mysql.user where user like '?'; +select user, QUOTE(host) from mysql.user where user="?"; +SELECT UTC_DATE(); +select utext from t1 where utext like '?'; +SELECT _utf32 0x10001=_utf32 0x10002; +select _utf32'?' collate utf32_general_ci = 0xfffd; +SELECT _utf8 0x7E, _utf8 X'?', _utf8 B'?'; +select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'?',_utf8 0xD0B1,_utf8 '?'); +select _utf8'?' union select _latin1'?'; +SELECT utf8_f,MIN(comment) FROM t1 GROUP BY ?; +SELECT _utf8mb3'?'; +select _utf8mb4 0xD0B0D0B1D0B2 like concat(_utf8mb4'?',_utf8mb4 0xD0B1,_utf8mb4 '?'); +select (_utf8mb4 X'?'); +SELECT _utf8'?' COLLATE utf8_5624_2; +select (_utf8 X'?'); +select uuid() into @?; +SELECT v1.a, v2? b FROM v1 LEFT OUTER JOIN v2 ON (v1.a=v2.b) AND (v1.a >= ?) GROUP BY v1.a; +SELECT v1.f4 FROM v1 WHERE f1<>? OR f2<>? AND f4='?' AND (f2<>? OR f3<>? AND f5<>? OR f4 LIKE '?'); +select v1.r_object_id, v2.users_names from v1, v2where (v1.group_name='?') and v2.r_object_id=v1.r_object_idorder by users_names; +SELECT v2 FROM t1 WHERE v1 IN ('?', '?', '?', '?' ) AND i = ?; +select "?" as "?"; +SELECT @@?; +select @? = CONVERT(@? USING ujis); +SELECT @?; +select @?, @?, @?=@?; +SELECT @?, @?; +SELECT @?, @?, @?, @?, @?, @?; +SELECT (@v:=a) <> (@v:=?) FROM t1; +select @?, coercibility(@?); +select @@?, @@?, @@?, @@?; +SELECT @?, @?, @?, @?; +SELECT user,host,password,insert_priv FROM user WHERE user=@? AND host=@?; diff --git a/query_classifier/test/canonical_tests/select.sql b/query_classifier/test/canonical_tests/select.sql new file mode 100644 index 000000000..8c6f987e8 --- /dev/null +++ b/query_classifier/test/canonical_tests/select.sql @@ -0,0 +1,60 @@ +select count(*) from t1 where id not in (1,2); +select count(*) from t1 where match a against ('000000'); +SELECT COUNT(*) FROM t1 WHERE MATCH(a) AGAINST("+awrd bwrd* +cwrd*" IN BOOLEAN MODE); +select count(*) from t1 where s1 < 0 or s1 is null; +SELECT COUNT(*) FROM t1 WHERE s1 = 1001; +select count(*) from t1 where x < -16; +select count(*) from t1 where x = -16; +select count(*) from t1 where x > -16; +select count(*) from t1 where x = 18446744073709551601; +select truncate(5678.123451,6); +select truncate(99999999999999999999999999999999999999,-31); +select v/10; +select uncompress(""); +SELECT UNHEX('G'); +select unhex(hex("foobar")), hex(unhex("1234567890ABCDEF")), unhex("345678"), unhex(NULL); +select UpdateXML('a1b1c1b2a2','/a/b/c','+++++++++'); +select UpdateXML(@xml, '/a/@aa1', ''); +SELECT USER(),CURRENT_USER(),@@LOCAL.external_user; +SELECT user(),current_user(),@@proxy_user; +SELECT user, host FROM mysql.user where user = 'CUser' order by 1,2; +select user, host, password, plugin, authentication_string from mysql.user where user = 'u1'; +select userid,count(*) from t1 group by userid desc having 3 IN (1,COUNT(*)); +select userid,count(*) from t1 group by userid desc having (count(*)+1) IN (4,3); +SELECT user_id FROM t1 WHERE request_id=9999999999999; +SELECT UserId FROM t1 WHERE UserId=22 group by Userid; +select userid,pmtotal,pmnew, (select count(rd) from t1 where toid=t2.userid) calc_total, (select count(rd) from t1 where rd=0 and toid=t2.userid) calc_new from t2 where userid in (select distinct toid from t1); +select yearweek("2000-01-01",0) as '2000', yearweek("2001-01-01",0) as '2001', yearweek("2002-01-01",0) as '2002',yearweek("2003-01-01",0) as '2003', yearweek("2004-01-01",0) as '2004', yearweek("2005-01-01",0) as '2005', yearweek("2006-01-01",0) as '2006'; +select user() like "%@%"; +select user,password,plugin,authentication_string from mysql.user where user like 'foo%'; +select user, QUOTE(host) from mysql.user where user="mysqltest_8"; +SELECT UTC_DATE(); +select utext from t1 where utext like '%%'; +SELECT _utf32 0x10001=_utf32 0x10002; +select _utf32'a' collate utf32_general_ci = 0xfffd; +SELECT _utf8 0x7E, _utf8 X'7E', _utf8 B'01111110'; +select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%'); +select _utf8'12' union select _latin1'12345'; +SELECT utf8_f,MIN(comment) FROM t1 GROUP BY 1; +SELECT _utf8mb3'test'; +select _utf8mb4 0xD0B0D0B1D0B2 like concat(_utf8mb4'%',_utf8mb4 0xD0B1,_utf8mb4 '%'); +select (_utf8mb4 X'616263FF'); +SELECT _utf8'test' COLLATE utf8_5624_2; +select (_utf8 X'616263FF'); +select uuid() into @my_uuid; +SELECT v1.a, v2. b FROM v1 LEFT OUTER JOIN v2 ON (v1.a=v2.b) AND (v1.a >= 3) GROUP BY v1.a; +SELECT v1.f4 FROM v1 WHERE f1<>0 OR f2<>0 AND f4='v' AND (f2<>0 OR f3<>0 AND f5<>0 OR f4 LIKE '%b%'); +select v1.r_object_id, v2.users_names from v1, v2where (v1.group_name='tstgroup1') and v2.r_object_id=v1.r_object_idorder by users_names; +SELECT v2 FROM t1 WHERE v1 IN ('f', 'd', 'h', 'u' ) AND i = 2; +select "-- comment # followed by another comment" as "-- more comments";# this should be removed +SELECT @@tx_isolation; +select @ujis4 = CONVERT(@utf84 USING ujis); +SELECT @user_var; +select @v1def1, @v1def2, @v1def1=@v1def2; +SELECT @v1, @v2; +SELECT @v5, @v6, @v7, @v8, @v9, @v10; +SELECT (@v:=a) <> (@v:=1) FROM t1; +select @v, coercibility(@v); +select @@version, @@version_comment, @@version_compile_machine, @@version_compile_os; +SELECT @x_str_1, @x_int_1, @x_int_2, @x_int_3; +SELECT user,host,password,insert_priv FROM user WHERE user=@u AND host=@h; diff --git a/utils/skygw_utils.cc b/utils/skygw_utils.cc index 61e0caf56..77770982a 100644 --- a/utils/skygw_utils.cc +++ b/utils/skygw_utils.cc @@ -2031,37 +2031,46 @@ void skygw_file_close( } } +#define BUFFER_GROWTH_RATE 1.2 static pcre2_code* remove_comments_re = NULL; static const PCRE2_SPTR remove_comments_pattern = (PCRE2_SPTR) -"((--\\s.*)|(#.*))"; +"(?:`[^`]*`\\K)|(?:#.*|--[[:space]].*)"; /** * Remove SQL comments from the end of a string * * The inline comments are not removed due to the fact that they can alter the * behavior of the query. - * @param str String to modify - * @return Pointer to new modified string or NULL if memory allocation failed + * @param src Pointer to the string to modify. + * @param srcsize Pointer to a size_t variable which holds the length of the string to + * be modified. + * @param dest The address of the pointer where the result will be stored. If the + * value pointed by this parameter is NULL, new memory will be allocated as needed. + * @param Pointer to a size_t variable where the size of the result string is stored. + * @return Pointer to new modified string or NULL if memory allocation failed. + * If NULL is returned and the value pointed by @c dest was not NULL, no new + * memory will be allocated, the memory pointed by @dest will be freed and the + * contents of @c dest and @c destsize will be invalid. */ -char* remove_mysql_comments(const char* str) +char* remove_mysql_comments(const char** src, const size_t* srcsize, char** dest, size_t* destsize) { static const PCRE2_SPTR replace = (PCRE2_SPTR) ""; pcre2_match_data* mdata; - size_t orig_len = strlen(str); - size_t len = orig_len; - char* output = NULL; + char* output = *dest; + size_t orig_len = *srcsize; + size_t len = output ? *destsize : orig_len; - if (len > 0) + if (orig_len > 0) { - if ((output = (char*) malloc(len * sizeof (char))) && + if ((output || (output = (char*) malloc(len * sizeof (char)))) && (mdata = pcre2_match_data_create_from_pattern(remove_comments_re, NULL))) { - while (pcre2_substitute(remove_comments_re, (PCRE2_SPTR) str, orig_len, 0, + while (pcre2_substitute(remove_comments_re, (PCRE2_SPTR) * src, orig_len, 0, PCRE2_SUBSTITUTE_GLOBAL, mdata, NULL, replace, PCRE2_ZERO_TERMINATED, (PCRE2_UCHAR8*) output, &len) == PCRE2_ERROR_NOMEMORY) { - char* tmp = (char*) realloc(output, len *= 2); + char* tmp = (char*) realloc(output, (len = len * BUFFER_GROWTH_RATE + 1)); if (tmp == NULL) { free(output); @@ -2078,41 +2087,56 @@ char* remove_mysql_comments(const char* str) output = NULL; } } - else + else if (output == NULL) { - output = strdup(str); + output = strdup(*src); } + + if (output) + { + *destsize = strlen(output); + *dest = output; + } + return output; } static pcre2_code* replace_values_re = NULL; static const PCRE2_SPTR replace_values_pattern = (PCRE2_SPTR) "(?i)([-=,+*/([:space:]]|\\b|[@])" -"(?:[0-9.]+|(?<=[@])[a-z_]+|NULL)([-=,+*/)[:space:];]|$)"; +"(?:[0-9.-]+|(?<=[@])[a-z_0-9]+)([-=,+*/)[:space:];]|$)"; /** - * Replace every literal number and NULL value with a question mark. - * @param str String to modify - * @return Pointer to new modified string or NULL if memory allocation failed + * Replace literal numbers and user variables with a question mark. + * @param src Pointer to the string to modify. + * @param srcsize Pointer to a size_t variable which holds the length of the string to + * be modified. + * @param dest The address of the pointer where the result will be stored. If the + * value pointed by this parameter is NULL, new memory will be allocated as needed. + * @param Pointer to a size_t variable where the size of the result string is stored. + * @return Pointer to new modified string or NULL if memory allocation failed. + * If NULL is returned and the value pointed by @c dest was not NULL, no new + * memory will be allocated, the memory pointed by @dest will be freed and the + * contents of @c dest and @c destsize will be invalid. */ -char* replace_values(const char* str) +char* replace_values(const char** src, const size_t* srcsize, char** dest, size_t* destsize) { static const PCRE2_SPTR replace = (PCRE2_SPTR) "$1?$2"; pcre2_match_data* mdata; - size_t orig_len = strlen(str); - size_t len = orig_len; - char* output = NULL; + char* output = *dest; + size_t orig_len = *srcsize; + size_t len = output ? *destsize : orig_len; - if (len > 0) + if (orig_len > 0) { - if ((output = (char*) malloc(len * sizeof (char))) && + if ((output || (output = (char*) malloc(len * sizeof (char)))) && (mdata = pcre2_match_data_create_from_pattern(replace_values_re, NULL))) { - while (pcre2_substitute(replace_values_re, (PCRE2_SPTR) str, orig_len, 0, + while (pcre2_substitute(replace_values_re, (PCRE2_SPTR) * src, orig_len, 0, PCRE2_SUBSTITUTE_GLOBAL, mdata, NULL, replace, PCRE2_ZERO_TERMINATED, (PCRE2_UCHAR8*) output, &len) == PCRE2_ERROR_NOMEMORY) { - char* tmp = (char*) realloc(output, len *= 2); + char* tmp = (char*) realloc(output, (len = len * BUFFER_GROWTH_RATE + 1)); if (tmp == NULL) { free(output); @@ -2129,10 +2153,17 @@ char* replace_values(const char* str) output = NULL; } } - else + else if (output == NULL) { - output = strdup(str); + output = strdup(*src); } + + if (output) + { + *destsize = strlen(output); + *dest = output; + } + return output; } @@ -2229,31 +2260,40 @@ retblock: static pcre2_code* replace_quoted_re = NULL; static const PCRE2_SPTR replace_quoted_pattern = (PCRE2_SPTR) -"(((?>(?<=[\"]))[^\"]*(?>(?=[\"])))|((?>(?<=[']))[^']*(?>(?=[']))))"; +"(?>[^'\"]*)(?|(?:\"\\K(?:(?:(?<=\\\\)\")|[^\"])*(\"))|(?:'\\K(?:(?:(?<=\\\\)')|[^'])*(')))"; /** - * Replace everything inside single or double quotes with question marks. - * @param str String to modify - * @return Pointer to new modified string or NULL if memory allocation failed + * Replace contents of single or double quoted strings with question marks. + * @param src Pointer to the string to modify. + * @param srcsize Pointer to a size_t variable which holds the length of the string to + * be modified. + * @param dest The address of the pointer where the result will be stored. If the + * value pointed by this parameter is NULL, new memory will be allocated as needed. + * @param Pointer to a size_t variable where the size of the result string is stored. + * @return Pointer to new modified string or NULL if memory allocation failed. + * If NULL is returned and the value pointed by @c dest was not NULL, no new + * memory will be allocated, the memory pointed by @dest will be freed and the + * contents of @c dest and @c destsize will be invalid. */ -char* replace_quoted(const char* str) +char* replace_quoted(const char** src, const size_t* srcsize, char** dest, size_t* destsize) { - static const PCRE2_SPTR replace = (PCRE2_SPTR) "?"; + static const PCRE2_SPTR replace = (PCRE2_SPTR) "?$1"; pcre2_match_data* mdata; - size_t orig_len = strlen(str); - size_t len = orig_len; - char* output = NULL; - if (len > 0) + char* output = *dest; + size_t orig_len = *srcsize; + size_t len = output ? *destsize : orig_len; + + if (orig_len > 0) { - if ((output = (char*) malloc(len * sizeof (char))) && + if ((output || (output = (char*) malloc(len * sizeof (char)))) && (mdata = pcre2_match_data_create_from_pattern(replace_quoted_re, NULL))) { - while (pcre2_substitute(replace_quoted_re, (PCRE2_SPTR) str, orig_len, 0, + while (pcre2_substitute(replace_quoted_re, (PCRE2_SPTR) * src, orig_len, 0, PCRE2_SUBSTITUTE_GLOBAL, mdata, NULL, replace, PCRE2_ZERO_TERMINATED, (PCRE2_UCHAR8*) output, &len) == PCRE2_ERROR_NOMEMORY) { - char* tmp = (char*) realloc(output, len *= 2); + char* tmp = (char*) realloc(output, (len = len * BUFFER_GROWTH_RATE + 1)); if (tmp == NULL) { free(output); @@ -2270,10 +2310,21 @@ char* replace_quoted(const char* str) output = NULL; } } + else if (output == NULL) + { + output = strdup(*src); + } + + if (output) + { + *destsize = strlen(output); + *dest = output; + } else { - output = strdup(str); + *dest = NULL; } + return output; } diff --git a/utils/skygw_utils.h b/utils/skygw_utils.h index 01deab65c..e8121d227 100644 --- a/utils/skygw_utils.h +++ b/utils/skygw_utils.h @@ -276,12 +276,15 @@ EXTERN_C_BLOCK_BEGIN size_t get_decimal_len(size_t s); -char* remove_mysql_comments(const char* str); -char* replace_values(const char* str); +char* remove_mysql_comments(const char** src, const size_t* srcsize, char** dest, + size_t* destsize); +char* replace_values(const char** src, const size_t* srcsize, char** dest, + size_t* destsize); char* replace_literal(char* haystack, const char* needle, const char* replacement); -char* replace_quoted(const char* str); +char* replace_quoted(const char** src, const size_t* srcsize, char** dest, + size_t* destsize); bool is_valid_posix_path(char* path); bool strip_escape_chars(char*); int simple_str_hash(char* key);