From 8ee110efa8d8742252095bae75107c86503dc6ff Mon Sep 17 00:00:00 2001
From: Markus Makela <markus.makela@mariadb.com>
Date: Mon, 11 Jan 2016 03:03:28 +0200
Subject: [PATCH] Fixed canonicalization and added more tests

Fixed mistakes in the canonicalization regular expressions and altered the
functions to use a source and destination buffers. This reduces the amount
of memory allocations that take place.

Added more canonical query tests to the internal test suite.
---
 query_classifier/query_classifier.cc          |  30 ++--
 .../test/canonical_tests/CMakeLists.txt       |  16 ++-
 .../test/canonical_tests/alter.expected       |  19 +++
 .../test/canonical_tests/alter.sql            |  19 +++
 .../test/canonical_tests/expected.sql         |   2 +-
 .../test/canonical_tests/select.expected      |  60 ++++++++
 .../test/canonical_tests/select.sql           |  60 ++++++++
 utils/skygw_utils.cc                          | 133 ++++++++++++------
 utils/skygw_utils.h                           |   9 +-
 9 files changed, 288 insertions(+), 60 deletions(-)
 create mode 100644 query_classifier/test/canonical_tests/alter.expected
 create mode 100644 query_classifier/test/canonical_tests/alter.sql
 create mode 100644 query_classifier/test/canonical_tests/select.expected
 create mode 100644 query_classifier/test/canonical_tests/select.sql

diff --git a/query_classifier/query_classifier.cc b/query_classifier/query_classifier.cc
index ab8f07332..602e9d34b 100644
--- a/query_classifier/query_classifier.cc
+++ b/query_classifier/query_classifier.cc
@@ -61,6 +61,7 @@
 #include <string.h>
 #include <stdarg.h>
 
+#define MYSQL_COM_QUERY_HEADER_SIZE 5 /*< 3 bytes size, 1 sequence, 1 command */
 #define MAX_QUERYBUF_SIZE 2048
 typedef struct parsing_info_st
 {
@@ -1436,23 +1437,24 @@ bool qc_query_has_clause(GWBUF* buf)
 char* qc_get_canonical(GWBUF* querybuf)
 {
     char *querystr = NULL;
-    if (GWBUF_LENGTH(querybuf) > 5 && GWBUF_IS_SQL(querybuf))
+    if (GWBUF_LENGTH(querybuf) > MYSQL_COM_QUERY_HEADER_SIZE && GWBUF_IS_SQL(querybuf))
     {
-        const size_t bufsize = MIN(MAX_QUERYBUF_SIZE, GWBUF_LENGTH(querybuf) - 5);
-        char buffer[bufsize + 1];
-        memcpy(buffer, (uint8_t*) GWBUF_DATA(querybuf) + 5, bufsize);
-        buffer[bufsize] = '\0';
-        char* replaced = replace_quoted(buffer);
-        if (replaced == NULL || (querystr = remove_mysql_comments(replaced)) == NULL)
+        size_t srcsize = GWBUF_LENGTH(querybuf) - MYSQL_COM_QUERY_HEADER_SIZE;
+        char *src = (char*) malloc(srcsize);
+        size_t destsize = 0;
+        char *dest = NULL;
+        if (src)
         {
-            querystr = NULL;
+            memcpy(src, (uint8_t*) GWBUF_DATA(querybuf) + MYSQL_COM_QUERY_HEADER_SIZE,
+                   srcsize);
+            if (replace_quoted((const char**) &src, &srcsize, &dest, &destsize) &&
+                remove_mysql_comments((const char**) &dest, &destsize, &src, &srcsize) &&
+                replace_values((const char**) &src, &srcsize, &dest, &destsize))
+            {
+                querystr = dest;
+            }
+            free(src);
         }
-        replaced = querystr;
-        if (replaced == NULL || (querystr = replace_values(replaced)) == NULL)
-        {
-            querystr = NULL;
-        }
-        free(replaced);
     }
     return querystr;
 }
diff --git a/query_classifier/test/canonical_tests/CMakeLists.txt b/query_classifier/test/canonical_tests/CMakeLists.txt
index a81aa3e88..0092b7841 100644
--- a/query_classifier/test/canonical_tests/CMakeLists.txt
+++ b/query_classifier/test/canonical_tests/CMakeLists.txt
@@ -9,9 +9,23 @@ else()
 endif()
 add_executable(canonizer canonizer.c ${CMAKE_SOURCE_DIR}/server/core/random_jkiss.c)
 target_link_libraries(canonizer ${PCRE2_LIBRARIES} utils pthread query_classifier z dl ssl aio crypt crypto rt m  ${EMBEDDED_LIB} fullcore stdc++)
-add_test(NAME Internal-TestCanonicalQuery COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh 
+add_test(NAME Internal-CanonicalQuery COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh
   ${CMAKE_CURRENT_BINARY_DIR}/test.log
   ${CMAKE_CURRENT_SOURCE_DIR}/input.sql
   ${CMAKE_CURRENT_BINARY_DIR}/output.sql
   ${CMAKE_CURRENT_SOURCE_DIR}/expected.sql
   $<TARGET_FILE:canonizer>)
+
+add_test(NAME Internal-CanonicalQuerySelect COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh
+  ${CMAKE_CURRENT_BINARY_DIR}/test.log
+  ${CMAKE_CURRENT_SOURCE_DIR}/select.sql
+  ${CMAKE_CURRENT_BINARY_DIR}/select.output
+  ${CMAKE_CURRENT_SOURCE_DIR}/select.expected
+  $<TARGET_FILE:canonizer>)
+
+add_test(NAME Internal-CanonicalQueryAlter COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh
+  ${CMAKE_CURRENT_BINARY_DIR}/test.log
+  ${CMAKE_CURRENT_SOURCE_DIR}/alter.sql
+  ${CMAKE_CURRENT_BINARY_DIR}/alter.output
+  ${CMAKE_CURRENT_SOURCE_DIR}/alter.expected
+  $<TARGET_FILE:canonizer>)
diff --git a/query_classifier/test/canonical_tests/alter.expected b/query_classifier/test/canonical_tests/alter.expected
new file mode 100644
index 000000000..be94039f6
--- /dev/null
+++ b/query_classifier/test/canonical_tests/alter.expected
@@ -0,0 +1,19 @@
+ALTER DATABASE `` DEFAULT CHARACTER SET latin2;
+ALTER DATABASE `#mysql50#../` UPGRADE DATA DIRECTORY NAME;
+ALTER DATABASE `#mysql50#../..` UPGRADE DATA DIRECTORY NAME; 
+ALTER DATABASE `#mysql51#not-yet` UPGRADE DATA DIRECTORY NAME; 
+ALTER DATABASE `test-database` CHARACTER SET utf8 COLLATE utf8_unicode_ci ;
+ALTER DEFINER=root@? EVENT e1 ON SCHEDULE EVERY ? HOUR;
+ALTER EVENT e1 COMMENT '?';
+ALTER EVENT e1 DO SELECT ?;
+ALTER EVENT e1 ON SCHEDULE AT '?'  ON COMPLETION PRESERVE DISABLE;
+ALTER TABLE `@0023sql1`  RENAME `#sql-1`;
+ALTER TABLE t1 ADD INDEX (c13) COMMENT '?';
+ALTER TABLE t1 ADD PARTITION IF NOT EXISTS(PARTITION `p5` VALUES LESS THAN (?)COMMENT '?');
+ALTER TABLE `t1` ADD PRIMARY KEY  (`a`);
+alter table t1 change a a enum('?','?','?','?','?','?','?','?') character set utf16;
+alter table t1 change a a int `FKEY1`='?';
+alter table t1i engine=innodb;
+alter table t1 max_rows=?;
+ALTER TABLE t2 PARTITION BY RANGE COLUMNS(c)(PARTITION p0 VALUES LESS THAN ('?'), PARTITION p1 VALUES LESS THAN (MAXVALUE));
+alter table table_24562 order by table_24562.subsection ASC, table_24562.section DESC;
diff --git a/query_classifier/test/canonical_tests/alter.sql b/query_classifier/test/canonical_tests/alter.sql
new file mode 100644
index 000000000..94448d8e5
--- /dev/null
+++ b/query_classifier/test/canonical_tests/alter.sql
@@ -0,0 +1,19 @@
+ALTER DATABASE `` DEFAULT CHARACTER SET latin2;
+ALTER DATABASE `#mysql50#../` UPGRADE DATA DIRECTORY NAME;
+ALTER DATABASE `#mysql50#../..` UPGRADE DATA DIRECTORY NAME; # a comment
+ALTER DATABASE `#mysql51#not-yet` UPGRADE DATA DIRECTORY NAME; # a comment with backticks `this should work`
+ALTER DATABASE `test-database` CHARACTER SET utf8 COLLATE utf8_unicode_ci ;
+ALTER DEFINER=root@localhost EVENT e1 ON SCHEDULE EVERY 1 HOUR;
+ALTER EVENT e1 COMMENT 'comment';
+ALTER EVENT e1 DO SELECT 2;
+ALTER EVENT e1 ON SCHEDULE AT '2000-01-02 00:00:00'  ON COMPLETION PRESERVE DISABLE;
+ALTER TABLE `@0023sql1`  RENAME `#sql-1`;
+ALTER TABLE t1 ADD INDEX (c13) COMMENT 'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcd';
+ALTER TABLE t1 ADD PARTITION IF NOT EXISTS(PARTITION `p5` VALUES LESS THAN (2010)COMMENT 'APSTART \' APEND');
+ALTER TABLE `t1` ADD PRIMARY KEY  (`a`);
+alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf16;
+alter table t1 change a a int `FKEY1`='v1';
+alter table t1i engine=innodb;
+alter table t1 max_rows=100000000000;
+ALTER TABLE t2 PARTITION BY RANGE COLUMNS(c)(PARTITION p0 VALUES LESS THAN ('2000-01-01 00:00:00'), PARTITION p1 VALUES LESS THAN (MAXVALUE));
+alter table table_24562 order by table_24562.subsection ASC, table_24562.section DESC;
diff --git a/query_classifier/test/canonical_tests/expected.sql b/query_classifier/test/canonical_tests/expected.sql
index 8dab956e8..0598742f7 100644
--- a/query_classifier/test/canonical_tests/expected.sql
+++ b/query_classifier/test/canonical_tests/expected.sql
@@ -7,7 +7,7 @@ select * from tst where lname='?';
 select ?,?,?,?,?,? from tst;
 select * from tst where fname like '?';
 select * from tst where lname like '?' order by fname;
-insert into tst values ("?","?"),("?",?),("?","?");
+insert into tst values ("?","?"),("?",null),("?","?");
 drop table if exists tst;
 create table tst(fname varchar(?), lname varchar(?));
 update tst set lname="?" where fname like '?' or lname like '?';
diff --git a/query_classifier/test/canonical_tests/select.expected b/query_classifier/test/canonical_tests/select.expected
new file mode 100644
index 000000000..af7ef85ad
--- /dev/null
+++ b/query_classifier/test/canonical_tests/select.expected
@@ -0,0 +1,60 @@
+select count(*) from t1 where id not in (?,?);
+select count(*) from t1 where match a against ('?');
+SELECT COUNT(*) FROM t1 WHERE MATCH(a) AGAINST("?" IN BOOLEAN MODE);
+select count(*) from t1 where s1 < ? or s1 is null;
+SELECT COUNT(*) FROM t1 WHERE s1 = ?;
+select count(*) from t1 where x < ?;
+select count(*) from t1 where x = ?;
+select count(*) from t1 where x > ?;
+select count(*) from t1 where x = ?;
+select truncate(?,?);
+select truncate(?,-?);
+select v/?;
+select uncompress("?");
+SELECT UNHEX('?');
+select unhex(hex("?")), hex(unhex("?")), unhex("?"), unhex(NULL);
+select UpdateXML('?','?','?');
+select UpdateXML(@?, '?', '?');
+SELECT USER(),CURRENT_USER(),@@LOCAL.external_user;
+SELECT user(),current_user(),@@?;
+SELECT user, host FROM mysql.user where user = '?' order by ?,?;
+select user, host, password, plugin, authentication_string from mysql.user where user = '?';
+select userid,count(*) from t1 group by userid desc having ?  IN (?,COUNT(*));
+select userid,count(*) from t1 group by userid desc having (count(*)+?) IN (?,?);
+SELECT user_id FROM t1 WHERE request_id=?;
+SELECT UserId FROM t1 WHERE UserId=? group by Userid;
+select userid,pmtotal,pmnew, (select count(rd) from t1 where toid=t2.userid) calc_total, (select count(rd) from t1 where rd=? and toid=t2.userid) calc_new from t2 where userid in (select distinct toid from t1);
+select yearweek("?",?) as '?', yearweek("?",?) as '?', yearweek("?",?) as '?',yearweek("?",?) as '?', yearweek("?",?) as '?', yearweek("?",?) as '?', yearweek("?",?) as '?';
+select user() like "?";
+select user,password,plugin,authentication_string from mysql.user where user like '?';
+select user, QUOTE(host) from mysql.user where user="?";
+SELECT UTC_DATE();
+select utext from t1 where utext like '?';
+SELECT _utf32 0x10001=_utf32 0x10002;
+select _utf32'?' collate utf32_general_ci = 0xfffd;
+SELECT _utf8 0x7E, _utf8 X'?', _utf8 B'?';
+select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'?',_utf8 0xD0B1,_utf8 '?');
+select _utf8'?' union select _latin1'?';
+SELECT utf8_f,MIN(comment) FROM t1 GROUP BY ?;
+SELECT _utf8mb3'?';
+select _utf8mb4 0xD0B0D0B1D0B2 like concat(_utf8mb4'?',_utf8mb4 0xD0B1,_utf8mb4 '?');
+select (_utf8mb4 X'?');
+SELECT _utf8'?' COLLATE utf8_5624_2;
+select (_utf8 X'?');
+select uuid() into @?;
+SELECT v1.a, v2? b  FROM v1 LEFT OUTER JOIN v2 ON (v1.a=v2.b) AND (v1.a >= ?)    GROUP BY v1.a;
+SELECT v1.f4 FROM v1  WHERE f1<>? OR f2<>? AND f4='?' AND (f2<>? OR f3<>? AND f5<>? OR f4 LIKE '?');
+select v1.r_object_id, v2.users_names from v1, v2where (v1.group_name='?') and v2.r_object_id=v1.r_object_idorder by users_names;
+SELECT v2 FROM t1 WHERE v1  IN  ('?', '?', '?', '?' ) AND i  =  ?;
+select "?" as "?";
+SELECT @@?;
+select @? = CONVERT(@? USING ujis);
+SELECT @?;
+select @?, @?, @?=@?;
+SELECT @?, @?;
+SELECT @?, @?, @?, @?, @?, @?;
+SELECT (@v:=a) <> (@v:=?) FROM t1;
+select @?, coercibility(@?);
+select @@?, @@?, @@?,       @@?;
+SELECT @?, @?, @?, @?;
+SELECT user,host,password,insert_priv FROM user WHERE user=@? AND host=@?;
diff --git a/query_classifier/test/canonical_tests/select.sql b/query_classifier/test/canonical_tests/select.sql
new file mode 100644
index 000000000..8c6f987e8
--- /dev/null
+++ b/query_classifier/test/canonical_tests/select.sql
@@ -0,0 +1,60 @@
+select count(*) from t1 where id not in (1,2);
+select count(*) from t1 where match a against ('000000');
+SELECT COUNT(*) FROM t1 WHERE MATCH(a) AGAINST("+awrd bwrd* +cwrd*" IN BOOLEAN MODE);
+select count(*) from t1 where s1 < 0 or s1 is null;
+SELECT COUNT(*) FROM t1 WHERE s1 = 1001;
+select count(*) from t1 where x < -16;
+select count(*) from t1 where x = -16;
+select count(*) from t1 where x > -16;
+select count(*) from t1 where x = 18446744073709551601;
+select truncate(5678.123451,6);
+select truncate(99999999999999999999999999999999999999,-31);
+select v/10;
+select uncompress("");
+SELECT UNHEX('G');
+select unhex(hex("foobar")), hex(unhex("1234567890ABCDEF")), unhex("345678"), unhex(NULL);
+select UpdateXML('<a>a1<b>b1<c>c1</c>b2</b>a2</a>','/a/b/c','+++++++++');
+select UpdateXML(@xml, '/a/@aa1', '');
+SELECT USER(),CURRENT_USER(),@@LOCAL.external_user;
+SELECT user(),current_user(),@@proxy_user;
+SELECT user, host FROM mysql.user where user = 'CUser' order by 1,2;
+select user, host, password, plugin, authentication_string from mysql.user where user = 'u1';
+select userid,count(*) from t1 group by userid desc having 3  IN (1,COUNT(*));
+select userid,count(*) from t1 group by userid desc having (count(*)+1) IN (4,3);
+SELECT user_id FROM t1 WHERE request_id=9999999999999;
+SELECT UserId FROM t1 WHERE UserId=22 group by Userid;
+select userid,pmtotal,pmnew, (select count(rd) from t1 where toid=t2.userid) calc_total, (select count(rd) from t1 where rd=0 and toid=t2.userid) calc_new from t2 where userid in (select distinct toid from t1);
+select yearweek("2000-01-01",0) as '2000', yearweek("2001-01-01",0) as '2001', yearweek("2002-01-01",0) as '2002',yearweek("2003-01-01",0) as '2003', yearweek("2004-01-01",0) as '2004', yearweek("2005-01-01",0) as '2005', yearweek("2006-01-01",0) as '2006';
+select user() like "%@%";
+select user,password,plugin,authentication_string from mysql.user where user like 'foo%';
+select user, QUOTE(host) from mysql.user where user="mysqltest_8";
+SELECT UTC_DATE();
+select utext from t1 where utext like '%%';
+SELECT _utf32 0x10001=_utf32 0x10002;
+select _utf32'a' collate utf32_general_ci = 0xfffd;
+SELECT _utf8 0x7E, _utf8 X'7E', _utf8 B'01111110';
+select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
+select _utf8'12' union select _latin1'12345';
+SELECT utf8_f,MIN(comment) FROM t1 GROUP BY 1;
+SELECT _utf8mb3'test';
+select _utf8mb4 0xD0B0D0B1D0B2 like concat(_utf8mb4'%',_utf8mb4 0xD0B1,_utf8mb4 '%');
+select (_utf8mb4 X'616263FF');
+SELECT _utf8'test' COLLATE utf8_5624_2;
+select (_utf8 X'616263FF');
+select uuid() into @my_uuid;
+SELECT v1.a, v2. b  FROM v1 LEFT OUTER JOIN v2 ON (v1.a=v2.b) AND (v1.a >= 3)    GROUP BY v1.a;
+SELECT v1.f4 FROM v1  WHERE f1<>0 OR f2<>0 AND f4='v' AND (f2<>0 OR f3<>0 AND f5<>0 OR f4 LIKE '%b%');
+select v1.r_object_id, v2.users_names from v1, v2where (v1.group_name='tstgroup1') and v2.r_object_id=v1.r_object_idorder by users_names;
+SELECT v2 FROM t1 WHERE v1  IN  ('f', 'd', 'h', 'u' ) AND i  =  2;
+select "-- comment # followed by another comment" as "-- more comments";# this should be removed
+SELECT @@tx_isolation;
+select @ujis4 = CONVERT(@utf84 USING ujis);
+SELECT @user_var;
+select @v1def1, @v1def2, @v1def1=@v1def2;
+SELECT @v1, @v2;
+SELECT @v5, @v6, @v7, @v8, @v9, @v10;
+SELECT (@v:=a) <> (@v:=1) FROM t1;
+select @v, coercibility(@v);
+select @@version, @@version_comment, @@version_compile_machine,       @@version_compile_os;
+SELECT @x_str_1, @x_int_1, @x_int_2, @x_int_3;
+SELECT user,host,password,insert_priv FROM user WHERE user=@u AND host=@h;
diff --git a/utils/skygw_utils.cc b/utils/skygw_utils.cc
index 61e0caf56..77770982a 100644
--- a/utils/skygw_utils.cc
+++ b/utils/skygw_utils.cc
@@ -2031,37 +2031,46 @@ void skygw_file_close(
 	}
 }
 
+#define BUFFER_GROWTH_RATE 1.2
 static pcre2_code* remove_comments_re = NULL;
 static const PCRE2_SPTR remove_comments_pattern = (PCRE2_SPTR)
-"((--\\s.*)|(#.*))";
+"(?:`[^`]*`\\K)|(?:#.*|--[[:space]].*)";
 
 /**
  * Remove SQL comments from the end of a string
  *
  * The inline comments are not removed due to the fact that they can alter the
  * behavior of the query.
- * @param str String to modify
- * @return Pointer to new modified string or NULL if memory allocation failed
+ * @param src Pointer to the string to modify.
+ * @param srcsize Pointer to a size_t variable which holds the length of the string to
+ * be modified.
+ * @param dest The address of the pointer where the result will be stored. If the
+ * value pointed by this parameter is NULL, new memory will be allocated as needed.
+ * @param Pointer to a size_t variable where the size of the result string is stored.
+ * @return Pointer to new modified string or NULL if memory allocation failed.
+ * If NULL is returned and the value pointed by @c dest was not NULL, no new
+ * memory will be allocated, the memory pointed by @dest will be freed and the
+ * contents of @c dest and @c destsize will be invalid.
  */
-char* remove_mysql_comments(const char* str)
+char* remove_mysql_comments(const char** src, const size_t* srcsize, char** dest, size_t* destsize)
 {
     static const PCRE2_SPTR replace = (PCRE2_SPTR) "";
     pcre2_match_data* mdata;
-    size_t orig_len = strlen(str);
-    size_t len = orig_len;
-    char* output = NULL;
+    char* output = *dest;
+    size_t orig_len = *srcsize;
+    size_t len = output ? *destsize : orig_len;
 
-    if (len > 0)
+    if (orig_len > 0)
     {
-        if ((output = (char*) malloc(len * sizeof (char))) &&
+        if ((output || (output = (char*) malloc(len * sizeof (char)))) &&
             (mdata = pcre2_match_data_create_from_pattern(remove_comments_re, NULL)))
         {
-            while (pcre2_substitute(remove_comments_re, (PCRE2_SPTR) str, orig_len, 0,
+            while (pcre2_substitute(remove_comments_re, (PCRE2_SPTR) * src, orig_len, 0,
                                     PCRE2_SUBSTITUTE_GLOBAL, mdata, NULL,
                                     replace, PCRE2_ZERO_TERMINATED,
                                     (PCRE2_UCHAR8*) output, &len) == PCRE2_ERROR_NOMEMORY)
             {
-                char* tmp = (char*) realloc(output, len *= 2);
+                char* tmp = (char*) realloc(output, (len = len * BUFFER_GROWTH_RATE + 1));
                 if (tmp == NULL)
                 {
                     free(output);
@@ -2078,41 +2087,56 @@ char* remove_mysql_comments(const char* str)
             output = NULL;
         }
     }
-    else
+    else if (output == NULL)
     {
-        output = strdup(str);
+        output = strdup(*src);
     }
+
+    if (output)
+    {
+        *destsize = strlen(output);
+        *dest = output;
+    }
+
     return output;
 }
 
 static pcre2_code* replace_values_re = NULL;
 static const PCRE2_SPTR replace_values_pattern = (PCRE2_SPTR) "(?i)([-=,+*/([:space:]]|\\b|[@])"
-"(?:[0-9.]+|(?<=[@])[a-z_]+|NULL)([-=,+*/)[:space:];]|$)";
+"(?:[0-9.-]+|(?<=[@])[a-z_0-9]+)([-=,+*/)[:space:];]|$)";
 
 /**
- * Replace every literal number and NULL value with a question mark.
- * @param str String to modify
- * @return Pointer to new modified string or NULL if memory allocation failed
+ * Replace literal numbers and user variables with a question mark.
+ * @param src Pointer to the string to modify.
+ * @param srcsize Pointer to a size_t variable which holds the length of the string to
+ * be modified.
+ * @param dest The address of the pointer where the result will be stored. If the
+ * value pointed by this parameter is NULL, new memory will be allocated as needed.
+ * @param Pointer to a size_t variable where the size of the result string is stored.
+ * @return Pointer to new modified string or NULL if memory allocation failed.
+ * If NULL is returned and the value pointed by @c dest was not NULL, no new
+ * memory will be allocated, the memory pointed by @dest will be freed and the
+ * contents of @c dest and @c destsize will be invalid.
  */
-char* replace_values(const char* str)
+char* replace_values(const char** src, const size_t* srcsize, char** dest, size_t* destsize)
 {
     static const PCRE2_SPTR replace = (PCRE2_SPTR) "$1?$2";
     pcre2_match_data* mdata;
-    size_t orig_len = strlen(str);
-    size_t len = orig_len;
-    char* output = NULL;
+    char* output = *dest;
+    size_t orig_len = *srcsize;
+    size_t len = output ? *destsize : orig_len;
 
-    if (len > 0)
+    if (orig_len > 0)
     {
-        if ((output = (char*) malloc(len * sizeof (char))) &&
+        if ((output || (output = (char*) malloc(len * sizeof (char)))) &&
             (mdata = pcre2_match_data_create_from_pattern(replace_values_re, NULL)))
         {
-            while (pcre2_substitute(replace_values_re, (PCRE2_SPTR) str, orig_len, 0,
+            while (pcre2_substitute(replace_values_re, (PCRE2_SPTR) * src, orig_len, 0,
                                     PCRE2_SUBSTITUTE_GLOBAL, mdata, NULL,
                                     replace, PCRE2_ZERO_TERMINATED,
                                     (PCRE2_UCHAR8*) output, &len) == PCRE2_ERROR_NOMEMORY)
             {
-                char* tmp = (char*) realloc(output, len *= 2);
+                char* tmp = (char*) realloc(output, (len = len * BUFFER_GROWTH_RATE + 1));
                 if (tmp == NULL)
                 {
                     free(output);
@@ -2129,10 +2153,17 @@ char* replace_values(const char* str)
             output = NULL;
         }
     }
-    else
+    else if (output == NULL)
     {
-        output = strdup(str);
+        output = strdup(*src);
     }
+
+    if (output)
+    {
+        *destsize = strlen(output);
+        *dest = output;
+    }
+
     return output;
 }
 
@@ -2229,31 +2260,40 @@ retblock:
 
 static pcre2_code* replace_quoted_re = NULL;
 static const PCRE2_SPTR replace_quoted_pattern = (PCRE2_SPTR)
-"(((?>(?<=[\"]))[^\"]*(?>(?=[\"])))|((?>(?<=[']))[^']*(?>(?=[']))))";
+"(?>[^'\"]*)(?|(?:\"\\K(?:(?:(?<=\\\\)\")|[^\"])*(\"))|(?:'\\K(?:(?:(?<=\\\\)')|[^'])*(')))";
 
 /**
- * Replace everything inside single or double quotes with question marks.
- * @param str String to modify
- * @return Pointer to new modified string or NULL if memory allocation failed
+ * Replace contents of single or double quoted strings with question marks.
+  * @param src Pointer to the string to modify.
+ * @param srcsize Pointer to a size_t variable which holds the length of the string to
+ * be modified.
+ * @param dest The address of the pointer where the result will be stored. If the
+ * value pointed by this parameter is NULL, new memory will be allocated as needed.
+ * @param Pointer to a size_t variable where the size of the result string is stored.
+ * @return Pointer to new modified string or NULL if memory allocation failed.
+ * If NULL is returned and the value pointed by @c dest was not NULL, no new
+ * memory will be allocated, the memory pointed by @dest will be freed and the
+ * contents of @c dest and @c destsize will be invalid.
  */
-char* replace_quoted(const char* str)
+char* replace_quoted(const char** src, const size_t* srcsize, char** dest, size_t* destsize)
 {
-    static const PCRE2_SPTR replace = (PCRE2_SPTR) "?";
+    static const PCRE2_SPTR replace = (PCRE2_SPTR) "?$1";
     pcre2_match_data* mdata;
-    size_t orig_len = strlen(str);
-    size_t len = orig_len;
-    char* output = NULL;
-    if (len > 0)
+    char* output = *dest;
+    size_t orig_len = *srcsize;
+    size_t len = output ? *destsize : orig_len;
+
+    if (orig_len > 0)
     {
-        if ((output = (char*) malloc(len * sizeof (char))) &&
+        if ((output || (output = (char*) malloc(len * sizeof (char)))) &&
             (mdata = pcre2_match_data_create_from_pattern(replace_quoted_re, NULL)))
         {
-            while (pcre2_substitute(replace_quoted_re, (PCRE2_SPTR) str, orig_len, 0,
+            while (pcre2_substitute(replace_quoted_re, (PCRE2_SPTR) * src, orig_len, 0,
                                     PCRE2_SUBSTITUTE_GLOBAL, mdata, NULL,
                                     replace, PCRE2_ZERO_TERMINATED,
                                     (PCRE2_UCHAR8*) output, &len) == PCRE2_ERROR_NOMEMORY)
             {
-                char* tmp = (char*) realloc(output, len *= 2);
+                char* tmp = (char*) realloc(output, (len = len * BUFFER_GROWTH_RATE + 1));
                 if (tmp == NULL)
                 {
                     free(output);
@@ -2270,10 +2310,21 @@ char* replace_quoted(const char* str)
             output = NULL;
         }
     }
+    else if (output == NULL)
+    {
+        output = strdup(*src);
+    }
+
+    if (output)
+    {
+        *destsize = strlen(output);
+        *dest = output;
+    }
     else
     {
-        output = strdup(str);
+        *dest = NULL;
     }
+
     return output;
 }
 
diff --git a/utils/skygw_utils.h b/utils/skygw_utils.h
index 01deab65c..e8121d227 100644
--- a/utils/skygw_utils.h
+++ b/utils/skygw_utils.h
@@ -276,12 +276,15 @@ EXTERN_C_BLOCK_BEGIN
 
 size_t get_decimal_len(size_t s);
 
-char* remove_mysql_comments(const char* str);
-char* replace_values(const char* str);
+char* remove_mysql_comments(const char** src, const size_t* srcsize, char** dest,
+                            size_t* destsize);
+char* replace_values(const char** src, const size_t* srcsize, char** dest,
+                     size_t* destsize);
 char* replace_literal(char* haystack, 
                       const char* needle, 
                       const char* replacement);
-char* replace_quoted(const char* str);
+char* replace_quoted(const char** src, const size_t* srcsize, char** dest,
+                     size_t* destsize);
 bool is_valid_posix_path(char* path);
 bool strip_escape_chars(char*);
 int simple_str_hash(char* key);