From e4efc29297faa63553d563c6770d6039ceb827a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20M=C3=A4kel=C3=A4?= Date: Fri, 16 Mar 2018 15:29:55 +0200 Subject: [PATCH] MXS-1702: Process comments when canonicalizing The canonicalization process now strips non-executable comments from the SQL and replaces all constants in executable comments. Enabled the comment test and updated expected output of the select and alter tests. --- server/core/modutil.cc | 57 ++++++++++++++++++- .../core/test/canonical_tests/CMakeLists.txt | 13 ++--- .../core/test/canonical_tests/alter.expected | 4 +- .../core/test/canonical_tests/select.expected | 2 +- 4 files changed, 65 insertions(+), 11 deletions(-) diff --git a/server/core/modutil.cc b/server/core/modutil.cc index 412ed3f43..ea3e2b095 100644 --- a/server/core/modutil.cc +++ b/server/core/modutil.cc @@ -1186,6 +1186,20 @@ mxs_pcre2_result_t modutil_mysql_wildcard_match(const char* pattern, const char* return rval; } +static inline bool is_next(mxs::Buffer::iterator it, mxs::Buffer::iterator end, const std::string& str) +{ + ss_dassert(it != end); + for (auto s_it = str.begin(); s_it != str.end(); s_it++, it++) + { + if (it == end || *it != *s_it) + { + return false; + } + } + + return true; +} + static std::pair probe_number(mxs::Buffer::iterator it, mxs::Buffer::iterator end) { @@ -1296,7 +1310,9 @@ char* modutil_get_canonical(GWBUF* querybuf) NONE, SINGLE_QUOTE, DOUBLE_QUOTE, - BACKTICK + BACKTICK, + UNTIL_NEWLINE, + INLINE_COMMENT } my_state = NONE; for (auto it = std::next(buf.begin(), MYSQL_HEADER_LEN + 1); // Skip packet header and command @@ -1339,6 +1355,27 @@ char* modutil_get_canonical(GWBUF* querybuf) } break; + case INLINE_COMMENT: + if (is_next(it, buf.end(), "*/")) + { + // Comment end marker, return to normal parsing + it++; + my_state = NONE; + } + break; + + case UNTIL_NEWLINE: + if (is_next(it, buf.end(), "\r\n")) + { + it++; + my_state = NONE; + } + else if (is_next(it, buf.end(), "\n") || is_next(it, buf.end(), "\r")) + { + my_state = NONE; + } + break; + default: if (isspace(*it)) { @@ -1349,6 +1386,24 @@ char* modutil_get_canonical(GWBUF* querybuf) } *it = ' '; } + else if (is_next(it, buf.end(), "/*")) + { + auto comment_start = std::next(it, 2); + if (comment_start != buf.end() && + *comment_start != '!' && + *comment_start != 'M') + { + // Non-executable comment + my_state = INLINE_COMMENT; + continue; + } + } + else if (is_next(it, buf.end(), "# ") || is_next(it, buf.end(), "-- ")) + { + // End-of-line comment, jump to the next line if one exists + my_state = UNTIL_NEWLINE; + continue; + } else if (isdigit(*it) && !isalpha(prev) && !isdigit(prev) && prev != '_') { auto num_end = probe_number(it, buf.end()); diff --git a/server/core/test/canonical_tests/CMakeLists.txt b/server/core/test/canonical_tests/CMakeLists.txt index 0ef8700fe..612815733 100644 --- a/server/core/test/canonical_tests/CMakeLists.txt +++ b/server/core/test/canonical_tests/CMakeLists.txt @@ -22,13 +22,12 @@ add_test(NAME test_canonical_alter COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest ${CMAKE_CURRENT_SOURCE_DIR}/alter.expected $) -# TODO: Fix comment removal -# add_test(NAME test_canonical_comment COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh -# ${CMAKE_CURRENT_BINARY_DIR}/test.log -# ${CMAKE_CURRENT_SOURCE_DIR}/comment.sql -# ${CMAKE_CURRENT_BINARY_DIR}/comment.output -# ${CMAKE_CURRENT_SOURCE_DIR}/comment.expected -# $) +add_test(NAME test_canonical_comment COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh + ${CMAKE_CURRENT_BINARY_DIR}/test.log + ${CMAKE_CURRENT_SOURCE_DIR}/comment.sql + ${CMAKE_CURRENT_BINARY_DIR}/comment.output + ${CMAKE_CURRENT_SOURCE_DIR}/comment.expected + $) add_test(NAME test_canonical_whitespace COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh ${CMAKE_CURRENT_BINARY_DIR}/test.log diff --git a/server/core/test/canonical_tests/alter.expected b/server/core/test/canonical_tests/alter.expected index f82d3c409..6b8c1a62e 100644 --- a/server/core/test/canonical_tests/alter.expected +++ b/server/core/test/canonical_tests/alter.expected @@ -1,7 +1,7 @@ ALTER DATABASE `` DEFAULT CHARACTER SET latin2; ALTER DATABASE `#mysql50#../` UPGRADE DATA DIRECTORY NAME; -ALTER DATABASE `#mysql50#../..` UPGRADE DATA DIRECTORY NAME; # a comment -ALTER DATABASE `#mysql51#not-yet` UPGRADE DATA DIRECTORY NAME; # a comment with backticks `this should work` +ALTER DATABASE `#mysql50#../..` UPGRADE DATA DIRECTORY NAME; +ALTER DATABASE `#mysql51#not-yet` UPGRADE DATA DIRECTORY NAME; ALTER DATABASE `test-database` CHARACTER SET utf8 COLLATE utf8_unicode_ci ; ALTER DEFINER=root@localhost EVENT e1 ON SCHEDULE EVERY ? HOUR; ALTER EVENT e1 COMMENT ?; diff --git a/server/core/test/canonical_tests/select.expected b/server/core/test/canonical_tests/select.expected index f0ae849bd..cf7b3f4c9 100644 --- a/server/core/test/canonical_tests/select.expected +++ b/server/core/test/canonical_tests/select.expected @@ -46,7 +46,7 @@ SELECT v1.a, v2. b FROM v1 LEFT OUTER JOIN v2 ON (v1.a=v2.b) AND (v1.a >= ?) GRO SELECT v1.f4 FROM v1 WHERE f1<>? OR f2<>? AND f4=? AND (f2<>? OR f3<>? AND f5<>? OR f4 LIKE ?); select v1.r_object_id, v2.users_names from v1, v2where (v1.group_name=?) and v2.r_object_id=v1.r_object_idorder by users_names; SELECT v2 FROM t1 WHERE v1 IN (?, ?, ?, ? ) AND i = ?; -select ? as ?;# this should be removed +select ? as ?; SELECT @@tx_isolation; select @ujis4 = CONVERT(@utf84 USING ujis); SELECT @user_var;