From 54ca1ab2dec779327d32b5806741f49885cb9da9 Mon Sep 17 00:00:00 2001 From: Markus Makela Date: Mon, 11 Jan 2016 12:25:33 +0200 Subject: [PATCH] Canonicalized queries remove non-executable comments The comments which do not alter the functionality of a query are now removed from canonicalized queries. Also fixed missing semicolon in the comment removal regex and added tests for comment removal. --- query_classifier/test/canonical_tests/CMakeLists.txt | 7 +++++++ .../test/canonical_tests/comment.expected | 11 +++++++++++ query_classifier/test/canonical_tests/comment.sql | 11 +++++++++++ utils/skygw_utils.cc | 6 +++--- 4 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 query_classifier/test/canonical_tests/comment.expected create mode 100644 query_classifier/test/canonical_tests/comment.sql diff --git a/query_classifier/test/canonical_tests/CMakeLists.txt b/query_classifier/test/canonical_tests/CMakeLists.txt index 0092b7841..8295893ef 100644 --- a/query_classifier/test/canonical_tests/CMakeLists.txt +++ b/query_classifier/test/canonical_tests/CMakeLists.txt @@ -29,3 +29,10 @@ add_test(NAME Internal-CanonicalQueryAlter COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/c ${CMAKE_CURRENT_BINARY_DIR}/alter.output ${CMAKE_CURRENT_SOURCE_DIR}/alter.expected $) + +add_test(NAME Internal-CanonicalQueryComment COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh + ${CMAKE_CURRENT_BINARY_DIR}/test.log + ${CMAKE_CURRENT_SOURCE_DIR}/comment.sql + ${CMAKE_CURRENT_BINARY_DIR}/comment.output + ${CMAKE_CURRENT_SOURCE_DIR}/comment.expected + $) diff --git a/query_classifier/test/canonical_tests/comment.expected b/query_classifier/test/canonical_tests/comment.expected new file mode 100644 index 000000000..f88c4ffde --- /dev/null +++ b/query_classifier/test/canonical_tests/comment.expected @@ -0,0 +1,11 @@ +select ?; +select ?; +select ?; +select /*! ? + */ ?; +select /*!? ? + */ ?; +select /*!? ? + */ ?; +SELECT ? ; +SELECT ? /*! +? */; +SELECT ? /*!? +? */; +SELECT ? /*M! +? */; +SELECT ? /*M!? +? */; diff --git a/query_classifier/test/canonical_tests/comment.sql b/query_classifier/test/canonical_tests/comment.sql new file mode 100644 index 000000000..904752017 --- /dev/null +++ b/query_classifier/test/canonical_tests/comment.sql @@ -0,0 +1,11 @@ +select 1;-- comment after statement +select 1;# comment after statement +select /* inline comment */ 1; +select /*! 1 + */ 1; +select /*!300000 1 + */ 1; +select /*!300000 1 + */ 1; +SELECT 2 /* +1 */; +SELECT 1 /*! +1 */; +SELECT 1 /*!50101 +1 */; +SELECT 2 /*M! +1 */; +SELECT 2 /*M!50101 +1 */; diff --git a/utils/skygw_utils.cc b/utils/skygw_utils.cc index 77770982a..e4a7506a2 100644 --- a/utils/skygw_utils.cc +++ b/utils/skygw_utils.cc @@ -2034,13 +2034,13 @@ void skygw_file_close( #define BUFFER_GROWTH_RATE 1.2 static pcre2_code* remove_comments_re = NULL; static const PCRE2_SPTR remove_comments_pattern = (PCRE2_SPTR) -"(?:`[^`]*`\\K)|(?:#.*|--[[:space]].*)"; +"(?:`[^`]*`\\K)|(\\/[*](?!(M?!)).*?[*]\\/)|(?:#.*|--[[:space:]].*)"; /** * Remove SQL comments from the end of a string * - * The inline comments are not removed due to the fact that they can alter the - * behavior of the query. + * The inline executable comments are not removed due to the fact that they can + * alter the behavior of the query. * @param src Pointer to the string to modify. * @param srcsize Pointer to a size_t variable which holds the length of the string to * be modified.