MXS-1702: Process comments when canonicalizing

The canonicalization process now strips non-executable comments from the
SQL and replaces all constants in executable comments.

Enabled the comment test and updated expected output of the select and
alter tests.
This commit is contained in:
Markus Mäkelä
2018-03-16 15:29:55 +02:00
parent 9fe25c8003
commit e4efc29297
4 changed files with 65 additions and 11 deletions

View File

@ -1186,6 +1186,20 @@ mxs_pcre2_result_t modutil_mysql_wildcard_match(const char* pattern, const char*
return rval;
}
static inline bool is_next(mxs::Buffer::iterator it, mxs::Buffer::iterator end, const std::string& str)
{
ss_dassert(it != end);
for (auto s_it = str.begin(); s_it != str.end(); s_it++, it++)
{
if (it == end || *it != *s_it)
{
return false;
}
}
return true;
}
static std::pair<bool, mxs::Buffer::iterator> probe_number(mxs::Buffer::iterator it,
mxs::Buffer::iterator end)
{
@ -1296,7 +1310,9 @@ char* modutil_get_canonical(GWBUF* querybuf)
NONE,
SINGLE_QUOTE,
DOUBLE_QUOTE,
BACKTICK
BACKTICK,
UNTIL_NEWLINE,
INLINE_COMMENT
} my_state = NONE;
for (auto it = std::next(buf.begin(), MYSQL_HEADER_LEN + 1); // Skip packet header and command
@ -1339,6 +1355,27 @@ char* modutil_get_canonical(GWBUF* querybuf)
}
break;
case INLINE_COMMENT:
if (is_next(it, buf.end(), "*/"))
{
// Comment end marker, return to normal parsing
it++;
my_state = NONE;
}
break;
case UNTIL_NEWLINE:
if (is_next(it, buf.end(), "\r\n"))
{
it++;
my_state = NONE;
}
else if (is_next(it, buf.end(), "\n") || is_next(it, buf.end(), "\r"))
{
my_state = NONE;
}
break;
default:
if (isspace(*it))
{
@ -1349,6 +1386,24 @@ char* modutil_get_canonical(GWBUF* querybuf)
}
*it = ' ';
}
else if (is_next(it, buf.end(), "/*"))
{
auto comment_start = std::next(it, 2);
if (comment_start != buf.end() &&
*comment_start != '!' &&
*comment_start != 'M')
{
// Non-executable comment
my_state = INLINE_COMMENT;
continue;
}
}
else if (is_next(it, buf.end(), "# ") || is_next(it, buf.end(), "-- "))
{
// End-of-line comment, jump to the next line if one exists
my_state = UNTIL_NEWLINE;
continue;
}
else if (isdigit(*it) && !isalpha(prev) && !isdigit(prev) && prev != '_')
{
auto num_end = probe_number(it, buf.end());

View File

@ -22,13 +22,12 @@ add_test(NAME test_canonical_alter COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest
${CMAKE_CURRENT_SOURCE_DIR}/alter.expected
$<TARGET_FILE:canonizer>)
# TODO: Fix comment removal
# add_test(NAME test_canonical_comment COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh
# ${CMAKE_CURRENT_BINARY_DIR}/test.log
# ${CMAKE_CURRENT_SOURCE_DIR}/comment.sql
# ${CMAKE_CURRENT_BINARY_DIR}/comment.output
# ${CMAKE_CURRENT_SOURCE_DIR}/comment.expected
# $<TARGET_FILE:canonizer>)
add_test(NAME test_canonical_comment COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh
${CMAKE_CURRENT_BINARY_DIR}/test.log
${CMAKE_CURRENT_SOURCE_DIR}/comment.sql
${CMAKE_CURRENT_BINARY_DIR}/comment.output
${CMAKE_CURRENT_SOURCE_DIR}/comment.expected
$<TARGET_FILE:canonizer>)
add_test(NAME test_canonical_whitespace COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/canontest.sh
${CMAKE_CURRENT_BINARY_DIR}/test.log

View File

@ -1,7 +1,7 @@
ALTER DATABASE `` DEFAULT CHARACTER SET latin2;
ALTER DATABASE `#mysql50#../` UPGRADE DATA DIRECTORY NAME;
ALTER DATABASE `#mysql50#../..` UPGRADE DATA DIRECTORY NAME; # a comment
ALTER DATABASE `#mysql51#not-yet` UPGRADE DATA DIRECTORY NAME; # a comment with backticks `this should work`
ALTER DATABASE `#mysql50#../..` UPGRADE DATA DIRECTORY NAME;
ALTER DATABASE `#mysql51#not-yet` UPGRADE DATA DIRECTORY NAME;
ALTER DATABASE `test-database` CHARACTER SET utf8 COLLATE utf8_unicode_ci ;
ALTER DEFINER=root@localhost EVENT e1 ON SCHEDULE EVERY ? HOUR;
ALTER EVENT e1 COMMENT ?;

View File

@ -46,7 +46,7 @@ SELECT v1.a, v2. b FROM v1 LEFT OUTER JOIN v2 ON (v1.a=v2.b) AND (v1.a >= ?) GRO
SELECT v1.f4 FROM v1 WHERE f1<>? OR f2<>? AND f4=? AND (f2<>? OR f3<>? AND f5<>? OR f4 LIKE ?);
select v1.r_object_id, v2.users_names from v1, v2where (v1.group_name=?) and v2.r_object_id=v1.r_object_idorder by users_names;
SELECT v2 FROM t1 WHERE v1 IN (?, ?, ?, ? ) AND i = ?;
select ? as ?;# this should be removed
select ? as ?;
SELECT @@tx_isolation;
select @ujis4 = CONVERT(@utf84 USING ujis);
SELECT @user_var;