MXS-2302: Add comment extraction code

The code extracts comments from a query. All three comment types are
supported and the double dash comments properly handle invalid input
(fixes MXS-2289).

The code uses iterators to access to the query and returns a list of
iterator pairs as start and end markers to comments. This makes testing
easier as the input and output can be passed as an std::string.
This commit is contained in:
Markus Mäkelä 2019-02-14 11:02:57 +02:00
parent 41a72bcc30
commit 11be8ed0f0
No known key found for this signature in database
GPG Key ID: 72D48FCE664F7B19
4 changed files with 287 additions and 0 deletions

View File

@ -2,3 +2,7 @@ add_library(hintfilter SHARED hintfilter.cc hintparser.cc)
set_target_properties(hintfilter PROPERTIES INSTALL_RPATH ${CMAKE_INSTALL_RPATH}:${MAXSCALE_LIBDIR} VERSION "1.0.0" LINK_FLAGS -Wl,-z,defs)
target_link_libraries(hintfilter maxscale-common)
install_module(hintfilter core)
if(BUILD_TESTS)
add_subdirectory(test)
endif()

View File

@ -19,6 +19,7 @@
#include <maxscale/filter.hh>
#include <maxscale/modinfo.h>
#include <maxscale/modutil.hh>
#include <maxscale/buffer.hh>
#include "mysqlhint.hh"
#include <maxscale/alloc.h>
@ -805,3 +806,139 @@ HINTSTACK* free_hint_stack(HINTSTACK* hint_stack)
return NULL;
}
}
/**
* Advance an iterator until either an unescaped character `c` is found or `end` is reached
*
* @param it Iterator to start from
* @param end Past-the-end iterator
* @param c The character to look for
*
* @return The iterator pointing at the first occurrence of the character or `end` if one was not found
*/
template<class InputIter>
InputIter skip_until(InputIter it, InputIter end, char c)
{
while (it != end)
{
if (*it == '\\')
{
if (++it == end)
{
continue;
}
}
else if (*it == c)
{
break;
}
++it;
}
return it;
}
/**
* Extract a MariaDB comment
*
* @param it Iterator pointing to the start of the search string
* @param end Past-the-end iterator
*
* @return A pair of iterators pointing to the range the comment spans. The comment tags themselves are not
* included in this range. If no comment is found, a pair of `end` iterators is returned.
*/
template<class InputIter>
std::pair<InputIter, InputIter> get_comment(InputIter it, InputIter end)
{
while (it != end)
{
switch (*it)
{
case '\\':
if (++it == end)
{
continue;
}
break;
case '"':
case '\'':
case '`':
// Quoted literal string or identifier
if ((it = skip_until(std::next(it), end, *it)) == end)
{
// Malformed quoted value
continue;
}
break;
case '#':
// A comment that spans the rest of the line
++it;
return {it, skip_until(it, end, '\n')};
case '-':
if (++it != end && *it == '-' && ++it != end && *it == ' ')
{
++it;
return {it, skip_until(it, end, '\n')};
}
continue;
case '/':
if (++it != end && *it == '*' && ++it != end)
{
auto start = it;
while (it != end)
{
auto comment_end = skip_until(it, end, '*');
it = comment_end;
if (it != end && ++it != end && *it == '/')
{
return {start, comment_end};
}
}
}
continue;
default:
break;
}
++it;
}
return {end, end};
}
/**
* Extract all MariaDB comments from a query
*
* @param start Iterator position to start from
* @param end Past-the-end iterator
*
* @return A list of iterator pairs pointing to all comments in the query
*/
template<class InputIter>
std::vector<std::pair<InputIter, InputIter>> get_all_comments(InputIter start, InputIter end)
{
std::vector<std::pair<InputIter, InputIter>> rval;
do
{
auto comment = get_comment(start, end);
if (comment.first != comment.second)
{
rval.push_back(comment);
}
start = comment.second;
}
while (start != end);
return rval;
}

View File

@ -0,0 +1,3 @@
add_executable(test_hintparser test_hintparser.cc)
target_link_libraries(test_hintparser maxscale-common)
add_test(test_hintparser test_hintparser)

View File

@ -0,0 +1,143 @@
/*
* Copyright (c) 2016 MariaDB Corporation Ab
*
* Use of this software is governed by the Business Source License included
* in the LICENSE.TXT file and at www.mariadb.com/bsl11.
*
* Change Date: 2022-01-01
*
* On the date above, in accordance with the Business Source License, use
* of this software will be governed by version 2 or later of the General
* Public License.
*/
#ifndef SS_DEBUG
#define SS_DEBUG
#endif
#include "../hintparser.cc"
#include <maxbase/log.hh>
#include <algorithm>
#include <iostream>
#include <initializer_list>
void test(const std::string& input, std::initializer_list<std::string> expected)
{
bool rval = true;
auto it = expected.begin();
for (auto output : get_all_comments(input.begin(), input.end()))
{
if (it == expected.end())
{
std::cout << "Too much output: " << std::string(output.first, output.second) << std::endl;
rval = false;
break;
}
int have = std::distance(output.first, output.second);
int need = it->length();
if (have != need)
{
std::cout << "Need " << need << " bytes but only have " << have << std::endl;
rval = false;
}
else if (!std::equal(output.first, output.second, it->begin()))
{
std::cout << "Output not equal to expected output" << std::endl;
rval = false;
}
if (!rval)
{
std::cout << "Input: " << input << std::endl;
std::cout << "Output: " << std::string(output.first, output.second) << std::endl;
std::cout << "Expected: " << *it << std::endl;
std::cout << std::endl;
}
++it;
}
if (it != expected.end())
{
std::cout << "Not enough output, need " << std::distance(it, expected.end())
<< " more comments" << std::endl;
rval = false;
}
mxb_assert(rval);
}
int main(int argc, char** argv)
{
mxb::Log log(MXB_LOG_TARGET_STDOUT);
// Simple comments
test("select 1 -- this is a comment", {"this is a comment"});
test("select 1 #this is a comment", {"this is a comment"});
test("select 1 # this is a comment", {" this is a comment"});
test("select 1 /*this is a comment*/", {"this is a comment"});
// Comments on line before, after and in between queries
test("-- this is a comment\nselect 1", {"this is a comment"});
test("#this is a comment\nselect 1", {"this is a comment"});
test("select 1\n-- this is a comment", {"this is a comment"});
test("select 1\n#this is a comment", {"this is a comment"});
test("select 1;\n-- this is a comment\nselect 2;", {"this is a comment"});
test("select 1;\n#this is a comment\nselect 2;", {"this is a comment"});
// Comment blocks on multiple lines
test("select 1\n/* this is a comment */", {" this is a comment "});
test("select 1\n/*this is \na comment*/", {"this is \na comment"});
test("select 1\n/**\n *this is \n* a comment\n*/", {"*\n *this is \n* a comment\n"});
test("select /*this is a comment*/ 1", {"this is a comment"});
test("select 1\n/* this is \na comment */", {" this is \na comment "});
// Multiple comments in the same query
test("select /*first*/ 1 /*second*/", {"first", "second"});
test("-- first\nselect 1\n-- second", {"first", "second"});
test("/** first comment */ select 1 -- second comment", {"* first comment ", "second comment"});
test("#first\nselect 1\n#second#comment", {"first", "second#comment"});
test("#first\nselect 1/*second*/-- third", {"first", "second", "third"});
// Comments inside quotes
test("select '/*do not parse this*/' /*parse this*/", {"parse this"});
test("select \"/*do not parse this*/\" /*parse this*/", {"parse this"});
test("select `/*do not parse this*/`/*parse this*/", {"parse this"});
test("select/*parse this*/ '/*do not parse this*/'", {"parse this"});
test("select/*parse this*/ \"/*do not parse this*/\"", {"parse this"});
test("select/*parse this*/ `/*do not parse this*/`", {"parse this"});
test("select \"/*do not\\\" parse this*/\"", {});
test("select '/*do not'' parse this*/'", {});
test("select '/*do not\\' parse this*/'", {});
// Malformed input
test("select '/*do not parse this*/\"", {});
test("select \"/*do not parse this*/'", {});
test("select `/*do not parse this*/'", {});
test("select `/*do not parse this*/\"", {});
test("select \"/*do not parse this*/", {});
test("select '/*do not parse this*/", {});
test("select `/*do not parse this*/", {});
test("select /do not parse this*/", {});
test("select / *do not parse this*/", {});
test("select /*do not parse this* /", {});
test("select /*do not parse this*\\/", {});
test("select /\n*do not parse this*/", {});
test("select --\ndo not parse this", {});
test("select --\tdo not parse this", {});
test("select ' \\' -- do not parse this", {});
test("select \" \\\" -- do not parse this", {});
test("select ` \\` -- do not parse this", {});
// MXS-2289
test("select 1; --bad comment", {});
test("select 1; --bad comment\n -- working comment", {"working comment"});
test("-- working comment\nselect 1; --bad comment", {"working comment"});
test("select 1 -- working comment --bad comment", {"working comment --bad comment"});
return 0;
}