MXS-2302: Rewrite hint tokenization and parsing

The tokenization is somewhat crude but given the small amount of token
types it is acceptably efficient while still maintaining readability. The
parsing is quite simple to implement as a sort of a recursive descent
parser and is a lot more readable that the old state machine
implementation.

Extended the unit test to check that all supported hint types are parsed
correctly. The stack mechanism isn't fully covered by the unit test and it
needs to be added once the stack mechanism uses STL containers.
This commit is contained in:
Markus Mäkelä 2019-02-15 13:14:25 +02:00
parent 897fee715d
commit ca9224bf88
No known key found for this signature in database
GPG Key ID: 72D48FCE664F7B19
2 changed files with 301 additions and 0 deletions

View File

@ -943,3 +943,220 @@ std::vector<std::pair<InputIter, InputIter>> get_all_comments(InputIter start, I
return rval;
}
// Simple container for two iterators and a token type
template<class InputIter>
struct Token
{
InputIter begin;
InputIter end;
TOKEN_VALUE type;
};
/**
* Extract the next token
*
* @param it Iterator to start from, modified to point to next non-whitespace character after the token
* @param end Past-the-end iterator
*
* @return The next token
*/
template<class InputIter>
Token<InputIter> next_token(InputIter& it, InputIter end)
{
const std::unordered_map<std::string, TOKEN_VALUE> tokens
{
{"begin", TOK_START},
{"end", TOK_STOP},
{"last", TOK_LAST},
{"master", TOK_MASTER},
{"maxscale", TOK_MAXSCALE},
{"prepare", TOK_PREPARE},
{"route", TOK_ROUTE},
{"server", TOK_SERVER},
{"slave", TOK_SLAVE},
{"start", TOK_START},
{"stop", TOK_STOP},
{"to", TOK_TO},
};
while (it != end && isspace(*it))
{
++it;
}
TOKEN_VALUE type = TOK_END;
auto start = it;
if (it != end)
{
if (*it == '=')
{
++it;
type = TOK_EQUAL;
}
else
{
while (it != end && !isspace(*it) && *it != '=')
{
++it;
}
auto t = tokens.find(std::string(start, it));
if (t != tokens.end())
{
type = t->second;
}
}
if (type == TOK_END && start != it)
{
// We read a string identifier
type = TOK_STRING;
}
}
return {start, it, type};
}
/**
* Process the definition of a hint
*
* @param it Start iterator
* @param end End iterator
*
* @return The processed hint or NULL on invalid input
*/
template<class InputIter>
HINT* process_definition(InputIter it, InputIter end)
{
HINT* rval = nullptr;
auto t = next_token(it, end);
if (t.type == TOK_ROUTE)
{
if (next_token(it, end).type == TOK_TO)
{
t = next_token(it, end);
if (t.type == TOK_MASTER)
{
rval = hint_create_route(nullptr, HINT_ROUTE_TO_MASTER, nullptr);
}
else if (t.type == TOK_SLAVE)
{
rval = hint_create_route(nullptr, HINT_ROUTE_TO_SLAVE, nullptr);
}
else if (t.type == TOK_LAST)
{
rval = hint_create_route(nullptr, HINT_ROUTE_TO_LAST_USED, nullptr);
}
else if (t.type == TOK_SERVER)
{
t = next_token(it, end);
if (t.type == TOK_STRING)
{
std::string value(t.begin, t.end);
rval = hint_create_route(nullptr, HINT_ROUTE_TO_NAMED_SERVER, value.c_str());
}
}
}
}
else if (t.type == TOK_STRING)
{
std::string key(t.begin, t.end);
auto eq = next_token(it, end);
auto val = next_token(it, end);
if (eq.type == TOK_EQUAL && val.type == TOK_STRING)
{
std::string value(val.begin, val.end);
rval = hint_create_parameter(nullptr, key.c_str(), value.c_str());
}
}
if (rval && next_token(it, end).type != TOK_END)
{
// Unexpected input after hint definition, treat it as an error and remove the hint
hint_free(rval);
rval = nullptr;
}
return rval;
}
template<class InputIter>
HINT* process_comment(HINT_SESSION* session, InputIter it, InputIter end)
{
HINT* rval = nullptr;
if (next_token(it, end).type == TOK_MAXSCALE)
{
// Peek at the next token
auto prev_it = it;
auto t = next_token(it, end);
if (t.type == TOK_START)
{
if ((rval = process_definition(it, end)))
{
hint_push(session, rval);
}
}
else if (t.type == TOK_STOP)
{
hint_pop(session);
}
else if (t.type == TOK_STRING)
{
std::string key(t.begin, t.end);
t = next_token(it, end);
if (t.type == TOK_EQUAL)
{
t = next_token(it, end);
if (t.type == TOK_STRING)
{
// A key=value hint
std::string value(t.begin, t.end);
rval = hint_create_parameter(nullptr, key.c_str(), value.c_str());
}
}
else if (t.type == TOK_PREPARE)
{
if ((rval = process_definition(it, end)))
{
// Preparation of a named hint
create_named_hint(session, key.c_str(), rval);
}
}
else if (t.type == TOK_START)
{
if ((rval = process_definition(it, end)))
{
if (!lookup_named_hint(session, key.c_str()))
{
// New hint defined, push it on to the stack
create_named_hint(session, key.c_str(), rval);
hint_push(session, hint_dup(rval));
}
}
else if ((rval = lookup_named_hint(session, key.c_str())))
{
// We starting an already define named hint
hint_push(session, hint_dup(rval));
}
}
}
else
{
// Only hint definition in the comment, use the stored iterator to process it again
rval = process_definition(prev_it, end);
}
}
return rval;
}

View File

@ -71,6 +71,51 @@ void test(const std::string& input, std::initializer_list<std::string> expected)
mxb_assert(rval);
}
static HINT_SESSION session = {};
void test_parse(const std::string& input, int expected_type)
{
for (auto comment : get_all_comments(input.begin(), input.end()))
{
std::string comment_str(comment.first, comment.second);
HINT* hint = process_comment(&session, comment.first, comment.second);
if (!hint && expected_type != 0)
{
std::cout << "Expected hint but didn't get one: " << comment_str << std::endl;
}
else if (hint && expected_type == 0)
{
std::cout << "Expected no hint but got one: " << comment_str << std::endl;
}
else if (hint && hint->type != expected_type)
{
std::cout << "Expected hint of type " << expected_type << " but got type "
<< (int)hint->type << ": " << comment_str << std::endl;
}
}
}
void count_hints(const std::string& input, int num_expected)
{
int n = 0;
for (auto comment : get_all_comments(input.begin(), input.end()))
{
if (process_comment(&session, comment.first, comment.second))
{
++n;
}
}
if (n != num_expected)
{
std::cout << "Expected " << num_expected << " hints but have " << n << "." << std::endl;
}
mxb_assert(n == num_expected);
}
int main(int argc, char** argv)
{
mxb::Log log(MXB_LOG_TARGET_STDOUT);
@ -139,5 +184,44 @@ int main(int argc, char** argv)
test("-- working comment\nselect 1; --bad comment", {"working comment"});
test("select 1 -- working comment --bad comment", {"working comment --bad comment"});
test_parse("SELECT 1 /* maxscale route to master */", HINT_ROUTE_TO_MASTER);
test_parse("SELECT 1 /* maxscale route to slave */", HINT_ROUTE_TO_SLAVE);
test_parse("SELECT 1 /* maxscale route to last*/", HINT_ROUTE_TO_LAST_USED);
test_parse("SELECT 1 /* maxscale route to server server1 */", HINT_ROUTE_TO_NAMED_SERVER);
test_parse("SELECT 1 /* maxscale test1 prepare route to server server1 */", HINT_ROUTE_TO_NAMED_SERVER);
test_parse("SELECT 1 /* maxscale test1 start route to server server1 */", HINT_ROUTE_TO_NAMED_SERVER);
test_parse("SELECT 1 /* maxscale start route to server server1 */", HINT_ROUTE_TO_NAMED_SERVER);
test_parse("SELECT 1 /* maxscale end*/", 0);
test_parse("SELECT 1 /* maxscale end*/", 0);
test_parse("SELECT 1 /* maxscale key=value */", HINT_PARAMETER);
test_parse("SELECT 1 /* maxscale max_slave_replication_lag=1*/", HINT_PARAMETER);
// Process multiple comments with hints in them
// Note: How the hints are used depends on the router module
count_hints("SELECT /* comment before hint */ 1 /* maxscale route to master */", 1);
count_hints("SELECT /* maxscale route to master */1/* comment after hint */", 1);
count_hints("SELECT /* maxscale route to slave */ 1 /* maxscale route to master */", 2);
count_hints("#maxscale route to slave\nSELECT 1;\n#maxscale route to master", 2);
count_hints("-- maxscale route to slave\nSELECT 1;\n-- maxscale route to master", 2);
count_hints("#maxscale route to slave \n#comment after hint\nSELECT 1", 1);
count_hints("#comment before hint\n#maxscale route to slave \nSELECT 1", 1);
// Hints with unexpected trailing input and unknown input
count_hints("/* maxscale route to slave server */ SELECT 1", 0);
count_hints("/* maxscale route to something */ SELECT 1", 0);
count_hints("/* maxscale route master */ SELECT 1", 0);
count_hints("/* maxscale route slave */ SELECT 1", 0);
count_hints("/* maxscale route to slave \n# comment inside comment\n */ SELECT 1", 0);
count_hints("/* maxscale route to slave -- */ SELECT 1", 0);
count_hints("/* maxscale route to slave # */ SELECT 1", 0);
count_hints("#/* maxscale route to slave */ SELECT 1", 0);
count_hints("-- /* maxscale route to slave */ SELECT 1", 0);
count_hints("-- # maxscale route to slave */ SELECT 1", 0);
count_hints("#-- maxscale route to slave */ SELECT 1", 0);
// The extra asterisk is a part of the comment and should cause the hint to be ignored. It could be
// processed but for the sake of simplicity and "bug compatibility" with 2.3 it is treated as an error.
count_hints("/**maxscale route to slave*/ SELECT 1", 0);
return 0;
}