MXS-2302: Rewrite hint tokenization and parsing

The tokenization is somewhat crude but given the small amount of token types it is acceptably efficient while still maintaining readability. The parsing is quite simple to implement as a sort of a recursive descent parser and is a lot more readable that the old state machine implementation. Extended the unit test to check that all supported hint types are parsed correctly. The stack mechanism isn't fully covered by the unit test and it needs to be added once the stack mechanism uses STL containers.
2019-02-15 13:14:25 +02:00 · 2019-02-15 13:14:25 +02:00 · ca9224bf88
commit ca9224bf88
parent 897fee715d
2 changed files with 301 additions and 0 deletions
--- a/server/modules/filter/hintfilter/hintparser.cc
+++ b/server/modules/filter/hintfilter/hintparser.cc
@ -943,3 +943,220 @@ std::vector<std::pair<InputIter, InputIter>> get_all_comments(InputIter start, I

    return rval;
 }
+
+// Simple container for two iterators and a token type
+template<class InputIter>
+struct Token
+{
+    InputIter   begin;
+    InputIter   end;
+    TOKEN_VALUE type;
+};
+
+/**
+ * Extract the next token
+ *
+ * @param it  Iterator to start from, modified to point to next non-whitespace character after the token
+ * @param end Past-the-end iterator
+ *
+ * @return The next token
+ */
+template<class InputIter>
+Token<InputIter> next_token(InputIter& it, InputIter end)
+{
+    const std::unordered_map<std::string, TOKEN_VALUE> tokens
+    {
+        {"begin", TOK_START},
+        {"end", TOK_STOP},
+        {"last", TOK_LAST},
+        {"master", TOK_MASTER},
+        {"maxscale", TOK_MAXSCALE},
+        {"prepare", TOK_PREPARE},
+        {"route", TOK_ROUTE},
+        {"server", TOK_SERVER},
+        {"slave", TOK_SLAVE},
+        {"start", TOK_START},
+        {"stop", TOK_STOP},
+        {"to", TOK_TO},
+    };
+
+    while (it != end && isspace(*it))
+    {
+        ++it;
+    }
+
+    TOKEN_VALUE type = TOK_END;
+    auto start = it;
+
+    if (it != end)
+    {
+        if (*it == '=')
+        {
+            ++it;
+            type = TOK_EQUAL;
+        }
+        else
+        {
+            while (it != end && !isspace(*it) && *it != '=')
+            {
+                ++it;
+            }
+
+            auto t = tokens.find(std::string(start, it));
+
+            if (t != tokens.end())
+            {
+                type = t->second;
+            }
+        }
+
+        if (type == TOK_END && start != it)
+        {
+            // We read a string identifier
+            type = TOK_STRING;
+        }
+    }
+
+    return {start, it, type};
+}
+
+/**
+ * Process the definition of a hint
+ *
+ * @param it  Start iterator
+ * @param end End iterator
+ *
+ * @return The processed hint or NULL on invalid input
+ */
+template<class InputIter>
+HINT* process_definition(InputIter it, InputIter end)
+{
+    HINT* rval = nullptr;
+    auto t = next_token(it, end);
+
+    if (t.type == TOK_ROUTE)
+    {
+        if (next_token(it, end).type == TOK_TO)
+        {
+            t = next_token(it, end);
+
+            if (t.type == TOK_MASTER)
+            {
+                rval = hint_create_route(nullptr, HINT_ROUTE_TO_MASTER, nullptr);
+            }
+            else if (t.type == TOK_SLAVE)
+            {
+                rval = hint_create_route(nullptr, HINT_ROUTE_TO_SLAVE, nullptr);
+            }
+            else if (t.type == TOK_LAST)
+            {
+                rval = hint_create_route(nullptr, HINT_ROUTE_TO_LAST_USED, nullptr);
+            }
+            else if (t.type == TOK_SERVER)
+            {
+                t = next_token(it, end);
+
+                if (t.type == TOK_STRING)
+                {
+                    std::string value(t.begin, t.end);
+                    rval = hint_create_route(nullptr, HINT_ROUTE_TO_NAMED_SERVER, value.c_str());
+                }
+            }
+        }
+    }
+    else if (t.type == TOK_STRING)
+    {
+        std::string key(t.begin, t.end);
+        auto eq = next_token(it, end);
+        auto val = next_token(it, end);
+
+        if (eq.type == TOK_EQUAL && val.type == TOK_STRING)
+        {
+            std::string value(val.begin, val.end);
+            rval = hint_create_parameter(nullptr, key.c_str(), value.c_str());
+        }
+    }
+
+    if (rval && next_token(it, end).type != TOK_END)
+    {
+        // Unexpected input after hint definition, treat it as an error and remove the hint
+        hint_free(rval);
+        rval = nullptr;
+    }
+
+    return rval;
+}
+
+template<class InputIter>
+HINT* process_comment(HINT_SESSION* session, InputIter it, InputIter end)
+{
+    HINT* rval = nullptr;
+
+    if (next_token(it, end).type == TOK_MAXSCALE)
+    {
+        // Peek at the next token
+        auto prev_it = it;
+        auto t = next_token(it, end);
+
+        if (t.type == TOK_START)
+        {
+            if ((rval = process_definition(it, end)))
+            {
+                hint_push(session, rval);
+            }
+        }
+        else if (t.type == TOK_STOP)
+        {
+            hint_pop(session);
+        }
+        else if (t.type == TOK_STRING)
+        {
+            std::string key(t.begin, t.end);
+            t = next_token(it, end);
+
+            if (t.type == TOK_EQUAL)
+            {
+                t = next_token(it, end);
+
+                if (t.type == TOK_STRING)
+                {
+                    // A key=value hint
+                    std::string value(t.begin, t.end);
+                    rval = hint_create_parameter(nullptr, key.c_str(), value.c_str());
+                }
+            }
+            else if (t.type == TOK_PREPARE)
+            {
+                if ((rval = process_definition(it, end)))
+                {
+                    // Preparation of a named hint
+                    create_named_hint(session, key.c_str(), rval);
+                }
+            }
+            else if (t.type == TOK_START)
+            {
+                if ((rval = process_definition(it, end)))
+                {
+                    if (!lookup_named_hint(session, key.c_str()))
+                    {
+                        // New hint defined, push it on to the stack
+                        create_named_hint(session, key.c_str(), rval);
+                        hint_push(session, hint_dup(rval));
+                    }
+                }
+                else if ((rval = lookup_named_hint(session, key.c_str())))
+                {
+                    // We starting an already define named hint
+                    hint_push(session, hint_dup(rval));
+                }
+            }
+        }
+        else
+        {
+            // Only hint definition in the comment, use the stored iterator to process it again
+            rval = process_definition(prev_it, end);
+        }
+    }
+
+    return rval;
+}
--- a/server/modules/filter/hintfilter/test/test_hintparser.cc
+++ b/server/modules/filter/hintfilter/test/test_hintparser.cc
@ -71,6 +71,51 @@ void test(const std::string& input, std::initializer_list<std::string> expected)
    mxb_assert(rval);
 }

+static HINT_SESSION session = {};
+
+void test_parse(const std::string& input, int expected_type)
+{
+    for (auto comment : get_all_comments(input.begin(), input.end()))
+    {
+        std::string comment_str(comment.first, comment.second);
+        HINT* hint = process_comment(&session, comment.first, comment.second);
+
+        if (!hint && expected_type != 0)
+        {
+            std::cout << "Expected hint but didn't get one: " << comment_str << std::endl;
+        }
+        else if (hint && expected_type == 0)
+        {
+            std::cout << "Expected no hint but got one: " << comment_str << std::endl;
+        }
+        else if (hint && hint->type != expected_type)
+        {
+            std::cout << "Expected hint of type " << expected_type << " but got type "
+                      << (int)hint->type << ": " << comment_str << std::endl;
+        }
+    }
+}
+
+void count_hints(const std::string& input, int num_expected)
+{
+    int n = 0;
+
+    for (auto comment : get_all_comments(input.begin(), input.end()))
+    {
+        if (process_comment(&session, comment.first, comment.second))
+        {
+            ++n;
+        }
+    }
+
+    if (n != num_expected)
+    {
+        std::cout << "Expected " << num_expected << " hints but have " << n << "." << std::endl;
+    }
+
+    mxb_assert(n == num_expected);
+}
+
 int main(int argc, char** argv)
 {
    mxb::Log log(MXB_LOG_TARGET_STDOUT);
@ -139,5 +184,44 @@ int main(int argc, char** argv)
    test("-- working comment\nselect 1; --bad comment", {"working comment"});
    test("select 1 -- working comment --bad comment", {"working comment --bad comment"});

+    test_parse("SELECT 1 /* maxscale route to master */", HINT_ROUTE_TO_MASTER);
+    test_parse("SELECT 1 /* maxscale route to slave */", HINT_ROUTE_TO_SLAVE);
+    test_parse("SELECT 1 /* maxscale route to last*/", HINT_ROUTE_TO_LAST_USED);
+    test_parse("SELECT 1 /* maxscale route to server server1 */", HINT_ROUTE_TO_NAMED_SERVER);
+    test_parse("SELECT 1 /* maxscale test1 prepare route to server server1 */", HINT_ROUTE_TO_NAMED_SERVER);
+    test_parse("SELECT 1 /* maxscale test1 start route to server server1 */", HINT_ROUTE_TO_NAMED_SERVER);
+    test_parse("SELECT 1 /* maxscale start route to server server1 */", HINT_ROUTE_TO_NAMED_SERVER);
+    test_parse("SELECT 1 /* maxscale end*/", 0);
+    test_parse("SELECT 1 /* maxscale end*/", 0);
+    test_parse("SELECT 1 /* maxscale key=value */", HINT_PARAMETER);
+    test_parse("SELECT 1 /* maxscale max_slave_replication_lag=1*/", HINT_PARAMETER);
+
+    // Process multiple comments  with hints in them
+    // Note: How the hints are used depends on the router module
+    count_hints("SELECT /* comment before hint */ 1 /* maxscale route to master */", 1);
+    count_hints("SELECT /* maxscale route to master */1/* comment after hint */", 1);
+    count_hints("SELECT /* maxscale route to slave */ 1 /* maxscale route to master */", 2);
+    count_hints("#maxscale route to slave\nSELECT 1;\n#maxscale route to master", 2);
+    count_hints("-- maxscale route to slave\nSELECT 1;\n-- maxscale route to master", 2);
+    count_hints("#maxscale route to slave \n#comment after hint\nSELECT 1", 1);
+    count_hints("#comment before hint\n#maxscale route to slave \nSELECT 1", 1);
+
+    // Hints with unexpected trailing input and unknown input
+    count_hints("/* maxscale route to slave server */ SELECT 1", 0);
+    count_hints("/* maxscale route to something */ SELECT 1", 0);
+    count_hints("/* maxscale route master */ SELECT 1", 0);
+    count_hints("/* maxscale route slave */ SELECT 1", 0);
+    count_hints("/* maxscale route to slave \n# comment inside comment\n */ SELECT 1", 0);
+    count_hints("/* maxscale route to slave -- */ SELECT 1", 0);
+    count_hints("/* maxscale route to slave # */ SELECT 1", 0);
+    count_hints("#/* maxscale route to slave */ SELECT 1", 0);
+    count_hints("-- /* maxscale route to slave */ SELECT 1", 0);
+    count_hints("-- # maxscale route to slave */ SELECT 1", 0);
+    count_hints("#-- maxscale route to slave */ SELECT 1", 0);
+
+    // The extra asterisk is a part of the comment and should cause the hint to be ignored. It could be
+    // processed but for the sake of simplicity and "bug compatibility" with 2.3 it is treated as an error.
+    count_hints("/**maxscale route to slave*/ SELECT 1", 0);
+
    return 0;
 }