From 04cae711f60bcac0ddc9284a27f14473228f7f8c Mon Sep 17 00:00:00 2001
From: Johan Wikman <johan.wikman@mariadb.com>
Date: Tue, 30 May 2017 16:35:24 +0300
Subject: [PATCH] MXS-1275: Add class for custom parsing

maxscale::CustomParser is a helper class to be used when making
custom recursive-descent parsers for detecting something specific.
---
 include/maxscale/customparser.hh | 219 +++++++++++++++++++++++++++++++
 1 file changed, 219 insertions(+)
 create mode 100644 include/maxscale/customparser.hh

diff --git a/include/maxscale/customparser.hh b/include/maxscale/customparser.hh
new file mode 100644
index 000000000..ab6aa3b9f
--- /dev/null
+++ b/include/maxscale/customparser.hh
@@ -0,0 +1,219 @@
+#pragma once
+/*
+ * Copyright (c) 2016 MariaDB Corporation Ab
+ *
+ * Use of this software is governed by the Business Source License included
+ * in the LICENSE.TXT file and at www.mariadb.com/bsl11.
+ *
+ * Change Date: 2019-07-01
+ *
+ * On the date above, in accordance with the Business Source License, use
+ * of this software will be governed by version 2 or later of the General
+ * Public License.
+ */
+
+#include <maxscale/cppdefs.hh>
+#include <maxscale/log_manager.h>
+#include <maxscale/modutil.h>
+#include <ctype.h>
+
+namespace maxscale
+{
+
+#define MXS_CP_EXPECT_TOKEN(string_literal) string_literal, (sizeof(string_literal) - 1)
+
+// For debugging purposes.
+// #define MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED
+#undef MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED
+
+class CustomParser
+{
+    CustomParser(const CustomParser&);
+    CustomParser& operator = (const CustomParser&);
+
+public:
+    typedef int32_t token_t;
+
+    enum token_required_t
+    {
+        TOKEN_REQUIRED,
+        TOKEN_NOT_REQUIRED,
+    };
+
+    enum
+    {
+        PARSER_UNKNOWN_TOKEN = -2,
+        PARSER_EXHAUSTED     = -1
+    };
+
+    CustomParser()
+        : m_pSql(NULL)
+        , m_len(0)
+        , m_pI(NULL)
+        , m_pEnd(NULL)
+    {
+    }
+
+protected:
+    /**
+     * To be called when unexpected data is encountered. For debugging
+     * purposes, logging will only be performed if the define
+     * MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED is defined.
+     */
+    void log_unexpected()
+    {
+#ifdef MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED
+        MXS_NOTICE("Custom parser: In statement '%.*s', unexpected token at '%.*s'.",
+                   (int)m_len, m_pSql, (int)(m_pEnd - m_pI), m_pI);
+#endif
+    }
+
+    /**
+     * To be called when there is no more data even though there is
+     * expected to be. For debugging purposes, logging will only be
+     * performed if the define MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED
+     * is defined.
+     */
+    void log_exhausted()
+    {
+#ifdef MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED
+        MXS_NOTICE("Custom parser: More tokens expected in statement '%.*s'.", (int)m_len, m_pSql);
+#endif
+    }
+
+    /**
+     * Is the character an alphabetic character.
+     *
+     * @param c A char
+     *
+     * @return True if @c c is between 'a' and 'z' or 'A' and 'Z', inclusive.
+     */
+    static bool is_alpha(char c)
+    {
+        return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+    }
+
+    /**
+     * Is the character a number
+     *
+     * @param c A char
+     *
+     * @return True if @c c is between '0' and '9' inclusive.
+     */
+    static bool is_number(char c)
+    {
+        return (c >= '0' && c <= '9');
+    }
+
+    /**
+     * Is a character some offset from the current position, a specific one.
+     *
+     * @param uc      An UPPERCASE character.
+     * @param offset  How many characters from the current position.
+     *
+     * @return True if the character at the position is the one specified or
+     *         its lowercase equivalent.
+     */
+    bool is_next_alpha(char uc, int offset = 1) const
+    {
+        ss_dassert(uc >= 'A' && uc <= 'Z');
+
+        char lc = uc + ('a' - 'A');
+
+        return
+            ((m_pI + offset) < m_pEnd) &&
+            ((*(m_pI + offset) == uc) || (*(m_pI + offset) == lc));
+    }
+
+    /**
+     * Peek next character.
+     *
+     * @param pC  Upon successful return will be the next character.
+     *
+     * @return True, if the next character was returned, false otherwise.
+     *         False will only be returned if the current position is at
+     *         the end.
+     */
+    bool peek_next_char(char* pC)
+    {
+        bypass_whitespace();
+
+        if (m_pI != m_pEnd)
+        {
+            *pC = *m_pI;
+        }
+
+        return m_pI != m_pEnd;
+    }
+
+    /**
+     * Convert a character to upper case.
+     *
+     * @param c The character to convert.
+     *
+     * @return The uppercase equivalent. If @c c is already uppercase,
+     *         then it is returned.
+     */
+    static char toupper(char c)
+    {
+        // Significantly faster than library version.
+        return (c >= 'a' && c <='z') ? c - ('a' - 'A') : c;
+    }
+
+    /**
+     * Bypass all whitespace from current position.
+     */
+    void bypass_whitespace()
+    {
+        m_pI = modutil_MySQL_bypass_whitespace(const_cast<char*>(m_pI), m_pEnd - m_pI);
+    }
+
+    /**
+     * Check whether an expected token is available.
+     *
+     * @param zWord  A token.
+     * @param len    The token length.
+     * @param token  The value to be returned if the next token is the
+     *               expected one.
+     *
+     * @return @c token if the current token is the expected one,
+     *         otherwise PARSER_UNKNOWN_TOKEN.
+     */
+    token_t expect_token(const char* zWord, int len, token_t token)
+    {
+        const char* pI = m_pI;
+        const char* pEnd = zWord + len;
+
+        while ((pI < m_pEnd) && (zWord < pEnd) && (toupper(*pI) == *zWord))
+        {
+            ++pI;
+            ++zWord;
+        }
+
+        if (zWord == pEnd)
+        {
+            if ((pI == m_pEnd) || (!isalpha(*pI))) // Handwritten isalpha not faster than library version.
+            {
+                m_pI = pI;
+            }
+            else
+            {
+                token = PARSER_UNKNOWN_TOKEN;
+            }
+        }
+        else
+        {
+            token = PARSER_UNKNOWN_TOKEN;
+        }
+
+        return token;
+    }
+
+protected:
+    const char* m_pSql;
+    int         m_len;
+    const char* m_pI;
+    const char* m_pEnd;
+};
+
+}