From 04cae711f60bcac0ddc9284a27f14473228f7f8c Mon Sep 17 00:00:00 2001 From: Johan Wikman Date: Tue, 30 May 2017 16:35:24 +0300 Subject: [PATCH] MXS-1275: Add class for custom parsing maxscale::CustomParser is a helper class to be used when making custom recursive-descent parsers for detecting something specific. --- include/maxscale/customparser.hh | 219 +++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 include/maxscale/customparser.hh diff --git a/include/maxscale/customparser.hh b/include/maxscale/customparser.hh new file mode 100644 index 000000000..ab6aa3b9f --- /dev/null +++ b/include/maxscale/customparser.hh @@ -0,0 +1,219 @@ +#pragma once +/* + * Copyright (c) 2016 MariaDB Corporation Ab + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file and at www.mariadb.com/bsl11. + * + * Change Date: 2019-07-01 + * + * On the date above, in accordance with the Business Source License, use + * of this software will be governed by version 2 or later of the General + * Public License. + */ + +#include +#include +#include +#include + +namespace maxscale +{ + +#define MXS_CP_EXPECT_TOKEN(string_literal) string_literal, (sizeof(string_literal) - 1) + +// For debugging purposes. +// #define MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED +#undef MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED + +class CustomParser +{ + CustomParser(const CustomParser&); + CustomParser& operator = (const CustomParser&); + +public: + typedef int32_t token_t; + + enum token_required_t + { + TOKEN_REQUIRED, + TOKEN_NOT_REQUIRED, + }; + + enum + { + PARSER_UNKNOWN_TOKEN = -2, + PARSER_EXHAUSTED = -1 + }; + + CustomParser() + : m_pSql(NULL) + , m_len(0) + , m_pI(NULL) + , m_pEnd(NULL) + { + } + +protected: + /** + * To be called when unexpected data is encountered. For debugging + * purposes, logging will only be performed if the define + * MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED is defined. + */ + void log_unexpected() + { +#ifdef MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED + MXS_NOTICE("Custom parser: In statement '%.*s', unexpected token at '%.*s'.", + (int)m_len, m_pSql, (int)(m_pEnd - m_pI), m_pI); +#endif + } + + /** + * To be called when there is no more data even though there is + * expected to be. For debugging purposes, logging will only be + * performed if the define MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED + * is defined. + */ + void log_exhausted() + { +#ifdef MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED + MXS_NOTICE("Custom parser: More tokens expected in statement '%.*s'.", (int)m_len, m_pSql); +#endif + } + + /** + * Is the character an alphabetic character. + * + * @param c A char + * + * @return True if @c c is between 'a' and 'z' or 'A' and 'Z', inclusive. + */ + static bool is_alpha(char c) + { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + } + + /** + * Is the character a number + * + * @param c A char + * + * @return True if @c c is between '0' and '9' inclusive. + */ + static bool is_number(char c) + { + return (c >= '0' && c <= '9'); + } + + /** + * Is a character some offset from the current position, a specific one. + * + * @param uc An UPPERCASE character. + * @param offset How many characters from the current position. + * + * @return True if the character at the position is the one specified or + * its lowercase equivalent. + */ + bool is_next_alpha(char uc, int offset = 1) const + { + ss_dassert(uc >= 'A' && uc <= 'Z'); + + char lc = uc + ('a' - 'A'); + + return + ((m_pI + offset) < m_pEnd) && + ((*(m_pI + offset) == uc) || (*(m_pI + offset) == lc)); + } + + /** + * Peek next character. + * + * @param pC Upon successful return will be the next character. + * + * @return True, if the next character was returned, false otherwise. + * False will only be returned if the current position is at + * the end. + */ + bool peek_next_char(char* pC) + { + bypass_whitespace(); + + if (m_pI != m_pEnd) + { + *pC = *m_pI; + } + + return m_pI != m_pEnd; + } + + /** + * Convert a character to upper case. + * + * @param c The character to convert. + * + * @return The uppercase equivalent. If @c c is already uppercase, + * then it is returned. + */ + static char toupper(char c) + { + // Significantly faster than library version. + return (c >= 'a' && c <='z') ? c - ('a' - 'A') : c; + } + + /** + * Bypass all whitespace from current position. + */ + void bypass_whitespace() + { + m_pI = modutil_MySQL_bypass_whitespace(const_cast(m_pI), m_pEnd - m_pI); + } + + /** + * Check whether an expected token is available. + * + * @param zWord A token. + * @param len The token length. + * @param token The value to be returned if the next token is the + * expected one. + * + * @return @c token if the current token is the expected one, + * otherwise PARSER_UNKNOWN_TOKEN. + */ + token_t expect_token(const char* zWord, int len, token_t token) + { + const char* pI = m_pI; + const char* pEnd = zWord + len; + + while ((pI < m_pEnd) && (zWord < pEnd) && (toupper(*pI) == *zWord)) + { + ++pI; + ++zWord; + } + + if (zWord == pEnd) + { + if ((pI == m_pEnd) || (!isalpha(*pI))) // Handwritten isalpha not faster than library version. + { + m_pI = pI; + } + else + { + token = PARSER_UNKNOWN_TOKEN; + } + } + else + { + token = PARSER_UNKNOWN_TOKEN; + } + + return token; + } + +protected: + const char* m_pSql; + int m_len; + const char* m_pI; + const char* m_pEnd; +}; + +}