240 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			240 lines
		
	
	
		
			5.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2018 MariaDB Corporation Ab
 | |
|  *
 | |
|  * Use of this software is governed by the Business Source License included
 | |
|  * in the LICENSE.TXT file and at www.mariadb.com/bsl11.
 | |
|  *
 | |
|  * Change Date: 2023-11-12
 | |
|  *
 | |
|  * On the date above, in accordance with the Business Source License, use
 | |
|  * of this software will be governed by version 2 or later of the General
 | |
|  * Public License.
 | |
|  */
 | |
| #pragma once
 | |
| 
 | |
| #include <maxscale/ccdefs.hh>
 | |
| #include <maxscale/modutil.hh>
 | |
| #include <ctype.h>
 | |
| 
 | |
| namespace maxscale
 | |
| {
 | |
| 
 | |
| #define MXS_CP_EXPECT_TOKEN(string_literal) string_literal, (sizeof(string_literal) - 1)
 | |
| 
 | |
| // For debugging purposes.
 | |
| // #define MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED
 | |
| #undef MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED
 | |
| 
 | |
| class CustomParser
 | |
| {
 | |
|     CustomParser(const CustomParser&);
 | |
|     CustomParser& operator=(const CustomParser&);
 | |
| 
 | |
| public:
 | |
|     typedef int32_t token_t;
 | |
| 
 | |
|     enum token_required_t
 | |
|     {
 | |
|         TOKEN_REQUIRED,
 | |
|         TOKEN_NOT_REQUIRED,
 | |
|     };
 | |
| 
 | |
|     enum
 | |
|     {
 | |
|         PARSER_UNKNOWN_TOKEN = -2,
 | |
|         PARSER_EXHAUSTED     = -1
 | |
|     };
 | |
| 
 | |
|     CustomParser()
 | |
|         : m_pSql(NULL)
 | |
|         , m_len(0)
 | |
|         , m_pI(NULL)
 | |
|         , m_pEnd(NULL)
 | |
|     {
 | |
|     }
 | |
| 
 | |
| protected:
 | |
|     /**
 | |
|      * To be called when unexpected data is encountered. For debugging
 | |
|      * purposes, logging will only be performed if the define
 | |
|      * MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED is defined.
 | |
|      */
 | |
|     void log_unexpected()
 | |
|     {
 | |
| #ifdef MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED
 | |
|         MXS_NOTICE("Custom parser: In statement '%.*s', unexpected token at '%.*s'.",
 | |
|                    (int)m_len,
 | |
|                    m_pSql,
 | |
|                    (int)(m_pEnd - m_pI),
 | |
|                    m_pI);
 | |
| #endif
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * To be called when there is no more data even though there is
 | |
|      * expected to be. For debugging purposes, logging will only be
 | |
|      * performed if the define MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED
 | |
|      * is defined.
 | |
|      */
 | |
|     void log_exhausted()
 | |
|     {
 | |
| #ifdef MXS_CP_LOG_UNEXPECTED_AND_EXHAUSTED
 | |
|         MXS_NOTICE("Custom parser: More tokens expected in statement '%.*s'.", (int)m_len, m_pSql);
 | |
| #endif
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Is the character an alphabetic character.
 | |
|      *
 | |
|      * @param c A char
 | |
|      *
 | |
|      * @return True if @c c is between 'a' and 'z' or 'A' and 'Z', inclusive.
 | |
|      */
 | |
|     static bool is_alpha(char c)
 | |
|     {
 | |
|         return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Is the character a number
 | |
|      *
 | |
|      * @param c A char
 | |
|      *
 | |
|      * @return True if @c c is between '0' and '9' inclusive.
 | |
|      */
 | |
|     static bool is_number(char c)
 | |
|     {
 | |
|         return c >= '0' && c <= '9';
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Is a character some offset from the current position, a specific one.
 | |
|      *
 | |
|      * @param uc      An UPPERCASE character.
 | |
|      * @param offset  How many characters from the current position.
 | |
|      *
 | |
|      * @return True if the character at the position is the one specified or
 | |
|      *         its lowercase equivalent.
 | |
|      */
 | |
|     bool is_next_alpha(char uc, int offset = 1) const
 | |
|     {
 | |
|         mxb_assert(uc >= 'A' && uc <= 'Z');
 | |
| 
 | |
|         char lc = uc + ('a' - 'A');
 | |
| 
 | |
|         return ((m_pI + offset) < m_pEnd)
 | |
|                && ((*(m_pI + offset) == uc) || (*(m_pI + offset) == lc));
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Peek current character.
 | |
|      *
 | |
|      * @param pC  Upon successful return will be the current character.
 | |
|      *
 | |
|      * @return True, if the current character was returned, false otherwise.
 | |
|      *         False will only be returned if the current position is at
 | |
|      *         the end.
 | |
|      */
 | |
|     bool peek_current_char(char* pC) const
 | |
|     {
 | |
|         if (m_pI != m_pEnd)
 | |
|         {
 | |
|             *pC = *m_pI;
 | |
|         }
 | |
| 
 | |
|         return m_pI != m_pEnd;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Peek next character.
 | |
|      *
 | |
|      * @param pC  Upon successful return will be the next character.
 | |
|      *
 | |
|      * @return True, if the next character was returned, false otherwise.
 | |
|      *         False will only be returned if the current position is at
 | |
|      *         the end.
 | |
|      */
 | |
|     bool peek_next_char(char* pC) const
 | |
|     {
 | |
|         bool rc = (m_pI + 1 < m_pEnd);
 | |
| 
 | |
|         if (rc)
 | |
|         {
 | |
|             *pC = *(m_pI + 1);
 | |
|         }
 | |
| 
 | |
|         return rc;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Convert a character to upper case.
 | |
|      *
 | |
|      * @param c The character to convert.
 | |
|      *
 | |
|      * @return The uppercase equivalent. If @c c is already uppercase,
 | |
|      *         then it is returned.
 | |
|      */
 | |
|     static char toupper(char c)
 | |
|     {
 | |
|         // Significantly faster than library version.
 | |
|         return (c >= 'a' && c <= 'z') ? c - ('a' - 'A') : c;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Bypass all whitespace from current position.
 | |
|      */
 | |
|     void bypass_whitespace()
 | |
|     {
 | |
|         m_pI = modutil_MySQL_bypass_whitespace(const_cast<char*>(m_pI), m_pEnd - m_pI);
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Check whether an expected token is available.
 | |
|      *
 | |
|      * @param zWord  A token.
 | |
|      * @param len    The token length.
 | |
|      * @param token  The value to be returned if the next token is the
 | |
|      *               expected one.
 | |
|      *
 | |
|      * @return @c token if the current token is the expected one,
 | |
|      *         otherwise PARSER_UNKNOWN_TOKEN.
 | |
|      */
 | |
|     token_t expect_token(const char* zWord, int len, token_t token)
 | |
|     {
 | |
|         const char* pI = m_pI;
 | |
|         const char* pEnd = zWord + len;
 | |
| 
 | |
|         while ((pI < m_pEnd) && (zWord < pEnd) && (toupper(*pI) == *zWord))
 | |
|         {
 | |
|             ++pI;
 | |
|             ++zWord;
 | |
|         }
 | |
| 
 | |
|         if (zWord == pEnd)
 | |
|         {
 | |
|             if ((pI == m_pEnd) || (!isalpha(*pI)))      // Handwritten isalpha not faster than library
 | |
|                                                         // version.
 | |
|             {
 | |
|                 m_pI = pI;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 token = PARSER_UNKNOWN_TOKEN;
 | |
|             }
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             token = PARSER_UNKNOWN_TOKEN;
 | |
|         }
 | |
| 
 | |
|         return token;
 | |
|     }
 | |
| 
 | |
| protected:
 | |
|     const char* m_pSql;
 | |
|     int         m_len;
 | |
|     const char* m_pI;
 | |
|     const char* m_pEnd;
 | |
| };
 | |
| }
 | 
