289 lines
		
	
	
		
			9.4 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			289 lines
		
	
	
		
			9.4 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*
 | 
						|
 * Copyright (c) 2016 MariaDB Corporation Ab
 | 
						|
 *
 | 
						|
 * Use of this software is governed by the Business Source License included
 | 
						|
 * in the LICENSE.TXT file and at www.mariadb.com/bsl11.
 | 
						|
 *
 | 
						|
 * Change Date: 2024-11-16
 | 
						|
 *
 | 
						|
 * On the date above, in accordance with the Business Source License, use
 | 
						|
 * of this software will be governed by version 2 or later of the General
 | 
						|
 * Public License.
 | 
						|
 *
 | 
						|
 */
 | 
						|
 | 
						|
/**
 | 
						|
 * @file maxscale_pcre2.c - Utility functions for regular expression matching
 | 
						|
 * with the bundled PCRE2 library.
 | 
						|
 *
 | 
						|
 * @verbatim
 | 
						|
 * Revision History
 | 
						|
 *
 | 
						|
 * Date       Who           Description
 | 
						|
 * 30-10-2015 Markus Makela Initial implementation
 | 
						|
 * @endverbatim
 | 
						|
 */
 | 
						|
 | 
						|
#include <maxscale/pcre2.h>
 | 
						|
#include <maxbase/assert.h>
 | 
						|
#include <maxbase/alloc.h>
 | 
						|
#include <maxscale/log.hh>
 | 
						|
 | 
						|
/**
 | 
						|
 * Utility wrapper for PCRE2 library function call pcre2_substitute.
 | 
						|
 *
 | 
						|
 * This function replaces all occurences of a pattern with the provided replacement
 | 
						|
 * and places the end result into @c dest. This buffer must be allocated by the caller.
 | 
						|
 * If the size of @c dest is not large enough it will be reallocated to a larger size.
 | 
						|
 * The size of @c dest is stored in @c size if any reallocation takes place.
 | 
						|
 *
 | 
						|
 * @param re Compiled pattern to use
 | 
						|
 * @param subject Subject string
 | 
						|
 * @param replace Replacement string
 | 
						|
 * @param dest Destination buffer
 | 
						|
 * @param size Size of the destination buffer
 | 
						|
 * @return MXS_PCRE2_MATCH if replacements were made, MXS_PCRE2_NOMATCH if nothing
 | 
						|
 * was replaced or MXS_PCRE2_ERROR if memory reallocation failed
 | 
						|
 */
 | 
						|
mxs_pcre2_result_t mxs_pcre2_substitute(pcre2_code* re,
 | 
						|
                                        const char* subject,
 | 
						|
                                        const char* replace,
 | 
						|
                                        char** dest,
 | 
						|
                                        size_t* size)
 | 
						|
{
 | 
						|
    int rc;
 | 
						|
    mxs_pcre2_result_t rval = MXS_PCRE2_ERROR;
 | 
						|
    pcre2_match_data* mdata = pcre2_match_data_create_from_pattern(re, NULL);
 | 
						|
 | 
						|
    if (mdata)
 | 
						|
    {
 | 
						|
        size_t size_tmp = *size;
 | 
						|
        while ((rc = pcre2_substitute(re,
 | 
						|
                                      (PCRE2_SPTR) subject,
 | 
						|
                                      PCRE2_ZERO_TERMINATED,
 | 
						|
                                      0,
 | 
						|
                                      PCRE2_SUBSTITUTE_GLOBAL,
 | 
						|
                                      mdata,
 | 
						|
                                      NULL,
 | 
						|
                                      (PCRE2_SPTR) replace,
 | 
						|
                                      PCRE2_ZERO_TERMINATED,
 | 
						|
                                      (PCRE2_UCHAR*) *dest,
 | 
						|
                                      &size_tmp)) == PCRE2_ERROR_NOMEMORY)
 | 
						|
        {
 | 
						|
            size_tmp = 2 * (*size);
 | 
						|
            char* tmp = (char*)MXS_REALLOC(*dest, size_tmp);
 | 
						|
            if (tmp == NULL)
 | 
						|
            {
 | 
						|
                break;
 | 
						|
            }
 | 
						|
            *dest = tmp;
 | 
						|
            *size = size_tmp;
 | 
						|
        }
 | 
						|
 | 
						|
        if (rc > 0)
 | 
						|
        {
 | 
						|
            rval = MXS_PCRE2_MATCH;
 | 
						|
        }
 | 
						|
        else if (rc == 0)
 | 
						|
        {
 | 
						|
            rval = MXS_PCRE2_NOMATCH;
 | 
						|
        }
 | 
						|
        pcre2_match_data_free(mdata);
 | 
						|
    }
 | 
						|
 | 
						|
    return rval;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * Do a simple matching of a pattern to a string.
 | 
						|
 *
 | 
						|
 * This function compiles the given pattern and checks if the subject string matches
 | 
						|
 * it.
 | 
						|
 * @param pattern Pattern used for matching
 | 
						|
 * @param subject Subject string to match
 | 
						|
 * @param options PCRE2 compilation options
 | 
						|
 * @param error The PCRE2 error code is stored here if one is available
 | 
						|
 * @return MXS_PCRE2_MATCH if @c subject matches @c pattern, MXS_PCRE2_NOMATCH if
 | 
						|
 * they do not match and MXS_PCRE2_ERROR if an error occurred. If an error occurred
 | 
						|
 * within the PCRE2 library, @c error will contain the error code. Otherwise it is
 | 
						|
 * set to 0.
 | 
						|
 */
 | 
						|
mxs_pcre2_result_t mxs_pcre2_simple_match(const char* pattern,
 | 
						|
                                          const char* subject,
 | 
						|
                                          int options,
 | 
						|
                                          int* error)
 | 
						|
{
 | 
						|
    int err;
 | 
						|
    size_t erroff;
 | 
						|
    mxs_pcre2_result_t rval = MXS_PCRE2_ERROR;
 | 
						|
    pcre2_code* re = pcre2_compile((PCRE2_SPTR) pattern,
 | 
						|
                                   PCRE2_ZERO_TERMINATED,
 | 
						|
                                   options,
 | 
						|
                                   &err,
 | 
						|
                                   &erroff,
 | 
						|
                                   NULL);
 | 
						|
    if (re)
 | 
						|
    {
 | 
						|
        pcre2_match_data* mdata = pcre2_match_data_create_from_pattern(re, NULL);
 | 
						|
        if (mdata)
 | 
						|
        {
 | 
						|
            int rc = pcre2_match(re,
 | 
						|
                                 (PCRE2_SPTR) subject,
 | 
						|
                                 PCRE2_ZERO_TERMINATED,
 | 
						|
                                 0,
 | 
						|
                                 0,
 | 
						|
                                 mdata,
 | 
						|
                                 NULL);
 | 
						|
            if (rc == PCRE2_ERROR_NOMATCH)
 | 
						|
            {
 | 
						|
                rval = MXS_PCRE2_NOMATCH;
 | 
						|
            }
 | 
						|
            else if (rc > 0)
 | 
						|
            {
 | 
						|
                /** Since we used the pattern to create the matching data,
 | 
						|
                 * pcre2_match will never return 0 */
 | 
						|
                rval = MXS_PCRE2_MATCH;
 | 
						|
            }
 | 
						|
            pcre2_match_data_free(mdata);
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
            *error = 0;
 | 
						|
        }
 | 
						|
        pcre2_code_free(re);
 | 
						|
    }
 | 
						|
    else
 | 
						|
    {
 | 
						|
        *error = err;
 | 
						|
    }
 | 
						|
    return rval;
 | 
						|
}
 | 
						|
 | 
						|
void mxs_pcre2_print_error(int errorcode,
 | 
						|
                           const char* module_name,
 | 
						|
                           const char* filename,
 | 
						|
                           int line_num,
 | 
						|
                           const char* func_name)
 | 
						|
{
 | 
						|
    mxb_assert(filename);
 | 
						|
    mxb_assert(func_name);
 | 
						|
    if (mxs_log_is_priority_enabled(LOG_ERR))
 | 
						|
    {
 | 
						|
        // 120 should be enough to contain any error message according to pcre2 manual.
 | 
						|
        const PCRE2_SIZE errbuf_len = 120;
 | 
						|
        PCRE2_UCHAR errorbuf[errbuf_len];
 | 
						|
        pcre2_get_error_message(errorcode, errorbuf, errbuf_len);
 | 
						|
        mxs_log_message(LOG_ERR,
 | 
						|
                        module_name,
 | 
						|
                        filename,
 | 
						|
                        line_num,
 | 
						|
                        func_name,
 | 
						|
                        "PCRE2 Error message: '%s'.",
 | 
						|
                        errorbuf);
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
bool mxs_pcre2_check_match_exclude(pcre2_code* re_match,
 | 
						|
                                   pcre2_code* re_exclude,
 | 
						|
                                   pcre2_match_data* md,
 | 
						|
                                   const char* subject,
 | 
						|
                                   int length,
 | 
						|
                                   const char* calling_module)
 | 
						|
{
 | 
						|
    mxb_assert((!re_match && !re_exclude) || (md && subject));
 | 
						|
    bool rval = true;
 | 
						|
    int string_len = ((size_t)length == PCRE2_ZERO_TERMINATED) ? strlen(subject) : length;
 | 
						|
    if (re_match)
 | 
						|
    {
 | 
						|
        int result = pcre2_match(re_match, (PCRE2_SPTR)subject, string_len, 0, 0, md, NULL);
 | 
						|
        if (result == PCRE2_ERROR_NOMATCH)
 | 
						|
        {
 | 
						|
            rval = false;   // Didn't match the "match"-regex
 | 
						|
            if (mxs_log_is_priority_enabled(LOG_INFO))
 | 
						|
            {
 | 
						|
                mxs_log_message(LOG_INFO,
 | 
						|
                                calling_module,
 | 
						|
                                __FILE__,
 | 
						|
                                __LINE__,
 | 
						|
                                __func__,
 | 
						|
                                "Subject does not match the 'match' pattern: %.*s",
 | 
						|
                                string_len,
 | 
						|
                                subject);
 | 
						|
            }
 | 
						|
        }
 | 
						|
        else if (result < 0)
 | 
						|
        {
 | 
						|
            rval = false;
 | 
						|
            /* The __FILE__ etc macros here do not match calling_module, but
 | 
						|
             * the values are only used for throttling messages. */
 | 
						|
            mxs_pcre2_print_error(result, calling_module, __FILE__, __LINE__, __func__);
 | 
						|
        }
 | 
						|
    }
 | 
						|
    if (rval && re_exclude)
 | 
						|
    {
 | 
						|
        int result = pcre2_match(re_exclude, (PCRE2_SPTR)subject, string_len, 0, 0, md, NULL);
 | 
						|
        if (result >= 0)
 | 
						|
        {
 | 
						|
            rval = false;   // Matched the "exclude"-regex
 | 
						|
            if (mxs_log_is_priority_enabled(LOG_INFO))
 | 
						|
            {
 | 
						|
                mxs_log_message(LOG_INFO,
 | 
						|
                                calling_module,
 | 
						|
                                __FILE__,
 | 
						|
                                __LINE__,
 | 
						|
                                __func__,
 | 
						|
                                "Query matches the 'exclude' pattern: %.*s",
 | 
						|
                                string_len,
 | 
						|
                                subject);
 | 
						|
            }
 | 
						|
        }
 | 
						|
        else if (result != PCRE2_ERROR_NOMATCH)
 | 
						|
        {
 | 
						|
            rval = false;
 | 
						|
            mxs_pcre2_print_error(result, calling_module, __FILE__, __LINE__, __func__);
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return rval;
 | 
						|
}
 | 
						|
 | 
						|
namespace maxscale
 | 
						|
{
 | 
						|
std::string pcre2_substitute(pcre2_code* re,
 | 
						|
                             const std::string& subject,
 | 
						|
                             const std::string& replace,
 | 
						|
                             std::string* error)
 | 
						|
{
 | 
						|
    mxb_assert(re);
 | 
						|
    std::string rval = subject;
 | 
						|
    size_t size_tmp = rval.size();
 | 
						|
    int rc;
 | 
						|
 | 
						|
    while ((rc = pcre2_substitute(re, (PCRE2_SPTR) subject.c_str(), subject.length(),
 | 
						|
                                  0, PCRE2_SUBSTITUTE_GLOBAL, NULL, NULL,
 | 
						|
                                  (PCRE2_SPTR) replace.c_str(), replace.length(),
 | 
						|
                                  (PCRE2_UCHAR*) &rval[0], &size_tmp)) == PCRE2_ERROR_NOMEMORY)
 | 
						|
    {
 | 
						|
        rval.resize(rval.size() * 2 + 1);
 | 
						|
        size_tmp = rval.size();
 | 
						|
    }
 | 
						|
 | 
						|
    if (rc < 0)
 | 
						|
    {
 | 
						|
        if (error)
 | 
						|
        {
 | 
						|
            char errbuf[1024];
 | 
						|
            pcre2_get_error_message(rc, (PCRE2_UCHAR*)errbuf, sizeof(errbuf));
 | 
						|
            *error = errbuf;
 | 
						|
        }
 | 
						|
 | 
						|
        rval.clear();
 | 
						|
    }
 | 
						|
    else
 | 
						|
    {
 | 
						|
        rval.resize(size_tmp);
 | 
						|
    }
 | 
						|
 | 
						|
    return rval;
 | 
						|
}
 | 
						|
}
 |