openGauss-server/src/common/backend/utils/mb/mbutils.cpp

/*
 * This file contains public functions for conversion between
 * client encoding and server (database) encoding.
 *
 * Tatsuo Ishii
 *
 * src/backend/utils/mb/mbutils.c
 */
#include "postgres.h"
#include "knl/knl_variable.h"

#include "access/xact.h"
#include "catalog/namespace.h"
#include "mb/pg_wchar.h"
#include "pgxc/execRemote.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/syscache.h"
#include "storage/ipc.h"
#include "executor/executor.h"

/*
 * We maintain a simple linked list caching the fmgr lookup info for the
 * currently selected conversion functions, as well as any that have been
 * selected previously in the current session.	(We remember previous
 * settings because we must be able to restore a previous setting during
 * transaction rollback, without doing any fresh catalog accesses.)
 *
 * Since we'll never release this data, we just keep it in t_thrd.top_mem_cxt.
 */
typedef struct ConvProcInfo {
    int s_encoding; /* server and client encoding IDs */
    int c_encoding;
    FmgrInfo to_server_info; /* lookup info for conversion procs */
    FmgrInfo to_client_info;
} ConvProcInfo;

/* Internal functions */
static char* perform_default_encoding_conversion(const char* src, int len, bool is_client_to_server);
static int cliplen(const char* str, int len, int limit);

// Determine whether the current case needs to be converted
bool NoNeedToConvert(int srcEncoding, int destEncoding)
{
    if (srcEncoding == destEncoding) {
        return true;
    }
    if (srcEncoding == PG_SQL_ASCII || destEncoding == PG_SQL_ASCII) {
        return true;
    }
    if (srcEncoding == PG_GB18030_2022 && destEncoding == PG_GB18030) {
        return true;
    }
    if (srcEncoding == PG_GB18030 && destEncoding == PG_GB18030_2022) {
        return true;
    }
    return false;
}

/*
 * Prepare for a future call to SetClientEncoding.	Success should mean
 * that SetClientEncoding is guaranteed to succeed for this encoding request.
 *
 * (But note that success before u_sess->mb_cxt.backend_startup_complete does not guarantee
 * success after ...)
 *
 * Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
 */
int PrepareClientEncoding(int encoding)
{
    int current_server_encoding;
    ListCell* lc = NULL;

    if (!PG_VALID_FE_ENCODING(encoding)) {
        return -1;
    }

    /* Can't do anything during startup, per notes above */
    if (!u_sess->mb_cxt.backend_startup_complete) {
        return 0;
    }

    /*
     * Check for cases that require no conversion function.
     */
    current_server_encoding = GetDatabaseEncoding();
    if (NoNeedToConvert(current_server_encoding, encoding)) {
        return 0;
    }

    if (IsTransactionState()) {
        /*
         * If we're in a live transaction, it's safe to access the catalogs,
         * so look up the functions.  We repeat the lookup even if the info is
         * already cached, so that we can react to changes in the contents of
         * pg_conversion.
         */
        Oid to_server_proc;
        Oid to_client_proc;
        ConvProcInfo* conv_info = NULL;
        MemoryContext old_context;

        to_server_proc = FindDefaultConversionProc(encoding, current_server_encoding);
        if (!OidIsValid(to_server_proc)) {
            return -1;
        }
        to_client_proc = FindDefaultConversionProc(current_server_encoding, encoding);
        if (!OidIsValid(to_client_proc)) {
            return -1;
        }

        /*
         * Load the fmgr info into t_thrd.top_mem_cxt (could still fail here)
         */
        MemoryContext executorCxt = SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_EXECUTOR);
        conv_info = (ConvProcInfo*)MemoryContextAlloc(executorCxt, sizeof(ConvProcInfo));
        conv_info->s_encoding = current_server_encoding;
        conv_info->c_encoding = encoding;
        fmgr_info_cxt(to_server_proc, &conv_info->to_server_info, executorCxt);
        fmgr_info_cxt(to_client_proc, &conv_info->to_client_info, executorCxt);

        /* Attach new info to head of list */
        old_context = MemoryContextSwitchTo(executorCxt);
        u_sess->mb_cxt.ConvProcList = lcons(conv_info, u_sess->mb_cxt.ConvProcList);
        (void)MemoryContextSwitchTo(old_context);

        /*
         * We cannot yet remove any older entry for the same encoding pair,
         * since it could still be in use.	SetClientEncoding will clean up.
         */
        return 0; /* success */
    } else {
        /*
         * If we're not in a live transaction, the only thing we can do is
         * restore a previous setting using the cache.	This covers all
         * transaction-rollback cases.	The only case it might not work for is
         * trying to change client_encoding on the fly by editing
         * postgresql.conf and SIGHUP'ing.  Which would probably be a stupid
         * thing to do anyway.
         */
        foreach (lc, u_sess->mb_cxt.ConvProcList) {
            ConvProcInfo* oldinfo = (ConvProcInfo*)lfirst(lc);
            if (oldinfo->s_encoding == current_server_encoding && oldinfo->c_encoding == encoding) {
                return 0;
            }
        }

        return -1; /* it's not cached, so fail */
    }
}

/*
 * Set the active client encoding and set up the conversion-function pointers.
 * PrepareClientEncoding should have been called previously for this encoding.
 *
 * Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
 */
int SetClientEncoding(int encoding)
{
    int current_server_encoding;
    bool found = false;
    ListCell* lc = NULL;
    ListCell* prev = NULL;
    ListCell* next = NULL;

    if (!PG_VALID_FE_ENCODING(encoding)) {
        return -1;
    }

    /* Can't do anything during startup, per notes above */
    if (!u_sess->mb_cxt.backend_startup_complete) {
        u_sess->mb_cxt.pending_client_encoding = encoding;
        return 0;
    }

    /*
     * Check for cases that require no conversion function.
     */
    current_server_encoding = GetDatabaseEncoding();
    if (NoNeedToConvert(current_server_encoding, encoding)) {
        u_sess->mb_cxt.ClientEncoding = &pg_enc2name_tbl[encoding];
        u_sess->mb_cxt.ToServerConvProc = NULL;
        u_sess->mb_cxt.ToClientConvProc = NULL;
        return 0;
    }

    /*
     * Search the cache for the entry previously prepared by
     * PrepareClientEncoding; if there isn't one, we lose.  While at it,
     * release any duplicate entries so that repeated Prepare/Set cycles don't
     * leak memory.
     */
    found = false;
    prev = NULL;
    for (lc = list_head(u_sess->mb_cxt.ConvProcList); lc; lc = next) {
        ConvProcInfo* conv_info = (ConvProcInfo*)lfirst(lc);
        next = lnext(lc);
        if (conv_info->s_encoding == current_server_encoding && conv_info->c_encoding == encoding) {
            if (!found) {
                /* Found newest entry, so set up */
                u_sess->mb_cxt.ClientEncoding = &pg_enc2name_tbl[encoding];
                u_sess->mb_cxt.ToServerConvProc = &conv_info->to_server_info;
                u_sess->mb_cxt.ToClientConvProc = &conv_info->to_client_info;
                found = true;
            } else {
                /* Duplicate entry, release it */
                u_sess->mb_cxt.ConvProcList = list_delete_cell(u_sess->mb_cxt.ConvProcList, lc, prev);
                pfree(conv_info);
                continue; /* prev mustn't advance */
            }
        }

        prev = lc;
    }

    if (found) {
        return 0; /* success */
    } else {
        return -1; /* it's not cached, so fail */
    }
}

/*
 * Initialize client encoding conversions.
 *		Called from InitPostgres() once during backend startup.
 */
void InitializeClientEncoding(void)
{
    Assert(!u_sess->mb_cxt.backend_startup_complete);
    u_sess->mb_cxt.backend_startup_complete = true;

    if (PrepareClientEncoding(u_sess->mb_cxt.pending_client_encoding) < 0 ||
        SetClientEncoding(u_sess->mb_cxt.pending_client_encoding) < 0) {
        /*
         * Oops, the requested conversion is not available. We couldn't fail
         * before, but we can now.
         */
        ereport(FATAL,
            (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                errmsg("conversion between %s and %s is not supported",
                    pg_enc2name_tbl[u_sess->mb_cxt.pending_client_encoding].name,
                    GetDatabaseEncodingName())));
    }
    u_sess->mb_cxt.character_set_connection = &pg_enc2name_tbl[GetDatabaseEncoding()];
    if (ENABLE_MULTI_CHARSET) {
        u_sess->mb_cxt.collation_connection = get_default_collation_by_charset(GetDatabaseEncoding(), false);
    } else {
        u_sess->mb_cxt.collation_connection = InvalidOid;
    }
}

/*
 * returns the current client encoding
 */
int pg_get_client_encoding(void)
{
    Assert(u_sess->mb_cxt.ClientEncoding);
    return u_sess->mb_cxt.ClientEncoding->encoding;
}

/*
 * returns the current client encoding name
 */
const char* pg_get_client_encoding_name(void)
{
    Assert(u_sess->mb_cxt.ClientEncoding);
    return u_sess->mb_cxt.ClientEncoding->name;
}

/*
 * Apply encoding conversion on src and return it. The encoding
 * conversion function is chosen from the pg_conversion system catalog
 * marked as "default". If it is not found in the schema search path,
 * it's taken from pg_catalog schema. If it even is not in the schema,
 * warn and return src.
 *
 * If conversion occurs, a palloc'd null-terminated string is returned.
 * In the case of no conversion, src is returned.
 *
 * CAUTION: although the presence of a length argument means that callers
 * can pass non-null-terminated strings, care is required because the same
 * string will be passed back if no conversion occurs.	Such callers *must*
 * check whether result == src and handle that case differently.
 *
 * Note: we try to avoid raising error, since that could get us into
 * infinite recursion when this function is invoked during error message
 * sending.  It should be OK to raise error for overlength strings though,
 * since the recursion will come with a shorter message.
 */
unsigned char* pg_do_encoding_conversion(unsigned char* src, int len, int src_encoding, int dest_encoding)
{
    unsigned char* result = NULL;
    Oid proc;

    if (!IsTransactionState()) {
        return src;
    }
    if (NoNeedToConvert(src_encoding, dest_encoding)) {
        return src;
    }
    if (len <= 0) {
        return src;
    }
    proc = FindDefaultConversionProc(src_encoding, dest_encoding);
    if (!OidIsValid(proc)) {
        ereport(DEBUG2,
            (errcode(ERRCODE_UNDEFINED_FUNCTION),
                errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
                    pg_encoding_to_char(src_encoding),
                    pg_encoding_to_char(dest_encoding))));
        return src;
    }

    /*
     * XXX we should avoid throwing errors in OidFunctionCall. Otherwise we
     * are going into infinite loop!  So we have to make sure that the
     * function exists before calling OidFunctionCall.
     */
    if (!SearchSysCacheExists1(PROCOID, ObjectIdGetDatum(proc))) {
        ereport(LOG, (errmsg("cache lookup failed for function %u", proc)));
        return src;
    }

    /*
     * Allocate space for conversion result, being wary of integer overflow
     */
    if ((Size)len >= (MaxAllocSize / (Size)MAX_CONVERSION_GROWTH)) {
        ereport(ERROR,
            (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                errmsg("out of memory"),
                errdetail("String of %d bytes is too long for encoding conversion.", len)));
    }
    result = (unsigned char*)palloc(len * MAX_CONVERSION_GROWTH + 1);

    OidFunctionCall5(proc,
        Int32GetDatum(src_encoding),
        Int32GetDatum(dest_encoding),
        CStringGetDatum(src),
        CStringGetDatum(result),
        Int32GetDatum(len));
    return result;
}

void construct_conversion_fmgr_info(int src_encoding, int dst_encoding, void* finfo)
{
    Assert(finfo != NULL);

    FmgrInfo* convert_finfo = (FmgrInfo*)finfo;
    if (src_encoding == dst_encoding) {
        convert_finfo->fn_oid = InvalidOid;
        return;
    }

    if (src_encoding == PG_SQL_ASCII || dst_encoding == PG_SQL_ASCII) {
        convert_finfo->fn_oid = InvalidOid;
        return;
    }

    Oid convert_func = FindDefaultConversionProc(src_encoding, dst_encoding);
    if (OidIsValid(convert_func)) {
        fmgr_info(convert_func, convert_finfo);
    } else {
        convert_finfo->fn_oid = InvalidOid;
    }
}


static char* fast_encoding_conversion(char* src, int len, int src_encoding, int dest_encoding, FmgrInfo* convert_finfo)
{
    if (len <= 0) {
        return src;
    }

    char* result = NULL;
    Assert(convert_finfo != NULL);
    Assert(OidIsValid(convert_finfo->fn_oid));

    /*
     * Allocate space for conversion result, being wary of integer overflow
     */
    if ((Size)len >= (MaxAllocSize / (Size)MAX_CONVERSION_GROWTH)) {
        ereport(ERROR,
            (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                errmsg("out of memory"),
                errdetail("String of %d bytes is too long for encoding conversion.", len)));
    }
    result = (char*)palloc(len * MAX_CONVERSION_GROWTH + 1);

    FunctionCall5(convert_finfo,
        Int32GetDatum(src_encoding),
        Int32GetDatum(dest_encoding),
        CStringGetDatum(src),
        CStringGetDatum(result),
        Int32GetDatum(len));
    return result;
}

char* try_fast_encoding_conversion(char* src, int len, int src_encoding, int dest_encoding, void* convert_finfo)
{
    if (unlikely(!OidIsValid(((FmgrInfo*)convert_finfo)->fn_oid))) {
        return (char*)pg_do_encoding_conversion((unsigned char*)src, len, src_encoding, dest_encoding);
    }

    return fast_encoding_conversion(src, len, src_encoding, dest_encoding, (FmgrInfo*)convert_finfo);
}

/*
 * Convert string using encoding_name. The source
 * encoding is the DB encoding.
 *
 * BYTEA convert_to(TEXT string, NAME encoding_name) */
Datum pg_convert_to(PG_FUNCTION_ARGS)
{
    Datum string = PG_GETARG_DATUM(0);
    Datum dest_encoding_name = PG_GETARG_DATUM(1);
    Datum src_encoding_name = DirectFunctionCall1(namein, CStringGetDatum(u_sess->mb_cxt.DatabaseEncoding->name));
    Datum result;

    /*
     * pg_convert expects a bytea as its first argument. We're passing it a
     * text argument here, relying on the fact that they are both in fact
     * varlena types, and thus structurally identical.
     */
    result = DirectFunctionCall3(pg_convert, string, src_encoding_name, dest_encoding_name);

    PG_RETURN_DATUM(result);
}

/* for GBK order */
Datum pg_convert_to_nocase(PG_FUNCTION_ARGS)
{
    Datum string = PG_GETARG_DATUM(0);
    Datum dest_encoding_name = PG_GETARG_DATUM(1);
    Datum src_encoding_name = DirectFunctionCall1(namein, CStringGetDatum(u_sess->mb_cxt.DatabaseEncoding->name));
    Datum result;
    FUNC_CHECK_HUGE_POINTER(PG_ARGISNULL(0), DatumGetPointer(string), "pg_convert()");

    /*
     * pg_convert expects a bytea as its first argument. We're passing it a
     * text argument here, relying on the fact that they are both in fact
     * varlena types, and thus structurally identical.
     */
    result = DirectFunctionCall3(pg_convert_nocase, string, src_encoding_name, dest_encoding_name);

    PG_RETURN_DATUM(result);
}

/*
 * Convert string using encoding_name. The destination
 * encoding is the DB encoding.
 *
 * TEXT convert_from(BYTEA string, NAME encoding_name) */
Datum pg_convert_from(PG_FUNCTION_ARGS)
{
    Datum string = PG_GETARG_DATUM(0);
    Datum src_encoding_name = PG_GETARG_DATUM(1);
    Datum dest_encoding_name = DirectFunctionCall1(namein, CStringGetDatum(u_sess->mb_cxt.DatabaseEncoding->name));
    Datum result;

    result = DirectFunctionCall3(pg_convert, string, src_encoding_name, dest_encoding_name);

    /*
     * pg_convert returns a bytea, which we in turn return as text, relying on
     * the fact that they are both in fact varlena types, and thus
     * structurally identical. Although not all bytea values are valid text,
     * in this case it will be because we've told pg_convert to return one
     * that is valid as text in the current database encoding.
     */
    PG_RETURN_DATUM(result);
}

/*
 * Convert string using encoding_names.
 *
 * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
 */
Datum pg_convert(PG_FUNCTION_ARGS)
{
    bytea* string = PG_GETARG_BYTEA_PP(0);
    char* src_encoding_name = NameStr(*PG_GETARG_NAME(1));
    int src_encoding = pg_char_to_encoding(src_encoding_name);
    char* dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
    int dest_encoding = pg_char_to_encoding(dest_encoding_name);
    const char* src_str = NULL;
    char* dest_str = NULL;
    bytea* retval = NULL;
    int len;

    if (src_encoding < 0) {
        ereport(ERROR,
            (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                errmsg("invalid source encoding name \"%s\"", src_encoding_name)));
    }
    if (dest_encoding < 0) {
        ereport(ERROR,
            (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                errmsg("invalid destination encoding name \"%s\"", dest_encoding_name)));
    }

    /* make sure that source string is valid */
    len = VARSIZE_ANY_EXHDR(string);
    src_str = VARDATA_ANY(string);
    (void)pg_verify_mbstr_len(src_encoding, src_str, len, false);

    dest_str = (char*)pg_do_encoding_conversion((unsigned char*)src_str, len, src_encoding, dest_encoding);
    if (dest_str != src_str) {
        len = strlen(dest_str);
    }
    /*
     * build bytea data type structure.
     */
    retval = (bytea*)palloc(len + VARHDRSZ);
    SET_VARSIZE(retval, len + VARHDRSZ);
    if (len > 0) {
        errno_t rc = memcpy_s(VARDATA(retval), len, dest_str, len);
        securec_check(rc, "", "");
    }

    if (dest_str != src_str) {
        pfree(dest_str);
    }
    /* free memory if allocated by the toaster */
    PG_FREE_IF_COPY(string, 0);

    PG_RETURN_BYTEA_P(retval);
}

Datum pg_convert_nocase(PG_FUNCTION_ARGS)
{
    bytea* string = PG_GETARG_BYTEA_PP(0);
    char* src_encoding_name = NameStr(*PG_GETARG_NAME(1));
    int src_encoding = pg_char_to_encoding(src_encoding_name);
    char* dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
    int dest_encoding = pg_char_to_encoding(dest_encoding_name);
    const char* src_str = NULL;
    char* dest_str = NULL;
    char* dest_str_tmp = NULL;

    bytea* retval = NULL;
    int len;
    int char_index = 0;
    char achar = '\0';
    int chardiff = 'a' - 'A';

    if (src_encoding < 0) {
        ereport(ERROR,
            (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                errmsg("invalid source encoding name \"%s\"", src_encoding_name)));
    }
    if (dest_encoding < 0) {
        ereport(ERROR,
            (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                errmsg("invalid destination encoding name \"%s\"", dest_encoding_name)));
    }

    /* make sure that source string is valid */
    len = VARSIZE_ANY_EXHDR(string);
    src_str = VARDATA_ANY(string);
    (void)pg_verify_mbstr_len(src_encoding, src_str, len, false);

    dest_str = (char*)pg_do_encoding_conversion((unsigned char*)src_str, len, src_encoding, dest_encoding);
    if (dest_str != src_str) {
        len = strlen(dest_str);
    }

    /*
     * build bytea data type structure.
     */
    retval = (bytea*)palloc(len + VARHDRSZ);
    SET_VARSIZE(retval, len + VARHDRSZ);

    int ss_rc = memcpy_s(VARDATA(retval), len, dest_str, len);
    securec_check(ss_rc, "", "");

    dest_str_tmp = VARDATA(retval);
    achar = *dest_str_tmp;
    while (achar != '\0' && char_index < len) {
        achar = *dest_str_tmp;
        if (achar >= 'A' && achar <= 'Z') {
            *dest_str_tmp += chardiff;
        }
        dest_str_tmp++;
        char_index++;
    }
    if (dest_str != src_str) {
        pfree(dest_str);
    }
    /* free memory if allocated by the toaster */
    PG_FREE_IF_COPY(string, 0);

    PG_RETURN_BYTEA_P(retval);
}

/*
 * get the length of the string considered as text in the specified
 * encoding. Raises an error if the data is not valid in that
 * encoding.
 *
 * INT4 length (BYTEA string, NAME src_encoding_name)
 */
Datum length_in_encoding(PG_FUNCTION_ARGS)
{
    bytea* string = PG_GETARG_BYTEA_P(0);
    char* src_encoding_name = NameStr(*PG_GETARG_NAME(1));
    int src_encoding = pg_char_to_encoding(src_encoding_name);
    int len = VARSIZE(string) - VARHDRSZ;
    int ret_val;

    if (src_encoding < 0) {
        ereport(ERROR,
            (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid encoding name \"%s\"", src_encoding_name)));
    }
    ret_val = pg_verify_mbstr_len(src_encoding, VARDATA(string), len, false);
    PG_RETURN_INT32(ret_val);
}

Datum pg_encoding_max_length_sql(PG_FUNCTION_ARGS)
{
    int encoding = PG_GETARG_INT32(0);
    if (PG_VALID_ENCODING(encoding)) {
        PG_RETURN_INT32(pg_wchar_table[encoding].maxmblen);
    } else {
        PG_RETURN_NULL();
    }
}

/*
 * convert client encoding to server encoding.
 */
char* pg_client_to_server(const char* s, int len)
{
    Assert(u_sess->mb_cxt.ClientEncoding);

    return pg_any_to_server(s, len, u_sess->mb_cxt.ClientEncoding->encoding);
}

char* verify_string_for_ascii(const char* s, int len, int encoding, bool bulkload_illegal_chars_conversion)
{
    /*
    * No conversion is possible, but we must still validate the data,
    * because the client-side code might have done string escaping using
    * the selected client_encoding.  If the client encoding is ASCII-safe
    * then we just do a straight validation under that encoding.  For an
    * ASCII-unsafe encoding we have a problem: we dare not pass such data
    * to the parser but we have no way to convert it.	We compromise by
    * rejecting the data if it contains any non-ASCII characters.
    */
    if (PG_VALID_BE_ENCODING(encoding)) {
        (void)pg_verify_mbstr(encoding, s, len, false);
        return (char*)s;
    }

    int i;
    for (i = 0; i < len; i++) {
        if (s[i] == '\0' || IS_HIGHBIT_SET(s[i])) {
            if (!bulkload_illegal_chars_conversion) {
                ereport(ERROR,
                    (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
                        errmsg("invalid byte value for encoding \"%s\": 0x%02x",
                            pg_enc2name_tbl[PG_SQL_ASCII].name,
                            (unsigned char)s[i])));
            }

            if (s[i] == '\0') {
                *((char*)&s[i]) = ' ';
            } else {
                *((char*)&s[i]) = '?';
            }
        }
    }
    return (char*)s;
}

/*
 * convert any encoding to server encoding.
 */
char* pg_any_to_server(const char* s, int len, int encoding)
{
    bool bulkload_illegal_chars_conversion = false;

    Assert(u_sess->mb_cxt.DatabaseEncoding);
    Assert(u_sess->mb_cxt.ClientEncoding);

    if (len <= 0) {
        return (char*)s;
    }
    if (u_sess->cmd_cxt.bulkload_compatible_illegal_chars) {
        bulkload_illegal_chars_conversion = true;
    }

    if (encoding == u_sess->mb_cxt.DatabaseEncoding->encoding || encoding == PG_SQL_ASCII ||
        (encoding == PG_GB18030 && u_sess->mb_cxt.DatabaseEncoding->encoding == PG_GB18030_2022)) {
        /*
         * No conversion is needed, but we must still validate the data.
         */
        (void)pg_verify_mbstr(u_sess->mb_cxt.DatabaseEncoding->encoding, s, len, false);
        return (char*)s;
    }

    if (u_sess->mb_cxt.DatabaseEncoding->encoding == PG_SQL_ASCII) {
        return verify_string_for_ascii(s, len, encoding, bulkload_illegal_chars_conversion);
    }

    if (u_sess->mb_cxt.ClientEncoding->encoding == encoding) {
        return perform_default_encoding_conversion(s, len, true);
    } else {
        return (char*)pg_do_encoding_conversion(
            (unsigned char*)s, len, encoding, u_sess->mb_cxt.DatabaseEncoding->encoding);
    }
}

/*
 * convert any encoding to client encoding.
 */
char* pg_any_to_client(const char* s, int len, int encoding, void* convert_finfo)
{
    Assert(u_sess->mb_cxt.ClientEncoding);

    if (len <= 0) {
        return (char*)s;
    }
    int client_encoding = u_sess->mb_cxt.ClientEncoding->encoding;

    if (encoding == client_encoding || client_encoding == PG_SQL_ASCII) {
        /*
         * No conversion is needed, but we must still validate the data.
         */
        return (char*)s;
    }

    if (encoding == PG_SQL_ASCII) {
        /* No conversion is possible, but we must validate the result */
        (void) pg_verify_mbstr(client_encoding, s, len, false);
        return (char*)s;
    }

    if (u_sess->mb_cxt.DatabaseEncoding->encoding == encoding) {
        return perform_default_encoding_conversion(s, len, false);
    } else if (convert_finfo != NULL) {
        return try_fast_encoding_conversion(
            (char*)s, len, encoding, client_encoding, convert_finfo);
    } else {
        return (char*)pg_do_encoding_conversion(
            (unsigned char*)s, len, encoding, client_encoding);
    }
}

/*
 * convert client encoding to encoding.
 */
char* pg_client_to_any(const char* s, int len, int dst_encoding, void* convert_finfo)
{
    bool bulkload_illegal_chars_conversion = false;

    Assert(u_sess->mb_cxt.ClientEncoding);

    if (len <= 0) {
        return (char*)s;
    }
    if (u_sess->cmd_cxt.bulkload_compatible_illegal_chars) {
        bulkload_illegal_chars_conversion = true;
    }

    int client_encoding = u_sess->mb_cxt.ClientEncoding->encoding;
    if (client_encoding == dst_encoding || client_encoding == PG_SQL_ASCII) {
        /*
         * No conversion is needed, but we must still validate the data.
         */
        (void)pg_verify_mbstr(dst_encoding, s, len, false);
        return (char*)s;
    }

    if (dst_encoding == PG_SQL_ASCII) {
        return verify_string_for_ascii(s, len, client_encoding, bulkload_illegal_chars_conversion);
    }

    if (u_sess->mb_cxt.DatabaseEncoding->encoding == dst_encoding) {
        return perform_default_encoding_conversion(s, len, true);
    } else if (convert_finfo != NULL) {
        return try_fast_encoding_conversion( (char*)s, len, client_encoding, dst_encoding, convert_finfo);
    } else {
        return (char*)pg_do_encoding_conversion((unsigned char*)s, len, client_encoding, dst_encoding);
    }
}

/*
 * convert server encoding to client encoding.
 */
char* pg_server_to_client(const char* s, int len)
{
    char* str = NULL;

    Assert(u_sess->mb_cxt.ClientEncoding);

    str = pg_server_to_any(s, len, u_sess->mb_cxt.ClientEncoding->encoding);
    if (str == NULL) {
        ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR), errmsg("pg_server_to_any returns null.")));
    }
    return str;
}

/*
 * Preheck if pg_server_to_any is really gonna do a conversion. That makes a difference
 * in COPY TO FILE, which is weird and not logical. Yet HandleCopyDataRow are not to
 * be changed and this function is added instead.
 */
bool WillTranscodingBePerformed(int encoding)
{
    return (!(encoding == u_sess->mb_cxt.DatabaseEncoding->encoding || encoding == PG_SQL_ASCII ||
              u_sess->mb_cxt.DatabaseEncoding->encoding == PG_SQL_ASCII));
}

/*
 * convert server encoding to any encoding.
 */
char* pg_server_to_any(const char* s, int len, int encoding, void *convert_finfo)
{
    Assert(u_sess->mb_cxt.DatabaseEncoding);
    Assert(u_sess->mb_cxt.ClientEncoding);

    if (len <= 0) {
        return (char*)s;
    }
    if (encoding == u_sess->mb_cxt.DatabaseEncoding->encoding || encoding == PG_SQL_ASCII) {
        return (char*)s; /* assume data is valid */
    }
    if (u_sess->mb_cxt.DatabaseEncoding->encoding == PG_SQL_ASCII) {
        /* No conversion is possible, but we must validate the result */
        (void) pg_verify_mbstr(encoding, s, len, false);
        return (char*)s;
    }
    if (u_sess->mb_cxt.ClientEncoding->encoding == encoding) {
        return perform_default_encoding_conversion(s, len, false);
    } else if (convert_finfo != NULL) {
        return try_fast_encoding_conversion(
            (char*)s, len, u_sess->mb_cxt.DatabaseEncoding->encoding, encoding, convert_finfo);
    } else {
        return (char*)pg_do_encoding_conversion(
            (unsigned char*)s, len, u_sess->mb_cxt.DatabaseEncoding->encoding, encoding);
    }
}

/*
 *	Perform default encoding conversion using cached FmgrInfo. Since
 *	this function does not access database at all, it is safe to call
 *	outside transactions.  If the conversion has not been set up by
 *	SetClientEncoding(), no conversion is performed.
 */
static char* perform_default_encoding_conversion(const char* src, int len, bool is_client_to_server)
{
    char* result = NULL;
    int src_encoding, dest_encoding;
    FmgrInfo* flinfo = NULL;

    if (is_client_to_server) {
        src_encoding = u_sess->mb_cxt.ClientEncoding->encoding;
        dest_encoding = u_sess->mb_cxt.DatabaseEncoding->encoding;
        flinfo = u_sess->mb_cxt.ToServerConvProc;
    } else {
        src_encoding = u_sess->mb_cxt.DatabaseEncoding->encoding;
        dest_encoding = u_sess->mb_cxt.ClientEncoding->encoding;
        flinfo = u_sess->mb_cxt.ToClientConvProc;
    }

    if (flinfo == NULL) {
        return (char*)src;
    }
    /*
     * Allocate space for conversion result, being wary of integer overflow
     */
    if ((Size)len >= (MaxAllocSize / (Size)MAX_CONVERSION_GROWTH)) {
        ereport(ERROR,
            (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                errmsg("out of memory"),
                errdetail("String of %d bytes is too long for encoding conversion.", len)));
    }
    result = (char*)palloc(len * MAX_CONVERSION_GROWTH + 1);

    FunctionCall5(flinfo,
        Int32GetDatum(src_encoding),
        Int32GetDatum(dest_encoding),
        CStringGetDatum(src),
        CStringGetDatum(result),
        Int32GetDatum(len));
    return result;
}

/* convert a multibyte string to a wchar */
int pg_mb2wchar(const char* from, pg_wchar* to)
{
    return (*pg_wchar_table[u_sess->mb_cxt.DatabaseEncoding->encoding].mb2wchar_with_len)(
        (const unsigned char*)from, to, strlen(from));
}

/* convert a multibyte string to a wchar with a limited length */
int pg_mb2wchar_with_len(const char* from, pg_wchar* to, int len)
{
    return (*pg_wchar_table[u_sess->mb_cxt.DatabaseEncoding->encoding].mb2wchar_with_len)(
        (const unsigned char*)from, to, len);
}

/* same, with any encoding */
int pg_encoding_mb2wchar_with_len(int encoding, const char* from, pg_wchar* to, int len)
{
    return (*pg_wchar_table[encoding].mb2wchar_with_len)((const unsigned char*)from, to, len);
}

/* convert a wchar string to a multibyte */
int pg_wchar2mb(const pg_wchar* from, char* to)
{
    return (*pg_wchar_table[u_sess->mb_cxt.DatabaseEncoding->encoding].wchar2mb_with_len)(
        from, (unsigned char*)to, pg_wchar_strlen(from));
}

/* convert a wchar string to a multibyte with a limited length */
int pg_wchar2mb_with_len(const pg_wchar* from, char* to, int len)
{
    return (*pg_wchar_table[u_sess->mb_cxt.DatabaseEncoding->encoding].wchar2mb_with_len)(
        from, (unsigned char*)to, len);
}

/* same, with any encoding */
int pg_encoding_wchar2mb_with_len(int encoding, const pg_wchar* from, char* to, int len)
{
    return (*pg_wchar_table[encoding].wchar2mb_with_len)(from, (unsigned char*)to, len);
}

/* returns the byte length of a multibyte character */
int pg_mblen(const char* mbstr)
{
    return ((*pg_wchar_table[u_sess->mb_cxt.DatabaseEncoding->encoding].mblen)((const unsigned char*)mbstr));
}

/* returns the display length of a multibyte character */
int pg_dsplen(const char* mbstr)
{
    return ((*pg_wchar_table[u_sess->mb_cxt.DatabaseEncoding->encoding].dsplen)((const unsigned char*)mbstr));
}

/* returns the length (counted in wchars) of a multibyte string */
int pg_mbstrlen(const char* mbstr)
{
    int len = 0;

    /* optimization for single byte encoding */
    if (pg_database_encoding_max_length() == 1) {
        return strlen(mbstr);
    }
    while (*mbstr) {
        mbstr += pg_mblen(mbstr);
        len++;
    }
    return len;
}

/* returns the length (counted in wchars) of a multibyte string
 * (not necessarily NULL terminated)
 */
int pg_mbstrlen_with_len(const char* mbstr, int limit)
{
    int len = 0;

    /* optimization for single byte encoding */
    if (pg_database_encoding_max_length() == 1) {
        return limit;
    }
    while (limit > 0 && *mbstr) {
        int l = pg_mblen(mbstr);

        limit -= l;
        mbstr += l;
        len++;
    }
    return len;
}

/* returns the length (counted in wchars) of a multibyte string
 * (not necessarily NULL terminated)
 */
int pg_encoding_mbstrlen_with_len(const char* mbstr, int limit, int encoding)
{
    int len = 0;

    /* optimization for single byte encoding */
    if (pg_encoding_max_length(encoding) == 1) {
        return limit;
    }
    while (limit > 0 && *mbstr) {
        int l = pg_encoding_mblen(encoding, mbstr);

        limit -= l;
        mbstr += l;
        len++;
    }
    return len;
}

/* returns the length (counted in wchars) of a multibyte string
 * with fixed encoding.
 */
int pg_mbstrlen_with_len_eml(const char* mbstr, int limit, int eml)
{
    int len = 0;

    /* optimization for single byte encoding */
    if (eml == 1) {
        return limit;
    }
    while (limit > 0 && *mbstr) {
        int l = pg_mblen(mbstr);
        limit -= l;
        mbstr += l;
        len++;
    }
    return len;
}

int pg_mbstrlen_with_len_toast(const char* mbstr, int* limit)
{
    int len = 0;

    while (*limit > 0 && *mbstr) {
        int l = pg_mblen(mbstr);

        *limit -= l;
        mbstr += l;
        len++;
    }
    return len;
}

/*
 * returns the byte length of a multibyte string
 * (not necessarily NULL terminated)
 * that is no longer than limit.
 * this function does not break multibyte character boundary.
 */
int pg_mbcliplen(const char* mbstr, int len, int limit)
{
    return pg_encoding_mbcliplen(u_sess->mb_cxt.DatabaseEncoding->encoding, mbstr, len, limit);
}

/*
 * pg_mbcliplen with specified encoding
 */
int pg_encoding_mbcliplen(int encoding, const char* mbstr, int len, int limit)
{
    mblen_converter mblen_fn;
    int clen = 0;
    int l;

    /* optimization for single byte encoding */
    if (pg_encoding_max_length(encoding) == 1) {
        return cliplen(mbstr, len, limit);
    }
    mblen_fn = pg_wchar_table[encoding].mblen;

    while (len > 0 && *mbstr) {
        l = (*mblen_fn)((const unsigned char*)mbstr);
        if ((clen + l) > limit) {
            break;
        }
        clen += l;
        if (clen == limit) {
            break;
        }
        len -= l;
        mbstr += l;
    }
    return clen;
}

/**
 * calculate the length of mbstr
 * @tparam calCharLength true for the character length, false for the byte length
 * @param mbstr mbstr
 * @param len length of mbstr
 * @param limit limit of mbstr
 * @return the length of mbstr
 */
template<bool calCharLength> int MbCharClipLen(const char* mbstr, int len, int limit)
{
    int clen = 0;
    int nch = 0;
    int l;

    /* optimization for single byte encoding */
    if (pg_database_encoding_max_length() == 1) {
        return cliplen(mbstr, len, limit);
    }
    while (len > 0 && *mbstr) {
        l = pg_mblen(mbstr);
        if (calCharLength) {
            nch++;
        } else {
            nch += l;
        }
        if (nch > limit) {
            break;
        }
        clen += l;
        len -= l;
        mbstr += l;
    }
    return clen;
}

/*
 * Similar to pg_mbcliplen except the limit parameter specifies the
 * byte length, not the character length.
 */
int pg_mbcharcliplen(const char* mbstr, int len, int limit)
{
    bool calCharLength = DB_IS_CMPT(PG_FORMAT | B_FORMAT);
    if (calCharLength) {
        return MbCharClipLen<true>(mbstr, len, limit);
    } else {
        return MbCharClipLen<false>(mbstr, len, limit);
    }
}
/*
 * Description	: Similar to pg_mbcliplen except the limit parameter specifies
 * 				  the character length, not the byte length.
 * Notes		:
 */
int pg_mbcharcliplen_orig(const char* mbstr, int len, int limit)
{
    return MbCharClipLen<true>(mbstr, len, limit);
}

/* mbcliplen for any single-byte encoding */
static int cliplen(const char* str, int len, int limit)
{
    int l = 0;
    len = Min(len, limit);
    while (l < len && str[l]) {
        l++;
    }
    return l;
}

void SetDatabaseEncoding(int encoding)
{
    if (!PG_VALID_BE_ENCODING(encoding)) {
        ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR), errmsg("invalid database encoding: %d", encoding)));
    }
    u_sess->mb_cxt.DatabaseEncoding = &pg_enc2name_tbl[encoding];
    Assert(u_sess->mb_cxt.DatabaseEncoding->encoding == encoding);
}

/*
 * Bind gettext to the codeset equivalent with the database encoding.
 */
void pg_bind_textdomain_codeset(const char* domain_name)
{
#if defined(ENABLE_NLS)
    int encoding = GetDatabaseEncoding();
    int i;

    /*
     * gettext() uses the codeset specified by LC_CTYPE by default, so if that
     * matches the database encoding we don't need to do anything. In CREATE
     * DATABASE, we enforce or trust that the locale's codeset matches
     * database encoding, except for the C locale. In C locale, we bind
     * gettext() explicitly to the right codeset.
     *
     * On Windows, though, gettext() tends to get confused so we always bind
     * it.
     */
#ifndef WIN32
    /* setlocale is thread-unsafe */
    AutoMutexLock localeLock(&gLocaleMutex);
    localeLock.lock();
    const char* ctype = gs_setlocale_r(LC_CTYPE, NULL);

    if (pg_strcasecmp(ctype, "C") != 0 && pg_strcasecmp(ctype, "POSIX") != 0) {
        localeLock.unLock();
        return;
    }

    localeLock.unLock();
#endif

    for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++) {
        if (pg_enc2gettext_tbl[i].encoding == encoding) {
            if (bind_textdomain_codeset(domain_name, pg_enc2gettext_tbl[i].name) == NULL) {
                ereport(LOG, (errmsg("bind_textdomain_codeset failed")));
            }
            break;
        }
    }
#endif
}

int GetDatabaseEncoding(void)
{
    Assert(u_sess->mb_cxt.DatabaseEncoding);
    return u_sess->mb_cxt.DatabaseEncoding->encoding;
}

const char* GetDatabaseEncodingName(void)
{
    Assert(u_sess->mb_cxt.DatabaseEncoding);
    return u_sess->mb_cxt.DatabaseEncoding->name;
}

int GetCharsetConnection(void)
{
    Assert(u_sess->mb_cxt.character_set_connection);
    return u_sess->mb_cxt.character_set_connection->encoding;
}

const char* GetCharsetConnectionName(void)
{
    Assert(u_sess->mb_cxt.character_set_connection);
    return u_sess->mb_cxt.character_set_connection->name;
}

Oid GetCollationConnection(void)
{
    if (!ENABLE_MULTI_CHARSET || !DB_IS_CMPT(B_FORMAT)) {
        return InvalidOid;
    }
    return u_sess->mb_cxt.collation_connection;
}

Datum getdatabaseencoding(PG_FUNCTION_ARGS)
{
    Assert(u_sess->mb_cxt.DatabaseEncoding);
    return DirectFunctionCall1(namein, CStringGetDatum(u_sess->mb_cxt.DatabaseEncoding->name));
}

Datum pg_client_encoding(PG_FUNCTION_ARGS)
{
    Assert(u_sess->mb_cxt.ClientEncoding);
    return DirectFunctionCall1(namein, CStringGetDatum(u_sess->mb_cxt.ClientEncoding->name));
}

int GetPlatformEncoding(void)
{
    if (u_sess->mb_cxt.PlatformEncoding == NULL) {
        int encoding;

        AutoMutexLock localeLock(&gLocaleMutex);
        localeLock.lock();
        /* try to determine encoding of server's environment locale */
        encoding = pg_get_encoding_from_locale("", true);
        localeLock.unLock();

        if (encoding < 0) {
            encoding = PG_SQL_ASCII;
        }
        u_sess->mb_cxt.PlatformEncoding = &pg_enc2name_tbl[encoding];
    }
    return u_sess->mb_cxt.PlatformEncoding->encoding;
}

#ifdef WIN32

/*
 * Result is palloc'ed null-terminated utf16 string. The character length
 * is also passed to utf16len if not null. Returns NULL iff failed.
 */
WCHAR* pgwin32_toUTF16(const char* str, int len, int* utf16_len)
{
    WCHAR* utf16 = NULL;
    int dst_len;

    /*
     * Use MultiByteToWideChar directly if there is a corresponding codepage,
     * or double conversion through UTF8 if not.
     */
    UINT codepage = pg_enc2name_tbl[GetDatabaseEncoding()].codepage;
    if (codepage != 0) {
        utf16 = (WCHAR*)palloc(sizeof(WCHAR) * (len + 1));
        dst_len = MultiByteToWideChar(codepage, 0, str, len, utf16, len);
        utf16[dst_len] = (WCHAR)0;
    } else {
        char* utf8 = NULL;
        utf8 = (char*)pg_do_encoding_conversion((unsigned char*)str, len, GetDatabaseEncoding(), PG_UTF8);
        if (utf8 != str) {
            len = strlen(utf8);
        }
        utf16 = (WCHAR*)palloc(sizeof(WCHAR) * (len + 1));
        dst_len = MultiByteToWideChar(CP_UTF8, 0, utf8, len, utf16, len);
        utf16[dst_len] = (WCHAR)0;

        if (utf8 != str) {
            pfree(utf8);
        }
    }

    if (dst_len == 0 && len > 0) {
        pfree(utf16);
        return NULL; /* error */
    }

    if (utf16_len != NULL) {
        *utf16_len = dst_len;
    }
    return utf16;
}

#endif