【实现内容】: 回合https://gitee.com/opengauss/openGauss-server/pulls/4694/修复表增加字段后core的问题 【根因分析】: 回合https://gitee.com/opengauss/openGauss-server/pulls/4694/修复表增加字段后core的问题 【实现方案】: 回合https://gitee.com/opengauss/openGauss-server/pulls/4694/修复表增加字段后core的问题 【关联需求或issue】: https://e.gitee.com/opengaussorg/dashboard?issue=I9C345
1305 lines
40 KiB
C++
1305 lines
40 KiB
C++
/*
|
|
* This file contains public functions for conversion between
|
|
* client encoding and server (database) encoding.
|
|
*
|
|
* Tatsuo Ishii
|
|
*
|
|
* src/backend/utils/mb/mbutils.c
|
|
*/
|
|
#include "postgres.h"
|
|
#include "knl/knl_variable.h"
|
|
|
|
#include "access/xact.h"
|
|
#include "catalog/namespace.h"
|
|
#include "mb/pg_wchar.h"
|
|
#include "pgxc/execRemote.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/memutils.h"
|
|
#include "utils/syscache.h"
|
|
#include "storage/ipc.h"
|
|
#include "executor/executor.h"
|
|
|
|
/*
|
|
* We maintain a simple linked list caching the fmgr lookup info for the
|
|
* currently selected conversion functions, as well as any that have been
|
|
* selected previously in the current session. (We remember previous
|
|
* settings because we must be able to restore a previous setting during
|
|
* transaction rollback, without doing any fresh catalog accesses.)
|
|
*
|
|
* Since we'll never release this data, we just keep it in t_thrd.top_mem_cxt.
|
|
*/
|
|
typedef struct ConvProcInfo {
|
|
int s_encoding; /* server and client encoding IDs */
|
|
int c_encoding;
|
|
FmgrInfo to_server_info; /* lookup info for conversion procs */
|
|
FmgrInfo to_client_info;
|
|
} ConvProcInfo;
|
|
|
|
/* Internal functions */
|
|
static char* perform_default_encoding_conversion(const char* src, int len, bool is_client_to_server);
|
|
static int cliplen(const char* str, int len, int limit);
|
|
|
|
// Determine whether the current case needs to be converted
|
|
bool NoNeedToConvert(int srcEncoding, int destEncoding)
|
|
{
|
|
if (srcEncoding == destEncoding) {
|
|
return true;
|
|
}
|
|
if (srcEncoding == PG_SQL_ASCII || destEncoding == PG_SQL_ASCII) {
|
|
return true;
|
|
}
|
|
if (srcEncoding == PG_GB18030_2022 && destEncoding == PG_GB18030) {
|
|
return true;
|
|
}
|
|
if (srcEncoding == PG_GB18030 && destEncoding == PG_GB18030_2022) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Prepare for a future call to SetClientEncoding. Success should mean
|
|
* that SetClientEncoding is guaranteed to succeed for this encoding request.
|
|
*
|
|
* (But note that success before u_sess->mb_cxt.backend_startup_complete does not guarantee
|
|
* success after ...)
|
|
*
|
|
* Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
|
|
*/
|
|
int PrepareClientEncoding(int encoding)
|
|
{
|
|
int current_server_encoding;
|
|
ListCell* lc = NULL;
|
|
|
|
if (!PG_VALID_FE_ENCODING(encoding)) {
|
|
return -1;
|
|
}
|
|
|
|
/* Can't do anything during startup, per notes above */
|
|
if (!u_sess->mb_cxt.backend_startup_complete) {
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Check for cases that require no conversion function.
|
|
*/
|
|
current_server_encoding = GetDatabaseEncoding();
|
|
if (NoNeedToConvert(current_server_encoding, encoding)) {
|
|
return 0;
|
|
}
|
|
|
|
if (IsTransactionState()) {
|
|
/*
|
|
* If we're in a live transaction, it's safe to access the catalogs,
|
|
* so look up the functions. We repeat the lookup even if the info is
|
|
* already cached, so that we can react to changes in the contents of
|
|
* pg_conversion.
|
|
*/
|
|
Oid to_server_proc;
|
|
Oid to_client_proc;
|
|
ConvProcInfo* conv_info = NULL;
|
|
MemoryContext old_context;
|
|
|
|
to_server_proc = FindDefaultConversionProc(encoding, current_server_encoding);
|
|
if (!OidIsValid(to_server_proc)) {
|
|
return -1;
|
|
}
|
|
to_client_proc = FindDefaultConversionProc(current_server_encoding, encoding);
|
|
if (!OidIsValid(to_client_proc)) {
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* Load the fmgr info into t_thrd.top_mem_cxt (could still fail here)
|
|
*/
|
|
MemoryContext executorCxt = SESS_GET_MEM_CXT_GROUP(MEMORY_CONTEXT_EXECUTOR);
|
|
conv_info = (ConvProcInfo*)MemoryContextAlloc(executorCxt, sizeof(ConvProcInfo));
|
|
conv_info->s_encoding = current_server_encoding;
|
|
conv_info->c_encoding = encoding;
|
|
fmgr_info_cxt(to_server_proc, &conv_info->to_server_info, executorCxt);
|
|
fmgr_info_cxt(to_client_proc, &conv_info->to_client_info, executorCxt);
|
|
|
|
/* Attach new info to head of list */
|
|
old_context = MemoryContextSwitchTo(executorCxt);
|
|
u_sess->mb_cxt.ConvProcList = lcons(conv_info, u_sess->mb_cxt.ConvProcList);
|
|
(void)MemoryContextSwitchTo(old_context);
|
|
|
|
/*
|
|
* We cannot yet remove any older entry for the same encoding pair,
|
|
* since it could still be in use. SetClientEncoding will clean up.
|
|
*/
|
|
return 0; /* success */
|
|
} else {
|
|
/*
|
|
* If we're not in a live transaction, the only thing we can do is
|
|
* restore a previous setting using the cache. This covers all
|
|
* transaction-rollback cases. The only case it might not work for is
|
|
* trying to change client_encoding on the fly by editing
|
|
* postgresql.conf and SIGHUP'ing. Which would probably be a stupid
|
|
* thing to do anyway.
|
|
*/
|
|
foreach (lc, u_sess->mb_cxt.ConvProcList) {
|
|
ConvProcInfo* oldinfo = (ConvProcInfo*)lfirst(lc);
|
|
if (oldinfo->s_encoding == current_server_encoding && oldinfo->c_encoding == encoding) {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return -1; /* it's not cached, so fail */
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Set the active client encoding and set up the conversion-function pointers.
|
|
* PrepareClientEncoding should have been called previously for this encoding.
|
|
*
|
|
* Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
|
|
*/
|
|
int SetClientEncoding(int encoding)
|
|
{
|
|
int current_server_encoding;
|
|
bool found = false;
|
|
ListCell* lc = NULL;
|
|
ListCell* prev = NULL;
|
|
ListCell* next = NULL;
|
|
|
|
if (!PG_VALID_FE_ENCODING(encoding)) {
|
|
return -1;
|
|
}
|
|
|
|
/* Can't do anything during startup, per notes above */
|
|
if (!u_sess->mb_cxt.backend_startup_complete) {
|
|
u_sess->mb_cxt.pending_client_encoding = encoding;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Check for cases that require no conversion function.
|
|
*/
|
|
current_server_encoding = GetDatabaseEncoding();
|
|
if (NoNeedToConvert(current_server_encoding, encoding)) {
|
|
u_sess->mb_cxt.ClientEncoding = &pg_enc2name_tbl[encoding];
|
|
u_sess->mb_cxt.ToServerConvProc = NULL;
|
|
u_sess->mb_cxt.ToClientConvProc = NULL;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Search the cache for the entry previously prepared by
|
|
* PrepareClientEncoding; if there isn't one, we lose. While at it,
|
|
* release any duplicate entries so that repeated Prepare/Set cycles don't
|
|
* leak memory.
|
|
*/
|
|
found = false;
|
|
prev = NULL;
|
|
for (lc = list_head(u_sess->mb_cxt.ConvProcList); lc; lc = next) {
|
|
ConvProcInfo* conv_info = (ConvProcInfo*)lfirst(lc);
|
|
next = lnext(lc);
|
|
if (conv_info->s_encoding == current_server_encoding && conv_info->c_encoding == encoding) {
|
|
if (!found) {
|
|
/* Found newest entry, so set up */
|
|
u_sess->mb_cxt.ClientEncoding = &pg_enc2name_tbl[encoding];
|
|
u_sess->mb_cxt.ToServerConvProc = &conv_info->to_server_info;
|
|
u_sess->mb_cxt.ToClientConvProc = &conv_info->to_client_info;
|
|
found = true;
|
|
} else {
|
|
/* Duplicate entry, release it */
|
|
u_sess->mb_cxt.ConvProcList = list_delete_cell(u_sess->mb_cxt.ConvProcList, lc, prev);
|
|
pfree(conv_info);
|
|
continue; /* prev mustn't advance */
|
|
}
|
|
}
|
|
|
|
prev = lc;
|
|
}
|
|
|
|
if (found) {
|
|
return 0; /* success */
|
|
} else {
|
|
return -1; /* it's not cached, so fail */
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Initialize client encoding conversions.
|
|
* Called from InitPostgres() once during backend startup.
|
|
*/
|
|
void InitializeClientEncoding(void)
|
|
{
|
|
Assert(!u_sess->mb_cxt.backend_startup_complete);
|
|
u_sess->mb_cxt.backend_startup_complete = true;
|
|
|
|
if (PrepareClientEncoding(u_sess->mb_cxt.pending_client_encoding) < 0 ||
|
|
SetClientEncoding(u_sess->mb_cxt.pending_client_encoding) < 0) {
|
|
/*
|
|
* Oops, the requested conversion is not available. We couldn't fail
|
|
* before, but we can now.
|
|
*/
|
|
ereport(FATAL,
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("conversion between %s and %s is not supported",
|
|
pg_enc2name_tbl[u_sess->mb_cxt.pending_client_encoding].name,
|
|
GetDatabaseEncodingName())));
|
|
}
|
|
u_sess->mb_cxt.character_set_connection = &pg_enc2name_tbl[GetDatabaseEncoding()];
|
|
if (ENABLE_MULTI_CHARSET) {
|
|
u_sess->mb_cxt.collation_connection = get_default_collation_by_charset(GetDatabaseEncoding(), false);
|
|
} else {
|
|
u_sess->mb_cxt.collation_connection = InvalidOid;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* returns the current client encoding
|
|
*/
|
|
int pg_get_client_encoding(void)
|
|
{
|
|
Assert(u_sess->mb_cxt.ClientEncoding);
|
|
return u_sess->mb_cxt.ClientEncoding->encoding;
|
|
}
|
|
|
|
/*
|
|
* returns the current client encoding name
|
|
*/
|
|
const char* pg_get_client_encoding_name(void)
|
|
{
|
|
Assert(u_sess->mb_cxt.ClientEncoding);
|
|
return u_sess->mb_cxt.ClientEncoding->name;
|
|
}
|
|
|
|
/*
|
|
* Apply encoding conversion on src and return it. The encoding
|
|
* conversion function is chosen from the pg_conversion system catalog
|
|
* marked as "default". If it is not found in the schema search path,
|
|
* it's taken from pg_catalog schema. If it even is not in the schema,
|
|
* warn and return src.
|
|
*
|
|
* If conversion occurs, a palloc'd null-terminated string is returned.
|
|
* In the case of no conversion, src is returned.
|
|
*
|
|
* CAUTION: although the presence of a length argument means that callers
|
|
* can pass non-null-terminated strings, care is required because the same
|
|
* string will be passed back if no conversion occurs. Such callers *must*
|
|
* check whether result == src and handle that case differently.
|
|
*
|
|
* Note: we try to avoid raising error, since that could get us into
|
|
* infinite recursion when this function is invoked during error message
|
|
* sending. It should be OK to raise error for overlength strings though,
|
|
* since the recursion will come with a shorter message.
|
|
*/
|
|
unsigned char* pg_do_encoding_conversion(unsigned char* src, int len, int src_encoding, int dest_encoding)
|
|
{
|
|
unsigned char* result = NULL;
|
|
Oid proc;
|
|
|
|
if (!IsTransactionState()) {
|
|
return src;
|
|
}
|
|
if (NoNeedToConvert(src_encoding, dest_encoding)) {
|
|
return src;
|
|
}
|
|
if (len <= 0) {
|
|
return src;
|
|
}
|
|
proc = FindDefaultConversionProc(src_encoding, dest_encoding);
|
|
if (!OidIsValid(proc)) {
|
|
ereport(DEBUG2,
|
|
(errcode(ERRCODE_UNDEFINED_FUNCTION),
|
|
errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
|
|
pg_encoding_to_char(src_encoding),
|
|
pg_encoding_to_char(dest_encoding))));
|
|
return src;
|
|
}
|
|
|
|
/*
|
|
* XXX we should avoid throwing errors in OidFunctionCall. Otherwise we
|
|
* are going into infinite loop! So we have to make sure that the
|
|
* function exists before calling OidFunctionCall.
|
|
*/
|
|
if (!SearchSysCacheExists1(PROCOID, ObjectIdGetDatum(proc))) {
|
|
ereport(LOG, (errmsg("cache lookup failed for function %u", proc)));
|
|
return src;
|
|
}
|
|
|
|
/*
|
|
* Allocate space for conversion result, being wary of integer overflow
|
|
*/
|
|
if ((Size)len >= (MaxAllocSize / (Size)MAX_CONVERSION_GROWTH)) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
|
errmsg("out of memory"),
|
|
errdetail("String of %d bytes is too long for encoding conversion.", len)));
|
|
}
|
|
result = (unsigned char*)palloc(len * MAX_CONVERSION_GROWTH + 1);
|
|
|
|
OidFunctionCall5(proc,
|
|
Int32GetDatum(src_encoding),
|
|
Int32GetDatum(dest_encoding),
|
|
CStringGetDatum(src),
|
|
CStringGetDatum(result),
|
|
Int32GetDatum(len));
|
|
return result;
|
|
}
|
|
|
|
void construct_conversion_fmgr_info(int src_encoding, int dst_encoding, void* finfo)
|
|
{
|
|
Assert(finfo != NULL);
|
|
|
|
FmgrInfo* convert_finfo = (FmgrInfo*)finfo;
|
|
if (src_encoding == dst_encoding) {
|
|
convert_finfo->fn_oid = InvalidOid;
|
|
return;
|
|
}
|
|
|
|
if (src_encoding == PG_SQL_ASCII || dst_encoding == PG_SQL_ASCII) {
|
|
convert_finfo->fn_oid = InvalidOid;
|
|
return;
|
|
}
|
|
|
|
Oid convert_func = FindDefaultConversionProc(src_encoding, dst_encoding);
|
|
if (OidIsValid(convert_func)) {
|
|
fmgr_info(convert_func, convert_finfo);
|
|
} else {
|
|
convert_finfo->fn_oid = InvalidOid;
|
|
}
|
|
}
|
|
|
|
|
|
static char* fast_encoding_conversion(char* src, int len, int src_encoding, int dest_encoding, FmgrInfo* convert_finfo)
|
|
{
|
|
if (len <= 0) {
|
|
return src;
|
|
}
|
|
|
|
char* result = NULL;
|
|
Assert(convert_finfo != NULL);
|
|
Assert(OidIsValid(convert_finfo->fn_oid));
|
|
|
|
/*
|
|
* Allocate space for conversion result, being wary of integer overflow
|
|
*/
|
|
if ((Size)len >= (MaxAllocSize / (Size)MAX_CONVERSION_GROWTH)) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
|
errmsg("out of memory"),
|
|
errdetail("String of %d bytes is too long for encoding conversion.", len)));
|
|
}
|
|
result = (char*)palloc(len * MAX_CONVERSION_GROWTH + 1);
|
|
|
|
FunctionCall5(convert_finfo,
|
|
Int32GetDatum(src_encoding),
|
|
Int32GetDatum(dest_encoding),
|
|
CStringGetDatum(src),
|
|
CStringGetDatum(result),
|
|
Int32GetDatum(len));
|
|
return result;
|
|
}
|
|
|
|
char* try_fast_encoding_conversion(char* src, int len, int src_encoding, int dest_encoding, void* convert_finfo)
|
|
{
|
|
if (unlikely(!OidIsValid(((FmgrInfo*)convert_finfo)->fn_oid))) {
|
|
return (char*)pg_do_encoding_conversion((unsigned char*)src, len, src_encoding, dest_encoding);
|
|
}
|
|
|
|
return fast_encoding_conversion(src, len, src_encoding, dest_encoding, (FmgrInfo*)convert_finfo);
|
|
}
|
|
|
|
/*
|
|
* Convert string using encoding_name. The source
|
|
* encoding is the DB encoding.
|
|
*
|
|
* BYTEA convert_to(TEXT string, NAME encoding_name) */
|
|
Datum pg_convert_to(PG_FUNCTION_ARGS)
|
|
{
|
|
Datum string = PG_GETARG_DATUM(0);
|
|
Datum dest_encoding_name = PG_GETARG_DATUM(1);
|
|
Datum src_encoding_name = DirectFunctionCall1(namein, CStringGetDatum(u_sess->mb_cxt.DatabaseEncoding->name));
|
|
Datum result;
|
|
|
|
/*
|
|
* pg_convert expects a bytea as its first argument. We're passing it a
|
|
* text argument here, relying on the fact that they are both in fact
|
|
* varlena types, and thus structurally identical.
|
|
*/
|
|
result = DirectFunctionCall3(pg_convert, string, src_encoding_name, dest_encoding_name);
|
|
|
|
PG_RETURN_DATUM(result);
|
|
}
|
|
|
|
/* for GBK order */
|
|
Datum pg_convert_to_nocase(PG_FUNCTION_ARGS)
|
|
{
|
|
Datum string = PG_GETARG_DATUM(0);
|
|
Datum dest_encoding_name = PG_GETARG_DATUM(1);
|
|
Datum src_encoding_name = DirectFunctionCall1(namein, CStringGetDatum(u_sess->mb_cxt.DatabaseEncoding->name));
|
|
Datum result;
|
|
FUNC_CHECK_HUGE_POINTER(PG_ARGISNULL(0), DatumGetPointer(string), "pg_convert()");
|
|
|
|
/*
|
|
* pg_convert expects a bytea as its first argument. We're passing it a
|
|
* text argument here, relying on the fact that they are both in fact
|
|
* varlena types, and thus structurally identical.
|
|
*/
|
|
result = DirectFunctionCall3(pg_convert_nocase, string, src_encoding_name, dest_encoding_name);
|
|
|
|
PG_RETURN_DATUM(result);
|
|
}
|
|
|
|
/*
|
|
* Convert string using encoding_name. The destination
|
|
* encoding is the DB encoding.
|
|
*
|
|
* TEXT convert_from(BYTEA string, NAME encoding_name) */
|
|
Datum pg_convert_from(PG_FUNCTION_ARGS)
|
|
{
|
|
Datum string = PG_GETARG_DATUM(0);
|
|
Datum src_encoding_name = PG_GETARG_DATUM(1);
|
|
Datum dest_encoding_name = DirectFunctionCall1(namein, CStringGetDatum(u_sess->mb_cxt.DatabaseEncoding->name));
|
|
Datum result;
|
|
|
|
result = DirectFunctionCall3(pg_convert, string, src_encoding_name, dest_encoding_name);
|
|
|
|
/*
|
|
* pg_convert returns a bytea, which we in turn return as text, relying on
|
|
* the fact that they are both in fact varlena types, and thus
|
|
* structurally identical. Although not all bytea values are valid text,
|
|
* in this case it will be because we've told pg_convert to return one
|
|
* that is valid as text in the current database encoding.
|
|
*/
|
|
PG_RETURN_DATUM(result);
|
|
}
|
|
|
|
/*
|
|
* Convert string using encoding_names.
|
|
*
|
|
* BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
|
|
*/
|
|
Datum pg_convert(PG_FUNCTION_ARGS)
|
|
{
|
|
bytea* string = PG_GETARG_BYTEA_PP(0);
|
|
char* src_encoding_name = NameStr(*PG_GETARG_NAME(1));
|
|
int src_encoding = pg_char_to_encoding(src_encoding_name);
|
|
char* dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
|
|
int dest_encoding = pg_char_to_encoding(dest_encoding_name);
|
|
const char* src_str = NULL;
|
|
char* dest_str = NULL;
|
|
bytea* retval = NULL;
|
|
int len;
|
|
|
|
if (src_encoding < 0) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid source encoding name \"%s\"", src_encoding_name)));
|
|
}
|
|
if (dest_encoding < 0) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid destination encoding name \"%s\"", dest_encoding_name)));
|
|
}
|
|
|
|
/* make sure that source string is valid */
|
|
len = VARSIZE_ANY_EXHDR(string);
|
|
src_str = VARDATA_ANY(string);
|
|
(void)pg_verify_mbstr_len(src_encoding, src_str, len, false);
|
|
|
|
dest_str = (char*)pg_do_encoding_conversion((unsigned char*)src_str, len, src_encoding, dest_encoding);
|
|
if (dest_str != src_str) {
|
|
len = strlen(dest_str);
|
|
}
|
|
/*
|
|
* build bytea data type structure.
|
|
*/
|
|
retval = (bytea*)palloc(len + VARHDRSZ);
|
|
SET_VARSIZE(retval, len + VARHDRSZ);
|
|
if (len > 0) {
|
|
errno_t rc = memcpy_s(VARDATA(retval), len, dest_str, len);
|
|
securec_check(rc, "", "");
|
|
}
|
|
|
|
if (dest_str != src_str) {
|
|
pfree(dest_str);
|
|
}
|
|
/* free memory if allocated by the toaster */
|
|
PG_FREE_IF_COPY(string, 0);
|
|
|
|
PG_RETURN_BYTEA_P(retval);
|
|
}
|
|
|
|
Datum pg_convert_nocase(PG_FUNCTION_ARGS)
|
|
{
|
|
bytea* string = PG_GETARG_BYTEA_PP(0);
|
|
char* src_encoding_name = NameStr(*PG_GETARG_NAME(1));
|
|
int src_encoding = pg_char_to_encoding(src_encoding_name);
|
|
char* dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
|
|
int dest_encoding = pg_char_to_encoding(dest_encoding_name);
|
|
const char* src_str = NULL;
|
|
char* dest_str = NULL;
|
|
char* dest_str_tmp = NULL;
|
|
|
|
bytea* retval = NULL;
|
|
int len;
|
|
int char_index = 0;
|
|
char achar = '\0';
|
|
int chardiff = 'a' - 'A';
|
|
|
|
if (src_encoding < 0) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid source encoding name \"%s\"", src_encoding_name)));
|
|
}
|
|
if (dest_encoding < 0) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
errmsg("invalid destination encoding name \"%s\"", dest_encoding_name)));
|
|
}
|
|
|
|
/* make sure that source string is valid */
|
|
len = VARSIZE_ANY_EXHDR(string);
|
|
src_str = VARDATA_ANY(string);
|
|
(void)pg_verify_mbstr_len(src_encoding, src_str, len, false);
|
|
|
|
dest_str = (char*)pg_do_encoding_conversion((unsigned char*)src_str, len, src_encoding, dest_encoding);
|
|
if (dest_str != src_str) {
|
|
len = strlen(dest_str);
|
|
}
|
|
|
|
/*
|
|
* build bytea data type structure.
|
|
*/
|
|
retval = (bytea*)palloc(len + VARHDRSZ);
|
|
SET_VARSIZE(retval, len + VARHDRSZ);
|
|
|
|
int ss_rc = memcpy_s(VARDATA(retval), len, dest_str, len);
|
|
securec_check(ss_rc, "", "");
|
|
|
|
dest_str_tmp = VARDATA(retval);
|
|
achar = *dest_str_tmp;
|
|
while (achar != '\0' && char_index < len) {
|
|
achar = *dest_str_tmp;
|
|
if (achar >= 'A' && achar <= 'Z') {
|
|
*dest_str_tmp += chardiff;
|
|
}
|
|
dest_str_tmp++;
|
|
char_index++;
|
|
}
|
|
if (dest_str != src_str) {
|
|
pfree(dest_str);
|
|
}
|
|
/* free memory if allocated by the toaster */
|
|
PG_FREE_IF_COPY(string, 0);
|
|
|
|
PG_RETURN_BYTEA_P(retval);
|
|
}
|
|
|
|
/*
|
|
* get the length of the string considered as text in the specified
|
|
* encoding. Raises an error if the data is not valid in that
|
|
* encoding.
|
|
*
|
|
* INT4 length (BYTEA string, NAME src_encoding_name)
|
|
*/
|
|
Datum length_in_encoding(PG_FUNCTION_ARGS)
|
|
{
|
|
bytea* string = PG_GETARG_BYTEA_P(0);
|
|
char* src_encoding_name = NameStr(*PG_GETARG_NAME(1));
|
|
int src_encoding = pg_char_to_encoding(src_encoding_name);
|
|
int len = VARSIZE(string) - VARHDRSZ;
|
|
int ret_val;
|
|
|
|
if (src_encoding < 0) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid encoding name \"%s\"", src_encoding_name)));
|
|
}
|
|
ret_val = pg_verify_mbstr_len(src_encoding, VARDATA(string), len, false);
|
|
PG_RETURN_INT32(ret_val);
|
|
}
|
|
|
|
Datum pg_encoding_max_length_sql(PG_FUNCTION_ARGS)
|
|
{
|
|
int encoding = PG_GETARG_INT32(0);
|
|
if (PG_VALID_ENCODING(encoding)) {
|
|
PG_RETURN_INT32(pg_wchar_table[encoding].maxmblen);
|
|
} else {
|
|
PG_RETURN_NULL();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* convert client encoding to server encoding.
|
|
*/
|
|
char* pg_client_to_server(const char* s, int len)
|
|
{
|
|
Assert(u_sess->mb_cxt.ClientEncoding);
|
|
|
|
return pg_any_to_server(s, len, u_sess->mb_cxt.ClientEncoding->encoding);
|
|
}
|
|
|
|
char* verify_string_for_ascii(const char* s, int len, int encoding, bool bulkload_illegal_chars_conversion)
|
|
{
|
|
/*
|
|
* No conversion is possible, but we must still validate the data,
|
|
* because the client-side code might have done string escaping using
|
|
* the selected client_encoding. If the client encoding is ASCII-safe
|
|
* then we just do a straight validation under that encoding. For an
|
|
* ASCII-unsafe encoding we have a problem: we dare not pass such data
|
|
* to the parser but we have no way to convert it. We compromise by
|
|
* rejecting the data if it contains any non-ASCII characters.
|
|
*/
|
|
if (PG_VALID_BE_ENCODING(encoding)) {
|
|
(void)pg_verify_mbstr(encoding, s, len, false);
|
|
return (char*)s;
|
|
}
|
|
|
|
int i;
|
|
for (i = 0; i < len; i++) {
|
|
if (s[i] == '\0' || IS_HIGHBIT_SET(s[i])) {
|
|
if (!bulkload_illegal_chars_conversion) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
|
errmsg("invalid byte value for encoding \"%s\": 0x%02x",
|
|
pg_enc2name_tbl[PG_SQL_ASCII].name,
|
|
(unsigned char)s[i])));
|
|
}
|
|
|
|
if (s[i] == '\0') {
|
|
*((char*)&s[i]) = ' ';
|
|
} else {
|
|
*((char*)&s[i]) = '?';
|
|
}
|
|
}
|
|
}
|
|
return (char*)s;
|
|
}
|
|
|
|
/*
|
|
* convert any encoding to server encoding.
|
|
*/
|
|
char* pg_any_to_server(const char* s, int len, int encoding)
|
|
{
|
|
bool bulkload_illegal_chars_conversion = false;
|
|
|
|
Assert(u_sess->mb_cxt.DatabaseEncoding);
|
|
Assert(u_sess->mb_cxt.ClientEncoding);
|
|
|
|
if (len <= 0) {
|
|
return (char*)s;
|
|
}
|
|
if (u_sess->cmd_cxt.bulkload_compatible_illegal_chars) {
|
|
bulkload_illegal_chars_conversion = true;
|
|
}
|
|
|
|
if (encoding == u_sess->mb_cxt.DatabaseEncoding->encoding || encoding == PG_SQL_ASCII ||
|
|
(encoding == PG_GB18030 && u_sess->mb_cxt.DatabaseEncoding->encoding == PG_GB18030_2022)) {
|
|
/*
|
|
* No conversion is needed, but we must still validate the data.
|
|
*/
|
|
(void)pg_verify_mbstr(u_sess->mb_cxt.DatabaseEncoding->encoding, s, len, false);
|
|
return (char*)s;
|
|
}
|
|
|
|
if (u_sess->mb_cxt.DatabaseEncoding->encoding == PG_SQL_ASCII) {
|
|
return verify_string_for_ascii(s, len, encoding, bulkload_illegal_chars_conversion);
|
|
}
|
|
|
|
if (u_sess->mb_cxt.ClientEncoding->encoding == encoding) {
|
|
return perform_default_encoding_conversion(s, len, true);
|
|
} else {
|
|
return (char*)pg_do_encoding_conversion(
|
|
(unsigned char*)s, len, encoding, u_sess->mb_cxt.DatabaseEncoding->encoding);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* convert any encoding to client encoding.
|
|
*/
|
|
char* pg_any_to_client(const char* s, int len, int encoding, void* convert_finfo)
|
|
{
|
|
Assert(u_sess->mb_cxt.ClientEncoding);
|
|
|
|
if (len <= 0) {
|
|
return (char*)s;
|
|
}
|
|
int client_encoding = u_sess->mb_cxt.ClientEncoding->encoding;
|
|
|
|
if (encoding == client_encoding || client_encoding == PG_SQL_ASCII) {
|
|
/*
|
|
* No conversion is needed, but we must still validate the data.
|
|
*/
|
|
return (char*)s;
|
|
}
|
|
|
|
if (encoding == PG_SQL_ASCII) {
|
|
/* No conversion is possible, but we must validate the result */
|
|
(void) pg_verify_mbstr(client_encoding, s, len, false);
|
|
return (char*)s;
|
|
}
|
|
|
|
if (u_sess->mb_cxt.DatabaseEncoding->encoding == encoding) {
|
|
return perform_default_encoding_conversion(s, len, false);
|
|
} else if (convert_finfo != NULL) {
|
|
return try_fast_encoding_conversion(
|
|
(char*)s, len, encoding, client_encoding, convert_finfo);
|
|
} else {
|
|
return (char*)pg_do_encoding_conversion(
|
|
(unsigned char*)s, len, encoding, client_encoding);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* convert client encoding to encoding.
|
|
*/
|
|
char* pg_client_to_any(const char* s, int len, int dst_encoding, void* convert_finfo)
|
|
{
|
|
bool bulkload_illegal_chars_conversion = false;
|
|
|
|
Assert(u_sess->mb_cxt.ClientEncoding);
|
|
|
|
if (len <= 0) {
|
|
return (char*)s;
|
|
}
|
|
if (u_sess->cmd_cxt.bulkload_compatible_illegal_chars) {
|
|
bulkload_illegal_chars_conversion = true;
|
|
}
|
|
|
|
int client_encoding = u_sess->mb_cxt.ClientEncoding->encoding;
|
|
if (client_encoding == dst_encoding || client_encoding == PG_SQL_ASCII) {
|
|
/*
|
|
* No conversion is needed, but we must still validate the data.
|
|
*/
|
|
(void)pg_verify_mbstr(dst_encoding, s, len, false);
|
|
return (char*)s;
|
|
}
|
|
|
|
if (dst_encoding == PG_SQL_ASCII) {
|
|
return verify_string_for_ascii(s, len, client_encoding, bulkload_illegal_chars_conversion);
|
|
}
|
|
|
|
if (u_sess->mb_cxt.DatabaseEncoding->encoding == dst_encoding) {
|
|
return perform_default_encoding_conversion(s, len, true);
|
|
} else if (convert_finfo != NULL) {
|
|
return try_fast_encoding_conversion( (char*)s, len, client_encoding, dst_encoding, convert_finfo);
|
|
} else {
|
|
return (char*)pg_do_encoding_conversion((unsigned char*)s, len, client_encoding, dst_encoding);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* convert server encoding to client encoding.
|
|
*/
|
|
char* pg_server_to_client(const char* s, int len)
|
|
{
|
|
char* str = NULL;
|
|
|
|
Assert(u_sess->mb_cxt.ClientEncoding);
|
|
|
|
str = pg_server_to_any(s, len, u_sess->mb_cxt.ClientEncoding->encoding);
|
|
if (str == NULL) {
|
|
ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR), errmsg("pg_server_to_any returns null.")));
|
|
}
|
|
return str;
|
|
}
|
|
|
|
/*
|
|
* Preheck if pg_server_to_any is really gonna do a conversion. That makes a difference
|
|
* in COPY TO FILE, which is weird and not logical. Yet HandleCopyDataRow are not to
|
|
* be changed and this function is added instead.
|
|
*/
|
|
bool WillTranscodingBePerformed(int encoding)
|
|
{
|
|
return (!(encoding == u_sess->mb_cxt.DatabaseEncoding->encoding || encoding == PG_SQL_ASCII ||
|
|
u_sess->mb_cxt.DatabaseEncoding->encoding == PG_SQL_ASCII));
|
|
}
|
|
|
|
/*
|
|
* convert server encoding to any encoding.
|
|
*/
|
|
char* pg_server_to_any(const char* s, int len, int encoding, void *convert_finfo)
|
|
{
|
|
Assert(u_sess->mb_cxt.DatabaseEncoding);
|
|
Assert(u_sess->mb_cxt.ClientEncoding);
|
|
|
|
if (len <= 0) {
|
|
return (char*)s;
|
|
}
|
|
if (encoding == u_sess->mb_cxt.DatabaseEncoding->encoding || encoding == PG_SQL_ASCII) {
|
|
return (char*)s; /* assume data is valid */
|
|
}
|
|
if (u_sess->mb_cxt.DatabaseEncoding->encoding == PG_SQL_ASCII) {
|
|
/* No conversion is possible, but we must validate the result */
|
|
(void) pg_verify_mbstr(encoding, s, len, false);
|
|
return (char*)s;
|
|
}
|
|
if (u_sess->mb_cxt.ClientEncoding->encoding == encoding) {
|
|
return perform_default_encoding_conversion(s, len, false);
|
|
} else if (convert_finfo != NULL) {
|
|
return try_fast_encoding_conversion(
|
|
(char*)s, len, u_sess->mb_cxt.DatabaseEncoding->encoding, encoding, convert_finfo);
|
|
} else {
|
|
return (char*)pg_do_encoding_conversion(
|
|
(unsigned char*)s, len, u_sess->mb_cxt.DatabaseEncoding->encoding, encoding);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Perform default encoding conversion using cached FmgrInfo. Since
|
|
* this function does not access database at all, it is safe to call
|
|
* outside transactions. If the conversion has not been set up by
|
|
* SetClientEncoding(), no conversion is performed.
|
|
*/
|
|
static char* perform_default_encoding_conversion(const char* src, int len, bool is_client_to_server)
|
|
{
|
|
char* result = NULL;
|
|
int src_encoding, dest_encoding;
|
|
FmgrInfo* flinfo = NULL;
|
|
|
|
if (is_client_to_server) {
|
|
src_encoding = u_sess->mb_cxt.ClientEncoding->encoding;
|
|
dest_encoding = u_sess->mb_cxt.DatabaseEncoding->encoding;
|
|
flinfo = u_sess->mb_cxt.ToServerConvProc;
|
|
} else {
|
|
src_encoding = u_sess->mb_cxt.DatabaseEncoding->encoding;
|
|
dest_encoding = u_sess->mb_cxt.ClientEncoding->encoding;
|
|
flinfo = u_sess->mb_cxt.ToClientConvProc;
|
|
}
|
|
|
|
if (flinfo == NULL) {
|
|
return (char*)src;
|
|
}
|
|
/*
|
|
* Allocate space for conversion result, being wary of integer overflow
|
|
*/
|
|
if ((Size)len >= (MaxAllocSize / (Size)MAX_CONVERSION_GROWTH)) {
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
|
errmsg("out of memory"),
|
|
errdetail("String of %d bytes is too long for encoding conversion.", len)));
|
|
}
|
|
result = (char*)palloc(len * MAX_CONVERSION_GROWTH + 1);
|
|
|
|
FunctionCall5(flinfo,
|
|
Int32GetDatum(src_encoding),
|
|
Int32GetDatum(dest_encoding),
|
|
CStringGetDatum(src),
|
|
CStringGetDatum(result),
|
|
Int32GetDatum(len));
|
|
return result;
|
|
}
|
|
|
|
/* convert a multibyte string to a wchar */
|
|
int pg_mb2wchar(const char* from, pg_wchar* to)
|
|
{
|
|
return (*pg_wchar_table[u_sess->mb_cxt.DatabaseEncoding->encoding].mb2wchar_with_len)(
|
|
(const unsigned char*)from, to, strlen(from));
|
|
}
|
|
|
|
/* convert a multibyte string to a wchar with a limited length */
|
|
int pg_mb2wchar_with_len(const char* from, pg_wchar* to, int len)
|
|
{
|
|
return (*pg_wchar_table[u_sess->mb_cxt.DatabaseEncoding->encoding].mb2wchar_with_len)(
|
|
(const unsigned char*)from, to, len);
|
|
}
|
|
|
|
/* same, with any encoding */
|
|
int pg_encoding_mb2wchar_with_len(int encoding, const char* from, pg_wchar* to, int len)
|
|
{
|
|
return (*pg_wchar_table[encoding].mb2wchar_with_len)((const unsigned char*)from, to, len);
|
|
}
|
|
|
|
/* convert a wchar string to a multibyte */
|
|
int pg_wchar2mb(const pg_wchar* from, char* to)
|
|
{
|
|
return (*pg_wchar_table[u_sess->mb_cxt.DatabaseEncoding->encoding].wchar2mb_with_len)(
|
|
from, (unsigned char*)to, pg_wchar_strlen(from));
|
|
}
|
|
|
|
/* convert a wchar string to a multibyte with a limited length */
|
|
int pg_wchar2mb_with_len(const pg_wchar* from, char* to, int len)
|
|
{
|
|
return (*pg_wchar_table[u_sess->mb_cxt.DatabaseEncoding->encoding].wchar2mb_with_len)(
|
|
from, (unsigned char*)to, len);
|
|
}
|
|
|
|
/* same, with any encoding */
|
|
int pg_encoding_wchar2mb_with_len(int encoding, const pg_wchar* from, char* to, int len)
|
|
{
|
|
return (*pg_wchar_table[encoding].wchar2mb_with_len)(from, (unsigned char*)to, len);
|
|
}
|
|
|
|
/* returns the byte length of a multibyte character */
|
|
int pg_mblen(const char* mbstr)
|
|
{
|
|
return ((*pg_wchar_table[u_sess->mb_cxt.DatabaseEncoding->encoding].mblen)((const unsigned char*)mbstr));
|
|
}
|
|
|
|
/* returns the display length of a multibyte character */
|
|
int pg_dsplen(const char* mbstr)
|
|
{
|
|
return ((*pg_wchar_table[u_sess->mb_cxt.DatabaseEncoding->encoding].dsplen)((const unsigned char*)mbstr));
|
|
}
|
|
|
|
/* returns the length (counted in wchars) of a multibyte string */
|
|
int pg_mbstrlen(const char* mbstr)
|
|
{
|
|
int len = 0;
|
|
|
|
/* optimization for single byte encoding */
|
|
if (pg_database_encoding_max_length() == 1) {
|
|
return strlen(mbstr);
|
|
}
|
|
while (*mbstr) {
|
|
mbstr += pg_mblen(mbstr);
|
|
len++;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/* returns the length (counted in wchars) of a multibyte string
|
|
* (not necessarily NULL terminated)
|
|
*/
|
|
int pg_mbstrlen_with_len(const char* mbstr, int limit)
|
|
{
|
|
int len = 0;
|
|
|
|
/* optimization for single byte encoding */
|
|
if (pg_database_encoding_max_length() == 1) {
|
|
return limit;
|
|
}
|
|
while (limit > 0 && *mbstr) {
|
|
int l = pg_mblen(mbstr);
|
|
|
|
limit -= l;
|
|
mbstr += l;
|
|
len++;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/* returns the length (counted in wchars) of a multibyte string
|
|
* (not necessarily NULL terminated)
|
|
*/
|
|
int pg_encoding_mbstrlen_with_len(const char* mbstr, int limit, int encoding)
|
|
{
|
|
int len = 0;
|
|
|
|
/* optimization for single byte encoding */
|
|
if (pg_encoding_max_length(encoding) == 1) {
|
|
return limit;
|
|
}
|
|
while (limit > 0 && *mbstr) {
|
|
int l = pg_encoding_mblen(encoding, mbstr);
|
|
|
|
limit -= l;
|
|
mbstr += l;
|
|
len++;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/* returns the length (counted in wchars) of a multibyte string
|
|
* with fixed encoding.
|
|
*/
|
|
int pg_mbstrlen_with_len_eml(const char* mbstr, int limit, int eml)
|
|
{
|
|
int len = 0;
|
|
|
|
/* optimization for single byte encoding */
|
|
if (eml == 1) {
|
|
return limit;
|
|
}
|
|
while (limit > 0 && *mbstr) {
|
|
int l = pg_mblen(mbstr);
|
|
limit -= l;
|
|
mbstr += l;
|
|
len++;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
int pg_mbstrlen_with_len_toast(const char* mbstr, int* limit)
|
|
{
|
|
int len = 0;
|
|
|
|
while (*limit > 0 && *mbstr) {
|
|
int l = pg_mblen(mbstr);
|
|
|
|
*limit -= l;
|
|
mbstr += l;
|
|
len++;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/*
|
|
* returns the byte length of a multibyte string
|
|
* (not necessarily NULL terminated)
|
|
* that is no longer than limit.
|
|
* this function does not break multibyte character boundary.
|
|
*/
|
|
int pg_mbcliplen(const char* mbstr, int len, int limit)
|
|
{
|
|
return pg_encoding_mbcliplen(u_sess->mb_cxt.DatabaseEncoding->encoding, mbstr, len, limit);
|
|
}
|
|
|
|
/*
|
|
* pg_mbcliplen with specified encoding
|
|
*/
|
|
int pg_encoding_mbcliplen(int encoding, const char* mbstr, int len, int limit)
|
|
{
|
|
mblen_converter mblen_fn;
|
|
int clen = 0;
|
|
int l;
|
|
|
|
/* optimization for single byte encoding */
|
|
if (pg_encoding_max_length(encoding) == 1) {
|
|
return cliplen(mbstr, len, limit);
|
|
}
|
|
mblen_fn = pg_wchar_table[encoding].mblen;
|
|
|
|
while (len > 0 && *mbstr) {
|
|
l = (*mblen_fn)((const unsigned char*)mbstr);
|
|
if ((clen + l) > limit) {
|
|
break;
|
|
}
|
|
clen += l;
|
|
if (clen == limit) {
|
|
break;
|
|
}
|
|
len -= l;
|
|
mbstr += l;
|
|
}
|
|
return clen;
|
|
}
|
|
|
|
/**
|
|
* calculate the length of mbstr
|
|
* @tparam calCharLength true for the character length, false for the byte length
|
|
* @param mbstr mbstr
|
|
* @param len length of mbstr
|
|
* @param limit limit of mbstr
|
|
* @return the length of mbstr
|
|
*/
|
|
template<bool calCharLength> int MbCharClipLen(const char* mbstr, int len, int limit)
|
|
{
|
|
int clen = 0;
|
|
int nch = 0;
|
|
int l;
|
|
|
|
/* optimization for single byte encoding */
|
|
if (pg_database_encoding_max_length() == 1) {
|
|
return cliplen(mbstr, len, limit);
|
|
}
|
|
while (len > 0 && *mbstr) {
|
|
l = pg_mblen(mbstr);
|
|
if (calCharLength) {
|
|
nch++;
|
|
} else {
|
|
nch += l;
|
|
}
|
|
if (nch > limit) {
|
|
break;
|
|
}
|
|
clen += l;
|
|
len -= l;
|
|
mbstr += l;
|
|
}
|
|
return clen;
|
|
}
|
|
|
|
/*
|
|
* Similar to pg_mbcliplen except the limit parameter specifies the
|
|
* byte length, not the character length.
|
|
*/
|
|
int pg_mbcharcliplen(const char* mbstr, int len, int limit)
|
|
{
|
|
bool calCharLength = DB_IS_CMPT(PG_FORMAT | B_FORMAT);
|
|
if (calCharLength) {
|
|
return MbCharClipLen<true>(mbstr, len, limit);
|
|
} else {
|
|
return MbCharClipLen<false>(mbstr, len, limit);
|
|
}
|
|
}
|
|
/*
|
|
* Description : Similar to pg_mbcliplen except the limit parameter specifies
|
|
* the character length, not the byte length.
|
|
* Notes :
|
|
*/
|
|
int pg_mbcharcliplen_orig(const char* mbstr, int len, int limit)
|
|
{
|
|
return MbCharClipLen<true>(mbstr, len, limit);
|
|
}
|
|
|
|
/* mbcliplen for any single-byte encoding */
|
|
static int cliplen(const char* str, int len, int limit)
|
|
{
|
|
int l = 0;
|
|
len = Min(len, limit);
|
|
while (l < len && str[l]) {
|
|
l++;
|
|
}
|
|
return l;
|
|
}
|
|
|
|
void SetDatabaseEncoding(int encoding)
|
|
{
|
|
if (!PG_VALID_BE_ENCODING(encoding)) {
|
|
ereport(ERROR, (errcode(ERRCODE_SYSTEM_ERROR), errmsg("invalid database encoding: %d", encoding)));
|
|
}
|
|
u_sess->mb_cxt.DatabaseEncoding = &pg_enc2name_tbl[encoding];
|
|
Assert(u_sess->mb_cxt.DatabaseEncoding->encoding == encoding);
|
|
}
|
|
|
|
/*
|
|
* Bind gettext to the codeset equivalent with the database encoding.
|
|
*/
|
|
void pg_bind_textdomain_codeset(const char* domain_name)
|
|
{
|
|
#if defined(ENABLE_NLS)
|
|
int encoding = GetDatabaseEncoding();
|
|
int i;
|
|
|
|
/*
|
|
* gettext() uses the codeset specified by LC_CTYPE by default, so if that
|
|
* matches the database encoding we don't need to do anything. In CREATE
|
|
* DATABASE, we enforce or trust that the locale's codeset matches
|
|
* database encoding, except for the C locale. In C locale, we bind
|
|
* gettext() explicitly to the right codeset.
|
|
*
|
|
* On Windows, though, gettext() tends to get confused so we always bind
|
|
* it.
|
|
*/
|
|
#ifndef WIN32
|
|
/* setlocale is thread-unsafe */
|
|
AutoMutexLock localeLock(&gLocaleMutex);
|
|
localeLock.lock();
|
|
const char* ctype = gs_setlocale_r(LC_CTYPE, NULL);
|
|
|
|
if (pg_strcasecmp(ctype, "C") != 0 && pg_strcasecmp(ctype, "POSIX") != 0) {
|
|
localeLock.unLock();
|
|
return;
|
|
}
|
|
|
|
localeLock.unLock();
|
|
#endif
|
|
|
|
for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++) {
|
|
if (pg_enc2gettext_tbl[i].encoding == encoding) {
|
|
if (bind_textdomain_codeset(domain_name, pg_enc2gettext_tbl[i].name) == NULL) {
|
|
ereport(LOG, (errmsg("bind_textdomain_codeset failed")));
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
int GetDatabaseEncoding(void)
|
|
{
|
|
Assert(u_sess->mb_cxt.DatabaseEncoding);
|
|
return u_sess->mb_cxt.DatabaseEncoding->encoding;
|
|
}
|
|
|
|
const char* GetDatabaseEncodingName(void)
|
|
{
|
|
Assert(u_sess->mb_cxt.DatabaseEncoding);
|
|
return u_sess->mb_cxt.DatabaseEncoding->name;
|
|
}
|
|
|
|
int GetCharsetConnection(void)
|
|
{
|
|
Assert(u_sess->mb_cxt.character_set_connection);
|
|
return u_sess->mb_cxt.character_set_connection->encoding;
|
|
}
|
|
|
|
const char* GetCharsetConnectionName(void)
|
|
{
|
|
Assert(u_sess->mb_cxt.character_set_connection);
|
|
return u_sess->mb_cxt.character_set_connection->name;
|
|
}
|
|
|
|
Oid GetCollationConnection(void)
|
|
{
|
|
if (!ENABLE_MULTI_CHARSET || !DB_IS_CMPT(B_FORMAT)) {
|
|
return InvalidOid;
|
|
}
|
|
return u_sess->mb_cxt.collation_connection;
|
|
}
|
|
|
|
Datum getdatabaseencoding(PG_FUNCTION_ARGS)
|
|
{
|
|
Assert(u_sess->mb_cxt.DatabaseEncoding);
|
|
return DirectFunctionCall1(namein, CStringGetDatum(u_sess->mb_cxt.DatabaseEncoding->name));
|
|
}
|
|
|
|
Datum pg_client_encoding(PG_FUNCTION_ARGS)
|
|
{
|
|
Assert(u_sess->mb_cxt.ClientEncoding);
|
|
return DirectFunctionCall1(namein, CStringGetDatum(u_sess->mb_cxt.ClientEncoding->name));
|
|
}
|
|
|
|
int GetPlatformEncoding(void)
|
|
{
|
|
if (u_sess->mb_cxt.PlatformEncoding == NULL) {
|
|
int encoding;
|
|
|
|
AutoMutexLock localeLock(&gLocaleMutex);
|
|
localeLock.lock();
|
|
/* try to determine encoding of server's environment locale */
|
|
encoding = pg_get_encoding_from_locale("", true);
|
|
localeLock.unLock();
|
|
|
|
if (encoding < 0) {
|
|
encoding = PG_SQL_ASCII;
|
|
}
|
|
u_sess->mb_cxt.PlatformEncoding = &pg_enc2name_tbl[encoding];
|
|
}
|
|
return u_sess->mb_cxt.PlatformEncoding->encoding;
|
|
}
|
|
|
|
#ifdef WIN32
|
|
|
|
/*
|
|
* Result is palloc'ed null-terminated utf16 string. The character length
|
|
* is also passed to utf16len if not null. Returns NULL iff failed.
|
|
*/
|
|
WCHAR* pgwin32_toUTF16(const char* str, int len, int* utf16_len)
|
|
{
|
|
WCHAR* utf16 = NULL;
|
|
int dst_len;
|
|
|
|
/*
|
|
* Use MultiByteToWideChar directly if there is a corresponding codepage,
|
|
* or double conversion through UTF8 if not.
|
|
*/
|
|
UINT codepage = pg_enc2name_tbl[GetDatabaseEncoding()].codepage;
|
|
if (codepage != 0) {
|
|
utf16 = (WCHAR*)palloc(sizeof(WCHAR) * (len + 1));
|
|
dst_len = MultiByteToWideChar(codepage, 0, str, len, utf16, len);
|
|
utf16[dst_len] = (WCHAR)0;
|
|
} else {
|
|
char* utf8 = NULL;
|
|
utf8 = (char*)pg_do_encoding_conversion((unsigned char*)str, len, GetDatabaseEncoding(), PG_UTF8);
|
|
if (utf8 != str) {
|
|
len = strlen(utf8);
|
|
}
|
|
utf16 = (WCHAR*)palloc(sizeof(WCHAR) * (len + 1));
|
|
dst_len = MultiByteToWideChar(CP_UTF8, 0, utf8, len, utf16, len);
|
|
utf16[dst_len] = (WCHAR)0;
|
|
|
|
if (utf8 != str) {
|
|
pfree(utf8);
|
|
}
|
|
}
|
|
|
|
if (dst_len == 0 && len > 0) {
|
|
pfree(utf16);
|
|
return NULL; /* error */
|
|
}
|
|
|
|
if (utf16_len != NULL) {
|
|
*utf16_len = dst_len;
|
|
}
|
|
return utf16;
|
|
}
|
|
|
|
#endif
|