fixed c385b14 from https://gitee.com/dodders/openGauss-server/pulls/3151
增加正则匹配支持多字节字符的功能
This commit is contained in:
@ -777,13 +777,17 @@ Datum similar_escape(PG_FUNCTION_ARGS)
|
||||
esc_text = PG_GETARG_TEXT_PP(1);
|
||||
e = VARDATA_ANY(esc_text);
|
||||
elen = VARSIZE_ANY_EXHDR(esc_text);
|
||||
if (elen == 0)
|
||||
if (elen == 0) {
|
||||
e = NULL; /* no escape character */
|
||||
else if (elen != 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
|
||||
errmsg("invalid escape string"),
|
||||
errhint("Escape string must be empty or one character.")));
|
||||
} else if (elen > 1) {
|
||||
int escape_mblen = pg_mbstrlen_with_len(e, elen);
|
||||
if (escape_mblen > 1) {
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
|
||||
errmsg("invalid escape string"),
|
||||
errhint("Escape string must be empty or one character.")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------
|
||||
@ -803,8 +807,10 @@ Datum similar_escape(PG_FUNCTION_ARGS)
|
||||
* We need room for the prefix/postfix plus as many as 3 output bytes per
|
||||
* input byte; since the input is at most 1GB this can't overflow
|
||||
*/
|
||||
result = (text*)palloc(VARHDRSZ + 6 + 3 * plen);
|
||||
const int dataBuffSize = 6 + 3 * plen;
|
||||
result = (text*)palloc(VARHDRSZ + dataBuffSize);
|
||||
r = VARDATA(result);
|
||||
const char* dataStartPtr = r;
|
||||
|
||||
*r++ = '^';
|
||||
*r++ = '(';
|
||||
@ -814,6 +820,50 @@ Datum similar_escape(PG_FUNCTION_ARGS)
|
||||
while (plen > 0) {
|
||||
char pchar = *p;
|
||||
|
||||
/*
|
||||
* If both the escape character and the current character from the
|
||||
* pattern are multi-byte, we need to take the slow path.
|
||||
*
|
||||
* But if one of them is single-byte, we can process the pattern one
|
||||
* byte at a time, ignoring multi-byte characters. (This works
|
||||
* because all server-encodings have the property that a valid
|
||||
* multi-byte character representation cannot contain the
|
||||
* representation of a valid single-byte character.)
|
||||
*/
|
||||
if (elen > 1) {
|
||||
int mblen = pg_mblen(p);
|
||||
if (mblen > 1) {
|
||||
/* slow, multi-byte path */
|
||||
if (afterescape) {
|
||||
*r++ = '\\';
|
||||
int destMax = dataBuffSize - (r - dataStartPtr) / sizeof(char);
|
||||
errno_t rc = memcpy_s(r, destMax, p, mblen);
|
||||
securec_check(rc, "\0", "\0");
|
||||
r += mblen;
|
||||
afterescape = false;
|
||||
} else if (e && elen == mblen && memcmp(e, p, mblen) == 0) {
|
||||
/* SQL99 escape character; do not send to output */
|
||||
afterescape = true;
|
||||
} else {
|
||||
/*
|
||||
* We know it's a multi-byte character, so we don't need
|
||||
* to do all the comparisons to single-byte characters
|
||||
* that we do below.
|
||||
*/
|
||||
int destMax = dataBuffSize - (r - dataStartPtr) / sizeof(char);
|
||||
errno_t rc = memcpy_s(r, destMax, p, mblen);
|
||||
securec_check(rc, "\0", "\0");
|
||||
r += mblen;
|
||||
}
|
||||
|
||||
p += mblen;
|
||||
plen -= mblen;
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* fast path */
|
||||
if (afterescape) {
|
||||
if (pchar == '"' && !incharclass) /* for SUBSTRING patterns */
|
||||
*r++ = ((nquotes++ % 2) == 0) ? '(' : ')';
|
||||
|
@ -278,3 +278,34 @@ select 'xyz' ~ '((.)){0}(\2){0}' as t;
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- test similar with regex
|
||||
SELECT 'abc' SIMILAR TO '我%(b|d)%' escape '我' AS RESULT;
|
||||
result
|
||||
--------
|
||||
f
|
||||
(1 row)
|
||||
|
||||
SELECT '%abc' SIMILAR TO '我%abc' escape '我' AS RESULT;
|
||||
result
|
||||
--------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT 'abc' SIMILAR TO '你%(b|d)%' escape '你' AS RESULT;
|
||||
result
|
||||
--------
|
||||
f
|
||||
(1 row)
|
||||
|
||||
SELECT '%abc' SIMILAR TO '你%abc' escape '你' AS RESULT;
|
||||
result
|
||||
--------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT '%abc' SIMILAR TO '\%abc' escape '\' AS RESULT;
|
||||
result
|
||||
--------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
|
@ -70,3 +70,10 @@ select 'a' ~ '()+\1';
|
||||
|
||||
-- test {0}
|
||||
select 'xyz' ~ '((.)){0}(\2){0}' as t;
|
||||
|
||||
-- test similar with regex
|
||||
SELECT 'abc' SIMILAR TO '我%(b|d)%' escape '我' AS RESULT;
|
||||
SELECT '%abc' SIMILAR TO '我%abc' escape '我' AS RESULT;
|
||||
SELECT 'abc' SIMILAR TO '你%(b|d)%' escape '你' AS RESULT;
|
||||
SELECT '%abc' SIMILAR TO '你%abc' escape '你' AS RESULT;
|
||||
SELECT '%abc' SIMILAR TO '\%abc' escape '\' AS RESULT;
|
||||
|
Reference in New Issue
Block a user