!3164 【sync】同步 PR 3151 的代码到master分支

Merge pull request !3164 from laishenghao/cherry-pick-1678950672
This commit is contained in:
opengauss-bot
2023-03-16 08:05:54 +00:00
committed by Gitee
3 changed files with 95 additions and 7 deletions

View File

@ -777,13 +777,17 @@ Datum similar_escape(PG_FUNCTION_ARGS)
esc_text = PG_GETARG_TEXT_PP(1);
e = VARDATA_ANY(esc_text);
elen = VARSIZE_ANY_EXHDR(esc_text);
if (elen == 0)
if (elen == 0) {
e = NULL; /* no escape character */
else if (elen != 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("invalid escape string"),
errhint("Escape string must be empty or one character.")));
} else if (elen > 1) {
int escape_mblen = pg_mbstrlen_with_len(e, elen);
if (escape_mblen > 1) {
ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("invalid escape string"),
errhint("Escape string must be empty or one character.")));
}
}
}
/* ----------
@ -803,8 +807,10 @@ Datum similar_escape(PG_FUNCTION_ARGS)
* We need room for the prefix/postfix plus as many as 3 output bytes per
* input byte; since the input is at most 1GB this can't overflow
*/
result = (text*)palloc(VARHDRSZ + 6 + 3 * plen);
const int dataBuffSize = 6 + 3 * plen;
result = (text*)palloc(VARHDRSZ + dataBuffSize);
r = VARDATA(result);
const char* dataStartPtr = r;
*r++ = '^';
*r++ = '(';
@ -814,6 +820,50 @@ Datum similar_escape(PG_FUNCTION_ARGS)
while (plen > 0) {
char pchar = *p;
/*
* If both the escape character and the current character from the
* pattern are multi-byte, we need to take the slow path.
*
* But if one of them is single-byte, we can process the pattern one
* byte at a time, ignoring multi-byte characters. (This works
* because all server-encodings have the property that a valid
* multi-byte character representation cannot contain the
* representation of a valid single-byte character.)
*/
if (elen > 1) {
int mblen = pg_mblen(p);
if (mblen > 1) {
/* slow, multi-byte path */
if (afterescape) {
*r++ = '\\';
int destMax = dataBuffSize - (r - dataStartPtr) / sizeof(char);
errno_t rc = memcpy_s(r, destMax, p, mblen);
securec_check(rc, "\0", "\0");
r += mblen;
afterescape = false;
} else if (e && elen == mblen && memcmp(e, p, mblen) == 0) {
/* SQL99 escape character; do not send to output */
afterescape = true;
} else {
/*
* We know it's a multi-byte character, so we don't need
* to do all the comparisons to single-byte characters
* that we do below.
*/
int destMax = dataBuffSize - (r - dataStartPtr) / sizeof(char);
errno_t rc = memcpy_s(r, destMax, p, mblen);
securec_check(rc, "\0", "\0");
r += mblen;
}
p += mblen;
plen -= mblen;
continue;
}
}
/* fast path */
if (afterescape) {
if (pchar == '"' && !incharclass) /* for SUBSTRING patterns */
*r++ = ((nquotes++ % 2) == 0) ? '(' : ')';

View File

@ -278,3 +278,34 @@ select 'xyz' ~ '((.)){0}(\2){0}' as t;
t
(1 row)
-- test similar with regex
SELECT 'abc' SIMILAR TO '我%(b|d)%' escape '我' AS RESULT;
result
--------
f
(1 row)
SELECT '%abc' SIMILAR TO '我%abc' escape '我' AS RESULT;
result
--------
t
(1 row)
SELECT 'abc' SIMILAR TO '你%(b|d)%' escape '你' AS RESULT;
result
--------
f
(1 row)
SELECT '%abc' SIMILAR TO '你%abc' escape '你' AS RESULT;
result
--------
t
(1 row)
SELECT '%abc' SIMILAR TO '\%abc' escape '\' AS RESULT;
result
--------
t
(1 row)

View File

@ -70,3 +70,10 @@ select 'a' ~ '()+\1';
-- test {0}
select 'xyz' ~ '((.)){0}(\2){0}' as t;
-- test similar with regex
SELECT 'abc' SIMILAR TO '我%(b|d)%' escape '' AS RESULT;
SELECT '%abc' SIMILAR TO '我%abc' escape '' AS RESULT;
SELECT 'abc' SIMILAR TO '你%(b|d)%' escape '' AS RESULT;
SELECT '%abc' SIMILAR TO '你%abc' escape '' AS RESULT;
SELECT '%abc' SIMILAR TO '\%abc' escape '\' AS RESULT;