501 lines
14 KiB
C++
501 lines
14 KiB
C++
/* -------------------------------------------------------------------------
|
|
*
|
|
* like.c
|
|
* like expression handling code.
|
|
*
|
|
* NOTES
|
|
* A big hack of the regexp.c code!! Contributed by
|
|
* Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
|
|
*
|
|
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* src/backend/utils/adt/like.c
|
|
*
|
|
* -------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
#include "knl/knl_variable.h"
|
|
|
|
#include <ctype.h>
|
|
|
|
#include "catalog/pg_collation.h"
|
|
#include "mb/pg_wchar.h"
|
|
#include "miscadmin.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/pg_locale.h"
|
|
|
|
static int SB_MatchText(char* t, int tlen, char* p, int plen, pg_locale_t locale, bool locale_is_c);
|
|
static text* SB_do_like_escape(text*, text*);
|
|
|
|
static int MB_MatchText(char* t, int tlen, char* p, int plen, pg_locale_t locale, bool locale_is_c);
|
|
static text* MB_do_like_escape(text*, text*);
|
|
|
|
static int UTF8_MatchText(char* t, int tlen, char* p, int plen, pg_locale_t locale, bool locale_is_c);
|
|
|
|
static int SB_IMatchText(char* t, int tlen, char* p, int plen, pg_locale_t locale, bool locale_is_c);
|
|
|
|
static int Generic_Text_IC_like(text* str, text* pat, Oid collation);
|
|
|
|
/* --------------------
|
|
* Support routine for MatchText. Compares given multibyte streams
|
|
* as wide characters. If they match, returns 1 otherwise returns 0.
|
|
* --------------------
|
|
*/
|
|
static inline int wchareq(const char* p1, const char* p2)
|
|
{
|
|
int p1_len;
|
|
|
|
/* Optimization: quickly compare the first byte. */
|
|
if (*p1 != *p2)
|
|
return 0;
|
|
|
|
p1_len = pg_mblen(p1);
|
|
if (pg_mblen(p2) != p1_len)
|
|
return 0;
|
|
|
|
/* They are the same length */
|
|
while (p1_len--) {
|
|
if (*p1++ != *p2++)
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Formerly we had a routine iwchareq() here that tried to do case-insensitive
|
|
* comparison of multibyte characters. It did not work at all, however,
|
|
* because it relied on tolower() which has a single-byte API ... and
|
|
* towlower() wouldn't be much better since we have no suitably cheap way
|
|
* of getting a single character transformed to the system's wchar_t format.
|
|
* So now, we just downcase the strings using lower() and apply regular LIKE
|
|
* comparison. This should be revisited when we install better locale support.
|
|
*/
|
|
|
|
/*
|
|
* We do handle case-insensitive matching for single-byte encodings using
|
|
* fold-on-the-fly processing, however.
|
|
*/
|
|
static char SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
|
|
{
|
|
if (locale_is_c)
|
|
return pg_ascii_tolower(c);
|
|
#ifdef HAVE_LOCALE_T
|
|
else if (locale)
|
|
return tolower_l(c, locale);
|
|
#endif
|
|
else
|
|
return pg_tolower(c);
|
|
}
|
|
|
|
#define NextByte(p, plen) ((p)++, (plen)--)
|
|
|
|
/* Set up to compile like_match.c for multibyte characters */
|
|
#define CHAREQ(p1, p2) wchareq((p1), (p2))
|
|
#define NextChar(p, plen) \
|
|
do { \
|
|
int __l = pg_mblen(p); \
|
|
(p) += __l; \
|
|
(plen) -= __l; \
|
|
} while (0)
|
|
#define CopyAdvChar(dst, src, srclen) \
|
|
do { \
|
|
int __l = pg_mblen(src); \
|
|
(srclen) -= __l; \
|
|
while (__l-- > 0) \
|
|
*(dst)++ = *(src)++; \
|
|
} while (0)
|
|
|
|
#define MatchText MB_MatchText
|
|
#define do_like_escape MB_do_like_escape
|
|
|
|
#include "like_match.cpp"
|
|
|
|
/* Set up to compile like_match.c for single-byte characters */
|
|
#define CHAREQ(p1, p2) (*(p1) == *(p2))
|
|
#define NextChar(p, plen) NextByte((p), (plen))
|
|
#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
|
|
|
|
#define MatchText SB_MatchText
|
|
#define do_like_escape SB_do_like_escape
|
|
|
|
#include "like_match.cpp"
|
|
|
|
/* setup to compile like_match.c for single byte case insensitive matches */
|
|
#define MATCH_LOWER(t) SB_lower_char((unsigned char)(t), locale, locale_is_c)
|
|
#define CHAREQ(p1, p2) (MATCH_LOWER(*(p1)) == MATCH_LOWER(*(p2)))
|
|
#define NextChar(p, plen) NextByte((p), (plen))
|
|
#define MatchText SB_IMatchText
|
|
|
|
#include "like_match.cpp"
|
|
|
|
/* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
|
|
|
|
#define NextChar(p, plen) \
|
|
do { \
|
|
(p)++; \
|
|
(plen)--; \
|
|
} while ((plen) > 0 && (*(p)&0xC0) == 0x80)
|
|
#define CHAREQ(p1, p2) wchareq((p1), (p2))
|
|
#define MatchText UTF8_MatchText
|
|
|
|
#include "like_match.cpp"
|
|
|
|
/* Generic for all cases not requiring inline case-folding */
|
|
int GenericMatchText(char* s, int slen, char* p, int plen)
|
|
{
|
|
if (pg_database_encoding_max_length() == 1)
|
|
return SB_MatchText(s, slen, p, plen, 0, true);
|
|
else if (GetDatabaseEncoding() == PG_UTF8)
|
|
return UTF8_MatchText(s, slen, p, plen, 0, true);
|
|
else
|
|
return MB_MatchText(s, slen, p, plen, 0, true);
|
|
}
|
|
|
|
static inline int Generic_Text_IC_like(text* str, text* pat, Oid collation)
|
|
{
|
|
char *s = NULL, *p = NULL;
|
|
int slen, plen;
|
|
|
|
/*
|
|
* For efficiency reasons, in the single byte case we don't call lower()
|
|
* on the pattern and text, but instead call SB_lower_char on each
|
|
* character. In the multi-byte case we don't have much choice :-(
|
|
*/
|
|
|
|
if (pg_database_encoding_max_length() > 1) {
|
|
/* lower's result is never packed, so OK to use old macros here */
|
|
pat = DatumGetTextP(DirectFunctionCall1Coll(lower, collation, PointerGetDatum(pat)));
|
|
p = VARDATA(pat);
|
|
plen = (VARSIZE(pat) - VARHDRSZ);
|
|
str = DatumGetTextP(DirectFunctionCall1Coll(lower, collation, PointerGetDatum(str)));
|
|
s = VARDATA(str);
|
|
slen = (VARSIZE(str) - VARHDRSZ);
|
|
if (GetDatabaseEncoding() == PG_UTF8)
|
|
return UTF8_MatchText(s, slen, p, plen, 0, true);
|
|
else
|
|
return MB_MatchText(s, slen, p, plen, 0, true);
|
|
} else {
|
|
/*
|
|
* Here we need to prepare locale information for SB_lower_char. This
|
|
* should match the methods used in str_tolower().
|
|
*/
|
|
pg_locale_t locale = 0;
|
|
bool locale_is_c = false;
|
|
|
|
if (lc_ctype_is_c(collation))
|
|
locale_is_c = true;
|
|
else if (collation != DEFAULT_COLLATION_OID) {
|
|
if (!OidIsValid(collation)) {
|
|
/*
|
|
* This typically means that the parser could not resolve a
|
|
* conflict of implicit collations, so report it that way.
|
|
*/
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_INDETERMINATE_COLLATION),
|
|
errmsg("could not determine which collation to use for ILIKE"),
|
|
errhint("Use the COLLATE clause to set the collation explicitly.")));
|
|
}
|
|
locale = pg_newlocale_from_collation(collation);
|
|
}
|
|
|
|
p = VARDATA_ANY(pat);
|
|
plen = VARSIZE_ANY_EXHDR(pat);
|
|
s = VARDATA_ANY(str);
|
|
slen = VARSIZE_ANY_EXHDR(str);
|
|
return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* interface routines called by the function manager
|
|
*/
|
|
|
|
Datum namelike(PG_FUNCTION_ARGS)
|
|
{
|
|
Name str = PG_GETARG_NAME(0);
|
|
text* pat = PG_GETARG_TEXT_PP(1);
|
|
bool result = false;
|
|
char *s, *p;
|
|
int slen, plen;
|
|
|
|
s = NameStr(*str);
|
|
slen = strlen(s);
|
|
p = VARDATA_ANY(pat);
|
|
plen = VARSIZE_ANY_EXHDR(pat);
|
|
|
|
result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum namenlike(PG_FUNCTION_ARGS)
|
|
{
|
|
Name str = PG_GETARG_NAME(0);
|
|
text* pat = PG_GETARG_TEXT_PP(1);
|
|
bool result = false;
|
|
char *s, *p;
|
|
int slen, plen;
|
|
|
|
s = NameStr(*str);
|
|
slen = strlen(s);
|
|
p = VARDATA_ANY(pat);
|
|
plen = VARSIZE_ANY_EXHDR(pat);
|
|
|
|
result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum textlike(PG_FUNCTION_ARGS)
|
|
{
|
|
text* str = PG_GETARG_TEXT_PP(0);
|
|
text* pat = PG_GETARG_TEXT_PP(1);
|
|
FUNC_CHECK_HUGE_POINTER(false, str, "textlike()");
|
|
|
|
bool result = false;
|
|
char *s, *p;
|
|
int slen, plen;
|
|
|
|
s = VARDATA_ANY(str);
|
|
slen = VARSIZE_ANY_EXHDR(str);
|
|
p = VARDATA_ANY(pat);
|
|
plen = VARSIZE_ANY_EXHDR(pat);
|
|
|
|
result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum textnlike(PG_FUNCTION_ARGS)
|
|
{
|
|
text* str = PG_GETARG_TEXT_PP(0);
|
|
text* pat = PG_GETARG_TEXT_PP(1);
|
|
FUNC_CHECK_HUGE_POINTER(false, str, "textnlike()");
|
|
|
|
bool result = false;
|
|
char *s, *p;
|
|
int slen, plen;
|
|
|
|
s = VARDATA_ANY(str);
|
|
slen = VARSIZE_ANY_EXHDR(str);
|
|
p = VARDATA_ANY(pat);
|
|
plen = VARSIZE_ANY_EXHDR(pat);
|
|
|
|
result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum bytealike(PG_FUNCTION_ARGS)
|
|
{
|
|
bytea* str = PG_GETARG_BYTEA_PP(0);
|
|
bytea* pat = PG_GETARG_BYTEA_PP(1);
|
|
FUNC_CHECK_HUGE_POINTER(false, str, "bytealike()");
|
|
|
|
bool result = false;
|
|
char *s, *p;
|
|
int slen, plen;
|
|
|
|
s = VARDATA_ANY(str);
|
|
slen = VARSIZE_ANY_EXHDR(str);
|
|
p = VARDATA_ANY(pat);
|
|
plen = VARSIZE_ANY_EXHDR(pat);
|
|
|
|
result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum byteanlike(PG_FUNCTION_ARGS)
|
|
{
|
|
bytea* str = PG_GETARG_BYTEA_PP(0);
|
|
bytea* pat = PG_GETARG_BYTEA_PP(1);
|
|
FUNC_CHECK_HUGE_POINTER(false, str, "byteanlike()");
|
|
|
|
bool result = false;
|
|
char *s, *p;
|
|
int slen, plen;
|
|
|
|
s = VARDATA_ANY(str);
|
|
slen = VARSIZE_ANY_EXHDR(str);
|
|
p = VARDATA_ANY(pat);
|
|
plen = VARSIZE_ANY_EXHDR(pat);
|
|
|
|
result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum
|
|
byteawithoutorderwithequalcollike(PG_FUNCTION_ARGS)
|
|
{
|
|
bytea *str = PG_GETARG_BYTEA_PP(0);
|
|
bytea *pat = PG_GETARG_BYTEA_PP(1);
|
|
FUNC_CHECK_HUGE_POINTER(false, str, "byteanlike()");
|
|
|
|
bool result = false;
|
|
char *s, *p;
|
|
int slen, plen;
|
|
|
|
s = VARDATA_ANY(str);
|
|
slen = VARSIZE_ANY_EXHDR(str);
|
|
p = VARDATA_ANY(pat);
|
|
plen = VARSIZE_ANY_EXHDR(pat);
|
|
|
|
result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum
|
|
byteawithoutorderwithequalcolnlike(PG_FUNCTION_ARGS)
|
|
{
|
|
bytea *str = PG_GETARG_BYTEA_PP(0);
|
|
bytea *pat = PG_GETARG_BYTEA_PP(1);
|
|
FUNC_CHECK_HUGE_POINTER(false, str, "byteanlike()");
|
|
|
|
bool result = false;
|
|
char *s, *p;
|
|
int slen, plen;
|
|
|
|
s = VARDATA_ANY(str);
|
|
slen = VARSIZE_ANY_EXHDR(str);
|
|
p = VARDATA_ANY(pat);
|
|
plen = VARSIZE_ANY_EXHDR(pat);
|
|
|
|
result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum rawlike(PG_FUNCTION_ARGS)
|
|
{
|
|
bytea* str = PG_GETARG_BYTEA_PP(0);
|
|
bytea* pat = PG_GETARG_BYTEA_PP(1);
|
|
FUNC_CHECK_HUGE_POINTER(false, str, "rawlike()");
|
|
|
|
bool result = false;
|
|
char *s, *p;
|
|
int slen, plen;
|
|
|
|
s = VARDATA_ANY(str);
|
|
slen = VARSIZE_ANY_EXHDR(str);
|
|
p = VARDATA_ANY(pat);
|
|
plen = VARSIZE_ANY_EXHDR(pat);
|
|
|
|
result = SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE;
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum rawnlike(PG_FUNCTION_ARGS)
|
|
{
|
|
bytea* str = PG_GETARG_BYTEA_PP(0);
|
|
bytea* pat = PG_GETARG_BYTEA_PP(1);
|
|
FUNC_CHECK_HUGE_POINTER(false, str, "rawnlike()");
|
|
|
|
bool result = false;
|
|
char *s, *p;
|
|
int slen, plen;
|
|
|
|
s = VARDATA_ANY(str);
|
|
slen = VARSIZE_ANY_EXHDR(str);
|
|
p = VARDATA_ANY(pat);
|
|
plen = VARSIZE_ANY_EXHDR(pat);
|
|
|
|
result = SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE;
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
//
|
|
// Case-insensitive versions
|
|
//
|
|
Datum nameiclike(PG_FUNCTION_ARGS)
|
|
{
|
|
Name str = PG_GETARG_NAME(0);
|
|
text* pat = PG_GETARG_TEXT_PP(1);
|
|
bool result = false;
|
|
text* strtext = NULL;
|
|
|
|
strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str)));
|
|
result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum nameicnlike(PG_FUNCTION_ARGS)
|
|
{
|
|
Name str = PG_GETARG_NAME(0);
|
|
text* pat = PG_GETARG_TEXT_PP(1);
|
|
bool result = false;
|
|
text* strtext = NULL;
|
|
|
|
strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str)));
|
|
result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum texticlike(PG_FUNCTION_ARGS)
|
|
{
|
|
text* str = PG_GETARG_TEXT_PP(0);
|
|
text* pat = PG_GETARG_TEXT_PP(1);
|
|
FUNC_CHECK_HUGE_POINTER(false, str, "textclike()");
|
|
|
|
bool result = false;
|
|
|
|
result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
Datum texticnlike(PG_FUNCTION_ARGS)
|
|
{
|
|
text* str = PG_GETARG_TEXT_PP(0);
|
|
text* pat = PG_GETARG_TEXT_PP(1);
|
|
FUNC_CHECK_HUGE_POINTER(false, str, "texticnlike()");
|
|
|
|
bool result = false;
|
|
|
|
result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
|
|
|
|
PG_RETURN_BOOL(result);
|
|
}
|
|
|
|
/*
|
|
* like_escape() --- given a pattern and an ESCAPE string,
|
|
* convert the pattern to use Postgres' standard backslash escape convention.
|
|
*/
|
|
Datum like_escape(PG_FUNCTION_ARGS)
|
|
{
|
|
text* pat = PG_GETARG_TEXT_PP(0);
|
|
text* esc = PG_GETARG_TEXT_PP(1);
|
|
FUNC_CHECK_HUGE_POINTER(false, pat, "like_escape()");
|
|
|
|
text* result = NULL;
|
|
|
|
if (pg_database_encoding_max_length() == 1)
|
|
result = SB_do_like_escape(pat, esc);
|
|
else
|
|
result = MB_do_like_escape(pat, esc);
|
|
|
|
PG_RETURN_TEXT_P(result);
|
|
}
|
|
|
|
/*
|
|
* like_escape_bytea() --- given a pattern and an ESCAPE string,
|
|
* convert the pattern to use Postgres' standard backslash escape convention.
|
|
*/
|
|
Datum like_escape_bytea(PG_FUNCTION_ARGS)
|
|
{
|
|
bytea* pat = PG_GETARG_BYTEA_PP(0);
|
|
bytea* esc = PG_GETARG_BYTEA_PP(1);
|
|
FUNC_CHECK_HUGE_POINTER(false, pat, "like_escape_bytea()");
|
|
|
|
bytea* result = SB_do_like_escape((text*)pat, (text*)esc);
|
|
|
|
PG_RETURN_BYTEA_P((bytea*)result);
|
|
}
|