/* ------------------------------------------------------------------------- * * like.c * like expression handling code. * * NOTES * A big hack of the regexp.c code!! Contributed by * Keith Parks (7/95). * * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * src/backend/utils/adt/like.c * * ------------------------------------------------------------------------- */ #include "postgres.h" #include "knl/knl_variable.h" #include #include "catalog/pg_collation.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" #include "utils/pg_locale.h" static int SB_MatchText(char* t, int tlen, char* p, int plen, pg_locale_t locale, bool locale_is_c); static text* SB_do_like_escape(text*, text*); static int MB_MatchText(char* t, int tlen, char* p, int plen, pg_locale_t locale, bool locale_is_c); static text* MB_do_like_escape(text*, text*); static int UTF8_MatchText(char* t, int tlen, char* p, int plen, pg_locale_t locale, bool locale_is_c); static int SB_IMatchText(char* t, int tlen, char* p, int plen, pg_locale_t locale, bool locale_is_c); static int Generic_Text_IC_like(text* str, text* pat, Oid collation); /* -------------------- * Support routine for MatchText. Compares given multibyte streams * as wide characters. If they match, returns 1 otherwise returns 0. * -------------------- */ static inline int wchareq(const char* p1, const char* p2) { int p1_len; /* Optimization: quickly compare the first byte. */ if (*p1 != *p2) return 0; p1_len = pg_mblen(p1); if (pg_mblen(p2) != p1_len) return 0; /* They are the same length */ while (p1_len--) { if (*p1++ != *p2++) return 0; } return 1; } /* * Formerly we had a routine iwchareq() here that tried to do case-insensitive * comparison of multibyte characters. It did not work at all, however, * because it relied on tolower() which has a single-byte API ... and * towlower() wouldn't be much better since we have no suitably cheap way * of getting a single character transformed to the system's wchar_t format. * So now, we just downcase the strings using lower() and apply regular LIKE * comparison. This should be revisited when we install better locale support. */ /* * We do handle case-insensitive matching for single-byte encodings using * fold-on-the-fly processing, however. */ static char SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) { if (locale_is_c) return pg_ascii_tolower(c); #ifdef HAVE_LOCALE_T else if (locale) return tolower_l(c, locale); #endif else return pg_tolower(c); } #define NextByte(p, plen) ((p)++, (plen)--) /* Set up to compile like_match.c for multibyte characters */ #define CHAREQ(p1, p2) wchareq((p1), (p2)) #define NextChar(p, plen) \ do { \ int __l = pg_mblen(p); \ (p) += __l; \ (plen) -= __l; \ } while (0) #define CopyAdvChar(dst, src, srclen) \ do { \ int __l = pg_mblen(src); \ (srclen) -= __l; \ while (__l-- > 0) \ *(dst)++ = *(src)++; \ } while (0) #define MatchText MB_MatchText #define do_like_escape MB_do_like_escape #include "like_match.cpp" /* Set up to compile like_match.c for single-byte characters */ #define CHAREQ(p1, p2) (*(p1) == *(p2)) #define NextChar(p, plen) NextByte((p), (plen)) #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) #define MatchText SB_MatchText #define do_like_escape SB_do_like_escape #include "like_match.cpp" /* setup to compile like_match.c for single byte case insensitive matches */ #define MATCH_LOWER(t) SB_lower_char((unsigned char)(t), locale, locale_is_c) #define CHAREQ(p1, p2) (MATCH_LOWER(*(p1)) == MATCH_LOWER(*(p2))) #define NextChar(p, plen) NextByte((p), (plen)) #define MatchText SB_IMatchText #include "like_match.cpp" /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */ #define NextChar(p, plen) \ do { \ (p)++; \ (plen)--; \ } while ((plen) > 0 && (*(p)&0xC0) == 0x80) #define CHAREQ(p1, p2) wchareq((p1), (p2)) #define MatchText UTF8_MatchText #include "like_match.cpp" /* Generic for all cases not requiring inline case-folding */ int GenericMatchText(char* s, int slen, char* p, int plen) { if (pg_database_encoding_max_length() == 1) return SB_MatchText(s, slen, p, plen, 0, true); else if (GetDatabaseEncoding() == PG_UTF8) return UTF8_MatchText(s, slen, p, plen, 0, true); else return MB_MatchText(s, slen, p, plen, 0, true); } static inline int Generic_Text_IC_like(text* str, text* pat, Oid collation) { char *s = NULL, *p = NULL; int slen, plen; /* * For efficiency reasons, in the single byte case we don't call lower() * on the pattern and text, but instead call SB_lower_char on each * character. In the multi-byte case we don't have much choice :-( */ if (pg_database_encoding_max_length() > 1) { /* lower's result is never packed, so OK to use old macros here */ pat = DatumGetTextP(DirectFunctionCall1Coll(lower, collation, PointerGetDatum(pat))); p = VARDATA(pat); plen = (VARSIZE(pat) - VARHDRSZ); str = DatumGetTextP(DirectFunctionCall1Coll(lower, collation, PointerGetDatum(str))); s = VARDATA(str); slen = (VARSIZE(str) - VARHDRSZ); if (GetDatabaseEncoding() == PG_UTF8) return UTF8_MatchText(s, slen, p, plen, 0, true); else return MB_MatchText(s, slen, p, plen, 0, true); } else { /* * Here we need to prepare locale information for SB_lower_char. This * should match the methods used in str_tolower(). */ pg_locale_t locale = 0; bool locale_is_c = false; if (lc_ctype_is_c(collation)) locale_is_c = true; else if (collation != DEFAULT_COLLATION_OID) { if (!OidIsValid(collation)) { /* * This typically means that the parser could not resolve a * conflict of implicit collations, so report it that way. */ ereport(ERROR, (errcode(ERRCODE_INDETERMINATE_COLLATION), errmsg("could not determine which collation to use for ILIKE"), errhint("Use the COLLATE clause to set the collation explicitly."))); } locale = pg_newlocale_from_collation(collation); } p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); return SB_IMatchText(s, slen, p, plen, locale, locale_is_c); } } /* * interface routines called by the function manager */ Datum namelike(PG_FUNCTION_ARGS) { Name str = PG_GETARG_NAME(0); text* pat = PG_GETARG_TEXT_PP(1); bool result = false; char *s, *p; int slen, plen; s = NameStr(*str); slen = strlen(s); p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } Datum namenlike(PG_FUNCTION_ARGS) { Name str = PG_GETARG_NAME(0); text* pat = PG_GETARG_TEXT_PP(1); bool result = false; char *s, *p; int slen, plen; s = NameStr(*str); slen = strlen(s); p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } Datum textlike(PG_FUNCTION_ARGS) { text* str = PG_GETARG_TEXT_PP(0); text* pat = PG_GETARG_TEXT_PP(1); FUNC_CHECK_HUGE_POINTER(false, str, "textlike()"); bool result = false; char *s, *p; int slen, plen; s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } Datum textnlike(PG_FUNCTION_ARGS) { text* str = PG_GETARG_TEXT_PP(0); text* pat = PG_GETARG_TEXT_PP(1); FUNC_CHECK_HUGE_POINTER(false, str, "textnlike()"); bool result = false; char *s, *p; int slen, plen; s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } Datum bytealike(PG_FUNCTION_ARGS) { bytea* str = PG_GETARG_BYTEA_PP(0); bytea* pat = PG_GETARG_BYTEA_PP(1); FUNC_CHECK_HUGE_POINTER(false, str, "bytealike()"); bool result = false; char *s, *p; int slen, plen; s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } Datum byteanlike(PG_FUNCTION_ARGS) { bytea* str = PG_GETARG_BYTEA_PP(0); bytea* pat = PG_GETARG_BYTEA_PP(1); FUNC_CHECK_HUGE_POINTER(false, str, "byteanlike()"); bool result = false; char *s, *p; int slen, plen; s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } Datum byteawithoutorderwithequalcollike(PG_FUNCTION_ARGS) { bytea *str = PG_GETARG_BYTEA_PP(0); bytea *pat = PG_GETARG_BYTEA_PP(1); FUNC_CHECK_HUGE_POINTER(false, str, "byteanlike()"); bool result = false; char *s, *p; int slen, plen; s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE); PG_RETURN_BOOL(result); } Datum byteawithoutorderwithequalcolnlike(PG_FUNCTION_ARGS) { bytea *str = PG_GETARG_BYTEA_PP(0); bytea *pat = PG_GETARG_BYTEA_PP(1); FUNC_CHECK_HUGE_POINTER(false, str, "byteanlike()"); bool result = false; char *s, *p; int slen, plen; s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE); PG_RETURN_BOOL(result); } Datum rawlike(PG_FUNCTION_ARGS) { bytea* str = PG_GETARG_BYTEA_PP(0); bytea* pat = PG_GETARG_BYTEA_PP(1); FUNC_CHECK_HUGE_POINTER(false, str, "rawlike()"); bool result = false; char *s, *p; int slen, plen; s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); result = SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE; PG_RETURN_BOOL(result); } Datum rawnlike(PG_FUNCTION_ARGS) { bytea* str = PG_GETARG_BYTEA_PP(0); bytea* pat = PG_GETARG_BYTEA_PP(1); FUNC_CHECK_HUGE_POINTER(false, str, "rawnlike()"); bool result = false; char *s, *p; int slen, plen; s = VARDATA_ANY(str); slen = VARSIZE_ANY_EXHDR(str); p = VARDATA_ANY(pat); plen = VARSIZE_ANY_EXHDR(pat); result = SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE; PG_RETURN_BOOL(result); } // // Case-insensitive versions // Datum nameiclike(PG_FUNCTION_ARGS) { Name str = PG_GETARG_NAME(0); text* pat = PG_GETARG_TEXT_PP(1); bool result = false; text* strtext = NULL; strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str))); result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE); PG_RETURN_BOOL(result); } Datum nameicnlike(PG_FUNCTION_ARGS) { Name str = PG_GETARG_NAME(0); text* pat = PG_GETARG_TEXT_PP(1); bool result = false; text* strtext = NULL; strtext = DatumGetTextP(DirectFunctionCall1(name_text, NameGetDatum(str))); result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE); PG_RETURN_BOOL(result); } Datum texticlike(PG_FUNCTION_ARGS) { text* str = PG_GETARG_TEXT_PP(0); text* pat = PG_GETARG_TEXT_PP(1); FUNC_CHECK_HUGE_POINTER(false, str, "textclike()"); bool result = false; result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE); PG_RETURN_BOOL(result); } Datum texticnlike(PG_FUNCTION_ARGS) { text* str = PG_GETARG_TEXT_PP(0); text* pat = PG_GETARG_TEXT_PP(1); FUNC_CHECK_HUGE_POINTER(false, str, "texticnlike()"); bool result = false; result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE); PG_RETURN_BOOL(result); } /* * like_escape() --- given a pattern and an ESCAPE string, * convert the pattern to use Postgres' standard backslash escape convention. */ Datum like_escape(PG_FUNCTION_ARGS) { text* pat = PG_GETARG_TEXT_PP(0); text* esc = PG_GETARG_TEXT_PP(1); FUNC_CHECK_HUGE_POINTER(false, pat, "like_escape()"); text* result = NULL; if (pg_database_encoding_max_length() == 1) result = SB_do_like_escape(pat, esc); else result = MB_do_like_escape(pat, esc); PG_RETURN_TEXT_P(result); } /* * like_escape_bytea() --- given a pattern and an ESCAPE string, * convert the pattern to use Postgres' standard backslash escape convention. */ Datum like_escape_bytea(PG_FUNCTION_ARGS) { bytea* pat = PG_GETARG_BYTEA_PP(0); bytea* esc = PG_GETARG_BYTEA_PP(1); FUNC_CHECK_HUGE_POINTER(false, pat, "like_escape_bytea()"); bytea* result = SB_do_like_escape((text*)pat, (text*)esc); PG_RETURN_BYTEA_P((bytea*)result); }