fix like range bug
This commit is contained in:
parent
03b72f408e
commit
9f87f66ded
24
deps/oblib/src/lib/charset/ob_charset.cpp
vendored
24
deps/oblib/src/lib/charset/ob_charset.cpp
vendored
@ -16,6 +16,7 @@
|
||||
#include "lib/ob_define.h"
|
||||
#include "lib/worker.h"
|
||||
#include "common/ob_common_utility.h"
|
||||
#include "lib/charset/str_uca_type.h"
|
||||
|
||||
namespace oceanbase
|
||||
{
|
||||
@ -1061,7 +1062,8 @@ int ObCharset::like_range(ObCollationType collation_type,
|
||||
char *min_str,
|
||||
size_t *min_str_len,
|
||||
char *max_str,
|
||||
size_t *max_str_len)
|
||||
size_t *max_str_len,
|
||||
size_t *prefix_len /*= NULL*/)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_UNLIKELY(collation_type <= CS_TYPE_INVALID ||
|
||||
@ -1099,6 +1101,7 @@ int ObCharset::like_range(ObCollationType collation_type,
|
||||
// 上面的修改会引发这样的问题:'a\0' 会不在范围内,因为mysql的utf8特性使得'a\0' < 'a',所以范围不能这么修改
|
||||
// 具体的修正还是由存储层来做
|
||||
size_t res_size = *min_str_len < *max_str_len ? *min_str_len : *max_str_len;
|
||||
size_t pre_len = 0;
|
||||
if (OB_ISNULL(cs->coll)) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->coll));
|
||||
@ -1112,8 +1115,11 @@ int ObCharset::like_range(ObCollationType collation_type,
|
||||
min_str,
|
||||
max_str,
|
||||
min_str_len,
|
||||
max_str_len)) {
|
||||
max_str_len,
|
||||
&pre_len)) {
|
||||
ret = OB_EMPTY_RANGE;
|
||||
} else if (prefix_len != NULL) {
|
||||
*prefix_len = pre_len;
|
||||
} else {
|
||||
// *min_str_len = real_len;
|
||||
}
|
||||
@ -3611,6 +3617,20 @@ bool ObCharset::is_cs_unicode(ObCollationType collation_type)
|
||||
return is_cs_unicode;
|
||||
}
|
||||
|
||||
bool ObCharset::is_cs_uca(ObCollationType collation_type)
|
||||
{
|
||||
bool is_cs_uca = false;
|
||||
if (OB_UNLIKELY(collation_type <= CS_TYPE_INVALID ||
|
||||
collation_type >= CS_TYPE_MAX) ||
|
||||
OB_ISNULL(ObCharset::charset_arr[collation_type])) {
|
||||
LOG_WARN_RET(OB_INVALID_ARGUMENT, "unexpected error. invalid argument(s)", K(ret), K(collation_type), K(lbt()));
|
||||
} else {
|
||||
ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
|
||||
is_cs_uca = (cs->uca != NULL) && (cs->uca->version == UCA_V900);
|
||||
}
|
||||
return is_cs_uca;
|
||||
}
|
||||
|
||||
int ObCharset::get_replace_character(ObCollationType collation_type, int32_t &replaced_char_unicode)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
4
deps/oblib/src/lib/charset/ob_charset.h
vendored
4
deps/oblib/src/lib/charset/ob_charset.h
vendored
@ -513,7 +513,8 @@ public:
|
||||
char *min_str,
|
||||
size_t *min_str_len,
|
||||
char *max_str,
|
||||
size_t *max_str_len);
|
||||
size_t *max_str_len,
|
||||
size_t *prefix_len = NULL);
|
||||
static size_t strlen_char(ObCollationType collation_type,
|
||||
const char *str,
|
||||
int64_t str_len);
|
||||
@ -717,6 +718,7 @@ public:
|
||||
|
||||
static bool is_cs_nonascii(ObCollationType collation_type);
|
||||
static bool is_cs_unicode(ObCollationType collation_type);
|
||||
static bool is_cs_uca(ObCollationType collation_type);
|
||||
static int get_replace_character(ObCollationType collation_type, int32_t &replaced_char_unicode);
|
||||
static bool is_cjk_charset(ObCollationType collation_type);
|
||||
static bool is_valid_connection_collation(ObCollationType collation_type);
|
||||
|
24
deps/oblib/src/lib/charset/ob_ctype.h
vendored
24
deps/oblib/src/lib/charset/ob_ctype.h
vendored
@ -304,12 +304,14 @@ typedef struct ObCollationHandler
|
||||
//size_t (*strnxfrmlen)(const struct ObCharsetInfo *, size_t);
|
||||
|
||||
// creates a LIKE range, for optimizer,query range模块使用到了
|
||||
// prifix_len should return **byte** length before the first '%'
|
||||
bool (*like_range)(const struct ObCharsetInfo *,
|
||||
const char *s, size_t s_length,
|
||||
pchar w_prefix, pchar w_one, pchar w_many,
|
||||
size_t res_length,
|
||||
char *min_str, char *max_str,
|
||||
size_t *min_len, size_t *max_len);
|
||||
size_t *min_len, size_t *max_len,
|
||||
size_t *prefix_len);
|
||||
// wildcard comparison, for LIKE
|
||||
int (*wildcmp)(const struct ObCharsetInfo *,
|
||||
const char *str,const char *str_end,
|
||||
@ -409,8 +411,8 @@ struct ObCharsetInfo
|
||||
#define ob_strnxfrm(cs, d, dl, s, sl) \
|
||||
((cs)->coll->strnxfrm((cs), (d), (dl), (dl), (s), (sl), MY_STRXFRM_PAD_WITH_SPACE))
|
||||
#define ob_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d), 0))
|
||||
#define ob_like_range(s, a, b, c, d, e, f, g, h, i, j) \
|
||||
((s)->coll->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h), (i), (j)))
|
||||
#define ob_like_range(s, a, b, c, d, e, f, g, h, i, j, k) \
|
||||
((s)->coll->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h), (i), (j), (k)))
|
||||
#define ob_wildcmp(cs,s,se,w,we,e,o,m) ((cs)->coll->wildcmp((cs),(s),(se),(w),(we),(e),(o),(m)))
|
||||
#define ob_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b)))
|
||||
#define ob_charpos(cs, b, e, num) (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num))
|
||||
@ -551,11 +553,12 @@ size_t ob_scan_8bit(const ObCharsetInfo *cs, const char *b, const char *e,
|
||||
|
||||
/* For 8-bit character set */
|
||||
bool ob_like_range_simple(const ObCharsetInfo *cs,
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str, char *max_str,
|
||||
size_t *min_length, size_t *max_length);
|
||||
const char *ptr, size_t ptr_length,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str, char *max_str,
|
||||
size_t *min_length, size_t *max_length,
|
||||
size_t *prefix_length);
|
||||
|
||||
bool ob_propagate_simple(const ObCharsetInfo *cs, const unsigned char *str,
|
||||
size_t len);
|
||||
@ -576,7 +579,8 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
|
||||
pbool escape, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_length,size_t *max_length);
|
||||
size_t *min_length,size_t *max_length,
|
||||
size_t *prefix_length);
|
||||
|
||||
int ob_wildcmp_mb(const ObCharsetInfo *cs,
|
||||
const char *str,const char *str_end,
|
||||
@ -675,7 +679,7 @@ bool ob_like_range_generic(const ObCharsetInfo *cs, const char *ptr,
|
||||
size_t ptr_length, char escape, char w_one,
|
||||
char w_many, size_t res_length, char *min_str,
|
||||
char *max_str, size_t *min_length,
|
||||
size_t *max_length);
|
||||
size_t *max_length, size_t *prefix_length);
|
||||
|
||||
size_t ob_strnxfrm_unicode(const ObCharsetInfo *cs,
|
||||
unsigned char *dst, size_t dstlen, unsigned int nweights,
|
||||
|
15
deps/oblib/src/lib/charset/ob_ctype_mb.cc
vendored
15
deps/oblib/src/lib/charset/ob_ctype_mb.cc
vendored
@ -60,13 +60,15 @@ bool ob_like_range_mb_help(const ObCharsetInfo *cs,
|
||||
size_t res_length,
|
||||
char **min_str_,char **max_str_,
|
||||
char **min_org_, char **min_end_,
|
||||
size_t *min_length,size_t *max_length, char **max_end_)
|
||||
size_t *min_length,size_t *max_length,
|
||||
char **max_end_, size_t *prefix_length)
|
||||
{
|
||||
char *min_str = *min_str_;
|
||||
char *max_str = *max_str_;
|
||||
char *min_end = *min_end_;
|
||||
char *max_end = *max_end_;
|
||||
char *min_org = *min_org_;
|
||||
*prefix_length = (size_t) (min_str - min_org);
|
||||
*min_length = ((!!(cs->state & OB_CS_BINSORT) || cs->pad_attribute == NO_PAD) ? (size_t) (min_str - min_org) : res_length);
|
||||
*max_length = res_length;
|
||||
do {
|
||||
@ -87,7 +89,8 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
|
||||
pbool escape_char, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_length,size_t *max_length)
|
||||
size_t *min_length,size_t *max_length,
|
||||
size_t *prefix_length)
|
||||
{
|
||||
unsigned int mb_len;
|
||||
const char *end= ptr + ptr_length;
|
||||
@ -101,7 +104,7 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
|
||||
if (*ptr == escape_char && ptr+1 != end) {
|
||||
ptr++;
|
||||
} else if (*ptr == w_one || *ptr == w_many) {
|
||||
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
|
||||
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end, prefix_length);
|
||||
}
|
||||
mb_len= ob_ismbchar(cs, ptr, end);
|
||||
if ( mb_len > 1) {
|
||||
@ -116,11 +119,11 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
|
||||
if (contractions && ptr + 1 < end &&
|
||||
ob_uca_can_be_contraction_head(contractions, (unsigned char) *ptr)) {
|
||||
if (ptr[1] == w_one || ptr[1] == w_many) {
|
||||
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
|
||||
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end, prefix_length);
|
||||
} else if (ob_uca_can_be_contraction_tail(contractions, (unsigned char) ptr[1]) &&
|
||||
ob_uca_contraction2_weight(contractions, (unsigned char) ptr[0], ptr[1])) {
|
||||
if (max_char_len == 1 || min_str + 1 >= min_end) {
|
||||
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
|
||||
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end, prefix_length);
|
||||
}
|
||||
max_char_len--;
|
||||
*min_str++= *max_str++= *ptr++;
|
||||
@ -130,7 +133,7 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
|
||||
}
|
||||
}
|
||||
|
||||
*min_length= *max_length = (size_t) (min_str - min_org);
|
||||
*min_length= *max_length = *prefix_length = (size_t) (min_str - min_org);
|
||||
while (min_end != min_str) {
|
||||
*min_str++= *max_str++= ' ';
|
||||
}
|
||||
|
@ -688,7 +688,8 @@ bool ob_like_range_simple(const ObCharsetInfo *cs,
|
||||
pbool escape_char, pbool w_one, pbool w_many,
|
||||
size_t res_len,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_len, size_t *max_len)
|
||||
size_t *min_len, size_t *max_len,
|
||||
size_t *prefix_len)
|
||||
{
|
||||
const char *end= ptr + ptr_len;
|
||||
char *min_org=min_str;
|
||||
@ -705,6 +706,7 @@ bool ob_like_range_simple(const ObCharsetInfo *cs,
|
||||
*max_str++= (char) cs->max_sort_char;
|
||||
continue;
|
||||
} else if (*ptr == w_many) {
|
||||
*prefix_len = min_str - min_org;
|
||||
*min_len= ((cs->state & OB_CS_BINSORT) ?
|
||||
(size_t) (min_str - min_org) :
|
||||
res_len);
|
||||
@ -718,7 +720,7 @@ bool ob_like_range_simple(const ObCharsetInfo *cs,
|
||||
*min_str++= *max_str++ = *ptr;
|
||||
}
|
||||
|
||||
*min_len= *max_len = (size_t) (min_str - min_org);
|
||||
*min_len= *max_len= *prefix_len = (size_t) (min_str - min_org);
|
||||
while (min_str != min_end) {
|
||||
*min_str++= *max_str++ = ' ';
|
||||
}
|
||||
|
5
deps/oblib/src/lib/charset/ob_ctype_utf16.cc
vendored
5
deps/oblib/src/lib/charset/ob_ctype_utf16.cc
vendored
@ -997,7 +997,8 @@ ob_like_range_generic(const ObCharsetInfo *cs,
|
||||
char escape_char, char w_one, char w_many,
|
||||
size_t res_length,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_length,size_t *max_length)
|
||||
size_t *min_length,size_t *max_length,
|
||||
size_t *prefix_length)
|
||||
{
|
||||
const char *min_org = min_str;
|
||||
const char *max_org = max_str;
|
||||
@ -1058,6 +1059,7 @@ ob_like_range_generic(const ObCharsetInfo *cs,
|
||||
continue;
|
||||
}
|
||||
} else if ((ob_wc_t) w_many == wc) {
|
||||
*prefix_length = (size_t) (min_str - min_org);
|
||||
*min_length= ((cs->state & OB_CS_BINSORT) ? (size_t) (min_str - min_org) : res_length);
|
||||
*max_length= res_length;
|
||||
goto PAD_MIN_MAX;
|
||||
@ -1111,6 +1113,7 @@ ob_like_range_generic(const ObCharsetInfo *cs,
|
||||
}
|
||||
|
||||
PAD_SET_LEN:
|
||||
*prefix_length = (size_t) (min_str - min_org);
|
||||
*min_length= (size_t) (min_str - min_org);
|
||||
*max_length= (size_t) (max_str - max_org);
|
||||
|
||||
|
@ -908,9 +908,9 @@ TEST_F(TestCharset, basic_collation_handler_test)
|
||||
if (OB_NOT_NULL(cs->coll->like_range)) {
|
||||
char temp1[100];
|
||||
char temp2[100];
|
||||
size_t len1, len2;
|
||||
size_t len1, len2, prefix_len;
|
||||
fprintf(stdout, ">> like_range = %d for text = \"%s\", min = %.*s, max = %.*s\n",
|
||||
cs->coll->like_range(cs, str, end-str, '\\', '_', '%', 100, temp1, temp2, &len1, &len2), utf8_str,
|
||||
cs->coll->like_range(cs, str, end-str, '\\', '_', '%', 100, temp1, temp2, &len1, &len2, &prefix_len), utf8_str,
|
||||
(int)len1, temp1, (int)len2, temp2);
|
||||
}
|
||||
if (OB_NOT_NULL(cs->coll->wildcmp)) {
|
||||
|
@ -111,11 +111,20 @@ int ObExprInnerDecodeLike::eval_inner_decode_like(const ObExpr &expr, ObEvalCtx
|
||||
LOG_WARN("failed to check escape length", K(escape_str), K(escape_str.length()));
|
||||
LOG_USER_ERROR(OB_INVALID_ARGUMENT, "ESCAPE");
|
||||
} else {
|
||||
// For a pattern like 'aaa%' that ends with `%`, we will extract a precise range with some special handling:
|
||||
// We need to fill the end key of the like range with the maximum character
|
||||
// up to the target column's length to match the semantics of `%`.
|
||||
// However, when the target column length is less than the effective prefix length of the pattern,
|
||||
// the pattern gets truncated, resulting in an imprecise range and incorrect results.
|
||||
// So, we need to ensure that the effective prefix of the pattern is not truncated
|
||||
// to guarantee that the range is always precise.
|
||||
int32_t range_str_len = col_len;
|
||||
//convert character counts to len in bytes
|
||||
col_len = static_cast<int32_t>(col_len * mbmaxlen);
|
||||
size_t min_str_len = col_len;
|
||||
size_t max_str_len = col_len;
|
||||
range_str_len = static_cast<int32_t>(range_str_len * mbmaxlen);
|
||||
size_t min_str_len = range_str_len;
|
||||
size_t max_str_len = range_str_len;
|
||||
size_t res_len = 0;
|
||||
size_t prefix_len = 0;
|
||||
int32_t start_flag = is_start->get_int();
|
||||
common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator();
|
||||
char *min_str_buf = NULL;
|
||||
@ -133,26 +142,59 @@ int ObExprInnerDecodeLike::eval_inner_decode_like(const ObExpr &expr, ObEvalCtx
|
||||
static_cast<char*>(min_str_buf),
|
||||
&min_str_len,
|
||||
static_cast<char*>(max_str_buf),
|
||||
&max_str_len))) {
|
||||
&max_str_len,
|
||||
&prefix_len))) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("calc like range failed", K(ret), K(pattern_str), K(escape_str), K(cs_type));
|
||||
} else {
|
||||
ObExprStrResAlloc res_alloc(expr, ctx);
|
||||
char *buf = NULL;
|
||||
if (is_start->get_int() == 1) {
|
||||
res_buf = min_str_buf;
|
||||
res_len = min_str_len;
|
||||
} else {
|
||||
res_buf = max_str_buf;
|
||||
res_len = max_str_len;
|
||||
if (prefix_len >= col_len && ObCharset::strlen_char(cs_type, min_str_buf, prefix_len) >= col_len) {
|
||||
int32_t pattern_prefix_len = 0; // strlen_char of prefix
|
||||
if (OB_FAIL(get_pattern_prefix_len(cs_type,
|
||||
escape_str,
|
||||
pattern_str,
|
||||
pattern_prefix_len))) {
|
||||
LOG_WARN("failed to get pattern prefix len", K(ret), K(pattern_str), K(escape_str));
|
||||
} else {
|
||||
range_str_len = max(col_len, pattern_prefix_len);
|
||||
range_str_len = static_cast<int32_t>(range_str_len * mbmaxlen);
|
||||
min_str_len = range_str_len;
|
||||
max_str_len = range_str_len;
|
||||
if (OB_ISNULL(min_str_buf = (char*)temp_allocator.alloc(min_str_len))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("alloc memory failed", K(min_str_len));
|
||||
} else if (OB_ISNULL(max_str_buf = (char*)temp_allocator.alloc(max_str_len))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("alloc memory failed", K(max_str_len));
|
||||
} else if (OB_FAIL(ObCharset::like_range(cs_type,
|
||||
pattern_str,
|
||||
*(escape_str.ptr()),
|
||||
static_cast<char*>(min_str_buf),
|
||||
&min_str_len,
|
||||
static_cast<char*>(max_str_buf),
|
||||
&max_str_len))) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("calc like range failed", K(ret), K(pattern_str), K(escape_str), K(cs_type));
|
||||
}
|
||||
}
|
||||
}
|
||||
buf = (char*)res_alloc.alloc(res_len);
|
||||
if (OB_ISNULL(buf)) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("alloc memory failed", K(ret), K(min_str_len));
|
||||
} else {
|
||||
MEMCPY(buf, res_buf, res_len);
|
||||
expr_datum.set_string(buf, res_len);
|
||||
if (OB_SUCC(ret)) {
|
||||
ObExprStrResAlloc res_alloc(expr, ctx);
|
||||
char *buf = NULL;
|
||||
if (is_start->get_int() == 1) {
|
||||
res_buf = min_str_buf;
|
||||
res_len = min_str_len;
|
||||
} else {
|
||||
res_buf = max_str_buf;
|
||||
res_len = max_str_len;
|
||||
}
|
||||
buf = (char*)res_alloc.alloc(res_len);
|
||||
if (OB_ISNULL(buf)) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("alloc memory failed", K(ret), K(min_str_len));
|
||||
} else {
|
||||
MEMCPY(buf, res_buf, res_len);
|
||||
expr_datum.set_string(buf, res_len);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -226,5 +268,46 @@ int ObExprInnerDecodeLike::cast_like_obj_if_needed(ObEvalCtx &ctx, const ObExpr
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObExprInnerDecodeLike::get_pattern_prefix_len(const ObCollationType &cs_type,
|
||||
const ObString &escape_str,
|
||||
const ObString &pattern_str,
|
||||
int32_t &pattern_prefix_len)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
int64_t mbmaxlen = 1;
|
||||
pattern_prefix_len = 0;
|
||||
if (OB_NOT_NULL(pattern_str.ptr()) && OB_NOT_NULL(escape_str.ptr()) && escape_str.length() == 1 &&
|
||||
cs_type != CS_TYPE_INVALID && cs_type < CS_TYPE_MAX) {
|
||||
if (OB_FAIL(ObCharset::get_mbmaxlen_by_coll(cs_type, mbmaxlen))) {
|
||||
LOG_WARN("fail to get mbmaxlen", K(ret), K(cs_type));
|
||||
} else {
|
||||
ObArenaAllocator allocator;
|
||||
size_t pattern_len = pattern_str.length();
|
||||
pattern_len = static_cast<int32_t>(pattern_len * mbmaxlen);
|
||||
size_t min_str_len = pattern_len;
|
||||
size_t max_str_len = pattern_len;
|
||||
size_t prefix_len = pattern_len;
|
||||
char *min_str_buf = NULL;
|
||||
char *max_str_buf = NULL;
|
||||
if (OB_ISNULL(min_str_buf = (char *)allocator.alloc(min_str_len))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("no enough memory", K(ret), K(pattern_len));
|
||||
} else if (OB_ISNULL(max_str_buf = (char *)allocator.alloc(max_str_len))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("no enough memory", K(ret), K(pattern_len));
|
||||
} else if (OB_FAIL(ObCharset::like_range(cs_type, pattern_str, *(escape_str.ptr()),
|
||||
min_str_buf, &min_str_len,
|
||||
max_str_buf, &max_str_len,
|
||||
&prefix_len))) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("failed to retrive like range", K(ret));
|
||||
} else {
|
||||
pattern_prefix_len = ObCharset::strlen_char(cs_type, min_str_buf, prefix_len);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
@ -40,6 +40,11 @@ public:
|
||||
private:
|
||||
static int cast_like_obj_if_needed(ObEvalCtx &ctx, const ObExpr &pattern_expr, ObDatum *pattern_datum,
|
||||
const ObExpr &dst_expr, ObDatum * &cast_datum);
|
||||
// get prefix string (without wildcards) length of like pattern
|
||||
static int get_pattern_prefix_len(const ObCollationType &cs_type,
|
||||
const ObString &escape_str,
|
||||
const ObString &pattern_str,
|
||||
int32_t &pattern_prefix_len);
|
||||
DISALLOW_COPY_AND_ASSIGN(ObExprInnerDecodeLike) const;
|
||||
};
|
||||
} // namespace sql
|
||||
|
@ -8424,6 +8424,9 @@ int ObQueryRange::get_like_range(const ObObj &pattern,
|
||||
void *max_str_buf = NULL;
|
||||
int32_t col_len = out_key_part.pos_.column_type_.get_accuracy().get_length();
|
||||
ObCollationType cs_type = out_key_part.pos_.column_type_.get_collation_type();
|
||||
int32_t pattern_prefix_len = 0;
|
||||
int32_t range_str_len = 0;
|
||||
size_t prefix_len = 0;
|
||||
size_t min_str_len = 0;
|
||||
size_t max_str_len = 0;
|
||||
ObObj pattern_buf_obj;
|
||||
@ -8474,6 +8477,7 @@ int ObQueryRange::get_like_range(const ObObj &pattern,
|
||||
} else if (escape_str.empty()) {
|
||||
escape_str.assign_ptr("\\", 1);
|
||||
} else { /* do nothing */ }
|
||||
|
||||
if (OB_FAIL(ret)) {
|
||||
// do nothing;
|
||||
} else if (OB_FAIL(ObCharset::get_mbmaxlen_by_coll(cs_type, mbmaxlen))) {
|
||||
@ -8485,13 +8489,26 @@ int ObQueryRange::get_like_range(const ObObj &pattern,
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
LOG_WARN("failed to check escape length", K(escape_str), K(escape_str.length()));
|
||||
LOG_USER_ERROR(OB_INVALID_ARGUMENT, "ESCAPE");
|
||||
} else { }
|
||||
} else if (OB_FAIL(get_pattern_prefix_len(cs_type,
|
||||
escape_str,
|
||||
pattern_str,
|
||||
pattern_prefix_len))) {
|
||||
LOG_WARN("failed to get pattern prefix len", K(ret), K(pattern_str), K(escape_str));
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret)) {
|
||||
// For a pattern like 'aaa%' that ends with `%`, we will extract a precise range with some special handling:
|
||||
// We need to fill the end key of the like range with the maximum character
|
||||
// up to the target column's length to match the semantics of `%`.
|
||||
// However, when the target column length is less than the effective prefix length of the pattern,
|
||||
// the pattern gets truncated, resulting in an imprecise range and incorrect results.
|
||||
// So, we need to ensure that the effective prefix of the pattern is not truncated
|
||||
// to guarantee that the range is always precise.
|
||||
range_str_len = col_len;
|
||||
//convert character counts to len in bytes
|
||||
col_len = static_cast<int32_t>(col_len * mbmaxlen);
|
||||
min_str_len = col_len;
|
||||
max_str_len = col_len;
|
||||
range_str_len = static_cast<int32_t>(range_str_len * mbmaxlen);
|
||||
min_str_len = range_str_len;
|
||||
max_str_len = range_str_len;
|
||||
if (OB_ISNULL(min_str_buf = allocator_.alloc(min_str_len))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_ERROR("alloc memory failed", K(min_str_len));
|
||||
@ -8504,7 +8521,8 @@ int ObQueryRange::get_like_range(const ObObj &pattern,
|
||||
static_cast<char*>(min_str_buf),
|
||||
&min_str_len,
|
||||
static_cast<char*>(max_str_buf),
|
||||
&max_str_len))) {
|
||||
&max_str_len,
|
||||
&prefix_len))) {
|
||||
//set whole range
|
||||
out_key_part.normal_keypart_->start_.set_min_value();
|
||||
out_key_part.normal_keypart_->end_.set_max_value();
|
||||
@ -8514,25 +8532,57 @@ int ObQueryRange::get_like_range(const ObObj &pattern,
|
||||
out_key_part.normal_keypart_->always_true_ = true;
|
||||
ret = OB_SUCCESS;
|
||||
} else {
|
||||
ObObj &start = out_key_part.normal_keypart_->start_;
|
||||
ObObj &end = out_key_part.normal_keypart_->end_;
|
||||
start.set_collation_type(out_key_part.pos_.column_type_.get_collation_type());
|
||||
start.set_string(out_key_part.pos_.column_type_.get_type(),
|
||||
static_cast<char*>(min_str_buf), static_cast<int32_t>(min_str_len));
|
||||
end.set_collation_type(out_key_part.pos_.column_type_.get_collation_type());
|
||||
end.set_string(out_key_part.pos_.column_type_.get_type(),
|
||||
static_cast<char*>(max_str_buf), static_cast<int32_t>(max_str_len));
|
||||
out_key_part.normal_keypart_->include_start_ = true;
|
||||
out_key_part.normal_keypart_->include_end_ = true;
|
||||
out_key_part.normal_keypart_->always_false_ = false;
|
||||
out_key_part.normal_keypart_->always_true_ = false;
|
||||
if (prefix_len >= col_len && ObCharset::strlen_char(cs_type, static_cast<char*>(min_str_buf), prefix_len) >= col_len) {
|
||||
int32_t pattern_prefix_len = 0; // strlen_char of prefix
|
||||
if (OB_FAIL(get_pattern_prefix_len(cs_type,
|
||||
escape_str,
|
||||
pattern_str,
|
||||
pattern_prefix_len))) {
|
||||
LOG_WARN("failed to get pattern prefix len", K(ret), K(pattern_str), K(escape_str));
|
||||
} else {
|
||||
range_str_len = max(col_len, pattern_prefix_len);
|
||||
range_str_len = static_cast<int32_t>(range_str_len * mbmaxlen);
|
||||
min_str_len = range_str_len;
|
||||
max_str_len = range_str_len;
|
||||
if (OB_ISNULL(min_str_buf = allocator_.alloc(min_str_len))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("alloc memory failed", K(min_str_len));
|
||||
} else if (OB_ISNULL(max_str_buf = allocator_.alloc(max_str_len))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("alloc memory failed", K(max_str_len));
|
||||
} else if (OB_FAIL(ObCharset::like_range(cs_type,
|
||||
pattern_str,
|
||||
*(escape_str.ptr()),
|
||||
static_cast<char*>(min_str_buf),
|
||||
&min_str_len,
|
||||
static_cast<char*>(max_str_buf),
|
||||
&max_str_len))) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("calc like range failed", K(ret), K(pattern_str), K(escape_str), K(cs_type));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
ObObj &start = out_key_part.normal_keypart_->start_;
|
||||
ObObj &end = out_key_part.normal_keypart_->end_;
|
||||
start.set_collation_type(out_key_part.pos_.column_type_.get_collation_type());
|
||||
start.set_string(out_key_part.pos_.column_type_.get_type(),
|
||||
static_cast<char*>(min_str_buf), static_cast<int32_t>(min_str_len));
|
||||
end.set_collation_type(out_key_part.pos_.column_type_.get_collation_type());
|
||||
end.set_string(out_key_part.pos_.column_type_.get_type(),
|
||||
static_cast<char*>(max_str_buf), static_cast<int32_t>(max_str_len));
|
||||
out_key_part.normal_keypart_->include_start_ = true;
|
||||
out_key_part.normal_keypart_->include_end_ = true;
|
||||
out_key_part.normal_keypart_->always_false_ = false;
|
||||
out_key_part.normal_keypart_->always_true_ = false;
|
||||
|
||||
/// check if is precise
|
||||
if (NULL != query_range_ctx_) {
|
||||
query_range_ctx_->cur_expr_is_precise_ =
|
||||
ObQueryRange::check_like_range_precise(pattern_str,
|
||||
static_cast<char *>(max_str_buf),
|
||||
max_str_len, *(escape_str.ptr()));
|
||||
/// check if is precise
|
||||
if (NULL != query_range_ctx_) {
|
||||
query_range_ctx_->cur_expr_is_precise_ =
|
||||
ObQueryRange::check_like_range_precise(pattern_str,
|
||||
static_cast<char *>(max_str_buf),
|
||||
max_str_len, *(escape_str.ptr()));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (NULL != min_str_buf) {
|
||||
@ -9611,7 +9661,8 @@ int ObQueryRange::is_precise_like_range(const ObObjParam &pattern, char escape,
|
||||
if (pattern.is_string_type()) {
|
||||
ObString pattern_str = pattern.get_string();
|
||||
if (cs_type == CS_TYPE_INVALID || cs_type >= CS_TYPE_MAX) {
|
||||
}else if (OB_FAIL(ObCharset::get_mbmaxlen_by_coll(cs_type, mbmaxlen))) {
|
||||
} else if (ObCharset::is_cs_uca(cs_type)) {
|
||||
} else if (OB_FAIL(ObCharset::get_mbmaxlen_by_coll(cs_type, mbmaxlen))) {
|
||||
LOG_WARN("fail to get mbmaxlen", K(ret), K(cs_type), K(escape));
|
||||
} else {
|
||||
ObArenaAllocator allocator;
|
||||
@ -9646,6 +9697,47 @@ int ObQueryRange::is_precise_like_range(const ObObjParam &pattern, char escape,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObQueryRange::get_pattern_prefix_len(const ObCollationType &cs_type,
|
||||
const ObString &escape_str,
|
||||
const ObString &pattern_str,
|
||||
int32_t &pattern_prefix_len)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
int64_t mbmaxlen = 1;
|
||||
pattern_prefix_len = 0;
|
||||
if (OB_NOT_NULL(pattern_str.ptr()) && OB_NOT_NULL(escape_str.ptr()) && escape_str.length() == 1 &&
|
||||
cs_type != CS_TYPE_INVALID && cs_type < CS_TYPE_MAX) {
|
||||
if (OB_FAIL(ObCharset::get_mbmaxlen_by_coll(cs_type, mbmaxlen))) {
|
||||
LOG_WARN("fail to get mbmaxlen", K(ret), K(cs_type));
|
||||
} else {
|
||||
ObArenaAllocator allocator;
|
||||
size_t pattern_len = pattern_str.length();
|
||||
pattern_len = static_cast<int32_t>(pattern_len * mbmaxlen);
|
||||
size_t prefix_len = pattern_len;
|
||||
size_t min_str_len = pattern_len;
|
||||
size_t max_str_len = pattern_len;
|
||||
char *min_str_buf = NULL;
|
||||
char *max_str_buf = NULL;
|
||||
if (OB_ISNULL(min_str_buf = (char *)allocator.alloc(min_str_len))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("no enough memory", K(ret), K(pattern_len));
|
||||
} else if (OB_ISNULL(max_str_buf = (char *)allocator.alloc(max_str_len))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
LOG_WARN("no enough memory", K(ret), K(pattern_len));
|
||||
} else if (OB_FAIL(ObCharset::like_range(cs_type, pattern_str, *(escape_str.ptr()),
|
||||
min_str_buf, &min_str_len,
|
||||
max_str_buf, &max_str_len,
|
||||
&prefix_len))) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("failed to retrive like range", K(ret));
|
||||
} else {
|
||||
pattern_prefix_len = ObCharset::strlen_char(cs_type, min_str_buf, prefix_len);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObQueryRange::get_calculable_expr_val(const ObRawExpr *expr,
|
||||
ObObj &val,
|
||||
bool &is_valid,
|
||||
|
@ -554,6 +554,11 @@ public:
|
||||
int64_t &range_prefix_count,
|
||||
bool &contain_always_false) const;
|
||||
virtual bool is_fast_nlj_range() const { return false; }
|
||||
// get prefix string (without wildcards) length of like pattern
|
||||
static int get_pattern_prefix_len(const ObCollationType &cs_type,
|
||||
const ObString &escape_str,
|
||||
const ObString &pattern_str,
|
||||
int32_t &pattern_prefix_len);
|
||||
private:
|
||||
|
||||
int init_query_range_ctx(common::ObIAllocator &allocator,
|
||||
|
Loading…
x
Reference in New Issue
Block a user