fix like range bug

This commit is contained in:
AA-tuliwei-BB 2025-01-03 11:48:49 +00:00 committed by ob-robot
parent 03b72f408e
commit 9f87f66ded
11 changed files with 286 additions and 67 deletions

View File

@ -16,6 +16,7 @@
#include "lib/ob_define.h"
#include "lib/worker.h"
#include "common/ob_common_utility.h"
#include "lib/charset/str_uca_type.h"
namespace oceanbase
{
@ -1061,7 +1062,8 @@ int ObCharset::like_range(ObCollationType collation_type,
char *min_str,
size_t *min_str_len,
char *max_str,
size_t *max_str_len)
size_t *max_str_len,
size_t *prefix_len /*= NULL*/)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(collation_type <= CS_TYPE_INVALID ||
@ -1099,6 +1101,7 @@ int ObCharset::like_range(ObCollationType collation_type,
// 上面的修改会引发这样的问题:'a\0' 会不在范围内,因为mysql的utf8特性使得'a\0' < 'a',所以范围不能这么修改
// 具体的修正还是由存储层来做
size_t res_size = *min_str_len < *max_str_len ? *min_str_len : *max_str_len;
size_t pre_len = 0;
if (OB_ISNULL(cs->coll)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->coll));
@ -1112,8 +1115,11 @@ int ObCharset::like_range(ObCollationType collation_type,
min_str,
max_str,
min_str_len,
max_str_len)) {
max_str_len,
&pre_len)) {
ret = OB_EMPTY_RANGE;
} else if (prefix_len != NULL) {
*prefix_len = pre_len;
} else {
// *min_str_len = real_len;
}
@ -3611,6 +3617,20 @@ bool ObCharset::is_cs_unicode(ObCollationType collation_type)
return is_cs_unicode;
}
bool ObCharset::is_cs_uca(ObCollationType collation_type)
{
bool is_cs_uca = false;
if (OB_UNLIKELY(collation_type <= CS_TYPE_INVALID ||
collation_type >= CS_TYPE_MAX) ||
OB_ISNULL(ObCharset::charset_arr[collation_type])) {
LOG_WARN_RET(OB_INVALID_ARGUMENT, "unexpected error. invalid argument(s)", K(ret), K(collation_type), K(lbt()));
} else {
ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
is_cs_uca = (cs->uca != NULL) && (cs->uca->version == UCA_V900);
}
return is_cs_uca;
}
int ObCharset::get_replace_character(ObCollationType collation_type, int32_t &replaced_char_unicode)
{
int ret = OB_SUCCESS;

View File

@ -513,7 +513,8 @@ public:
char *min_str,
size_t *min_str_len,
char *max_str,
size_t *max_str_len);
size_t *max_str_len,
size_t *prefix_len = NULL);
static size_t strlen_char(ObCollationType collation_type,
const char *str,
int64_t str_len);
@ -717,6 +718,7 @@ public:
static bool is_cs_nonascii(ObCollationType collation_type);
static bool is_cs_unicode(ObCollationType collation_type);
static bool is_cs_uca(ObCollationType collation_type);
static int get_replace_character(ObCollationType collation_type, int32_t &replaced_char_unicode);
static bool is_cjk_charset(ObCollationType collation_type);
static bool is_valid_connection_collation(ObCollationType collation_type);

View File

@ -304,12 +304,14 @@ typedef struct ObCollationHandler
//size_t (*strnxfrmlen)(const struct ObCharsetInfo *, size_t);
// creates a LIKE range, for optimizer,query range模块使用到了
// prifix_len should return **byte** length before the first '%'
bool (*like_range)(const struct ObCharsetInfo *,
const char *s, size_t s_length,
pchar w_prefix, pchar w_one, pchar w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_len, size_t *max_len);
size_t *min_len, size_t *max_len,
size_t *prefix_len);
// wildcard comparison, for LIKE
int (*wildcmp)(const struct ObCharsetInfo *,
const char *str,const char *str_end,
@ -409,8 +411,8 @@ struct ObCharsetInfo
#define ob_strnxfrm(cs, d, dl, s, sl) \
((cs)->coll->strnxfrm((cs), (d), (dl), (dl), (s), (sl), MY_STRXFRM_PAD_WITH_SPACE))
#define ob_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d), 0))
#define ob_like_range(s, a, b, c, d, e, f, g, h, i, j) \
((s)->coll->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h), (i), (j)))
#define ob_like_range(s, a, b, c, d, e, f, g, h, i, j, k) \
((s)->coll->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h), (i), (j), (k)))
#define ob_wildcmp(cs,s,se,w,we,e,o,m) ((cs)->coll->wildcmp((cs),(s),(se),(w),(we),(e),(o),(m)))
#define ob_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b)))
#define ob_charpos(cs, b, e, num) (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num))
@ -551,11 +553,12 @@ size_t ob_scan_8bit(const ObCharsetInfo *cs, const char *b, const char *e,
/* For 8-bit character set */
bool ob_like_range_simple(const ObCharsetInfo *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length,
size_t *prefix_length);
bool ob_propagate_simple(const ObCharsetInfo *cs, const unsigned char *str,
size_t len);
@ -576,7 +579,8 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str,char *max_str,
size_t *min_length,size_t *max_length);
size_t *min_length,size_t *max_length,
size_t *prefix_length);
int ob_wildcmp_mb(const ObCharsetInfo *cs,
const char *str,const char *str_end,
@ -675,7 +679,7 @@ bool ob_like_range_generic(const ObCharsetInfo *cs, const char *ptr,
size_t ptr_length, char escape, char w_one,
char w_many, size_t res_length, char *min_str,
char *max_str, size_t *min_length,
size_t *max_length);
size_t *max_length, size_t *prefix_length);
size_t ob_strnxfrm_unicode(const ObCharsetInfo *cs,
unsigned char *dst, size_t dstlen, unsigned int nweights,

View File

@ -60,13 +60,15 @@ bool ob_like_range_mb_help(const ObCharsetInfo *cs,
size_t res_length,
char **min_str_,char **max_str_,
char **min_org_, char **min_end_,
size_t *min_length,size_t *max_length, char **max_end_)
size_t *min_length,size_t *max_length,
char **max_end_, size_t *prefix_length)
{
char *min_str = *min_str_;
char *max_str = *max_str_;
char *min_end = *min_end_;
char *max_end = *max_end_;
char *min_org = *min_org_;
*prefix_length = (size_t) (min_str - min_org);
*min_length = ((!!(cs->state & OB_CS_BINSORT) || cs->pad_attribute == NO_PAD) ? (size_t) (min_str - min_org) : res_length);
*max_length = res_length;
do {
@ -87,7 +89,8 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
pbool escape_char, pbool w_one, pbool w_many,
size_t res_length,
char *min_str,char *max_str,
size_t *min_length,size_t *max_length)
size_t *min_length,size_t *max_length,
size_t *prefix_length)
{
unsigned int mb_len;
const char *end= ptr + ptr_length;
@ -101,7 +104,7 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
if (*ptr == escape_char && ptr+1 != end) {
ptr++;
} else if (*ptr == w_one || *ptr == w_many) {
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end, prefix_length);
}
mb_len= ob_ismbchar(cs, ptr, end);
if ( mb_len > 1) {
@ -116,11 +119,11 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
if (contractions && ptr + 1 < end &&
ob_uca_can_be_contraction_head(contractions, (unsigned char) *ptr)) {
if (ptr[1] == w_one || ptr[1] == w_many) {
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end, prefix_length);
} else if (ob_uca_can_be_contraction_tail(contractions, (unsigned char) ptr[1]) &&
ob_uca_contraction2_weight(contractions, (unsigned char) ptr[0], ptr[1])) {
if (max_char_len == 1 || min_str + 1 >= min_end) {
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end, prefix_length);
}
max_char_len--;
*min_str++= *max_str++= *ptr++;
@ -130,7 +133,7 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
}
}
*min_length= *max_length = (size_t) (min_str - min_org);
*min_length= *max_length = *prefix_length = (size_t) (min_str - min_org);
while (min_end != min_str) {
*min_str++= *max_str++= ' ';
}

View File

@ -688,7 +688,8 @@ bool ob_like_range_simple(const ObCharsetInfo *cs,
pbool escape_char, pbool w_one, pbool w_many,
size_t res_len,
char *min_str,char *max_str,
size_t *min_len, size_t *max_len)
size_t *min_len, size_t *max_len,
size_t *prefix_len)
{
const char *end= ptr + ptr_len;
char *min_org=min_str;
@ -705,6 +706,7 @@ bool ob_like_range_simple(const ObCharsetInfo *cs,
*max_str++= (char) cs->max_sort_char;
continue;
} else if (*ptr == w_many) {
*prefix_len = min_str - min_org;
*min_len= ((cs->state & OB_CS_BINSORT) ?
(size_t) (min_str - min_org) :
res_len);
@ -718,7 +720,7 @@ bool ob_like_range_simple(const ObCharsetInfo *cs,
*min_str++= *max_str++ = *ptr;
}
*min_len= *max_len = (size_t) (min_str - min_org);
*min_len= *max_len= *prefix_len = (size_t) (min_str - min_org);
while (min_str != min_end) {
*min_str++= *max_str++ = ' ';
}

View File

@ -997,7 +997,8 @@ ob_like_range_generic(const ObCharsetInfo *cs,
char escape_char, char w_one, char w_many,
size_t res_length,
char *min_str,char *max_str,
size_t *min_length,size_t *max_length)
size_t *min_length,size_t *max_length,
size_t *prefix_length)
{
const char *min_org = min_str;
const char *max_org = max_str;
@ -1058,6 +1059,7 @@ ob_like_range_generic(const ObCharsetInfo *cs,
continue;
}
} else if ((ob_wc_t) w_many == wc) {
*prefix_length = (size_t) (min_str - min_org);
*min_length= ((cs->state & OB_CS_BINSORT) ? (size_t) (min_str - min_org) : res_length);
*max_length= res_length;
goto PAD_MIN_MAX;
@ -1111,6 +1113,7 @@ ob_like_range_generic(const ObCharsetInfo *cs,
}
PAD_SET_LEN:
*prefix_length = (size_t) (min_str - min_org);
*min_length= (size_t) (min_str - min_org);
*max_length= (size_t) (max_str - max_org);

View File

@ -908,9 +908,9 @@ TEST_F(TestCharset, basic_collation_handler_test)
if (OB_NOT_NULL(cs->coll->like_range)) {
char temp1[100];
char temp2[100];
size_t len1, len2;
size_t len1, len2, prefix_len;
fprintf(stdout, ">> like_range = %d for text = \"%s\", min = %.*s, max = %.*s\n",
cs->coll->like_range(cs, str, end-str, '\\', '_', '%', 100, temp1, temp2, &len1, &len2), utf8_str,
cs->coll->like_range(cs, str, end-str, '\\', '_', '%', 100, temp1, temp2, &len1, &len2, &prefix_len), utf8_str,
(int)len1, temp1, (int)len2, temp2);
}
if (OB_NOT_NULL(cs->coll->wildcmp)) {

View File

@ -111,11 +111,20 @@ int ObExprInnerDecodeLike::eval_inner_decode_like(const ObExpr &expr, ObEvalCtx
LOG_WARN("failed to check escape length", K(escape_str), K(escape_str.length()));
LOG_USER_ERROR(OB_INVALID_ARGUMENT, "ESCAPE");
} else {
// For a pattern like 'aaa%' that ends with `%`, we will extract a precise range with some special handling:
// We need to fill the end key of the like range with the maximum character
// up to the target column's length to match the semantics of `%`.
// However, when the target column length is less than the effective prefix length of the pattern,
// the pattern gets truncated, resulting in an imprecise range and incorrect results.
// So, we need to ensure that the effective prefix of the pattern is not truncated
// to guarantee that the range is always precise.
int32_t range_str_len = col_len;
//convert character counts to len in bytes
col_len = static_cast<int32_t>(col_len * mbmaxlen);
size_t min_str_len = col_len;
size_t max_str_len = col_len;
range_str_len = static_cast<int32_t>(range_str_len * mbmaxlen);
size_t min_str_len = range_str_len;
size_t max_str_len = range_str_len;
size_t res_len = 0;
size_t prefix_len = 0;
int32_t start_flag = is_start->get_int();
common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator();
char *min_str_buf = NULL;
@ -133,26 +142,59 @@ int ObExprInnerDecodeLike::eval_inner_decode_like(const ObExpr &expr, ObEvalCtx
static_cast<char*>(min_str_buf),
&min_str_len,
static_cast<char*>(max_str_buf),
&max_str_len))) {
&max_str_len,
&prefix_len))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("calc like range failed", K(ret), K(pattern_str), K(escape_str), K(cs_type));
} else {
ObExprStrResAlloc res_alloc(expr, ctx);
char *buf = NULL;
if (is_start->get_int() == 1) {
res_buf = min_str_buf;
res_len = min_str_len;
} else {
res_buf = max_str_buf;
res_len = max_str_len;
if (prefix_len >= col_len && ObCharset::strlen_char(cs_type, min_str_buf, prefix_len) >= col_len) {
int32_t pattern_prefix_len = 0; // strlen_char of prefix
if (OB_FAIL(get_pattern_prefix_len(cs_type,
escape_str,
pattern_str,
pattern_prefix_len))) {
LOG_WARN("failed to get pattern prefix len", K(ret), K(pattern_str), K(escape_str));
} else {
range_str_len = max(col_len, pattern_prefix_len);
range_str_len = static_cast<int32_t>(range_str_len * mbmaxlen);
min_str_len = range_str_len;
max_str_len = range_str_len;
if (OB_ISNULL(min_str_buf = (char*)temp_allocator.alloc(min_str_len))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("alloc memory failed", K(min_str_len));
} else if (OB_ISNULL(max_str_buf = (char*)temp_allocator.alloc(max_str_len))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("alloc memory failed", K(max_str_len));
} else if (OB_FAIL(ObCharset::like_range(cs_type,
pattern_str,
*(escape_str.ptr()),
static_cast<char*>(min_str_buf),
&min_str_len,
static_cast<char*>(max_str_buf),
&max_str_len))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("calc like range failed", K(ret), K(pattern_str), K(escape_str), K(cs_type));
}
}
}
buf = (char*)res_alloc.alloc(res_len);
if (OB_ISNULL(buf)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("alloc memory failed", K(ret), K(min_str_len));
} else {
MEMCPY(buf, res_buf, res_len);
expr_datum.set_string(buf, res_len);
if (OB_SUCC(ret)) {
ObExprStrResAlloc res_alloc(expr, ctx);
char *buf = NULL;
if (is_start->get_int() == 1) {
res_buf = min_str_buf;
res_len = min_str_len;
} else {
res_buf = max_str_buf;
res_len = max_str_len;
}
buf = (char*)res_alloc.alloc(res_len);
if (OB_ISNULL(buf)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("alloc memory failed", K(ret), K(min_str_len));
} else {
MEMCPY(buf, res_buf, res_len);
expr_datum.set_string(buf, res_len);
}
}
}
}
@ -226,5 +268,46 @@ int ObExprInnerDecodeLike::cast_like_obj_if_needed(ObEvalCtx &ctx, const ObExpr
}
return ret;
}
int ObExprInnerDecodeLike::get_pattern_prefix_len(const ObCollationType &cs_type,
const ObString &escape_str,
const ObString &pattern_str,
int32_t &pattern_prefix_len)
{
int ret = OB_SUCCESS;
int64_t mbmaxlen = 1;
pattern_prefix_len = 0;
if (OB_NOT_NULL(pattern_str.ptr()) && OB_NOT_NULL(escape_str.ptr()) && escape_str.length() == 1 &&
cs_type != CS_TYPE_INVALID && cs_type < CS_TYPE_MAX) {
if (OB_FAIL(ObCharset::get_mbmaxlen_by_coll(cs_type, mbmaxlen))) {
LOG_WARN("fail to get mbmaxlen", K(ret), K(cs_type));
} else {
ObArenaAllocator allocator;
size_t pattern_len = pattern_str.length();
pattern_len = static_cast<int32_t>(pattern_len * mbmaxlen);
size_t min_str_len = pattern_len;
size_t max_str_len = pattern_len;
size_t prefix_len = pattern_len;
char *min_str_buf = NULL;
char *max_str_buf = NULL;
if (OB_ISNULL(min_str_buf = (char *)allocator.alloc(min_str_len))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("no enough memory", K(ret), K(pattern_len));
} else if (OB_ISNULL(max_str_buf = (char *)allocator.alloc(max_str_len))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("no enough memory", K(ret), K(pattern_len));
} else if (OB_FAIL(ObCharset::like_range(cs_type, pattern_str, *(escape_str.ptr()),
min_str_buf, &min_str_len,
max_str_buf, &max_str_len,
&prefix_len))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to retrive like range", K(ret));
} else {
pattern_prefix_len = ObCharset::strlen_char(cs_type, min_str_buf, prefix_len);
}
}
}
return ret;
}
}
}

View File

@ -40,6 +40,11 @@ public:
private:
static int cast_like_obj_if_needed(ObEvalCtx &ctx, const ObExpr &pattern_expr, ObDatum *pattern_datum,
const ObExpr &dst_expr, ObDatum * &cast_datum);
// get prefix string (without wildcards) length of like pattern
static int get_pattern_prefix_len(const ObCollationType &cs_type,
const ObString &escape_str,
const ObString &pattern_str,
int32_t &pattern_prefix_len);
DISALLOW_COPY_AND_ASSIGN(ObExprInnerDecodeLike) const;
};
} // namespace sql

View File

@ -8424,6 +8424,9 @@ int ObQueryRange::get_like_range(const ObObj &pattern,
void *max_str_buf = NULL;
int32_t col_len = out_key_part.pos_.column_type_.get_accuracy().get_length();
ObCollationType cs_type = out_key_part.pos_.column_type_.get_collation_type();
int32_t pattern_prefix_len = 0;
int32_t range_str_len = 0;
size_t prefix_len = 0;
size_t min_str_len = 0;
size_t max_str_len = 0;
ObObj pattern_buf_obj;
@ -8474,6 +8477,7 @@ int ObQueryRange::get_like_range(const ObObj &pattern,
} else if (escape_str.empty()) {
escape_str.assign_ptr("\\", 1);
} else { /* do nothing */ }
if (OB_FAIL(ret)) {
// do nothing;
} else if (OB_FAIL(ObCharset::get_mbmaxlen_by_coll(cs_type, mbmaxlen))) {
@ -8485,13 +8489,26 @@ int ObQueryRange::get_like_range(const ObObj &pattern,
ret = OB_INVALID_ARGUMENT;
LOG_WARN("failed to check escape length", K(escape_str), K(escape_str.length()));
LOG_USER_ERROR(OB_INVALID_ARGUMENT, "ESCAPE");
} else { }
} else if (OB_FAIL(get_pattern_prefix_len(cs_type,
escape_str,
pattern_str,
pattern_prefix_len))) {
LOG_WARN("failed to get pattern prefix len", K(ret), K(pattern_str), K(escape_str));
}
if (OB_SUCC(ret)) {
// For a pattern like 'aaa%' that ends with `%`, we will extract a precise range with some special handling:
// We need to fill the end key of the like range with the maximum character
// up to the target column's length to match the semantics of `%`.
// However, when the target column length is less than the effective prefix length of the pattern,
// the pattern gets truncated, resulting in an imprecise range and incorrect results.
// So, we need to ensure that the effective prefix of the pattern is not truncated
// to guarantee that the range is always precise.
range_str_len = col_len;
//convert character counts to len in bytes
col_len = static_cast<int32_t>(col_len * mbmaxlen);
min_str_len = col_len;
max_str_len = col_len;
range_str_len = static_cast<int32_t>(range_str_len * mbmaxlen);
min_str_len = range_str_len;
max_str_len = range_str_len;
if (OB_ISNULL(min_str_buf = allocator_.alloc(min_str_len))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_ERROR("alloc memory failed", K(min_str_len));
@ -8504,7 +8521,8 @@ int ObQueryRange::get_like_range(const ObObj &pattern,
static_cast<char*>(min_str_buf),
&min_str_len,
static_cast<char*>(max_str_buf),
&max_str_len))) {
&max_str_len,
&prefix_len))) {
//set whole range
out_key_part.normal_keypart_->start_.set_min_value();
out_key_part.normal_keypart_->end_.set_max_value();
@ -8514,25 +8532,57 @@ int ObQueryRange::get_like_range(const ObObj &pattern,
out_key_part.normal_keypart_->always_true_ = true;
ret = OB_SUCCESS;
} else {
ObObj &start = out_key_part.normal_keypart_->start_;
ObObj &end = out_key_part.normal_keypart_->end_;
start.set_collation_type(out_key_part.pos_.column_type_.get_collation_type());
start.set_string(out_key_part.pos_.column_type_.get_type(),
static_cast<char*>(min_str_buf), static_cast<int32_t>(min_str_len));
end.set_collation_type(out_key_part.pos_.column_type_.get_collation_type());
end.set_string(out_key_part.pos_.column_type_.get_type(),
static_cast<char*>(max_str_buf), static_cast<int32_t>(max_str_len));
out_key_part.normal_keypart_->include_start_ = true;
out_key_part.normal_keypart_->include_end_ = true;
out_key_part.normal_keypart_->always_false_ = false;
out_key_part.normal_keypart_->always_true_ = false;
if (prefix_len >= col_len && ObCharset::strlen_char(cs_type, static_cast<char*>(min_str_buf), prefix_len) >= col_len) {
int32_t pattern_prefix_len = 0; // strlen_char of prefix
if (OB_FAIL(get_pattern_prefix_len(cs_type,
escape_str,
pattern_str,
pattern_prefix_len))) {
LOG_WARN("failed to get pattern prefix len", K(ret), K(pattern_str), K(escape_str));
} else {
range_str_len = max(col_len, pattern_prefix_len);
range_str_len = static_cast<int32_t>(range_str_len * mbmaxlen);
min_str_len = range_str_len;
max_str_len = range_str_len;
if (OB_ISNULL(min_str_buf = allocator_.alloc(min_str_len))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("alloc memory failed", K(min_str_len));
} else if (OB_ISNULL(max_str_buf = allocator_.alloc(max_str_len))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("alloc memory failed", K(max_str_len));
} else if (OB_FAIL(ObCharset::like_range(cs_type,
pattern_str,
*(escape_str.ptr()),
static_cast<char*>(min_str_buf),
&min_str_len,
static_cast<char*>(max_str_buf),
&max_str_len))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("calc like range failed", K(ret), K(pattern_str), K(escape_str), K(cs_type));
}
}
}
if (OB_SUCC(ret)) {
ObObj &start = out_key_part.normal_keypart_->start_;
ObObj &end = out_key_part.normal_keypart_->end_;
start.set_collation_type(out_key_part.pos_.column_type_.get_collation_type());
start.set_string(out_key_part.pos_.column_type_.get_type(),
static_cast<char*>(min_str_buf), static_cast<int32_t>(min_str_len));
end.set_collation_type(out_key_part.pos_.column_type_.get_collation_type());
end.set_string(out_key_part.pos_.column_type_.get_type(),
static_cast<char*>(max_str_buf), static_cast<int32_t>(max_str_len));
out_key_part.normal_keypart_->include_start_ = true;
out_key_part.normal_keypart_->include_end_ = true;
out_key_part.normal_keypart_->always_false_ = false;
out_key_part.normal_keypart_->always_true_ = false;
/// check if is precise
if (NULL != query_range_ctx_) {
query_range_ctx_->cur_expr_is_precise_ =
ObQueryRange::check_like_range_precise(pattern_str,
static_cast<char *>(max_str_buf),
max_str_len, *(escape_str.ptr()));
/// check if is precise
if (NULL != query_range_ctx_) {
query_range_ctx_->cur_expr_is_precise_ =
ObQueryRange::check_like_range_precise(pattern_str,
static_cast<char *>(max_str_buf),
max_str_len, *(escape_str.ptr()));
}
}
}
if (NULL != min_str_buf) {
@ -9611,7 +9661,8 @@ int ObQueryRange::is_precise_like_range(const ObObjParam &pattern, char escape,
if (pattern.is_string_type()) {
ObString pattern_str = pattern.get_string();
if (cs_type == CS_TYPE_INVALID || cs_type >= CS_TYPE_MAX) {
}else if (OB_FAIL(ObCharset::get_mbmaxlen_by_coll(cs_type, mbmaxlen))) {
} else if (ObCharset::is_cs_uca(cs_type)) {
} else if (OB_FAIL(ObCharset::get_mbmaxlen_by_coll(cs_type, mbmaxlen))) {
LOG_WARN("fail to get mbmaxlen", K(ret), K(cs_type), K(escape));
} else {
ObArenaAllocator allocator;
@ -9646,6 +9697,47 @@ int ObQueryRange::is_precise_like_range(const ObObjParam &pattern, char escape,
return ret;
}
int ObQueryRange::get_pattern_prefix_len(const ObCollationType &cs_type,
const ObString &escape_str,
const ObString &pattern_str,
int32_t &pattern_prefix_len)
{
int ret = OB_SUCCESS;
int64_t mbmaxlen = 1;
pattern_prefix_len = 0;
if (OB_NOT_NULL(pattern_str.ptr()) && OB_NOT_NULL(escape_str.ptr()) && escape_str.length() == 1 &&
cs_type != CS_TYPE_INVALID && cs_type < CS_TYPE_MAX) {
if (OB_FAIL(ObCharset::get_mbmaxlen_by_coll(cs_type, mbmaxlen))) {
LOG_WARN("fail to get mbmaxlen", K(ret), K(cs_type));
} else {
ObArenaAllocator allocator;
size_t pattern_len = pattern_str.length();
pattern_len = static_cast<int32_t>(pattern_len * mbmaxlen);
size_t prefix_len = pattern_len;
size_t min_str_len = pattern_len;
size_t max_str_len = pattern_len;
char *min_str_buf = NULL;
char *max_str_buf = NULL;
if (OB_ISNULL(min_str_buf = (char *)allocator.alloc(min_str_len))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("no enough memory", K(ret), K(pattern_len));
} else if (OB_ISNULL(max_str_buf = (char *)allocator.alloc(max_str_len))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("no enough memory", K(ret), K(pattern_len));
} else if (OB_FAIL(ObCharset::like_range(cs_type, pattern_str, *(escape_str.ptr()),
min_str_buf, &min_str_len,
max_str_buf, &max_str_len,
&prefix_len))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to retrive like range", K(ret));
} else {
pattern_prefix_len = ObCharset::strlen_char(cs_type, min_str_buf, prefix_len);
}
}
}
return ret;
}
int ObQueryRange::get_calculable_expr_val(const ObRawExpr *expr,
ObObj &val,
bool &is_valid,

View File

@ -554,6 +554,11 @@ public:
int64_t &range_prefix_count,
bool &contain_always_false) const;
virtual bool is_fast_nlj_range() const { return false; }
// get prefix string (without wildcards) length of like pattern
static int get_pattern_prefix_len(const ObCollationType &cs_type,
const ObString &escape_str,
const ObString &pattern_str,
int32_t &pattern_prefix_len);
private:
int init_query_range_ctx(common::ObIAllocator &allocator,