fix find in set bug and improve performance
This commit is contained in:
19
deps/oblib/src/lib/charset/ob_charset.cpp
vendored
19
deps/oblib/src/lib/charset/ob_charset.cpp
vendored
@ -578,6 +578,25 @@ uint32_t ObCharset::instr(ObCollationType collation_type,
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int64_t ObCharset::instrb(ObCollationType collation_type,
|
||||||
|
const char *str1,
|
||||||
|
int64_t str1_len,
|
||||||
|
const char *str2,
|
||||||
|
int64_t str2_len)
|
||||||
|
{
|
||||||
|
int64_t result = -1;
|
||||||
|
if (is_argument_valid(collation_type, str1, str1_len, str2, str2_len)) {
|
||||||
|
ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
|
||||||
|
ob_match_t m_match_t[2];
|
||||||
|
uint nmatch = 1;
|
||||||
|
uint m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
|
||||||
|
if (0 != m_ret) {
|
||||||
|
result = m_match_t[0].end - m_match_t[0].beg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t ObCharset::locate(ObCollationType collation_type,
|
uint32_t ObCharset::locate(ObCollationType collation_type,
|
||||||
const char *str1,
|
const char *str1,
|
||||||
int64_t str1_len,
|
int64_t str1_len,
|
||||||
|
|||||||
8
deps/oblib/src/lib/charset/ob_charset.h
vendored
8
deps/oblib/src/lib/charset/ob_charset.h
vendored
@ -213,11 +213,19 @@ public:
|
|||||||
static size_t scan_str(const char *str,
|
static size_t scan_str(const char *str,
|
||||||
const char *end,
|
const char *end,
|
||||||
int sq);
|
int sq);
|
||||||
|
// return position in characters
|
||||||
static uint32_t instr(ObCollationType collation_type,
|
static uint32_t instr(ObCollationType collation_type,
|
||||||
const char *str1,
|
const char *str1,
|
||||||
int64_t str1_len,
|
int64_t str1_len,
|
||||||
const char *str2,
|
const char *str2,
|
||||||
int64_t str2_len);
|
int64_t str2_len);
|
||||||
|
|
||||||
|
// return position in bytes
|
||||||
|
static int64_t instrb(ObCollationType collation_type,
|
||||||
|
const char *str1,
|
||||||
|
int64_t str1_len,
|
||||||
|
const char *str2,
|
||||||
|
int64_t str2_len);
|
||||||
static uint32_t locate(ObCollationType collation_type,
|
static uint32_t locate(ObCollationType collation_type,
|
||||||
const char *str1,
|
const char *str1,
|
||||||
int64_t str1_len,
|
int64_t str1_len,
|
||||||
|
|||||||
@ -36,8 +36,6 @@ int ObExprFindInSet::calc_result_type2(ObExprResType &type,
|
|||||||
ObExprTypeCtx &type_ctx) const
|
ObExprTypeCtx &type_ctx) const
|
||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
type1.set_calc_type(ObVarcharType);
|
|
||||||
type2.set_calc_type(ObVarcharType);
|
|
||||||
if (OB_LIKELY(NOT_ROW_DIMENSION == row_dimension_)) {
|
if (OB_LIKELY(NOT_ROW_DIMENSION == row_dimension_)) {
|
||||||
type.set_uint64();
|
type.set_uint64();
|
||||||
type.set_precision(ObAccuracy::DDL_DEFAULT_ACCURACY[ObUInt64Type].precision_);
|
type.set_precision(ObAccuracy::DDL_DEFAULT_ACCURACY[ObUInt64Type].precision_);
|
||||||
@ -45,16 +43,16 @@ int ObExprFindInSet::calc_result_type2(ObExprResType &type,
|
|||||||
type.set_calc_type(ObVarcharType);
|
type.set_calc_type(ObVarcharType);
|
||||||
ObExprOperator::calc_result_flag2(type, type1, type2);
|
ObExprOperator::calc_result_flag2(type, type1, type2);
|
||||||
ObObjMeta coll_types[2];
|
ObObjMeta coll_types[2];
|
||||||
coll_types[0].set_collation(type1);
|
coll_types[0] = type1.get_obj_meta();
|
||||||
coll_types[1].set_collation(type2);
|
coll_types[1] = type2.get_obj_meta();
|
||||||
if (OB_FAIL(aggregate_charsets_for_comparison(type.get_calc_meta(),
|
if (OB_FAIL(aggregate_charsets_for_comparison(type.get_calc_meta(),
|
||||||
coll_types, 2, type_ctx.get_coll_type()))) {
|
coll_types, 2, type_ctx.get_coll_type()))) {
|
||||||
LOG_WARN("failed to aggregate_charsets_for_comparison", K(ret));
|
LOG_WARN("failed to aggregate_charsets_for_comparison", K(ret));
|
||||||
} else {
|
} else {
|
||||||
type1.set_calc_collation_type(type.get_collation_type());
|
type1.set_calc_type(ObVarcharType);
|
||||||
type1.set_calc_collation_level(type.get_collation_level());
|
type1.set_calc_collation_type(type.get_calc_collation_type());
|
||||||
type2.set_calc_collation_type(type.get_collation_type());
|
type2.set_calc_type(ObVarcharType);
|
||||||
type2.set_calc_collation_level(type.get_collation_level());
|
type2.set_calc_collation_type(type.get_calc_collation_type());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ret = OB_ERR_INVALID_TYPE_FOR_OP;
|
ret = OB_ERR_INVALID_TYPE_FOR_OP;
|
||||||
@ -62,8 +60,6 @@ int ObExprFindInSet::calc_result_type2(ObExprResType &type,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int search(const ObString &str, const ObString &str_list, const ObCollationType &cs_type,
|
|
||||||
uint64_t &res_pos);
|
|
||||||
int search(const ObString &str, const ObString &str_list, const ObCollationType &cs_type,
|
int search(const ObString &str, const ObString &str_list, const ObCollationType &cs_type,
|
||||||
uint64_t &res_pos)
|
uint64_t &res_pos)
|
||||||
{
|
{
|
||||||
@ -74,38 +70,32 @@ int search(const ObString &str, const ObString &str_list, const ObCollationType
|
|||||||
if (ObCharset::locate(cs_type, first_ptr, first_length, ",", 1, 1) != 0) {
|
if (ObCharset::locate(cs_type, first_ptr, first_length, ",", 1, 1) != 0) {
|
||||||
res_pos = 0;
|
res_pos = 0;
|
||||||
} else {
|
} else {
|
||||||
bool is_found = false;
|
int64_t str_list_pos = 0;
|
||||||
res_pos = 1;
|
int64_t comma_pos = 0;
|
||||||
uint32_t pre_separtor_pos = 0;
|
int64_t elem_idx = 1;
|
||||||
uint32_t cur_separtor_pos = 0;
|
|
||||||
uint32_t pre_sep_pos_byte = 0;
|
ObString comma_str = ObCharsetUtils::get_const_str(cs_type, ',');
|
||||||
uint32_t cur_sep_pos_byte = 0;
|
|
||||||
const char *second_ptr = str_list.ptr();
|
while (str_list_pos < str_list.length()) {
|
||||||
int64_t second_length = str_list.length();
|
int64_t comma_pos = ObCharset::instrb(cs_type, str_list.ptr() + str_list_pos, str_list.length() - str_list_pos,
|
||||||
while ((!is_found) &&
|
comma_str.ptr(), comma_str.length());
|
||||||
(cur_separtor_pos = ObCharset::locate(cs_type, second_ptr, second_length,
|
const char* elem_ptr = str_list.ptr() + str_list_pos;
|
||||||
",", 1, cur_separtor_pos + 1)) != 0) {
|
int64_t elem_length = (comma_pos >=0) ? comma_pos : str_list.length() - str_list_pos;
|
||||||
cur_sep_pos_byte = ObCharset::charpos(cs_type, second_ptr, second_length, cur_separtor_pos);
|
if (0 != ObCharset::strcmp(cs_type, elem_ptr, elem_length, str.ptr(), str.length())) {
|
||||||
if (ObCharset::strcmp(cs_type, first_ptr, first_length, second_ptr + pre_sep_pos_byte,
|
//not match
|
||||||
cur_sep_pos_byte - pre_sep_pos_byte - 1) == 0) {
|
str_list_pos += elem_length + ((comma_pos >= 0) ? comma_str.length() : 0);
|
||||||
is_found = true;
|
elem_idx++;
|
||||||
} else {
|
} else {
|
||||||
pre_separtor_pos = cur_separtor_pos;
|
break;
|
||||||
pre_sep_pos_byte = cur_sep_pos_byte;
|
|
||||||
++res_pos;
|
|
||||||
}
|
}
|
||||||
LOG_DEBUG("find_in_set debug", K(ret), K(pre_sep_pos_byte), K(cur_separtor_pos),
|
|
||||||
K(pre_sep_pos_byte), K(cur_separtor_pos), K(is_found), K(res_pos));
|
|
||||||
}
|
}
|
||||||
if (!is_found) {
|
|
||||||
// match the last substring extracted from strlist
|
if (str_list_pos < str_list.length()) {
|
||||||
if (ObCharset::strcmp(cs_type, first_ptr, first_length, second_ptr + pre_sep_pos_byte,
|
res_pos = elem_idx;
|
||||||
second_length - pre_sep_pos_byte) == 0) {
|
|
||||||
// do nothing
|
|
||||||
} else {
|
} else {
|
||||||
res_pos = 0;
|
res_pos = 0;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user