fix find in set bug and improve performance

This commit is contained in:
wjhh2008
2022-12-30 05:41:52 +00:00
committed by ob-robot
parent abff448d3e
commit d1453a1695
3 changed files with 56 additions and 39 deletions

View File

@ -578,6 +578,25 @@ uint32_t ObCharset::instr(ObCollationType collation_type,
return result;
}
int64_t ObCharset::instrb(ObCollationType collation_type,
const char *str1,
int64_t str1_len,
const char *str2,
int64_t str2_len)
{
int64_t result = -1;
if (is_argument_valid(collation_type, str1, str1_len, str2, str2_len)) {
ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
ob_match_t m_match_t[2];
uint nmatch = 1;
uint m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
if (0 != m_ret) {
result = m_match_t[0].end - m_match_t[0].beg;
}
}
return result;
}
uint32_t ObCharset::locate(ObCollationType collation_type,
const char *str1,
int64_t str1_len,

View File

@ -213,11 +213,19 @@ public:
static size_t scan_str(const char *str,
const char *end,
int sq);
// return position in characters
static uint32_t instr(ObCollationType collation_type,
const char *str1,
int64_t str1_len,
const char *str2,
int64_t str2_len);
// return position in bytes
static int64_t instrb(ObCollationType collation_type,
const char *str1,
int64_t str1_len,
const char *str2,
int64_t str2_len);
static uint32_t locate(ObCollationType collation_type,
const char *str1,
int64_t str1_len,

View File

@ -36,8 +36,6 @@ int ObExprFindInSet::calc_result_type2(ObExprResType &type,
ObExprTypeCtx &type_ctx) const
{
int ret = OB_SUCCESS;
type1.set_calc_type(ObVarcharType);
type2.set_calc_type(ObVarcharType);
if (OB_LIKELY(NOT_ROW_DIMENSION == row_dimension_)) {
type.set_uint64();
type.set_precision(ObAccuracy::DDL_DEFAULT_ACCURACY[ObUInt64Type].precision_);
@ -45,16 +43,16 @@ int ObExprFindInSet::calc_result_type2(ObExprResType &type,
type.set_calc_type(ObVarcharType);
ObExprOperator::calc_result_flag2(type, type1, type2);
ObObjMeta coll_types[2];
coll_types[0].set_collation(type1);
coll_types[1].set_collation(type2);
coll_types[0] = type1.get_obj_meta();
coll_types[1] = type2.get_obj_meta();
if (OB_FAIL(aggregate_charsets_for_comparison(type.get_calc_meta(),
coll_types, 2, type_ctx.get_coll_type()))) {
LOG_WARN("failed to aggregate_charsets_for_comparison", K(ret));
} else {
type1.set_calc_collation_type(type.get_collation_type());
type1.set_calc_collation_level(type.get_collation_level());
type2.set_calc_collation_type(type.get_collation_type());
type2.set_calc_collation_level(type.get_collation_level());
type1.set_calc_type(ObVarcharType);
type1.set_calc_collation_type(type.get_calc_collation_type());
type2.set_calc_type(ObVarcharType);
type2.set_calc_collation_type(type.get_calc_collation_type());
}
} else {
ret = OB_ERR_INVALID_TYPE_FOR_OP;
@ -62,8 +60,6 @@ int ObExprFindInSet::calc_result_type2(ObExprResType &type,
return ret;
}
int search(const ObString &str, const ObString &str_list, const ObCollationType &cs_type,
uint64_t &res_pos);
int search(const ObString &str, const ObString &str_list, const ObCollationType &cs_type,
uint64_t &res_pos)
{
@ -74,38 +70,32 @@ int search(const ObString &str, const ObString &str_list, const ObCollationType
if (ObCharset::locate(cs_type, first_ptr, first_length, ",", 1, 1) != 0) {
res_pos = 0;
} else {
bool is_found = false;
res_pos = 1;
uint32_t pre_separtor_pos = 0;
uint32_t cur_separtor_pos = 0;
uint32_t pre_sep_pos_byte = 0;
uint32_t cur_sep_pos_byte = 0;
const char *second_ptr = str_list.ptr();
int64_t second_length = str_list.length();
while ((!is_found) &&
(cur_separtor_pos = ObCharset::locate(cs_type, second_ptr, second_length,
",", 1, cur_separtor_pos + 1)) != 0) {
cur_sep_pos_byte = ObCharset::charpos(cs_type, second_ptr, second_length, cur_separtor_pos);
if (ObCharset::strcmp(cs_type, first_ptr, first_length, second_ptr + pre_sep_pos_byte,
cur_sep_pos_byte - pre_sep_pos_byte - 1) == 0) {
is_found = true;
int64_t str_list_pos = 0;
int64_t comma_pos = 0;
int64_t elem_idx = 1;
ObString comma_str = ObCharsetUtils::get_const_str(cs_type, ',');
while (str_list_pos < str_list.length()) {
int64_t comma_pos = ObCharset::instrb(cs_type, str_list.ptr() + str_list_pos, str_list.length() - str_list_pos,
comma_str.ptr(), comma_str.length());
const char* elem_ptr = str_list.ptr() + str_list_pos;
int64_t elem_length = (comma_pos >=0) ? comma_pos : str_list.length() - str_list_pos;
if (0 != ObCharset::strcmp(cs_type, elem_ptr, elem_length, str.ptr(), str.length())) {
//not match
str_list_pos += elem_length + ((comma_pos >= 0) ? comma_str.length() : 0);
elem_idx++;
} else {
pre_separtor_pos = cur_separtor_pos;
pre_sep_pos_byte = cur_sep_pos_byte;
++res_pos;
}
LOG_DEBUG("find_in_set debug", K(ret), K(pre_sep_pos_byte), K(cur_separtor_pos),
K(pre_sep_pos_byte), K(cur_separtor_pos), K(is_found), K(res_pos));
}
if (!is_found) {
// match the last substring extracted from strlist
if (ObCharset::strcmp(cs_type, first_ptr, first_length, second_ptr + pre_sep_pos_byte,
second_length - pre_sep_pos_byte) == 0) {
// do nothing
} else {
res_pos = 0;
break;
}
}
if (str_list_pos < str_list.length()) {
res_pos = elem_idx;
} else {
res_pos = 0;
}
}
return ret;
}