fix find in set bug and improve performance

This commit is contained in:
wjhh2008
2022-12-30 05:41:52 +00:00
committed by ob-robot
parent abff448d3e
commit d1453a1695
3 changed files with 56 additions and 39 deletions

View File

@ -578,6 +578,25 @@ uint32_t ObCharset::instr(ObCollationType collation_type,
return result; return result;
} }
int64_t ObCharset::instrb(ObCollationType collation_type,
const char *str1,
int64_t str1_len,
const char *str2,
int64_t str2_len)
{
int64_t result = -1;
if (is_argument_valid(collation_type, str1, str1_len, str2, str2_len)) {
ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
ob_match_t m_match_t[2];
uint nmatch = 1;
uint m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
if (0 != m_ret) {
result = m_match_t[0].end - m_match_t[0].beg;
}
}
return result;
}
uint32_t ObCharset::locate(ObCollationType collation_type, uint32_t ObCharset::locate(ObCollationType collation_type,
const char *str1, const char *str1,
int64_t str1_len, int64_t str1_len,

View File

@ -213,11 +213,19 @@ public:
static size_t scan_str(const char *str, static size_t scan_str(const char *str,
const char *end, const char *end,
int sq); int sq);
// return position in characters
static uint32_t instr(ObCollationType collation_type, static uint32_t instr(ObCollationType collation_type,
const char *str1, const char *str1,
int64_t str1_len, int64_t str1_len,
const char *str2, const char *str2,
int64_t str2_len); int64_t str2_len);
// return position in bytes
static int64_t instrb(ObCollationType collation_type,
const char *str1,
int64_t str1_len,
const char *str2,
int64_t str2_len);
static uint32_t locate(ObCollationType collation_type, static uint32_t locate(ObCollationType collation_type,
const char *str1, const char *str1,
int64_t str1_len, int64_t str1_len,

View File

@ -36,8 +36,6 @@ int ObExprFindInSet::calc_result_type2(ObExprResType &type,
ObExprTypeCtx &type_ctx) const ObExprTypeCtx &type_ctx) const
{ {
int ret = OB_SUCCESS; int ret = OB_SUCCESS;
type1.set_calc_type(ObVarcharType);
type2.set_calc_type(ObVarcharType);
if (OB_LIKELY(NOT_ROW_DIMENSION == row_dimension_)) { if (OB_LIKELY(NOT_ROW_DIMENSION == row_dimension_)) {
type.set_uint64(); type.set_uint64();
type.set_precision(ObAccuracy::DDL_DEFAULT_ACCURACY[ObUInt64Type].precision_); type.set_precision(ObAccuracy::DDL_DEFAULT_ACCURACY[ObUInt64Type].precision_);
@ -45,16 +43,16 @@ int ObExprFindInSet::calc_result_type2(ObExprResType &type,
type.set_calc_type(ObVarcharType); type.set_calc_type(ObVarcharType);
ObExprOperator::calc_result_flag2(type, type1, type2); ObExprOperator::calc_result_flag2(type, type1, type2);
ObObjMeta coll_types[2]; ObObjMeta coll_types[2];
coll_types[0].set_collation(type1); coll_types[0] = type1.get_obj_meta();
coll_types[1].set_collation(type2); coll_types[1] = type2.get_obj_meta();
if (OB_FAIL(aggregate_charsets_for_comparison(type.get_calc_meta(), if (OB_FAIL(aggregate_charsets_for_comparison(type.get_calc_meta(),
coll_types, 2, type_ctx.get_coll_type()))) { coll_types, 2, type_ctx.get_coll_type()))) {
LOG_WARN("failed to aggregate_charsets_for_comparison", K(ret)); LOG_WARN("failed to aggregate_charsets_for_comparison", K(ret));
} else { } else {
type1.set_calc_collation_type(type.get_collation_type()); type1.set_calc_type(ObVarcharType);
type1.set_calc_collation_level(type.get_collation_level()); type1.set_calc_collation_type(type.get_calc_collation_type());
type2.set_calc_collation_type(type.get_collation_type()); type2.set_calc_type(ObVarcharType);
type2.set_calc_collation_level(type.get_collation_level()); type2.set_calc_collation_type(type.get_calc_collation_type());
} }
} else { } else {
ret = OB_ERR_INVALID_TYPE_FOR_OP; ret = OB_ERR_INVALID_TYPE_FOR_OP;
@ -62,8 +60,6 @@ int ObExprFindInSet::calc_result_type2(ObExprResType &type,
return ret; return ret;
} }
int search(const ObString &str, const ObString &str_list, const ObCollationType &cs_type,
uint64_t &res_pos);
int search(const ObString &str, const ObString &str_list, const ObCollationType &cs_type, int search(const ObString &str, const ObString &str_list, const ObCollationType &cs_type,
uint64_t &res_pos) uint64_t &res_pos)
{ {
@ -74,38 +70,32 @@ int search(const ObString &str, const ObString &str_list, const ObCollationType
if (ObCharset::locate(cs_type, first_ptr, first_length, ",", 1, 1) != 0) { if (ObCharset::locate(cs_type, first_ptr, first_length, ",", 1, 1) != 0) {
res_pos = 0; res_pos = 0;
} else { } else {
bool is_found = false; int64_t str_list_pos = 0;
res_pos = 1; int64_t comma_pos = 0;
uint32_t pre_separtor_pos = 0; int64_t elem_idx = 1;
uint32_t cur_separtor_pos = 0;
uint32_t pre_sep_pos_byte = 0; ObString comma_str = ObCharsetUtils::get_const_str(cs_type, ',');
uint32_t cur_sep_pos_byte = 0;
const char *second_ptr = str_list.ptr(); while (str_list_pos < str_list.length()) {
int64_t second_length = str_list.length(); int64_t comma_pos = ObCharset::instrb(cs_type, str_list.ptr() + str_list_pos, str_list.length() - str_list_pos,
while ((!is_found) && comma_str.ptr(), comma_str.length());
(cur_separtor_pos = ObCharset::locate(cs_type, second_ptr, second_length, const char* elem_ptr = str_list.ptr() + str_list_pos;
",", 1, cur_separtor_pos + 1)) != 0) { int64_t elem_length = (comma_pos >=0) ? comma_pos : str_list.length() - str_list_pos;
cur_sep_pos_byte = ObCharset::charpos(cs_type, second_ptr, second_length, cur_separtor_pos); if (0 != ObCharset::strcmp(cs_type, elem_ptr, elem_length, str.ptr(), str.length())) {
if (ObCharset::strcmp(cs_type, first_ptr, first_length, second_ptr + pre_sep_pos_byte, //not match
cur_sep_pos_byte - pre_sep_pos_byte - 1) == 0) { str_list_pos += elem_length + ((comma_pos >= 0) ? comma_str.length() : 0);
is_found = true; elem_idx++;
} else { } else {
pre_separtor_pos = cur_separtor_pos; break;
pre_sep_pos_byte = cur_sep_pos_byte;
++res_pos;
} }
LOG_DEBUG("find_in_set debug", K(ret), K(pre_sep_pos_byte), K(cur_separtor_pos),
K(pre_sep_pos_byte), K(cur_separtor_pos), K(is_found), K(res_pos));
} }
if (!is_found) {
// match the last substring extracted from strlist if (str_list_pos < str_list.length()) {
if (ObCharset::strcmp(cs_type, first_ptr, first_length, second_ptr + pre_sep_pos_byte, res_pos = elem_idx;
second_length - pre_sep_pos_byte) == 0) {
// do nothing
} else { } else {
res_pos = 0; res_pos = 0;
} }
}
} }
return ret; return ret;
} }