From 1866f396dbb1f1b4d153e0b4e539149cd5aab668 Mon Sep 17 00:00:00 2001 From: wjhh2008 Date: Thu, 10 Aug 2023 04:18:37 +0000 Subject: [PATCH] opt expr find_in_set --- src/sql/engine/expr/ob_expr_find_in_set.cpp | 156 +++++++++++++++++++- src/sql/engine/expr/ob_expr_find_in_set.h | 4 + src/sql/engine/expr/ob_expr_operator.cpp | 3 + src/sql/engine/expr/ob_expr_operator.h | 12 ++ 4 files changed, 172 insertions(+), 3 deletions(-) diff --git a/src/sql/engine/expr/ob_expr_find_in_set.cpp b/src/sql/engine/expr/ob_expr_find_in_set.cpp index 4ac048b6e0..f680b55bcf 100644 --- a/src/sql/engine/expr/ob_expr_find_in_set.cpp +++ b/src/sql/engine/expr/ob_expr_find_in_set.cpp @@ -13,6 +13,8 @@ #define USING_LOG_PREFIX SQL_ENG #include "sql/engine/expr/ob_expr_find_in_set.h" #include "lib/charset/ob_charset.h" +#include "sql/ob_sql_utils.h" +#include "sql/engine/ob_exec_context.h" using namespace oceanbase::common; @@ -104,6 +106,147 @@ int search(const ObString &str, const ObString &str_list, const ObCollationType return ret; } +int add_to_hashmap(ObExprFindIntCachedValue &cached_value, ObString &sort_key, int64_t elem_idx) +{ + int ret = OB_SUCCESS; + uint64_t value = 0; + if (OB_SUCC(cached_value.get_hashmap().get_refactored(sort_key, value))) { + //do nothing + } else if (OB_LIKELY(OB_HASH_NOT_EXIST == ret)) { + ret = OB_SUCCESS; + OZ (cached_value.get_hashmap().set_refactored(sort_key, elem_idx)); + } else { + LOG_WARN("unexpected error", K(ret)); + } + return ret; +} + +int gen_sortkey(ObCollationType cs_type, ObIAllocator &allocator, ObString &elem, ObString &sort_key) { + int ret = OB_SUCCESS; + if (elem.empty()) { + sort_key = ObString(); + } else if (ObCharset::is_bin_sort(cs_type)) { + sort_key = elem; + } else { + bool is_valid_character = false; + const ObCharsetInfo *cs = ObCharset::get_charset(cs_type); + size_t buf_len = cs->coll->strnxfrmlen(cs, elem.length()) * cs->mbmaxlen; + ObArrayWrap buffer; + + OZ (buffer.allocate_array(allocator, buf_len)); + if (OB_SUCC(ret)) { + size_t sort_key_len = ObCharset::sortkey(cs_type, elem.ptr(), elem.length(), + buffer.get_data(), buf_len, is_valid_character); + if (OB_UNLIKELY(!is_valid_character)) { + ret = OB_ERR_INCORRECT_STRING_VALUE; + } else { + sort_key.assign_ptr(buffer.get_data(), sort_key_len); + } + } + } + return ret; +} + +int build_hashmap(ObEvalCtx &ctx, + ObExprFindIntCachedValue &cached_value, + const ObString &str_list, + const ObCollationType &cs_type) { + int ret = OB_SUCCESS; + int64_t str_list_pos = 0; + int64_t comma_pos = 0; + ObString comma_str = ObCharsetUtils::get_const_str(cs_type, ','); + ObMemAttr mem_attr(MTL_ID(), "HashMap"); + ObArray sortkeys; + + OZ (sortkeys.reserve(16)); + + while (OB_SUCC(ret) && str_list_pos < str_list.length()) { + comma_pos = ObCharset::instrb(cs_type, + str_list.ptr() + str_list_pos, + str_list.length() - str_list_pos, + comma_str.ptr(), + comma_str.length()); + const char* elem_ptr = str_list.ptr() + str_list_pos; + int64_t elem_length = (comma_pos >= 0) ? comma_pos : (str_list.length() - str_list_pos); + ObString sort_key; + ObString elem(elem_length, elem_ptr); + OZ (gen_sortkey(cs_type, ctx.exec_ctx_.get_allocator(), elem, sort_key)); + OZ (sortkeys.push_back(sort_key)); + if (OB_SUCC(ret)) { + str_list_pos += elem_length + ((comma_pos >= 0) ? comma_str.length() : 0); + } + } + + if (OB_SUCC(ret) && comma_pos > 0) { + OZ (sortkeys.push_back(ObString())); + } + + if (OB_UNLIKELY(OB_ERR_INCORRECT_STRING_VALUE == ret)) { + ret = OB_SUCCESS; + ObString temp_str; + bool is_null = false; + ObSQLUtils::check_well_formed_str(str_list, cs_type, temp_str, is_null, true); //generate warning + } + + OZ (cached_value.get_hashmap().create(MAX(16, sortkeys.count() * 2), mem_attr, mem_attr)); + for (int i = 0; OB_SUCC(ret) && i < sortkeys.count(); i++) { + OZ (add_to_hashmap(cached_value, sortkeys.at(i), i + 1)); + } + + + return ret; +} + +int search_with_const_set(const ObExpr &expr, + ObEvalCtx &ctx, + const ObString &str, + const ObString &str_list, + const ObCollationType &cs_type, + uint64_t &res_pos) +{ + int ret = OB_SUCCESS; + const char* first_ptr = str.ptr(); + int64_t first_length = str.length(); + // if first input string contains ',', return 0 + if (ObCharset::locate(cs_type, first_ptr, first_length, ",", 1, 1) != 0) { + res_pos = 0; + } else { + auto rt_ctx_id = static_cast(expr.expr_ctx_id_); + ObExprFindIntCachedValue *cached_value = NULL; + if (NULL == (cached_value = static_cast + (ctx.exec_ctx_.get_expr_op_ctx(rt_ctx_id)))) { + if (OB_FAIL(ctx.exec_ctx_.create_expr_op_ctx(rt_ctx_id, cached_value))) { + LOG_WARN("failed to create operator ctx", K(ret)); + } else { + OZ (build_hashmap(ctx, *cached_value, str_list, cs_type)); + } + } + if (OB_SUCC(ret)) { + ObEvalCtx::TempAllocGuard tmp_alloc_g(ctx); + common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); + ObString sort_key; + ObString input = str; + if (OB_FAIL(gen_sortkey(cs_type, temp_allocator, input, sort_key))) { + res_pos = 0; + if (OB_ERR_INCORRECT_STRING_VALUE == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to get sort key", K(ret)); + } + } else if (OB_FAIL(cached_value->get_hashmap().get_refactored(sort_key, res_pos))) { + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + res_pos = 0; + } else { + LOG_WARN("fail to get from hash map", K(ret)); + } + } + } + } + + return ret; +} + int ObExprFindInSet::calc_find_in_set_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum) { @@ -125,10 +268,17 @@ int ObExprFindInSet::calc_find_in_set_expr(const ObExpr &expr, ObEvalCtx &ctx, !ObCharset::is_valid_collation(static_cast(cs_type)))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid cs_type", K(ret), K(cs_type)); - } else if (OB_FAIL(search(str->get_string(), strlist->get_string(), cs_type, res_pos))) { - LOG_WARN("search str in str list failed", K(ret)); } else { - res_datum.set_uint(res_pos); + if (expr.args_[1]->is_static_const_) { + ret = search_with_const_set(expr, ctx, str->get_string(), strlist->get_string(), cs_type, res_pos); + } else { + ret = search(str->get_string(), strlist->get_string(), cs_type, res_pos); + } + if (OB_FAIL(ret)) { + LOG_WARN("search str in str list failed", K(ret), K(expr.args_[1]->is_static_const_)); + } else { + res_datum.set_uint(res_pos); + } } } return ret; diff --git a/src/sql/engine/expr/ob_expr_find_in_set.h b/src/sql/engine/expr/ob_expr_find_in_set.h index fc45e7af17..1e02ef4874 100644 --- a/src/sql/engine/expr/ob_expr_find_in_set.h +++ b/src/sql/engine/expr/ob_expr_find_in_set.h @@ -30,6 +30,10 @@ public: common::ObExprTypeCtx &type_ctx) const; virtual int cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const; + virtual bool need_rt_ctx() const override + { + return true; + } static int calc_find_in_set_expr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &res_datum); private: DISALLOW_COPY_AND_ASSIGN(ObExprFindInSet); diff --git a/src/sql/engine/expr/ob_expr_operator.cpp b/src/sql/engine/expr/ob_expr_operator.cpp index d6fe19633a..dad5978e1a 100644 --- a/src/sql/engine/expr/ob_expr_operator.cpp +++ b/src/sql/engine/expr/ob_expr_operator.cpp @@ -1523,6 +1523,9 @@ int ObExprDFMConvertCtx::parse_format(const ObString &format_str, return ret; } +ObExprFindIntCachedValue::~ObExprFindIntCachedValue() { +} + ObObjType ObExprOperator::enumset_calc_types_[ObMaxTC] = { ObUInt64Type,/*ObNullTC*/ diff --git a/src/sql/engine/expr/ob_expr_operator.h b/src/sql/engine/expr/ob_expr_operator.h index 43da50983b..b54e59098f 100644 --- a/src/sql/engine/expr/ob_expr_operator.h +++ b/src/sql/engine/expr/ob_expr_operator.h @@ -35,6 +35,7 @@ #include "sql/engine/expr/ob_expr_cmp_func.h" #include "sql/engine/expr/ob_expr_extra_info_factory.h" #include "sql/engine/expr/ob_i_expr_extra_info.h" +#include "lib/hash/ob_hashset.h" #define GET_EXPR_CTX(ClassType, ctx, id) static_cast((ctx).get_expr_op_ctx(id)) @@ -2031,6 +2032,17 @@ private: common::ObFixedBitSet elem_flags_; }; +class ObExprFindIntCachedValue : public ObExprOperatorCtx +{ + typedef common::hash::ObHashMap HASH_MAP_TYPE; +public: + ObExprFindIntCachedValue() {} + virtual ~ObExprFindIntCachedValue(); + HASH_MAP_TYPE &get_hashmap() { return hash_map_; } +private: + HASH_MAP_TYPE hash_map_; +}; + class ObExprTRDateFormat { public: