From 6cdac14ee2db41b81ebdbbc6c5de13507041ccad Mon Sep 17 00:00:00 2001 From: wjhh2008 Date: Thu, 29 Aug 2024 03:17:53 +0000 Subject: [PATCH] add expr split_part --- deps/oblib/src/lib/ob_name_def.h | 1 + src/objit/include/objit/common/ob_item_type.h | 1 + src/sql/CMakeLists.txt | 1 + .../engine/expr/ob_expr_eval_functions.cpp | 2 + .../engine/expr/ob_expr_operator_factory.cpp | 3 + src/sql/engine/expr/ob_expr_split_part.cpp | 168 ++++++++++++++++++ src/sql/engine/expr/ob_expr_split_part.h | 45 +++++ 7 files changed, 221 insertions(+) create mode 100644 src/sql/engine/expr/ob_expr_split_part.cpp create mode 100644 src/sql/engine/expr/ob_expr_split_part.h diff --git a/deps/oblib/src/lib/ob_name_def.h b/deps/oblib/src/lib/ob_name_def.h index 1b07db2a90..6b3c344de1 100644 --- a/deps/oblib/src/lib/ob_name_def.h +++ b/deps/oblib/src/lib/ob_name_def.h @@ -1161,4 +1161,5 @@ #define N_AUDIT_LOG_FILTER_SET_USER "audit_log_filter_set_user" #define N_AUDIT_LOG_FILTER_REMOVE_USER "audit_log_filter_remove_user" #define N_CAN_ACCESS_TRIGGER "can_access_trigger" +#define N_SPLIT_PART "split_part" #endif //OCEANBASE_LIB_OB_NAME_DEF_H_ diff --git a/src/objit/include/objit/common/ob_item_type.h b/src/objit/include/objit/common/ob_item_type.h index e916df7e80..605a2ba6f7 100755 --- a/src/objit/include/objit/common/ob_item_type.h +++ b/src/objit/include/objit/common/ob_item_type.h @@ -887,6 +887,7 @@ typedef enum ObItemType T_FUN_SYS_GET_PATH = 1818, T_FUN_SYS_INNER_DOUBLE_TO_INT = 1819, T_FUN_SYS_INNER_DECIMAL_TO_YEAR = 1820, + T_FUN_SYS_SPLIT_PART = 1821, T_FUN_SYS_VEC_VID = 1900, // vector index T_FUN_SYS_VEC_TYPE = 1901, T_FUN_SYS_VEC_VECTOR = 1902, diff --git a/src/sql/CMakeLists.txt b/src/sql/CMakeLists.txt index bb9766a960..e8fbdae9fb 100644 --- a/src/sql/CMakeLists.txt +++ b/src/sql/CMakeLists.txt @@ -771,6 +771,7 @@ ob_set_subtarget(ob_sql engine_expr engine/expr/ob_expr_rb_to_string.cpp engine/expr/ob_expr_rb_from_string.cpp engine/expr/ob_expr_decode_trace_id.cpp + engine/expr/ob_expr_split_part.cpp ) ob_set_subtarget(ob_sql engine_join diff --git a/src/sql/engine/expr/ob_expr_eval_functions.cpp b/src/sql/engine/expr/ob_expr_eval_functions.cpp index cbd5c3ed30..dc4177b5a5 100644 --- a/src/sql/engine/expr/ob_expr_eval_functions.cpp +++ b/src/sql/engine/expr/ob_expr_eval_functions.cpp @@ -392,6 +392,7 @@ #include "ob_expr_rb_from_string.h" #include "ob_expr_audit_log_func.h" #include "ob_expr_can_access_trigger.h" +#include "ob_expr_split_part.h" namespace oceanbase { @@ -1248,6 +1249,7 @@ static ObExpr::EvalFunc g_expr_eval_functions[] = { NULL, //ObExprArrayContains::eval_array_contains_double, /* 752 */ NULL, //ObExprArrayContains::eval_array_contains_ObString, /* 753 */ NULL, //ObExprArrayContains::eval_array_contains_array, /* 754 */ + ObExprSplitPart::calc_split_part_expr, /* 755 */ }; static ObExpr::EvalBatchFunc g_expr_eval_batch_functions[] = { diff --git a/src/sql/engine/expr/ob_expr_operator_factory.cpp b/src/sql/engine/expr/ob_expr_operator_factory.cpp index 58e77f3dc0..143d18c8d2 100644 --- a/src/sql/engine/expr/ob_expr_operator_factory.cpp +++ b/src/sql/engine/expr/ob_expr_operator_factory.cpp @@ -457,6 +457,7 @@ #include "sql/engine/expr/ob_expr_transaction_id.h" #include "sql/engine/expr/ob_expr_audit_log_func.h" #include "sql/engine/expr/ob_expr_can_access_trigger.h" +#include "sql/engine/expr/ob_expr_split_part.h" using namespace oceanbase::common; namespace oceanbase @@ -1129,6 +1130,7 @@ void ObExprOperatorFactory::register_expr_operators() REG_OP(ObExprSm3); REG_OP(ObExprSm4Encrypt); REG_OP(ObExprSm4Decrypt); + REG_OP(ObExprSplitPart); }(); // 注册oracle系统函数 REG_OP_ORCL(ObExprSysConnectByPath); @@ -1457,6 +1459,7 @@ void ObExprOperatorFactory::register_expr_operators() REG_OP_ORCL(ObExprSdoRelate); REG_OP_ORCL(ObExprGetPath); REG_OP_ORCL(ObExprDecodeTraceId); + REG_OP_ORCL(ObExprSplitPart); } bool ObExprOperatorFactory::is_expr_op_type_valid(ObExprOperatorType type) diff --git a/src/sql/engine/expr/ob_expr_split_part.cpp b/src/sql/engine/expr/ob_expr_split_part.cpp new file mode 100644 index 0000000000..1f54ec3cb1 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_split_part.cpp @@ -0,0 +1,168 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_ENG + +#include "sql/engine/expr/ob_expr_split_part.h" +#include +#include "lib/oblog/ob_log.h" +#include "objit/common/ob_item_type.h" +#include "sql/session/ob_sql_session_info.h" +#include "sql/engine/expr/ob_expr_lob_utils.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ +ObExprSplitPart::ObExprSplitPart(ObIAllocator &alloc) + : ObStringExprOperator(alloc, T_FUN_SYS_SPLIT_PART, N_SPLIT_PART, TWO_OR_THREE, VALID_FOR_GENERATED_COL) +{ + need_charset_convert_ = false; +} +ObExprSplitPart::~ObExprSplitPart() +{ +} +int ObExprSplitPart::calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + ObExprTypeCtx &type_ctx) const +{ + UNUSED(type_ctx); + int ret = OB_SUCCESS; + if (param_num != 3) { + ret = OB_INVALID_ARGUMENT; + OB_LOG(WARN, "the param number of split part should be 3", K(ret), K(param_num)); + } else if (lib::is_mysql_mode()) { + types[0].set_calc_type(ObVarcharType); + types[1].set_calc_type(ObVarcharType); + types[2].set_calc_type(ObIntType); + type.set_varchar(); + if (OB_FAIL(aggregate_charsets_for_string_result(type, types, 1, type_ctx))) { + LOG_WARN("aggregate_charsets_for_string_result failed", K(ret)); + } else { + for (int64_t i = 0; i < param_num; i++) { + types[i].set_calc_collation_type(type.get_collation_type()); + types[i].set_calc_collation_level(type.get_collation_level()); + } + } + } else { + ObSEArray params; + OZ (params.push_back(&types[0])); + OZ (aggregate_string_type_and_charset_oracle(*type_ctx.get_session(), params, type)); + OZ (params.push_back(&types[1])); + OZ (deduce_string_param_calc_type_and_charset(*type_ctx.get_session(), type, params)); + types[2].set_calc_type(ObIntType); + } + return ret; +} + +int ObExprSplitPart::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, + ObExpr &rt_expr) const +{ + int ret = OB_SUCCESS; + UNUSED(expr_cg_ctx); + UNUSED(raw_expr); + rt_expr.eval_func_ = calc_split_part_expr; + return ret; +} + +int ObExprSplitPart::calc_split_part_expr(const ObExpr &expr, ObEvalCtx &ctx, + ObDatum &res) +{ + int ret = OB_SUCCESS; + ObDatum *sub = NULL; + ObDatum *ori = NULL; + ObDatum *pos = NULL; + if (OB_UNLIKELY(3 != expr.arg_cnt_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid arg cnt", K(ret), K(expr.arg_cnt_)); + } else if (OB_FAIL(expr.args_[0]->eval(ctx,ori)) || + OB_FAIL(expr.args_[1]->eval(ctx,sub)) || + OB_FAIL(expr.args_[2]->eval(ctx,pos)) ){ + LOG_WARN("eval arg failed", K(ret)); + } else if (ori->is_null() || sub->is_null() || pos->is_null()) { + res.set_null(); + } else { + int64_t pos_int = pos->get_int(); + const ObString &ori_str = ori->get_string(); + const ObString &sub_str = sub->get_string(); + ObString empty_str; + ObCollationType calc_cs_type = expr.args_[0]->datum_meta_.cs_type_; + if (sub_str.empty()) { + if (pos_int == 1 || pos_int == 0 || pos_int == -1) { + res.set_string(ori->get_string()); + } else { + res.set_string(empty_str); + } + } else if (ori_str.empty()) { + res.set_string(empty_str); + } else { + int64_t total_splits = 0; + int64_t cur_pos = 1; + ObSEArray splits_pos; + // the array is [padding_start, ...split_positions, padding_end] + int64_t idx = 0; + OZ(splits_pos.push_back(1 - sub_str.length())); // padding_start + while (OB_SUCC(ret) && cur_pos <= ori_str.length()){ + idx = ObCharset::instrb(calc_cs_type, ori_str.ptr() + cur_pos - 1, ori_str.length() - cur_pos + 1, + sub_str.ptr(), sub_str.length()) + 1; + if (idx == 0) { + break; + } else { + total_splits++; + cur_pos = cur_pos + idx; + OZ(splits_pos.push_back(cur_pos - 1)); + } + } + OZ(splits_pos.push_back(ori_str.length() + 1)); // padding_end + + if (OB_SUCC(ret)) { + if (total_splits == 0) { + res.set_string(empty_str); + } else { + // handle pos_int + if (0 == pos_int) { + pos_int = 1; + } else if (pos_int < 0){ + pos_int = pos_int + total_splits + 2; + } + if (total_splits + 1 < pos_int || 1 > pos_int){ + // out of range + res.set_string(empty_str); + } else { + int64_t start_pos = 0; + int64_t end_pos = 0; + // the return string is [start_pos, end_pos) + start_pos = splits_pos.at(pos_int - 1) - 1 + sub_str.length(); + end_pos = splits_pos.at(pos_int); + if (start_pos >= end_pos) { + res.set_string(empty_str); + } else { + res.set_string(ori_str.ptr() + start_pos, end_pos - start_pos - 1); + } + } + } + } + } + } + return ret; +} +DEF_SET_LOCAL_SESSION_VARS(ObExprSplitPart, raw_expr) { + int ret = OB_SUCCESS; + SET_LOCAL_SYSVAR_CAPACITY(1); + EXPR_ADD_LOCAL_SYSVAR(share::SYS_VAR_COLLATION_CONNECTION); + return ret; +} + +} +} \ No newline at end of file diff --git a/src/sql/engine/expr/ob_expr_split_part.h b/src/sql/engine/expr/ob_expr_split_part.h new file mode 100644 index 0000000000..a62d4ee768 --- /dev/null +++ b/src/sql/engine/expr/ob_expr_split_part.h @@ -0,0 +1,45 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef _OB_SQL_EXPR_SPLIT_PART_H_ +#define _OB_SQL_EXPR_SPLIT_PART_H_ +#include "sql/engine/expr/ob_expr_operator.h" +namespace oceanbase +{ +namespace sql +{ +class ObExprSplitPart : public ObStringExprOperator +{ +public: + explicit ObExprSplitPart(common::ObIAllocator &alloc); + + virtual ~ObExprSplitPart(); + + virtual int calc_result_typeN(ObExprResType &type, + ObExprResType *types, + int64_t param_num, + common::ObExprTypeCtx &type_ctx) const; + + + virtual int cg_expr(ObExprCGCtx &op_cg_ctx, const ObRawExpr &raw_expr, + ObExpr &rt_expr) const override; + + static int calc_split_part_expr(const ObExpr &expr, ObEvalCtx &ctx, + ObDatum &res); + DECLARE_SET_LOCAL_SESSION_VARS; +private: + DISALLOW_COPY_AND_ASSIGN(ObExprSplitPart); +}; +} +} + +#endif /* _OB_SQL_EXPR_SPLIT_PART_H_ */ \ No newline at end of file