/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #define USING_LOG_PREFIX SQL_ENG #include #include "sql/engine/expr/ob_expr_in.h" #include "sql/engine/expr/ob_expr_equal.h" #include "sql/engine/expr/ob_expr_result_type_util.h" #include "sql/engine/ob_exec_context.h" #include "sql/session/ob_sql_session_info.h" #include "sql/engine/expr/ob_expr_coll_pred.h" #include "sql/engine/expr/ob_expr_subquery_ref.h" #include "sql/engine/subquery/ob_subplan_filter_op.h" #include "share/vector/expr_cmp_func.h" namespace oceanbase { using namespace common; using namespace hash; namespace sql { //计算当前数字二进制排列组合的下一个数, 例如 001->010->100 static unsigned next_perm(unsigned int cur_num) { unsigned int t = cur_num | (cur_num - 1); // t gets cur_num's least significant 0 bits set to 1 // Next set to 1 the most significant bit to change, // set to 0 the least significant ones, and add the necessary 1 bits. return (t + 1) | (((~t & -~t) - 1) >> (__builtin_ctz(cur_num) + 1)); } //计算当前数字二进制排列组合的下一个数,如果当前数字的高位全部为1,则增加一个1并放到低位 //例如 11000->00111 static unsigned next(unsigned int cur_num, unsigned int max) { // 这里和next_perm中重复计算的cur | (cur_num - 1)会被编译器自动在内联时提取出来,不会额外计算一次 return ((cur_num - 1) | cur_num) >= max - 1 ? (1U << (__builtin_popcount(cur_num) + 1)) - 1 : next_perm(cur_num); } //用于求解cur num的上一个二进制排列组合, 例如 111->110->101->011->100->010->001, max为111 static unsigned last(unsigned int cur_num, unsigned int max) { unsigned int num = (cur_num ^ max); return (max ^ next(num, max)); } template <> bool Row::equal_key(const Row &other, void **cmp_funcs, const int idx) const { bool equal_ret = false; if (OB_ISNULL(other.elems_) || OB_ISNULL(elems_)) { } else if (other.elems_ == elems_) { equal_ret = true; } else { bool is_equal = true; int curr_idx = idx; for (int i = 0; is_equal && 0 != curr_idx; ++i, curr_idx = curr_idx >> 1) { if (1 == (curr_idx & 1)) { if (elems_[i].is_null() && other.elems_[i].is_null()) { //true } else if (elems_[i].is_null() || other.elems_[i].is_null()) { is_equal = false; } else { int cmp_ret = 0; // lob type will not use in expr with hash, can ignore ret here (void)((DatumCmpFunc)cmp_funcs[i])(elems_[i], other.elems_[i], cmp_ret); if (0 != cmp_ret) { is_equal = false; } else { //do nothing } } } } equal_ret = is_equal; } return equal_ret; } template <> int Row::hash_key(void **hash_funcs, const int idx, uint64_t seed, uint64_t &hash_val) const { int ret = OB_SUCCESS; hash_val = 0; if (OB_ISNULL(elems_)) { } else { int curr_idx = idx; for (int i = 0; 0 != curr_idx && OB_SUCC(ret); ++i, curr_idx = curr_idx >> 1) { if (1 == (curr_idx & 1)) { ret = ((ObExprHashFuncType)hash_funcs[i])(elems_[i], seed, seed); } else { continue; } } hash_val = seed; } return ret; } template <> int Row::compare_with_null(const Row &other, void **cmp_funcs, const int64_t row_dimension, int &exist_ret) const { int ret = OB_SUCCESS; if (OB_ISNULL(other.elems_) || OB_ISNULL(elems_) || OB_ISNULL(cmp_funcs)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("NULL pointer param or function", K(ret)); } else if (row_dimension > 0) { exist_ret = ObExprInHashMap::HASH_CMP_TRUE; for (int i = 0; ObExprInHashMap::HASH_CMP_FALSE != exist_ret && i < row_dimension && OB_SUCC(ret); ++i) { if (elems_[i].is_null() || other.elems_[i].is_null()) { exist_ret = ObExprInHashMap::HASH_CMP_UNKNOWN; } else { int cmp_ret = 0; if (OB_FAIL(((DatumCmpFunc)cmp_funcs[i])(elems_[i], other.elems_[i], cmp_ret))) { LOG_WARN("failed to compare", K(ret)); } else if (0 != cmp_ret) { exist_ret = ObExprInHashMap::HASH_CMP_FALSE; } else { //do nothing } } } } return ret; } template int Row::set_elem(T *elems) { int ret = OB_SUCCESS; if (OB_ISNULL(elems)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("elem is not inited", K(ret)); } else { elems_ = elems; } return ret; } template bool RowKey::operator==(const RowKey &other) const { return row_.equal_key(other.row_, meta_->cmp_funcs_, meta_->idx_); } template int RowKey::hash(uint64_t &hash_val, uint64_t seed) const { return row_.hash_key(meta_->hash_funcs_, meta_->idx_, seed, hash_val); } template int ObExprInHashMap::set_refactored(const Row &row) { int ret = OB_SUCCESS; ObArray> *arr_ptr = NULL; RowKey tmp_row_key; tmp_row_key.row_= row; tmp_row_key.meta_ = &meta_; if (OB_ISNULL(arr_ptr = const_cast> *> (map_.get(tmp_row_key)))) { ObArray> arr; ret = map_.set_refactored(tmp_row_key, arr); if (OB_SUCC(ret)) { arr_ptr = const_cast> *> (map_.get(tmp_row_key)); CK (OB_NOT_NULL(arr_ptr)); if (OB_SUCC(ret)) { arr_ptr->set_tenant_id(MTL_ID()); if (OB_FAIL(arr_ptr->push_back(row))) { LOG_WARN("failed to push row", K(ret)); } } } } else { int exist = ObExprInHashMap::HASH_CMP_FALSE; //去重 for (int i = 0; OB_SUCC(ret) && ObExprInHashMap::HASH_CMP_TRUE != exist && i < arr_ptr->count(); ++i) { if (OB_FAIL((*arr_ptr)[i].compare_with_null(row, meta_.cmp_funcs_, meta_.row_dimension_, exist))) { LOG_WARN("compare with null failed", K(ret)); } } if (OB_SUCC(ret) && ObExprInHashMap::HASH_CMP_TRUE != exist) { ret = arr_ptr->push_back(row); } } return ret; } template int ObExprInHashMap::exist_refactored(const Row &row, int &exist_ret) { int ret = OB_SUCCESS; RowKey tmp_row_key; tmp_row_key.row_= row; tmp_row_key.meta_ = &meta_; const ObArray> *arr_ptr = map_.get(tmp_row_key); if (OB_ISNULL(arr_ptr)) { exist_ret = ObExprInHashMap::HASH_CMP_FALSE; //在hash表中不存在 } else { int exist = ObExprInHashMap::HASH_CMP_FALSE; for (int i=0; 0 != exist_ret && i < arr_ptr->count(); ++i) { if (OB_FAIL((*arr_ptr)[i].compare_with_null(row, meta_.cmp_funcs_, meta_.row_dimension_, exist))) { LOG_WARN("compare with null failed", K(ret)); } else if (ObExprInHashMap::HASH_CMP_UNKNOWN == exist || ObExprInHashMap::HASH_CMP_TRUE == exist) { exist_ret = exist; } else { //do nothing } } } return ret; } template int ObExprInHashSet::set_refactored(const Row &row) { RowKey tmp_row_key; tmp_row_key.row_= row; tmp_row_key.meta_ = &meta_; return set_.set_refactored(tmp_row_key); } template int ObExprInHashSet::exist_refactored(const Row &row, bool &is_exist) { RowKey tmp_row_key; tmp_row_key.row_= row; tmp_row_key.meta_ = &meta_; int ret = set_.exist_refactored(tmp_row_key); if (OB_HASH_EXIST == ret) { ret = OB_SUCCESS; is_exist = true; } else if (OB_HASH_NOT_EXIST == ret) { ret = OB_SUCCESS; is_exist = false; } else { LOG_WARN("failed to search in hashset", K(ret)); } return OB_SUCCESS; } int ObExprInOrNotIn::ObExprInCtx::init_static_engine_hashset(int64_t param_num) { static_engine_hashset_.set_meta_idx(1); static_engine_hashset_.set_meta_dimension(1); row_dimension_ = 1; return static_engine_hashset_.create(param_num * 2); } int ObExprInOrNotIn::ObExprInCtx::init_static_engine_hashset_vecs(int64_t param_num, int64_t row_dimension, ObExecContext *exec_ctx) { int ret = OB_SUCCESS; static_engine_hashset_vecs_ = NULL; int64_t vecs_buf_size = sizeof(ObExprInHashMap ) * (1 << row_dimension); if (OB_ISNULL(static_engine_hashset_vecs_ = (ObExprInHashMap *) ((exec_ctx->get_allocator()).alloc(vecs_buf_size)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate memory", K(ret)); } else { for (int64_t i = 0; i < (1 << row_dimension); ++i) { new (&static_engine_hashset_vecs_[i]) ObExprInHashMap (); } for (int64_t i = 0; OB_SUCC(ret) && i < (1 << row_dimension); ++i) { static_engine_hashset_vecs_[i].set_meta_idx(i); static_engine_hashset_vecs_[i].set_meta_dimension(row_dimension); if (OB_FAIL(static_engine_hashset_vecs_[i].create(param_num))) { LOG_WARN("create static_engine_hashset_vecs failed", K(ret), K(i)); } } } row_dimension_ = row_dimension; return ret; } int ObExprInOrNotIn::ObExprInCtx::add_to_static_engine_hashset(const Row &row) { int ret = static_engine_hashset_.set_refactored(row); if (OB_FAIL(ret)) { LOG_WARN("failed to add to hashset", K(ret)); } return ret; } int ObExprInOrNotIn::ObExprInCtx:: add_to_static_engine_hashset_vecs(const Row &row, const int idx) { int ret = OB_SUCCESS; if (idx >= (1 << row_dimension_)) { ret = OB_INVALID_ARGUMENT; } else { ret = static_engine_hashset_vecs_[idx].set_refactored(row); } if (OB_FAIL(ret)) { LOG_WARN("failed to add to hashset_vecs", K(ret), K(idx)); } return ret; } int ObExprInOrNotIn::ObExprInCtx::exist_in_static_engine_hashset(const Row &row, bool &is_exist) { return static_engine_hashset_.exist_refactored(row, is_exist); } int ObExprInOrNotIn::ObExprInCtx:: exist_in_static_engine_hashset_vecs(const Row &row, const int idx, int &exist_ret) { int ret = OB_SUCCESS; if (idx >= (1 << row_dimension_)) { ret = OB_INVALID_ARGUMENT; } else if (OB_FAIL(static_engine_hashset_vecs_[idx].exist_refactored(row, exist_ret))){ LOG_WARN("failed to find in hash map", K(ret)); } return ret; } int ObExprInOrNotIn::ObExprInCtx::set_cmp_types(const ObExprCalcType &cmp_type, const int64_t row_dimension) { int ret = OB_SUCCESS; if (cmp_types_.count() < row_dimension) { ret = cmp_types_.push_back(cmp_type); } else { //do nothing } return ret; } const ObExprCalcType &ObExprInOrNotIn::ObExprInCtx::get_cmp_types(const int64_t idx) const { return cmp_types_[idx]; } int ObExprInOrNotIn::ObExprInCtx:: init_hashset_vecs_all_null(const int64_t row_dimension, ObExecContext *exec_ctx) { int ret = OB_SUCCESS; hashset_vecs_all_null_.set_allocator(&(exec_ctx->get_allocator())); if (OB_FAIL(hashset_vecs_all_null_.init(1 << row_dimension))) { LOG_WARN("failed to init fixed array", K(ret)); } else { for (int i = 0; OB_SUCC(ret) && i < (1 << row_dimension); ++i) { ret = hashset_vecs_all_null_.push_back(false); } } return ret; } int ObExprInOrNotIn::ObExprInCtx::set_hashset_vecs_all_null_true(const int64_t idx) { int ret = OB_SUCCESS; if (idx >= (1 << row_dimension_)) { ret = OB_INVALID_ARGUMENT; } else { hashset_vecs_all_null_[idx] = true; } return ret; } int ObExprInOrNotIn::ObExprInCtx:: get_hashset_vecs_all_null(const int64_t idx, bool &is_all_null) const { int ret = OB_SUCCESS; if (idx >= (1 << row_dimension_)) { ret = OB_INVALID_ARGUMENT; } else { is_all_null = hashset_vecs_all_null_[idx]; } return ret; } int ObExprInOrNotIn::ObExprInCtx:: init_right_datums(int64_t param_num, int64_t row_dimension, ObExecContext *exec_ctx) { int ret = OB_SUCCESS; right_datums_ = NULL; int64_t datums_buf_size = sizeof(ObDatum *) * param_num; //ObDatum *指针数组大小 if (OB_ISNULL(right_datums_ = (ObDatum **) ((exec_ctx->get_allocator()).alloc(datums_buf_size)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate memory for ObDatum **", K(ret)); } else { for (int i =0; OB_SUCC(ret) && i < param_num; ++i) {//初始化每个ObDatum * if (OB_ISNULL(right_datums_[i] = static_cast (((exec_ctx->get_allocator()).alloc(sizeof(ObDatum) * row_dimension))))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate memory for ObDatum *", K(ret), K(i)); } } } return ret; } int ObExprInOrNotIn::ObExprInCtx:: init_cmp_funcs(int64_t func_cnt, ObExecContext *exec_ctx) { int ret = OB_SUCCESS; cmp_functions_ = NULL; if (func_cnt > 0) { cmp_functions_ = (void**)exec_ctx->get_allocator().alloc(func_cnt * sizeof(void*)); if (OB_ISNULL(cmp_functions_)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate memory for cmp_func **", K(ret)); } else { for (int64_t i = 0; i < func_cnt; i++) { cmp_functions_[i] = NULL; } } } return ret; } int ObExprInOrNotIn::ObExprInCtx::set_right_datum(int64_t row_num, int64_t col_num, const int right_param_num, const common::ObDatum &datum) { int ret = OB_SUCCESS; if (OB_ISNULL(right_datums_)) { ret = OB_NOT_INIT; LOG_WARN("right_datums is not init", K(ret)); } else if (row_num < 0 || row_num >= right_param_num || col_num < 0 || col_num >= row_dimension_) { ret = OB_INVALID_ARGUMENT; LOG_WARN("row_num or col_num out of bounds", K(ret)); } else { right_datums_[row_num][col_num] = datum; } return ret; } ObExprInOrNotIn::ObExprInOrNotIn(ObIAllocator &alloc, ObExprOperatorType type, const char *name) : ObVectorExprOperator(alloc, type, name, 2, 1), param_flags_(0) { param_lazy_eval_ = true; need_charset_convert_ = false; } int ObExprInOrNotIn::calc_result_typeN(ObExprResType &type, ObExprResType *types, int64_t param_num, ObExprTypeCtx &type_ctx) const { int ret = ObVectorExprOperator::calc_result_typeN(type, types, param_num, type_ctx); if (OB_SUCC(ret)) { type.set_scale(DEFAULT_SCALE_FOR_INTEGER); type.set_precision(DEFAULT_PRECISION_FOR_BOOL); } return ret; } /* 比较规则: * Oracle document: * Two nested table variables are equal if and only if they have the same set of elements (in any order). * the problem is how to define "the same set of elements", which is not documented by Oracle. * the rules we follow here are: * 1. if the elements are of an uncomparable type, such as Record, return an error * 2. when NULL (NULL can be a nested table itself or its element) is compared with any other element, return NULL * 3. nt in (nt1, nt2, ...) returns: * a. TRUE if any of nt=nt1, nt=nt2, ... is TRUE * b. NULL if none of them is TRUE, and at least one of them is NULL * c. FALSE if all of them are FALSE */ // forbid composite types in IN expr before composite types comparison is refactored int ObExprInOrNotIn::eval_pl_udt_in(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum) { int ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "IN expr for composite types is"); LOG_WARN("IN expr for composite types is not supported", K(expr)); // CollectionPredRes res = CollectionPredRes::COLL_PRED_INVALID; // ObDatum *left = NULL; // ObDatum *right = NULL; // pl::ObPLCollection *coll = NULL; // bool is_any_result_null = false; // OZ(expr.args_[0]->eval(ctx, left)); // if (OB_SUCC(ret)) { // coll = reinterpret_cast(left->get_ext()); // const ObExpr *row = expr.args_[1]; // for (int64_t i = 0; OB_SUCC(ret) && i < row->arg_cnt_; ++i) { // OZ(row->args_[i]->eval(ctx, right)); // CollectionPredRes eq_cmp_res = COLL_PRED_INVALID; // if (OB_SUCC(ret)) { // if (OB_FAIL(ObRelationalExprOperator::pl_udt_compare2( // eq_cmp_res, *left->extend_obj_, *right->extend_obj_, // ctx.exec_ctx_, CO_EQ))) { // LOG_WARN("failed to compare to nest table", K(ret)); // } else { // res = static_cast(eq_cmp_res); // if (COLL_PRED_TRUE == res) { // break; // } else if (COLL_PRED_NULL == res) { // is_any_result_null = true; // } // } // } // if (OB_SUCC(ret)) { // if (COLL_PRED_TRUE == res) { // set_datum_result(T_OP_IN == expr.type_, true, false, expr_datum); // } else if (is_any_result_null) { // set_datum_result(T_OP_IN == expr.type_, false, true, expr_datum); // } else { // set_datum_result(T_OP_IN == expr.type_, false, false, expr_datum); // } // } // } // } return ret; } inline bool ObExprInOrNotIn::need_hash(ObExecContext *exec_ctx) const { return is_param_all_const() && is_param_all_same_type() && is_param_all_same_cs_type() && NULL != exec_ctx && exec_ctx->is_expr_op_ctx_inited(); } OB_SERIALIZE_MEMBER(ObExprInOrNotIn, row_dimension_, real_param_num_, result_type_, input_types_, id_, param_flags_); ObExprIn::ObExprIn(ObIAllocator &alloc) : ObExprInOrNotIn(alloc, T_OP_IN, N_IN) {} ObExprNotIn::ObExprNotIn(ObIAllocator &alloc) : ObExprInOrNotIn(alloc, T_OP_NOT_IN, N_NOT_IN) {} int ObExprInOrNotIn::cg_expr(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { int ret = OB_SUCCESS; if (OB_UNLIKELY(2 != raw_expr.get_param_count()) || OB_ISNULL(expr_cg_ctx.allocator_)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid param count for in expr", K(ret)); } else if (OB_ISNULL(raw_expr.get_param_expr(0)) || OB_ISNULL(raw_expr.get_param_expr(1))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid null param expr", K(ret)); } else if (lib::is_oracle_mode() && is_param_is_ext_type_oracle()) { rt_expr.eval_func_ = &eval_pl_udt_in; } else if (T_REF_QUERY == raw_expr.get_param_expr(1)->get_expr_type()) { //xx in (subquery) has been transformed to xx =ANY() ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid right expr type", K(ret)); } else if (T_REF_QUERY == raw_expr.get_param_expr(0)->get_expr_type() //output column == 1 由subplan filter负责迭代数据 && raw_expr.get_param_expr(0)->get_output_column() > 1) { ret = cg_expr_with_subquery(expr_cg_ctx, raw_expr, rt_expr); } else if (T_OP_ROW == raw_expr.get_param_expr(0)->get_expr_type()) { ret = cg_expr_with_row(expr_cg_ctx, raw_expr, rt_expr); } else { ret = cg_expr_without_row(expr_cg_ctx, raw_expr, rt_expr); } return ret; } int ObExprInOrNotIn::cg_expr_without_row(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { int ret = OB_SUCCESS; UNUSED(raw_expr); if (OB_UNLIKELY(2 != rt_expr.arg_cnt_) || OB_ISNULL(rt_expr.args_) || OB_ISNULL(rt_expr.args_[0]) || OB_ISNULL(rt_expr.args_[1])) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret)); } else { rt_expr.inner_func_cnt_ = rt_expr.args_[1]->arg_cnt_; void **func_buf = NULL; int64_t func_buf_size = sizeof(void *) * rt_expr.inner_func_cnt_; if (OB_ISNULL(func_buf = (void **)expr_cg_ctx.allocator_->alloc(func_buf_size))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate memory", K(ret)); } else { ObObjType left_type = rt_expr.args_[0]->datum_meta_.type_; ObCollationType left_cs = rt_expr.args_[0]->datum_meta_.cs_type_; ObObjType right_type = rt_expr.args_[1]->args_[0]->datum_meta_.type_; const bool has_lob_header = rt_expr.args_[0]->obj_meta_.has_lob_header() || rt_expr.args_[1]->args_[0]->obj_meta_.has_lob_header(); ObScale scale1 = rt_expr.args_[0]->datum_meta_.scale_; ObScale scale2 = rt_expr.args_[1]->datum_meta_.scale_; ObPrecision prec1 = rt_expr.args_[0]->datum_meta_.precision_; ObPrecision prec2 = rt_expr.args_[1]->args_[0]->datum_meta_.precision_; rt_expr.inner_functions_ = func_buf; bool is_reverse_cmp_func = expr_cg_ctx.cur_cluster_version_ >= CLUSTER_VERSION_4_3_0_0; DatumCmpFunc func_ptr; if (is_reverse_cmp_func) { // hash table use self as left, so here right param is left for cmp func func_ptr = ObExprCmpFuncsHelper::get_datum_expr_cmp_func( right_type, left_type, scale2, scale1, prec2, prec1, lib::is_oracle_mode(), left_cs, has_lob_header); } else { func_ptr = ObExprCmpFuncsHelper::get_datum_expr_cmp_func( left_type, right_type, scale1, scale2, prec1, prec2, lib::is_oracle_mode(), left_cs, has_lob_header); } for (int i = 0; i < rt_expr.inner_func_cnt_; i++) { rt_expr.inner_functions_[i] = (void *)func_ptr; } bool is_string_text_cmp = (ob_is_string_tc(left_type) && ob_is_text_tc(right_type)) || (ob_is_text_tc(left_type) && ob_is_string_tc(right_type)); if (!is_param_all_const() || rt_expr.inner_func_cnt_ <= 2 || (!is_reverse_cmp_func && is_string_text_cmp) || (ob_is_json(left_type) || ob_is_json(right_type)) || (ob_is_urowid(left_type) || ob_is_urowid(right_type))) { rt_expr.eval_func_ = &ObExprInOrNotIn::eval_in_without_row_fallback; } else { rt_expr.eval_func_ = &ObExprInOrNotIn::eval_in_without_row; } //now only support c1 in (1,2,3,4...) to be vectorized if (is_param_can_vectorized()) { //目前认为右边参数 <= 2时, nest_loop算法的效果一定比hash更好 int tmp_in_ret = OB_E(EventTable::EN_ENABLE_VECTOR_IN) OB_SUCCESS; if (rt_expr.inner_func_cnt_ <= 2 || ob_is_urowid(left_type) || ob_is_urowid(right_type)) { rt_expr.eval_batch_func_ = &ObExprInOrNotIn::eval_batch_in_without_row_fallback; rt_expr.eval_vector_func_ = tmp_in_ret == OB_SUCCESS ? &ObExprInOrNotIn::eval_vector_in_without_row_fallback : nullptr; } else { rt_expr.eval_batch_func_ = &ObExprInOrNotIn::eval_batch_in_without_row; rt_expr.eval_vector_func_ = tmp_in_ret == OB_SUCCESS ? &ObExprInOrNotIn::eval_vector_in_without_row : nullptr; } } } } return ret; } int ObExprInOrNotIn::cg_expr_with_row(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { int ret = OB_SUCCESS; UNUSED(raw_expr); if (OB_UNLIKELY(2 != rt_expr.arg_cnt_) || OB_ISNULL(rt_expr.args_) || OB_ISNULL(rt_expr.args_[0]) || OB_ISNULL(rt_expr.args_[1]) || OB_ISNULL(rt_expr.args_[1]->args_[0])) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret)); } else { ObSEArray left_types; ObSEArray left_cs_arr; ObSEArray right_types; ObSEArray has_lob_headers; ObSEArray left_scales; ObSEArray right_scales; ObSEArray left_precs; ObSEArray rigth_precs; #define LEFT_ROW rt_expr.args_[0] #define LEFT_ROW_ELE(i) rt_expr.args_[0]->args_[i] #define RIGHT_ROW(i) rt_expr.args_[1]->args_[i] #define RIGHT_ROW_ELE(i, j) rt_expr.args_[1]->args_[i]->args_[j] for (int i = 0; OB_SUCC(ret) && i < LEFT_ROW->arg_cnt_; i++) { if (OB_ISNULL(LEFT_ROW_ELE(i))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid null args", K(ret)); } else if (OB_FAIL(left_types.push_back(LEFT_ROW_ELE(i)->datum_meta_.type_))) { LOG_WARN("failed to push back element", K(ret)); } else if (OB_FAIL(left_cs_arr.push_back( LEFT_ROW_ELE(i)->datum_meta_.cs_type_))) { LOG_WARN("failed to push back element", K(ret)); } else if (OB_FAIL(has_lob_headers.push_back( LEFT_ROW_ELE(i)->obj_meta_.has_lob_header()))) { LOG_WARN("failed to push back element", K(ret)); } else if (OB_FAIL(left_scales.push_back(LEFT_ROW_ELE(i)->datum_meta_.scale_))) { LOG_WARN("failed to push back element", K(ret)); } else if (OB_FAIL(left_precs.push_back(LEFT_ROW_ELE(i)->datum_meta_.precision_))) { LOG_WARN("failed to push back element", K(ret)); } else { /* do nothing */ } } // end for for (int i = 0; OB_SUCC(ret) && i < RIGHT_ROW(0)->arg_cnt_; i++) { if (OB_FAIL(right_types.push_back(RIGHT_ROW_ELE(0, i)->datum_meta_.type_))) { LOG_WARN("failed to push back element", K(ret)); } else if (OB_FAIL(right_scales.push_back(RIGHT_ROW_ELE(0, i)->datum_meta_.scale_))) { LOG_WARN("failed to push back element", K(ret)); } else if (OB_FAIL(rigth_precs.push_back(RIGHT_ROW_ELE(0, i)->datum_meta_.precision_))) { LOG_WARN("failed to push back element", K(ret)); } else { has_lob_headers.at(i) = has_lob_headers.at(i) || (RIGHT_ROW_ELE(0, i)->obj_meta_.has_lob_header()); } } if (OB_SUCC(ret)) { void **func_buf = NULL; int func_buf_size = sizeof(void *) * LEFT_ROW->arg_cnt_ ; //这里初始化row_dimension rt_expr.inner_func_cnt_ = LEFT_ROW->arg_cnt_; if (OB_ISNULL(func_buf = (void **)expr_cg_ctx.allocator_->alloc(func_buf_size))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate memory", K(ret)); } else { bool is_reverse_cmp_func = expr_cg_ctx.cur_cluster_version_ >= CLUSTER_VERSION_4_3_0_0; bool is_string_text_cmp = false; for (int i = 0; i < left_types.count(); i++) { DatumCmpFunc func_ptr; if (is_reverse_cmp_func) { // hash table use self as left, so here right param is left for cmp func func_ptr = ObExprCmpFuncsHelper::get_datum_expr_cmp_func( right_types.at(i), left_types.at(i), right_scales.at(i), left_scales.at(i), rigth_precs.at(i), left_precs.at(i), lib::is_oracle_mode(), left_cs_arr.at(i), has_lob_headers.at(i)); } else { func_ptr = ObExprCmpFuncsHelper::get_datum_expr_cmp_func( left_types.at(i), right_types.at(i), left_scales.at(i), right_scales.at(i), left_precs.at(i), rigth_precs.at(i), lib::is_oracle_mode(), left_cs_arr.at(i), has_lob_headers.at(i)); } func_buf[i] = (void *)func_ptr; is_string_text_cmp |= (ob_is_string_tc(left_types.at(i)) && ob_is_text_tc(right_types.at(i))) || (ob_is_text_tc(left_types.at(i)) && ob_is_string_tc(right_types.at(i))); } // end for if (!is_param_all_const() || (!is_reverse_cmp_func && is_string_text_cmp)) { rt_expr.eval_func_ = &ObExprInOrNotIn::eval_in_with_row_fallback; } else { rt_expr.eval_func_ = &ObExprInOrNotIn::eval_in_with_row; } rt_expr.inner_functions_ = func_buf; } } #undef LEFT_ROW #undef LEFT_ROW_ELE #undef RIGHT_ROW #undef RIGHT_ROW_ELE } return ret; } #undef GET_CS_TYPE int ObExprInOrNotIn::cg_expr_with_subquery(ObExprCGCtx &expr_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { int ret = OB_SUCCESS; if (OB_UNLIKELY(2 != rt_expr.arg_cnt_) || OB_ISNULL(rt_expr.args_) || OB_ISNULL(rt_expr.args_[0]) || OB_ISNULL(rt_expr.args_[1]) || OB_ISNULL(rt_expr.args_[1]->args_[0])) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret)); } else { #define RIGHT_ROW(i) rt_expr.args_[1]->args_[i] #define RIGHT_ROW_ELE(i, j) rt_expr.args_[1]->args_[i]->args_[j] ObSEArray left_types; void **funcs = NULL; CK(2 == raw_expr.get_param_count()); CK(NULL != raw_expr.get_param_expr(0)); CK(NULL != raw_expr.get_param_expr(1)); OZ(get_param_types(*raw_expr.get_param_expr(0), true, left_types)); //OZ(get_param_types(*raw_expr.get_param_expr(1), false, right_types)); if (OB_FAIL(ret)) { } else if (left_types.empty() || left_types.count() != RIGHT_ROW(0)->arg_cnt_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("operand cnt mismatch", K(ret), K(left_types.count()), K(RIGHT_ROW(0)->arg_cnt_)); } else if (OB_ISNULL(funcs = (void **)expr_cg_ctx.allocator_->alloc( sizeof(void *) * left_types.count()))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("alloc memory failed", K(ret)); } else { rt_expr.inner_func_cnt_ = left_types.count(); rt_expr.inner_functions_ = funcs; bool is_reverse_cmp_func = expr_cg_ctx.cur_cluster_version_ >= CLUSTER_VERSION_4_3_0_0; for (int64_t i = 0; OB_SUCC(ret) && i < rt_expr.inner_func_cnt_; i++) { auto &l = left_types.at(i); auto &r = RIGHT_ROW_ELE(0, i)->obj_meta_; auto &r_datum_meta_ = RIGHT_ROW_ELE(0, i)->datum_meta_; bool has_lob_header = l.has_lob_header() || r.has_lob_header(); if (ObDatumFuncs::is_string_type(l.get_type()) && ObDatumFuncs::is_string_type(r.get_type())) { CK(l.get_collation_type() == r.get_collation_type()); } if (OB_SUCC(ret)) { if (is_reverse_cmp_func) { // hash table use self as left, so here right param is left for cmp func funcs[i] = (void *)ObExprCmpFuncsHelper::get_datum_expr_cmp_func( r.get_type(), l.get_type(), r_datum_meta_.scale_, l.get_scale(), r_datum_meta_.precision_, l.get_precision(), lib::is_oracle_mode(), l.get_collation_type(), has_lob_header); } else { funcs[i] = (void *)ObExprCmpFuncsHelper::get_datum_expr_cmp_func( l.get_type(), r.get_type(), l.get_scale(), r_datum_meta_.scale_, l.get_precision(), r_datum_meta_.precision_, lib::is_oracle_mode(), l.get_collation_type(), has_lob_header); } CK(NULL != funcs[i]); } } if (OB_SUCC(ret)) { rt_expr.eval_func_ = &eval_in_with_subquery; } } #undef RIGHT_ROW #undef RIGHT_ROW_ELE } return ret; } int ObExprInOrNotIn::eval_in_with_row_fallback(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum) { return calc_for_row_static_engine(expr, ctx, expr_datum, nullptr); } int ObExprInOrNotIn::eval_in_without_row_fallback(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum) { int ret = OB_SUCCESS; // TODO [zongmei.zzm] 原先的In或者NotIn实现,如果没有向量,并且满足need_hash的条件 // 会先计算出所有的右孩子的节点值并构建hash表,现在只实现了短路逻辑比较 ObDatum *left = NULL; ObDatum *right = NULL; bool cnt_null = false; bool is_equal = false; bool is_reverse_cmp = ctx.exec_ctx_.get_my_session()->get_exec_min_cluster_version() >= CLUSTER_VERSION_4_3_0_0; if (OB_FAIL(expr.args_[0]->eval(ctx, left))) { LOG_WARN("failed to eval left", K(ret)); } else if (left->is_null()) { cnt_null = true; } else { int cmp_ret = 0; for (int i = 0; OB_SUCC(ret) && !is_equal && i < expr.inner_func_cnt_; i++) { if (OB_ISNULL(expr.args_[1]->args_[i])) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid null arg", K(ret), K(expr.args_[1]->args_[i]), K(i)); } else if (OB_FAIL(expr.args_[1]->args_[i]->eval(ctx, right))) { LOG_WARN("failed to eval right datum", K(ret)); } else if (right->is_null()) { cnt_null = true; } else { if (!is_reverse_cmp && OB_FAIL(((DatumCmpFunc)expr.inner_functions_[0])(*left, *right, cmp_ret))) { LOG_WARN("failed to compare", K(ret)); } else if (is_reverse_cmp && OB_FAIL(((DatumCmpFunc)expr.inner_functions_[0])(*right, *left, cmp_ret))) { LOG_WARN("failed to compare", K(ret)); } else if (0 == cmp_ret) { is_equal = true; } else { // do nothing } } } } if (OB_SUCC(ret)) { set_datum_result(T_OP_IN == expr.type_, is_equal, cnt_null, expr_datum); } return ret; } int ObExprInOrNotIn::eval_batch_in_without_row_fallback(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const int64_t batch_size) { int ret = OB_SUCCESS; LOG_DEBUG("eval_batch_in start: batch mode", K(batch_size)); bool is_reverse_cmp = ctx.exec_ctx_.get_my_session()->get_exec_min_cluster_version() >= CLUSTER_VERSION_4_3_0_0; ObDatum *results = expr.locate_batch_datums(ctx); if (OB_ISNULL(results)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("results frame is not init", K(ret)); } else { ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); ObDatum* input_left; if (OB_FAIL(expr.args_[0]->eval_batch(ctx, skip, batch_size))) { LOG_WARN("failed to eval batch param values", K(ret)); } else { input_left = expr.args_[0]->locate_batch_datums(ctx); ObDatum *right = nullptr; ObDatum *left = nullptr; ObDatum *right_store[expr.inner_func_cnt_]; //store all right param ptrs bool cnt_null = false; //right param has null /* * CAN_CMP_MEM used for common short path * the params of left and right * both are string type * both are CS_TYPE_UTF8MB4_BIN * both dont have null value * both dont have tailing space * right params count is 2(> 2 will turn to hash calc) */ bool can_cmp_mem = expr.args_[0]->obj_meta_.is_string_type() && CS_TYPE_UTF8MB4_BIN == expr.args_[0]->obj_meta_.get_collation_type(); //eval all right params for (int64_t j = 0; OB_SUCC(ret) && j < expr.inner_func_cnt_; ++j) { if (OB_FAIL(expr.args_[1]->args_[j]->eval(ctx, right_store[j]))) { LOG_WARN("failed to eval right datum", K(ret), K(j)); } else { check_right_can_cmp_mem(*right_store[j], expr.args_[1]->args_[j]->obj_meta_, can_cmp_mem, cnt_null); } } if (OB_SUCC(ret)) { static const char SPACE = ' '; check_left_can_cmp_mem(expr, input_left, skip, eval_flags, batch_size, can_cmp_mem); int64_t idx = 0; if (can_cmp_mem) { const char *ptr0 = right_store[0]->ptr_; const char *ptr1 = right_store[1]->ptr_; uint32_t len0 = right_store[0]->len_; uint32_t len1 = right_store[1]->len_; for (; idx < batch_size; ++idx) { if (input_left[idx].is_null()) { results[idx].set_null(); } else if (input_left[idx].len_ > 0 && SPACE == input_left[idx].ptr_[input_left[idx].len_ - 1]) { can_cmp_mem = false; break; } else { bool is_equal = false; left = &input_left[idx]; is_equal = (left->len_ >= len0 && 0 == MEMCMP(ptr0, left->ptr_, len0) && is_all_space(left->ptr_ + len0, left->len_ - len0)); is_equal = is_equal || (left->len_ >= len1 && 0 == MEMCMP(ptr1, left->ptr_, len1) && is_all_space(left->ptr_ + len1, left->len_ - len1)); results[idx].set_int(is_equal); } } // To prevent passing idx=0 as a param to memset() and triggering an error. if (idx > 0) { eval_flags.set_all(idx); } } if (!can_cmp_mem) { for (; OB_SUCC(ret) && idx < batch_size; ++idx) { if (skip.at(idx) || eval_flags.at(idx)) { continue; } bool is_equal = false; int cmp_ret = 0; left = &input_left[idx]; for (int64_t j = 0; OB_SUCC(ret) && j < expr.inner_func_cnt_; ++j) { right = right_store[j]; if (!left->is_null() && !right->is_null()) { if (is_reverse_cmp && OB_FAIL(((DatumCmpFunc)expr.inner_functions_[0])(*right, *left, cmp_ret))) { LOG_WARN("failed to compare", K(ret)); } else if (!is_reverse_cmp && OB_FAIL(((DatumCmpFunc)expr.inner_functions_[0])(*left, *right, cmp_ret))) { LOG_WARN("failed to compare", K(ret)); } else { is_equal |= !(cmp_ret); } } } if (OB_SUCC(ret)) { set_datum_result(T_OP_IN == expr.type_, is_equal, cnt_null | left->is_null(), results[idx]); eval_flags.set(idx); } } } } } } return ret; } #define IN_OR_NOTIN_DISPATCH_VECTOR_IN_LEFT_ARG_FORMAT(func_name, res_vec) \ switch (left_format) { \ case VEC_FIXED: { \ ret = func_name(expr, ctx, skip, bound); \ break; \ } \ case VEC_DISCRETE: { \ ret = func_name(expr, ctx, skip, bound); \ break; \ } \ case VEC_CONTINUOUS: { \ ret = func_name(expr, ctx, skip, bound); \ break; \ } \ case VEC_UNIFORM: { \ ret = func_name, res_vec>(expr, ctx, skip, bound); \ break; \ } \ case VEC_UNIFORM_CONST: { \ ret = func_name, res_vec>(expr, ctx, skip, bound); \ break; \ } \ default: { \ ret = func_name(expr, ctx, skip, bound); \ } \ } #define IN_OR_NOTIN_DISPATCH_VECTOR_IN_RES_ARG_FORMAT(func_name) \ switch (res_format) { \ case VEC_FIXED: { \ IN_OR_NOTIN_DISPATCH_VECTOR_IN_LEFT_ARG_FORMAT(func_name, IntegerFixedVec); \ break; \ } \ case VEC_UNIFORM: { \ IN_OR_NOTIN_DISPATCH_VECTOR_IN_LEFT_ARG_FORMAT(func_name, IntegerUniVec); \ break; \ } \ case VEC_UNIFORM_CONST: { \ IN_OR_NOTIN_DISPATCH_VECTOR_IN_LEFT_ARG_FORMAT(func_name, IntegerUniCVec); \ break; \ } \ default: { \ IN_OR_NOTIN_DISPATCH_VECTOR_IN_LEFT_ARG_FORMAT(func_name, ObVectorBase); \ } \ } int ObExprInOrNotIn::eval_vector_in_without_row_fallback(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const EvalBound &bound) { int ret = OB_SUCCESS; if (OB_FAIL(expr.args_[0]->eval_vector(ctx, skip, bound))) { LOG_WARN("failed to eval vector param values", K(ret)); } else { VectorFormat res_format = expr.get_format(ctx); VectorFormat left_format = expr.args_[0]->get_format(ctx); IN_OR_NOTIN_DISPATCH_VECTOR_IN_RES_ARG_FORMAT(inner_eval_vector_in_without_row_fallback); } return ret; } template int ObExprInOrNotIn::inner_eval_vector_in_without_row_fallback(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const EvalBound &bound) { int ret = OB_SUCCESS; LOG_DEBUG("eval_vector_in start: vector mode", K(bound)); ResVec *res_vec = static_cast(expr.get_vector(ctx)); LeftVec *input_left_vec = static_cast(expr.args_[0]->get_vector(ctx)); ObDatum *right = nullptr; ObDatum *right_store[expr.inner_func_cnt_]; // store all right param ptrs ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); bool right_has_null = false; // right param has null ObBitVector &my_skip = expr.get_pvt_skip(ctx); my_skip.deep_copy(skip, bound.start(), bound.end()); bool left_all_null = true; for (int64_t idx = bound.start(); idx < bound.end(); ++idx) { if (input_left_vec->is_null(idx)) { my_skip.set(idx); res_vec->set_null(idx); eval_flags.set(idx); } else { left_all_null = false; } } // If all the values on the left are null, // perform a short-circuit calculation and return immediately. if (!left_all_null) { /* * CAN_CMP_MEM used for common short path * the params of left and right * both are string type * both are CS_TYPE_UTF8MB4_BIN * both dont have null value * both dont have tailing space * right params count is 2(> 2 will turn to hash calc) */ bool can_cmp_mem = expr.args_[0]->obj_meta_.is_string_type() && CS_TYPE_UTF8MB4_BIN == expr.args_[0]->obj_meta_.get_collation_type(); // eval all right params for (int64_t i = 0; OB_SUCC(ret) && i < expr.inner_func_cnt_; ++i) { // Because we know that in this scenario, // the values on the right side are constants, // meaning they are single-line data, // so we use the eval interface. if (OB_FAIL(expr.args_[1]->args_[i]->eval(ctx, right_store[i]))) { LOG_WARN("failed to eval right datum", K(ret), K(i)); } else { check_right_can_cmp_mem(*right_store[i], expr.args_[1]->args_[i]->obj_meta_, can_cmp_mem, right_has_null); } } if (OB_SUCC(ret)) { check_left_can_cmp_mem(expr, skip, eval_flags, bound, can_cmp_mem); int64_t idx = bound.start(); if (can_cmp_mem && !std::is_same::value) { static const char SPACE = ' '; const char *ptr0 = right_store[0]->ptr_; const char *ptr1 = right_store[1]->ptr_; uint32_t len0 = right_store[0]->len_; uint32_t len1 = right_store[1]->len_; const char *left_str_ptr = nullptr; int32_t left_str_len = 0; for (; OB_SUCC(ret) && idx < bound.end(); ++idx) { // If can_cmp_mem is true, then it is guaranteed that the right side is non-null. // If input_left_vec->is_null(idx), res_vec has been set before. if (!input_left_vec->is_null(idx)) { input_left_vec->get_payload(idx, left_str_ptr, left_str_len); if (left_str_len > 0 && SPACE == left_str_ptr[left_str_len - 1]) { can_cmp_mem = false; break; } else { bool is_equal = false; is_equal = (left_str_len >= len0 && 0 == MEMCMP(ptr0, left_str_ptr, len0) && is_all_space(left_str_ptr + len0, left_str_len - len0)); is_equal = is_equal || (left_str_len >= len1 && 0 == MEMCMP(ptr1, left_str_ptr, len1) && is_all_space(left_str_ptr + len1, left_str_len - len1)); res_vec->set_int(idx, T_OP_IN == expr.type_ ? is_equal : !is_equal); } } } if (idx > bound.start()) { eval_flags.set_all(bound.start(), idx); } } if (!can_cmp_mem) { const char *l_payload = nullptr; const char *fixed_base_l_payload = nullptr; ObLength l_len = 0; int cmp_ret = 0; sql::RowCmpFunc row_cmp_func = VectorCmpExprFuncsHelper::get_row_cmp_func( expr.args_[0]->datum_meta_, expr.args_[1]->args_[0]->datum_meta_); if (std::is_same::value) { fixed_base_l_payload = (reinterpret_cast(input_left_vec))->get_data(); l_len = (reinterpret_cast(input_left_vec))->get_length(); } for (; OB_SUCC(ret) && idx < bound.end(); ++idx) { if (my_skip.at(idx) || eval_flags.at(idx)) { continue; } // The situation "input_left_vec->is_null(idx)" has already been handled previously. if (std::is_same::value) { l_payload = fixed_base_l_payload + l_len * idx; } else { input_left_vec->get_payload(idx, l_payload, l_len); } bool left_hit = false; if (right_has_null) { for (int64_t i = 0; OB_SUCC(ret) && i < expr.inner_func_cnt_; ++i) { right = right_store[i]; if (right->is_null()) { // do nothing } else if (OB_FAIL((row_cmp_func)(expr.args_[0]->obj_meta_, expr.args_[1]->args_[i]->obj_meta_, (const void *)l_payload, l_len, (const void *)right->ptr_, right->len_, cmp_ret))) { LOG_WARN("row_cmp_func failed!", K(ret), K(expr.args_[0]->obj_meta_), K(expr.args_[1]->args_[i]->obj_meta_)); } else if (cmp_ret == 0) { left_hit = true; break; } } } else { for (int64_t i = 0; OB_SUCC(ret) && i < expr.inner_func_cnt_; ++i) { right = right_store[i]; if (OB_FAIL((row_cmp_func)(expr.args_[0]->obj_meta_, expr.args_[1]->args_[i]->obj_meta_, (const void *)l_payload, l_len, (const void *)right->ptr_, right->len_, cmp_ret))) { LOG_WARN("row_cmp_func failed!", K(ret), K(expr.args_[0]->obj_meta_), K(expr.args_[1]->args_[i]->obj_meta_)); } else if (cmp_ret == 0) { left_hit = true; break; } } } if (OB_SUCC(ret)) { set_vector_result(T_OP_IN == expr.type_, left_hit, right_has_null, res_vec, idx); eval_flags.set(idx); } } } } } return ret; } int ObExprInOrNotIn::eval_in_with_row(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum) { int ret = OB_SUCCESS; ObDatum *left = NULL; ObDatum *right = NULL; ObExprInCtx *in_ctx = NULL; ObExecContext *exec_ctx = &ctx.exec_ctx_; uint64_t in_id = static_cast(expr.expr_ctx_id_); bool is_completely_cmp = false;//完全匹配,in返回true,not in返回false bool is_null_cmp = false;//第二轮null值匹配,匹配上至少返回null bool left_has_null = false;//左边是否存在null bool is_right_all_null = false; bool is_left_all_null = false; bool fallback = false; #define LEFT_ROW expr.args_[0] #define LEFT_ROW_ELE(i) expr.args_[0]->args_[i] #define RIGHT_ROW(i) expr.args_[1]->args_[i] #define RIGHT_ROW_ELE(i, j) expr.args_[1]->args_[i]->args_[j] int64_t right_param_num = expr.args_[1]->arg_cnt_; int64_t row_dimension = expr.inner_func_cnt_; if (row_dimension > 3) { fallback = true; } if (!fallback && OB_SUCC(ret) && NULL == (in_ctx = static_cast (exec_ctx->get_expr_op_ctx(in_id)))) { if (OB_FAIL(exec_ctx->create_expr_op_ctx(in_id, in_ctx))) { LOG_WARN("failed to create operator ctx", K(ret)); } else if (OB_FAIL(in_ctx->init_static_engine_hashset_vecs(right_param_num, row_dimension, exec_ctx))) { //hashset集合 LOG_WARN("failed to init hashset", K(ret)); } else if (OB_FAIL(in_ctx->init_hashset_vecs_all_null(row_dimension, exec_ctx))) { LOG_WARN("failed to init hashset_vecs_all_null", K(ret)); } else if (OB_FAIL(in_ctx->init_right_datums(right_param_num, row_dimension, exec_ctx))) { LOG_WARN("failed to init right datums", K(ret)); } else if (OB_FAIL(in_ctx->init_cmp_funcs(expr.inner_func_cnt_, exec_ctx))) { LOG_WARN("failed to init cmp funcs", K(ret)); } else { for (int i = 0; OB_SUCC(ret) && i < right_param_num; ++i) { if (OB_ISNULL(RIGHT_ROW(i))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid null arg", K(ret), K(RIGHT_ROW(i)), K(i)); } else { int null_idx = 0; //遍历整个向量,记录null元素的位置; 将剩余非null值进行全排列,与null元素按位异或后插入对应位置 for (int64_t j = 0; OB_SUCC(ret) && j < row_dimension; ++j) { if (OB_ISNULL(RIGHT_ROW_ELE(i, j))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid null arg", K(ret), K(RIGHT_ROW_ELE(i, j)),K(i), K(j)); } else if (OB_FAIL(RIGHT_ROW_ELE(i, j)->eval(ctx, right))) { LOG_DEBUG("param evaluate fail, hash set lookup disabled for in expr", K(ret), K(i)); in_ctx->disable_hash_calc(); } else if (!in_ctx->is_hash_calc_disabled()) {//遍历确定null_idx //探测null元素的位置并记录 if (right->is_null()) { null_idx = null_idx ^ (1 << j); in_ctx->right_has_null = true; if (null_idx == ((1 << row_dimension) - 1)) { in_ctx->ctx_hash_null_ = true; is_right_all_null = true; } } else { //do nothing } if (OB_FAIL(in_ctx->set_right_datum(i, j, right_param_num, *right))) { LOG_WARN("failed to load right", K(ret), K(i), K(j)); } else { if (OB_ISNULL(in_ctx->hash_func_buff_)) { int func_buf_size = sizeof(void *) * row_dimension; if (OB_ISNULL(in_ctx->hash_func_buff_ = (void **)(exec_ctx->get_allocator()).alloc(func_buf_size))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate memory", K(ret)); } } //设置ObDatum的hash函数 if (OB_SUCC(ret)) { in_ctx->hash_func_buff_[j] = (void *)(RIGHT_ROW_ELE(i, j)->basic_funcs_->murmur_hash_v2_); in_ctx->cmp_functions_[j] = (void *)(RIGHT_ROW_ELE(i, j)->basic_funcs_->null_first_cmp_); } } } else { //do nothing } } //这里对所有hash表进行函数指针的设定 if (OB_SUCC(ret) && !in_ctx->funcs_ptr_set) { for (int i = 0; i < (1 << row_dimension); ++i) { in_ctx->set_hash_funcs_ptr(i, in_ctx->hash_func_buff_); in_ctx->set_cmp_funcs_ptr(i, in_ctx->cmp_functions_); } in_ctx->funcs_ptr_set = true; } /* *从 1~2^col迭代,选取的全为非null值的时候,,记录这个idx, *将其设置进row中,这个idx用于hash值的计算,以及进入hashset的下标,和operator == 不用于compare_with_null *对设置好idx的row,进入对应的hashtable, *此时operator == 要求key值完全匹配 */ Row tmp_row; for (int64_t k = 1; OB_SUCC(ret) && k < (1 << row_dimension); ++k) { int hash_idx = k; if (0 == (k & null_idx)) {//k代表选取的列,这些列不能包含null if (OB_FAIL(tmp_row.set_elem(in_ctx->get_datum_row(i)))) { LOG_WARN("failed to set elem", K(ret)); } //此次排列进入hash表对应位置 if (OB_SUCC(ret)) { if (OB_FAIL(in_ctx->add_to_static_engine_hashset_vecs(tmp_row, hash_idx))) { LOG_WARN("failed to add hashset", K(ret)); } else { //do nothing } } } else if (null_idx == (k | null_idx)) {//k选取的列为null的子集,将这里的全null置为true if (OB_FAIL(in_ctx->set_hashset_vecs_all_null_true(k))) { LOG_WARN("failed to set hashset vecs all null true", K(ret)); } } else { //此次排列不入hash表 } } } } } } //second we search in hashset if (!fallback && OB_SUCC(ret) && OB_NOT_NULL(in_ctx)) { if (OB_UNLIKELY(in_ctx->is_hash_calc_disabled())) { //fall_back = true;//TODO : lack param fallback } else if (!fallback) { //遍历提取左向量 int null_idx = 0; Row tmp_row; ObDatum datum_ptr[row_dimension]; for (int64_t j = 0; OB_SUCC(ret) && j < row_dimension; ++j) { if (OB_ISNULL(LEFT_ROW_ELE(j))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid null arg", K(LEFT_ROW_ELE(j)), K(j)); } else if (OB_FAIL(LEFT_ROW_ELE(j)->eval(ctx, left))) { LOG_WARN("failed to eval", K(ret)); } else { //探测null元素的位置并记录 if (left->is_null()) { null_idx = null_idx ^ (1 << j); left_has_null = true; if (null_idx == ((1 << row_dimension) - 1)) { is_left_all_null = true; } } else { //do nothing } datum_ptr[j] = *left; // refresh hash fun to left row if (OB_NOT_NULL(in_ctx->hash_func_buff_)) { in_ctx->hash_func_buff_[j] = (void *)(LEFT_ROW_ELE(j)->basic_funcs_->murmur_hash_v2_); } // hash table use self as left, so here right param is left for cmp func DatumCmpFunc func_ptr = ObExprCmpFuncsHelper::get_datum_expr_cmp_func( RIGHT_ROW_ELE(0, j)->datum_meta_.type_, LEFT_ROW_ELE(j)->datum_meta_.type_, RIGHT_ROW_ELE(0, j)->datum_meta_.scale_, LEFT_ROW_ELE(j)->datum_meta_.scale_, RIGHT_ROW_ELE(0, j)->datum_meta_.precision_, LEFT_ROW_ELE(j)->datum_meta_.precision_, lib::is_oracle_mode(), LEFT_ROW_ELE(j)->datum_meta_.cs_type_, LEFT_ROW_ELE(j)->obj_meta_.has_lob_header() || RIGHT_ROW_ELE(0, j)->obj_meta_.has_lob_header()); in_ctx->cmp_functions_[j] = (void *)(func_ptr); } } if (OB_SUCC(ret)) { tmp_row.set_elem(datum_ptr); //首先检查左边是否有null,有null则检查反面的hashset是否为全null if (null_idx != 0 && OB_FAIL(in_ctx->get_hashset_vecs_all_null((1 <::HASH_CMP_FALSE; for (int64_t k = (1 << row_dimension) - 1; !is_null_cmp && !is_completely_cmp && OB_SUCC(ret) && k >= 1; k = static_cast(last(k, (1 << row_dimension) -1))) { //k 代表选取的列,即idx if (0 == (k & null_idx)) {//k不包含null列 if (OB_FAIL(in_ctx->exist_in_static_engine_hashset_vecs(tmp_row, k, exist_ret))) { LOG_WARN("failed to search in hashset", K(ret)); } else { if (ObExprInHashMap::HASH_CMP_TRUE == exist_ret) { is_completely_cmp = true; } else if (ObExprInHashMap::HASH_CMP_UNKNOWN == exist_ret) { is_null_cmp = true; } else { //do nothing } } if (!left_has_null && !in_ctx->right_has_null) {//左右均没有null值,第一次探测完成后直接退出 break; } } } } } } #undef LEFT_ROW #undef LEFT_ROW_ELE #undef RIGHT_ROW #undef RIGHT_ROW_ELE if (!fallback && OB_SUCC(ret)) { if (OB_NOT_NULL(in_ctx) && in_ctx->ctx_hash_null_) { is_null_cmp = true; } if (!is_completely_cmp && (is_null_cmp || is_right_all_null || is_left_all_null)) { expr_datum.set_null(); } else { set_datum_result(T_OP_IN == expr.type_, is_completely_cmp, false, expr_datum); } } if (fallback) { ret = eval_in_with_row_fallback(expr, ctx, expr_datum); } return ret; } int ObExprInOrNotIn::eval_in_without_row(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum) { int ret = OB_SUCCESS; ObExprInCtx *in_ctx = NULL; ObDatum *left = NULL; bool cnt_null = false; bool is_exist = false; ObExecContext *exec_ctx = &ctx.exec_ctx_; uint64_t in_id = static_cast(expr.expr_ctx_id_); bool fallback = false; if (!fallback && OB_SUCC(ret)) { if (OB_FAIL(expr.args_[0]->eval(ctx, left))) { LOG_WARN("failed to eval left", K(ret)); } else if (left->is_null()) { is_exist = false; cnt_null = true; } else { int64_t right_param_num = expr.inner_func_cnt_; //first build hash table for right params if (OB_FAIL(build_right_hash_without_row(in_id, right_param_num, expr, ctx, exec_ctx, in_ctx, cnt_null))) { LOG_WARN("failed to build hash table for right params", K(ret)); } else { // refresh inctx hash fun to left hash func if (!in_ctx->is_hash_calc_disabled() && OB_NOT_NULL(in_ctx->hash_func_buff_)) { in_ctx->hash_func_buff_[0] = (void *) (expr.args_[0]->basic_funcs_->murmur_hash_v2_); } // whatever fallback or not, need set cmp func to right and left // hash table use self as left, so here right param is left for cmp func DatumCmpFunc func_ptr = ObExprCmpFuncsHelper::get_datum_expr_cmp_func( expr.args_[1]->args_[0]->datum_meta_.type_, expr.args_[0]->datum_meta_.type_, expr.args_[1]->args_[0]->datum_meta_.scale_, expr.args_[0]->datum_meta_.scale_, expr.args_[1]->args_[0]->datum_meta_.precision_, expr.args_[0]->datum_meta_.precision_, lib::is_oracle_mode(), expr.args_[0]->datum_meta_.cs_type_, expr.args_[0]->obj_meta_.has_lob_header() || expr.args_[1]->args_[0]->obj_meta_.has_lob_header()); for (int i = 0; i < expr.inner_func_cnt_; i++) { in_ctx->cmp_functions_[i] = (void *)func_ptr; } } //second we search in hashset. if (OB_SUCC(ret) && OB_NOT_NULL(in_ctx)) { if (OB_UNLIKELY(in_ctx->is_hash_calc_disabled())) { //do nothing } else if (!left->is_null()){ Row tmp_row; ObDatum *datum_ptr = left; if (OB_FAIL(ret)) { } else if (OB_FAIL(tmp_row.set_elem(datum_ptr))) { LOG_WARN("failed to load left", K(ret)); } else if (0 != in_ctx->get_static_engine_hashset_size() && OB_FAIL(in_ctx->exist_in_static_engine_hashset(tmp_row, is_exist))) { LOG_WARN("failed to search in hashset", K(ret)); } else { //do nothing } } else { //do nothing } } } if (OB_SUCC(ret) && OB_NOT_NULL(in_ctx) && !in_ctx->is_hash_calc_disabled()) { if (OB_NOT_NULL(in_ctx) && in_ctx->ctx_hash_null_) { cnt_null = true; } if (!is_exist && cnt_null) { expr_datum.set_null(); } else { set_datum_result(T_OP_IN == expr.type_, is_exist, false, expr_datum); } } else if (OB_SUCC(ret) && OB_ISNULL(in_ctx)) { if (!left->is_null()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("in_ctx is not init", K(ret)); } else { expr_datum.set_null(); } } else if (OB_SUCC(ret)) { ret = eval_in_without_row_fallback(expr, ctx, expr_datum); } } else if (OB_SUCC(ret)){ ret = eval_in_without_row_fallback(expr, ctx, expr_datum); } else { //do nothing } return ret; } int ObExprInOrNotIn::eval_batch_in_without_row(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const int64_t batch_size) { int ret = OB_SUCCESS; LOG_DEBUG("eval_batch_in_hash start: batch mode"); ObDatum *results = expr.locate_batch_datums(ctx); if (OB_ISNULL(results)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("results frame is not init", K(ret)); } else { ObExprInCtx *in_ctx = NULL; ObExecContext *exec_ctx = &ctx.exec_ctx_; uint64_t in_id = expr.expr_ctx_id_; if (OB_FAIL(expr.args_[0]->eval_batch(ctx, skip, batch_size))) { LOG_WARN("failed to eval batch param values", K(ret)); } else { ObDatum *input_left = expr.args_[0]->locate_batch_datums(ctx); ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); bool fallback = false; //建hash表过程中eval 失败,需要尝试nest_loop Row tmp_row; //放置left ObDatum *left = nullptr; int64_t right_param_num = expr.inner_func_cnt_; bool right_has_null = false; if (OB_FAIL(build_right_hash_without_row(in_id, right_param_num, expr, ctx, exec_ctx, in_ctx, right_has_null))) { LOG_WARN("failed to build hash table for right params", K(ret)); } else { fallback = in_ctx->is_hash_calc_disabled(); // refresh inctx hash fun to left hash func if (!fallback && OB_NOT_NULL(in_ctx->hash_func_buff_)) { in_ctx->hash_func_buff_[0] = (void *) (expr.args_[0]->basic_funcs_->murmur_hash_v2_); } // hash table use self as left, so here right param is left for cmp func DatumCmpFunc func_ptr = ObExprCmpFuncsHelper::get_datum_expr_cmp_func( expr.args_[1]->args_[0]->datum_meta_.type_, expr.args_[0]->datum_meta_.type_, expr.args_[1]->args_[0]->datum_meta_.scale_, expr.args_[0]->datum_meta_.scale_, expr.args_[1]->args_[0]->datum_meta_.precision_, expr.args_[0]->datum_meta_.precision_, lib::is_oracle_mode(), expr.args_[0]->datum_meta_.cs_type_, expr.args_[0]->obj_meta_.has_lob_header() || expr.args_[1]->args_[0]->obj_meta_.has_lob_header()); for (int i = 0; i < expr.inner_func_cnt_; i++) { in_ctx->cmp_functions_[i] = (void *)func_ptr; } } for (int64_t left_idx = 0; OB_SUCC(ret) && !fallback && left_idx < batch_size; ++left_idx) { if (skip.at(left_idx) || eval_flags.at(left_idx)) { continue; } bool is_exist = false; bool has_null = false; left = &input_left[left_idx]; if (left->is_null()) { is_exist = false; has_null = true; } else { //second we search in hashset. if (OB_SUCC(ret) && OB_NOT_NULL(in_ctx) && !fallback) { if (OB_FAIL(tmp_row.set_elem(left))) { LOG_WARN("failed to load left", K(ret)); } else if (0 != in_ctx->get_static_engine_hashset_size() && OB_FAIL(in_ctx->exist_in_static_engine_hashset(tmp_row, is_exist))) { LOG_WARN("failed to search in hashset", K(ret)); } else { //do nothing } } } if (OB_SUCC(ret) && !fallback) { has_null = has_null || (OB_NOT_NULL(in_ctx) && in_ctx->ctx_hash_null_); if (!is_exist && has_null) { results[left_idx].set_null(); } else { set_datum_result(T_OP_IN == expr.type_, is_exist, false, results[left_idx]); } eval_flags.set(left_idx); } } if (fallback) { ret = eval_batch_in_without_row_fallback(expr, ctx, skip, batch_size); } } } return ret; } int ObExprInOrNotIn::eval_vector_in_without_row(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const EvalBound &bound) { int ret = OB_SUCCESS; if (OB_FAIL(expr.args_[0]->eval_vector(ctx, skip, bound))) { LOG_WARN("failed to eval vector param values", K(ret)); } else { VectorFormat res_format = expr.get_format(ctx); VectorFormat left_format = expr.args_[0]->get_format(ctx); IN_OR_NOTIN_DISPATCH_VECTOR_IN_RES_ARG_FORMAT(inner_eval_vector_in_without_row); } return ret; } template int ObExprInOrNotIn::inner_eval_vector_in_without_row(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, const EvalBound &bound) { int ret = OB_SUCCESS; LOG_DEBUG("eval_vector_in_hash start: vector mode"); ResVec *res_vec = static_cast(expr.get_vector(ctx)); LeftVec *input_left_vec = static_cast(expr.args_[0]->get_vector(ctx)); ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); ObExprInCtx *in_ctx = NULL; ObExecContext *exec_ctx = &ctx.exec_ctx_; uint64_t in_id = expr.expr_ctx_id_; // During the process of building the hash table, // if eval() failed, we need to try the nest_loop. bool fallback = false; // Placing the left_datum. Row tmp_row; ObDatum left_datum(nullptr, 0, false); int64_t right_param_num = expr.inner_func_cnt_; bool right_has_null = false; const char *fixed_base_l_payload = nullptr; bool is_exist = false; bool right_all_null = false; ObBitVector &my_skip = expr.get_pvt_skip(ctx); my_skip.deep_copy(skip, bound.start(), bound.end()); bool left_all_null = true; for (int64_t idx = bound.start(); idx < bound.end(); ++idx) { if (input_left_vec->is_null(idx)) { my_skip.set(idx); res_vec->set_null(idx); eval_flags.set(idx); } else { left_all_null = false; } } if (!left_all_null) { if (OB_FAIL(build_right_hash_without_row(in_id, right_param_num, expr, ctx, exec_ctx, in_ctx, right_has_null))) { LOG_WARN("failed to build hash table for right params", K(ret)); } else { fallback = in_ctx->is_hash_calc_disabled(); if (!fallback) { // refresh inctx hash fun to left hash func if (OB_NOT_NULL(in_ctx->hash_func_buff_)) { in_ctx->hash_func_buff_[0] = (void *) (expr.args_[0]->basic_funcs_->murmur_hash_v2_); } // hash table use self as left, so here right param is left for cmp func DatumCmpFunc func_ptr = ObExprCmpFuncsHelper::get_datum_expr_cmp_func( expr.args_[1]->args_[0]->datum_meta_.type_, expr.args_[0]->datum_meta_.type_, expr.args_[1]->args_[0]->datum_meta_.scale_, expr.args_[0]->datum_meta_.scale_, expr.args_[1]->args_[0]->datum_meta_.precision_, expr.args_[0]->datum_meta_.precision_, lib::is_oracle_mode(), expr.args_[0]->datum_meta_.cs_type_, expr.args_[0]->obj_meta_.has_lob_header() || expr.args_[1]->args_[0]->obj_meta_.has_lob_header()); for (int i = 0; i < right_param_num; i++) { in_ctx->cmp_functions_[i] = (void *)func_ptr; } if (0 == in_ctx->get_static_engine_hashset_size()) { // Scenarios where in_list contains only null. if (in_ctx->ctx_hash_null_) { for (int64_t left_idx = bound.start(); left_idx < bound.end(); ++left_idx) { if (skip.at(left_idx) || eval_flags.at(left_idx)) { continue; } res_vec->set_null(left_idx); eval_flags.set(left_idx); } right_all_null = true; } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("static_engine_hashset_size unexpected", K(ret), K(right_has_null), K(in_ctx->get_static_engine_hashset_size())); } } else if (std::is_same::value) { fixed_base_l_payload = (reinterpret_cast(input_left_vec))->get_data(); left_datum.len_ = (reinterpret_cast(input_left_vec))->get_length(); } } } if (OB_FAIL(ret)) { } else if (right_all_null) { } else if (!fallback) { for (int64_t left_idx = bound.start(); OB_SUCC(ret) && left_idx < bound.end(); ++left_idx) { if (skip.at(left_idx) || eval_flags.at(left_idx)) { continue; } // The situation "input_left_vec->is_null(idx)" has already been handled previously. if (OB_NOT_NULL(in_ctx)) { //second we search in hashset. if (std::is_same::value) { left_datum.ptr_ = fixed_base_l_payload + left_idx * left_datum.len_; } else { left_datum.ptr_ = input_left_vec->get_payload(left_idx); left_datum.len_ = input_left_vec->get_length(left_idx); } if (OB_FAIL(tmp_row.set_elem(&left_datum))) { LOG_WARN("failed to load left", K(ret)); } else if (OB_FAIL(in_ctx->exist_in_static_engine_hashset(tmp_row, is_exist))) { LOG_WARN("failed to search in hashset", K(ret)); } else { set_vector_result(T_OP_IN == expr.type_, is_exist, in_ctx->ctx_hash_null_, res_vec, left_idx); eval_flags.set(left_idx); } } } } else { ret = eval_vector_in_without_row_fallback(expr, ctx, skip, bound); } } return ret; } int ObExprInOrNotIn::eval_in_with_subquery(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum) { int ret = OB_SUCCESS; UNUSED(expr_datum); ObSubQueryIterator *l_iter = NULL; ObExpr **l_row = NULL; if (OB_FAIL(setup_row(expr.args_, ctx, true, expr.inner_func_cnt_, l_iter, l_row))) { LOG_WARN("setup left row failed", K(ret)); } else if (OB_ISNULL(l_row)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("null row", K(ret)); } else { bool l_end = false; if (OB_NOT_NULL(l_iter)) { if (OB_FAIL(l_iter->get_next_row())) { if (OB_ITER_END == ret) { ret = OB_SUCCESS; l_end = true; // set row to NULL for (int64_t i = 0; i < expr.inner_func_cnt_; ++i) { l_row[i]->locate_expr_datum(ctx).set_null(); l_row[i]->set_evaluated_projected(ctx); } } else { LOG_WARN("get next row failed", K(ret)); } } } if (OB_FAIL(ret)) { } else if (OB_FAIL(calc_for_row_static_engine(expr, ctx, expr_datum, l_row))) { LOG_WARN("calc for row failed", K(ret), K(l_row)); } if (OB_SUCC(ret) && NULL != l_iter && !l_end) { if (OB_FAIL(l_iter->get_next_row())) { if (OB_ITER_END == ret) { ret = OB_SUCCESS; } else { LOG_WARN("get next row failed", K(ret)); } } else { //only one row expected for left row ret = OB_SUBQUERY_TOO_MANY_ROW; } } } return ret; } int ObExprInOrNotIn::calc_for_row_static_engine(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum, ObExpr **l_row) { int ret = OB_SUCCESS; UNUSED(expr_datum); ObDatum *left = NULL; ObDatum *right = NULL; bool set_cnt_null = false; bool set_cnt_equal = false; bool is_reverse_cmp = ctx.exec_ctx_.get_my_session()->get_exec_min_cluster_version() >= CLUSTER_VERSION_4_3_0_0; #define RIGHT_ROW(i) expr.args_[1]->args_[i] #define RIGHT_ROW_ELE(i, j) expr.args_[1]->args_[i]->args_[j] for (int i = 0; OB_SUCC(ret) && ! set_cnt_equal && i < expr.args_[1]->arg_cnt_; ++i) { if (OB_ISNULL(RIGHT_ROW(i))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid null arg", K(ret), K(RIGHT_ROW(i)), K(i)); } else { bool row_is_equal = true; bool row_cnt_null = false; ObExpr *left_expr = nullptr; for (int j = 0; OB_SUCC(ret) && row_is_equal && j < expr.inner_func_cnt_; ++j) { if (OB_ISNULL(l_row)) { left_expr = expr.args_[0]->args_[j]; } else { left_expr = l_row[j]; } if (OB_ISNULL(left_expr)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid null arg", K(ret)); } else if (OB_FAIL(left_expr->eval(ctx, left))) { LOG_WARN("failed to eval", K(ret)); } else if (left->is_null()) { row_cnt_null = true; } else if (OB_ISNULL(RIGHT_ROW_ELE(i, j))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid null arg", K(ret), K(RIGHT_ROW_ELE(i, j)), K(i), K(j)); } else if (OB_FAIL(RIGHT_ROW_ELE(i, j)->eval(ctx, right))) { LOG_WARN("failed to eval", K(ret)); } else if (right->is_null()) { row_cnt_null = true; } else { int cmp_ret = 0; if (is_reverse_cmp && OB_FAIL(((DatumCmpFunc)expr.inner_functions_[j])(*right, *left, cmp_ret))) { LOG_WARN("failed to compare", K(ret)); } else if (!is_reverse_cmp && OB_FAIL(((DatumCmpFunc)expr.inner_functions_[j])(*left, *right, cmp_ret))) { LOG_WARN("failed to compare", K(ret)); } else if (0 != cmp_ret) { //如果在向量的比较中,有明确的false,表明这个向量不成立,所以应该将has_null置为false row_is_equal = false; row_cnt_null = false; } } } //inner loop if (OB_FAIL(ret)) { //do nothing } else if (row_is_equal && ! row_cnt_null) { set_cnt_equal = true; } else if (row_cnt_null) { set_cnt_null = true; } } } #undef RIGHT_ROW #undef RIGHT_ROW_ELE if (OB_SUCC(ret)) { set_datum_result(T_OP_IN == expr.type_, set_cnt_equal, set_cnt_null, expr_datum); } return ret; } void ObExprInOrNotIn::set_datum_result(const bool is_expr_in, const bool is_exist, const bool param_exist_null, ObDatum &expr_datum) { if (!is_exist && param_exist_null) { expr_datum.set_null(); } else { expr_datum.set_int(!(is_expr_in ^ is_exist)); } } template void ObExprInOrNotIn::set_vector_result(const bool is_expr_in, const bool is_exist, const bool param_exist_null, ResVec *res_vec, const int64_t &idx) { if (!is_exist && param_exist_null) { res_vec->set_null(idx); } else { res_vec->set_int(idx, !(is_expr_in ^ is_exist)); } } int ObExprInOrNotIn::setup_row(ObExpr **expr, ObEvalCtx &ctx, const bool is_iter, const int64_t cmp_func_cnt, ObSubQueryIterator *&iter, ObExpr **&row) { int ret = OB_SUCCESS; if (is_iter) { ObDatum *v = NULL; if (OB_FAIL(expr[0]->eval(ctx, v))) { LOG_WARN("expr evaluate failed", K(ret)); } else if (v->is_null()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("NULL subquery ref info returned", K(ret)); } else if (OB_FAIL(ObExprSubQueryRef::get_subquery_iter( ctx, ObExprSubQueryRef::Extra::get_info(v->get_int()), iter))) { LOG_WARN("get subquery iterator failed", K(ret)); } else if (OB_ISNULL(iter) || cmp_func_cnt != iter->get_output().count()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("NULL subquery iterator", K(ret), KP(iter), K(cmp_func_cnt)); } else if (OB_FAIL(iter->rewind())) { LOG_WARN("start iterate failed", K(ret)); } else { row = &const_cast(iter->get_output()).at(0); } } else if (T_OP_ROW == expr[0]->type_) { if (cmp_func_cnt != expr[0]->arg_cnt_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("cmp function count mismatch", K(ret), K(cmp_func_cnt), K(*expr[0])); } else { row = expr[0]->args_; } } else { row = expr; } return ret; } int ObExprInOrNotIn::get_param_types( const ObRawExpr ¶m, const bool is_iter, ObIArray &types) const { int ret = OB_SUCCESS; if (param.get_expr_type() == T_OP_ROW) { for (int64_t i = 0; OB_SUCC(ret) && i < param.get_param_count(); i++) { const ObRawExpr *e = param.get_param_expr(i); CK(NULL != e); OZ(types.push_back(e->get_result_type())); } } else if (param.get_expr_type() == T_REF_QUERY && is_iter) { const ObQueryRefRawExpr &ref = static_cast(param); FOREACH_CNT_X(t, ref.get_column_types(), OB_SUCC(ret)) { OZ(types.push_back(*t)); } } else { OZ(types.push_back(param.get_result_type())); } return ret; } int ObExprInOrNotIn::build_right_hash_without_row(const int64_t in_id, const int64_t right_param_num, const ObExpr &expr, ObEvalCtx &ctx, ObExecContext *exec_ctx, ObExprInCtx *&in_ctx, bool &cnt_null) { int ret = OB_SUCCESS; ObDatum *right = NULL; int64_t row_dimension = 1; if (OB_ISNULL(in_ctx = static_cast (exec_ctx->get_expr_op_ctx(in_id)))) { if (OB_FAIL(exec_ctx->create_expr_op_ctx(in_id, in_ctx))) { LOG_WARN("failed to create operator ctx", K(ret)); } else if (OB_FAIL(in_ctx->init_static_engine_hashset(right_param_num))) { LOG_WARN("failed to init hashset", K(ret)); } else if (OB_FAIL(in_ctx->init_right_datums(right_param_num, row_dimension, exec_ctx))) { LOG_WARN("failed to init right datums", K(ret)); } else if (OB_FAIL(in_ctx->init_cmp_funcs(expr.inner_func_cnt_, exec_ctx))) { LOG_WARN("failed to init cmp funcs", K(ret)); } else { for (int i = 0; OB_SUCC(ret) && !in_ctx->is_hash_calc_disabled() && i < right_param_num; ++i) { if (OB_ISNULL(expr.args_[1]->args_[i])) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid null arg", K(ret), K(expr.args_[1]->args_[i]), K(i)); } else if (OB_FAIL(expr.args_[1]->args_[i]->eval(ctx, right))) { ret = OB_SUCCESS; in_ctx->disable_hash_calc(); LOG_DEBUG("param eval failed, try nest_loop", K(ret), K(i)); } else if (right->is_null()) { cnt_null = true; in_ctx->ctx_hash_null_ = true; in_ctx->cmp_functions_[i] = (void *)(expr.args_[1]->args_[i]->basic_funcs_->null_first_cmp_); } else { if (OB_FAIL(in_ctx->set_right_datum(i, 0, right_param_num, *right))) { LOG_WARN("failed to load right", K(ret), K(i)); } else { if (OB_ISNULL(in_ctx->hash_func_buff_)) { int64_t func_buf_size = sizeof(void *) * 1; if (OB_ISNULL(in_ctx->hash_func_buff_ = (void **) (exec_ctx->get_allocator()).alloc(func_buf_size))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate memory", K(ret)); } } if (OB_SUCC(ret)) { in_ctx->hash_func_buff_[0] = (void *) (expr.args_[1]->args_[i]->basic_funcs_->murmur_hash_v2_); in_ctx->cmp_functions_[i] = (void *)(expr.args_[1]->args_[i]->basic_funcs_->null_first_cmp_); } } Row tmp_row; //这里所有hash函数和cmp函数已经加载完毕,设置tmp_row的函数指针 if (OB_FAIL(ret)){ } else if(OB_FAIL(tmp_row.set_elem(in_ctx->get_datum_row(i)))) { LOG_WARN("failed to load datum", K(ret), K(i)); } else { in_ctx->set_hash_funcs_ptr_for_set(in_ctx->hash_func_buff_); in_ctx->set_cmp_funcs_ptr_for_set(in_ctx->cmp_functions_); } if (OB_SUCC(ret) && OB_FAIL(in_ctx->add_to_static_engine_hashset(tmp_row))) { LOG_WARN("failed to add to hashset", K(ret)); } } } } } return ret; } void ObExprInOrNotIn::check_right_can_cmp_mem(const ObDatum &datum, const ObObjMeta &meta, bool &can_cmp_mem, bool &cnt_null) { static const char SPACE = ' '; if (!meta.is_string_type() || CS_TYPE_UTF8MB4_BIN != meta.get_collation_type()) { cnt_null = cnt_null || datum.is_null(); can_cmp_mem = false; } else { cnt_null = cnt_null || datum.is_null(); can_cmp_mem = can_cmp_mem && !cnt_null; if (datum.len_ > 0 && SPACE == datum.ptr_[datum.len_ - 1]) { can_cmp_mem = false; } } } void ObExprInOrNotIn::check_left_can_cmp_mem(const ObExpr &expr, const ObDatum *datum, const ObBitVector &skip, const ObBitVector &eval_flags, const int64_t batch_size, bool &can_cmp_mem) { UNUSED(datum); can_cmp_mem = can_cmp_mem && T_OP_IN == expr.type_ && 2 == expr.inner_func_cnt_ && ObBitVector::bit_op_zero(skip, eval_flags, batch_size, [](const uint64_t l, const uint64_t r) { return (l | r); }); } void ObExprInOrNotIn::check_left_can_cmp_mem(const ObExpr &expr, const ObBitVector &skip, const ObBitVector &eval_flags, const EvalBound &bound, bool &can_cmp_mem) { can_cmp_mem = can_cmp_mem && 2 == expr.inner_func_cnt_ && ObBitVector::bit_op_zero(skip, eval_flags, bound, [](const uint64_t l, const uint64_t r) { return (l | r); }); } bool ObExprInOrNotIn::is_all_space(const char *ptr, const int64_t remain_len) { bool ret = true; int64_t len = remain_len; int64_t pos = 0; const static char *space64 = " "; int64_t size = 64; while(len > 0 && ret) { int64_t min_cmp_len = min(len, size); ret = (0 == MEMCMP(ptr + pos, space64, min_cmp_len)); pos += min_cmp_len; len -= size; } return ret; } } }