/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #define USING_LOG_PREFIX SQL_ENG #include "sql/engine/expr/ob_expr_like.h" //#include "sql/engine/expr/ob_expr_promotion_util.h" #include "sql/engine/ob_exec_context.h" #include "share/object/ob_obj_cast.h" #include "lib/oblog/ob_log.h" #include "sql/session/ob_sql_session_info.h" namespace oceanbase { using namespace common; namespace sql { #define PERCENT_SIGN_START(mode) (START_WITH_PERCENT_SIGN == mode || START_END_WITH_PERCENT_SIGN == mode) #define PERCENT_SIGN_END(mode) (END_WITH_PERCENT_SIGN == mode || START_END_WITH_PERCENT_SIGN == mode) int ObExprLike::InstrInfo::record_pattern(char *&pattern_buf, const ObString &pattern) { int ret = OB_SUCCESS; if (OB_LIKELY(pattern.length() <= instr_buf_length_)) { pattern_buf = instr_buf_; } else if (OB_ISNULL(allocator_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("allocator is null", K(ret)); } else if (OB_ISNULL(pattern_buf = (char*)(allocator_->alloc(sizeof(char) * pattern.length() * 2)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("No more memories", K(ret)); } else { instr_buf_ = pattern_buf; instr_buf_length_ = pattern.length() * 2; } if (OB_SUCC(ret)) { MEMCPY(pattern_buf, pattern.ptr(), pattern.length()); } return ret; } int ObExprLike::InstrInfo::add_instr_info(const char *start, const uint32_t length) { int ret = OB_SUCCESS; if (OB_LIKELY(instr_cnt_ < instr_info_buf_size_)) { instr_starts_[instr_cnt_] = start; instr_lengths_[instr_cnt_] = length; instr_cnt_++; } else { const uint32_t init_buf_size = 8; const uint32_t new_buf_size = MAX(init_buf_size, instr_info_buf_size_ * 2); const char **new_instr_starts = NULL; uint32_t *new_instr_lengths = NULL; if (OB_ISNULL(new_instr_starts = static_cast(allocator_->alloc(sizeof(char *) * new_buf_size))) || OB_ISNULL(new_instr_lengths = static_cast(allocator_->alloc(sizeof(uint32_t) * new_buf_size)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocator memory failed", K(ret), K(new_instr_starts), K(new_buf_size)); } else { MEMCPY(new_instr_starts, instr_starts_, sizeof(char *) * instr_cnt_); MEMCPY(new_instr_lengths, instr_lengths_, sizeof(uint32_t) * instr_cnt_); instr_info_buf_size_ = new_buf_size; instr_starts_ = new_instr_starts; instr_lengths_ = new_instr_lengths; instr_starts_[instr_cnt_] = start; instr_lengths_[instr_cnt_] = length; instr_cnt_++; } } return ret; } ObExprLike::ObExprLike(ObIAllocator &alloc) : ObFuncExprOperator(alloc, T_OP_LIKE, N_LIKE, 3, NOT_ROW_DIMENSION), is_pattern_literal_(false), is_text_literal_(true), is_escape_literal_(false), like_id_(-1) { need_charset_convert_ = false; } ObExprLike::~ObExprLike() { } int ObExprLike::assign(const ObExprOperator &other) { int ret = OB_SUCCESS; const ObExprLike *tmp_other = dynamic_cast(&other); if (OB_UNLIKELY(NULL == tmp_other)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument. wrong type for other", K(ret), K(other)); } else if (OB_LIKELY(this != tmp_other)) { if (OB_FAIL(ObFuncExprOperator::assign(other))) { LOG_WARN("copy in Base class ObFuncExprOperator failed", K(ret)); } else { this->is_pattern_literal_ = tmp_other->is_pattern_literal_; this->is_text_literal_ = tmp_other->is_text_literal_; this->is_escape_literal_ = tmp_other->is_escape_literal_; this->like_id_ = tmp_other->like_id_; } } return ret; } // Oracle mode, the following character of escape character only support _ and % and self, or report error // check valid must be process first, even parttern or text is null, it will report error // eg: select 1 from dual where null like 'a' escape ''; // like: select 1 from t1 where '_%a' like 'a_a%aa' escape 'a'; --ok // select 1 from t1 where '_%' like 'aba%' escape 'a'; --error, ab is invalid // ORA-01424: missing or illegal character following the escape character template int ObExprLike::check_pattern_valid(const T &pattern, const T &escape, const ObCollationType escape_coll, ObCollationType coll_type, ObExecContext *exec_ctx, const uint64_t like_id, const bool check_optimization) { int ret = OB_SUCCESS; int32_t escape_wc = 0; const ObCharsetInfo *cs = NULL; ObString escape_val = escape.get_string(); ObString pattern_val = pattern.get_string(); ObExprLikeContext *like_ctx = NULL; if (is_static_engine) { // check_optimizaiton is true, only if pattern and escape are const. if (check_optimization && NULL == (like_ctx = static_cast (exec_ctx->get_expr_op_ctx(like_id)))) { if (OB_FAIL(exec_ctx->create_expr_op_ctx(like_id, like_ctx))) { LOG_WARN("failed to create operator ctx", K(ret), K(like_id)); } else { like_ctx->instr_info_.set_allocator(exec_ctx->get_allocator()); } } } else if (NULL != exec_ctx) { // When text, pattern and escape are all const, report error when create op ctx // If it's error, then don't optimize check int tmp_ret = OB_SUCCESS; if (NULL == (like_ctx = static_cast (exec_ctx->get_expr_op_ctx(like_id)))) { if (OB_SUCCESS != (tmp_ret = exec_ctx->create_expr_op_ctx(like_id, like_ctx))) { LOG_DEBUG("failed to create operator ctx", K(ret), K(like_id)); } else { like_ctx->instr_info_.set_allocator(exec_ctx->get_allocator()); } } } if (OB_FAIL(ret)) { LOG_WARN("failed to check pattern", K(ret), K(like_id)); } else if (!lib::is_oracle_mode()) { //don't check in mysql mode } else if (NULL != like_ctx && checked_already(*like_ctx, pattern.is_null(), pattern_val, escape.is_null(), escape_val)) { // skip check if pattern and escape are same as checked last time. //select * from t1 where exist (select * from t2 where 'abc' like t1.c1 escape t1.c2); //pattern t1.c1 and escape t1.c2 are const in subquery, but they may change. like_ctx->same_as_last = true; } else if (escape.is_null() || 1 != escape_val.length()) { ret = OB_ERR_INVALID_ESCAPE_CHAR_LENGTH; LOG_WARN("escape character must be character string of length 1", K(escape_val), K(ret)); } else if (OB_FAIL(calc_escape_wc(escape_coll, escape_val, escape_wc))) { LOG_WARN("fail to calc escape wc", K(escape_val), K(escape_coll)); } else if (OB_UNLIKELY(OB_ISNULL(cs = ObCharset::get_charset(coll_type)) || OB_ISNULL(cs->cset))) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("unexpected error. invalid argument(s)",K(coll_type)); } else if (!pattern.is_null()) { const char *buf_start = pattern_val.ptr(); const char *buf_end = pattern_val.ptr() + pattern_val.length(); int error = 0; int32_t char_len = 0; bool is_valid = true; bool is_char_escape = false; bool pre_char_is_escape = false; while (OB_SUCC(ret) && buf_start < buf_end && is_valid) { char_len = static_cast(cs->cset->well_formed_len(cs, buf_start, buf_end, 1, &error)); if (OB_UNLIKELY(0 != error)) { ret = OB_ERR_INVALID_CHARACTER_STRING; LOG_WARN("well_formed_len failed. invalid char", K(buf_start), K(pattern_val), K(char_len)); } else if (OB_FAIL(is_escape(coll_type, buf_start, char_len, escape_wc, is_char_escape))) { LOG_WARN("fail to judge escape", K(escape_val), K(escape_coll)); } else if (is_char_escape) { // 连续两个escape char, like: select 1 from t1 where 'a' like 'aa' escape 'a'; -- it's ok if (pre_char_is_escape) { pre_char_is_escape = false; is_char_escape = false; } else { pre_char_is_escape = true; is_char_escape = false; } } else if (pre_char_is_escape) { // If pre char is escape char, then the following char must be '_' or '%' // Eg: select 1 from t1 where 'a' like 'a_a%' escape 'a'; -- it's ok if (1 != char_len) { ret = OB_ERR_INVALID_CHAR_FOLLOWING_ESCAPE_CHAR; LOG_WARN("missing or illegal character following the escape character", K(escape_val), K(pattern_val), K(ret)); } else if ('%' == *buf_start || '_' == *buf_start) { // it's ok } else { ret = OB_ERR_INVALID_CHAR_FOLLOWING_ESCAPE_CHAR; LOG_WARN("missing or illegal character following the escape character", K(escape_val), K(pattern_val), K(ret)); } pre_char_is_escape = false; } buf_start += char_len; }//end while if (pre_char_is_escape) { // Last character is escape character // // Eg: select 1 from t1 where 'a' like 'a_a' escape 'a'; -- it's error ret = OB_ERR_INVALID_CHAR_FOLLOWING_ESCAPE_CHAR; LOG_WARN("missing or illegal character following the escape character", K(escape_val), K(pattern_val), K(ret)); } if (OB_SUCC(ret) && NULL != like_ctx) { record_last_check(*like_ctx, pattern_val, escape_val, &exec_ctx->get_allocator()); } } return ret; } int ObExprLike::calc_result_type3(ObExprResType &type, ObExprResType &type1, ObExprResType &type2, ObExprResType &type3, ObExprTypeCtx &type_ctx) const { UNUSED(type_ctx); int ret = OB_SUCCESS; if (!type1.is_null() && !type2.is_null() && !type3.is_null() && (!is_type_valid(type1.get_type()) || !is_type_valid(type2.get_type()) || !is_type_valid(type3.get_type()))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("the param is not castable", K(type1), K(type2), K(type3), K(ret)); } else if (OB_NOT_NULL(type_ctx.get_session()) && lib::is_oracle_mode()) { ObSEArray str_params; ObExprResType tmp_result_type; OZ(str_params.push_back(&type1)); OZ(aggregate_string_type_and_charset_oracle(*type_ctx.get_session(), str_params, tmp_result_type)); OZ(str_params.push_back(&type2)); OZ(deduce_string_param_calc_type_and_charset(*type_ctx.get_session(), tmp_result_type, str_params)); type3.set_calc_type(ObVarcharType); type3.set_calc_collation_type(type_ctx.get_session()->get_nls_collation()); type.set_int(); type.set_calc_type(type1.get_calc_type()); type.set_calc_collation_type(type1.get_calc_collation_type()); type.set_scale(DEFAULT_SCALE_FOR_INTEGER); type.set_precision(DEFAULT_PRECISION_FOR_BOOL); } else { type.set_int(); ObObjMeta types[2] = {type1, type2}; type.set_calc_type(ObVarcharType); type.set_scale(DEFAULT_SCALE_FOR_INTEGER); type.set_precision(DEFAULT_PRECISION_FOR_BOOL); if (!type1.is_clob()) { type1.set_calc_type(ObVarcharType); } if (!type2.is_clob()) { type2.set_calc_type(ObVarcharType); } type3.set_calc_type(ObVarcharType); type3.set_calc_collation_type(type3.get_collation_type()); if (lib::is_oracle_mode()) { if (OB_ISNULL(type_ctx.get_session())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("session is null", K(ret)); } else { type.set_calc_collation_type(type_ctx.get_session()->get_nls_collation()); } } else { ret = aggregate_charsets_for_comparison(type.get_calc_meta(), types, 2, type_ctx.get_coll_type()); } type1.set_calc_collation_type(type.get_calc_collation_type()); type2.set_calc_collation_type(type.get_calc_collation_type()); ObExprOperator::calc_result_flag2(type, type1, type2); // ESCAPE is ignored } return ret; } int ObExprLike::set_instr_info(ObIAllocator *exec_allocator, const ObCollationType cs_type, const ObString &pattern, const ObString &escape, const ObCollationType escape_coll, ObExprLikeContext &like_ctx) { //If you feel tough to understand this func, //please feel free to refer here for more details : //https://gw.alicdn.com/tfscom/TB1XAvqMpXXXXaVXpXXXXXXXXXX.jpg int ret = OB_SUCCESS; like_ctx.instr_info_.reuse(); const ObCharsetInfo *cs = NULL; char *pattern_buf = nullptr; ObIAllocator *exec_cal_buf = exec_allocator; InstrInfo &instr_info = like_ctx.instr_info_; if (cs_type != CS_TYPE_UTF8MB4_BIN) { //we optimize the case in which cs_type == CS_TYPE_UTF8MB4_BIN only //just let it go } else if (OB_UNLIKELY(OB_ISNULL(cs = ObCharset::get_charset(cs_type)) || OB_ISNULL(cs->cset))) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("unexpected error. invalid argument(s)",K(cs_type), K(pattern), K(escape)); } else if (OB_UNLIKELY(pattern.empty())) { //do nothing.just let it go. } else if (OB_ISNULL(exec_cal_buf)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("Failed to get exec cal buf", K(ret)); } else { int32_t escape_wc = 0; if (OB_FAIL(instr_info.record_pattern(pattern_buf, pattern))) { LOG_WARN("record pattern failed", K(ret)); } else if (OB_FAIL(calc_escape_wc(escape_coll, escape, escape_wc))) { LOG_WARN("calc escape wc failed", K(ret), K(escape_coll), K(escape)); } else { //iterate pattern now const char *buf_start = pattern_buf; const char *buf_end = pattern_buf + pattern.length(); int error = 0; int32_t char_len = 0; bool is_char_escape = false; bool use_instr_mode = true; const char *instr_start = NULL; uint32_t instr_len = 0; bool percent_sign_exist = false; while (OB_SUCC(ret) && buf_start < buf_end && use_instr_mode) { char_len = static_cast(cs->cset->well_formed_len(cs, buf_start, buf_end, 1, &error)); is_char_escape = false; if (OB_UNLIKELY(0 != error)) { ret = OB_ERR_INVALID_CHARACTER_STRING; LOG_WARN("well_formed_len failed. invalid char", K(cs_type), K(buf_start), K(pattern), K(char_len)); } else if (OB_FAIL(is_escape(cs_type, buf_start, char_len, escape_wc, is_char_escape))) { LOG_WARN("check is escape failed", K(ret), K(escape_coll)); } else if (is_char_escape || (1 == char_len && '_' == *buf_start)) { //when there are "_" or escape in pattern //the case can not be optimized. use_instr_mode = false; } else if ((1 == char_len && '%' == *buf_start)) { //percent sign percent_sign_exist = true; if (OB_LIKELY(instr_len > 0)) { if (OB_FAIL(instr_info.add_instr_info(instr_start, instr_len))) { LOG_WARN("add instr info failed", K(ret)); } instr_info.instr_total_length_ += instr_len; instr_len = 0; } buf_start += char_len; } else { //non-percent char if (0 == instr_len) { instr_start = buf_start; } buf_start += char_len; instr_len += char_len; } }//end while if (OB_SUCC(ret) && use_instr_mode && percent_sign_exist) { bool end_with_percent_sign = true; if (instr_len > 0) { // record last instr end_with_percent_sign = false; instr_info.instr_total_length_ += instr_len; if (OB_FAIL(instr_info.add_instr_info(instr_start, instr_len))) { LOG_WARN("add instr info failed", K(ret)); } } if (OB_UNLIKELY(instr_info.empty())) { instr_info.instr_mode_ = ALL_PERCENT_SIGN; } else { bool start_with_percent_sign = instr_info.instr_starts_[0] != pattern_buf; instr_info.instr_mode_ = start_with_percent_sign ? (end_with_percent_sign ? START_END_WITH_PERCENT_SIGN : START_WITH_PERCENT_SIGN) : (end_with_percent_sign ? END_WITH_PERCENT_SIGN : MIDDLE_PERCENT_SIGN); } }//end deduce instrmode }//end else } LOG_DEBUG("end set instr info", K(cs_type), K(pattern), K(escape), K(escape_coll), K(instr_info)); return ret; } template int ObExprLike::calc_with_instr_mode(T &result, const ObCollationType cs_type, const ObString &text, const ObExprLikeContext &like_ctx) { int ret = OB_SUCCESS; const InstrInfo instr_info = like_ctx.instr_info_; const int32_t text_len = text.length(); if (OB_UNLIKELY(cs_type != CS_TYPE_UTF8MB4_BIN)) { ret = OB_INVALID_ARGUMENT; LOG_ERROR("invalid argument(s)", K(ret), K(cs_type), K(text)); } else if (OB_UNLIKELY(instr_info.empty())) { result.set_int(1); } else if (OB_UNLIKELY(text_len < instr_info.instr_total_length_)) { result.set_int(0); } else { int64_t res = 0; switch(instr_info.instr_mode_) { case START_WITH_PERCENT_SIGN: { res = match_with_instr_mode(text, instr_info); break; } case START_END_WITH_PERCENT_SIGN: { res = match_with_instr_mode(text, instr_info); break; } case END_WITH_PERCENT_SIGN: { res = match_with_instr_mode(text, instr_info); break; } case MIDDLE_PERCENT_SIGN: { res = match_with_instr_mode(text, instr_info); break; } default: { ret = OB_ERR_UNEXPECTED; LOG_ERROR("unexpected instr mode", K(ret), K(instr_info.instr_mode_), K(text)); break; } } if (OB_FAIL(ret)) { LOG_WARN("match with instr mode failed", K(ret), K(instr_info.instr_mode_), K(text)); } else { result.set_int(res); } } return ret; } int ObExprLike::calc_escape_wc(const ObCollationType escape_coll, const ObString &escape, int32_t &escape_wc) { int ret = OB_SUCCESS; size_t length = ObCharset::strlen_char(escape_coll, escape.ptr(), escape.length()); if (1 != length) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument to ESCAPE", K(escape), K(length), K(ret)); } else if (OB_FAIL(ObCharset::mb_wc(escape_coll, escape, escape_wc))) { LOG_WARN("failed to convert escape to wc", K(ret), K(escape), K(escape_coll), K(escape_wc)); ret = OB_INVALID_ARGUMENT; } return ret; } int ObExprLike::is_escape(const ObCollationType cs_type, const char *buf_start, int32_t char_len, int32_t escape_wc, bool &res) { int ret = OB_SUCCESS; res = false; //once is_escape is called //we have to construct and destruct the string. //while, note that is_escape will not be called too frequently //so, never mind it ObString string(char_len, buf_start); int32_t wc = 0; if (OB_FAIL(ObCharset::mb_wc(cs_type, string, wc))) { LOG_WARN("failed to get wc", K(ret), K(string), K(cs_type)); ret = OB_INVALID_ARGUMENT; } else { res = (wc == escape_wc); } return ret; } OB_DEF_SERIALIZE(ObExprLike) { int ret = OB_SUCCESS; BASE_SER((ObExprLike, ObFuncExprOperator)); OB_UNIS_ENCODE(is_pattern_literal_); OB_UNIS_ENCODE(is_text_literal_); OB_UNIS_ENCODE(is_escape_literal_); OB_UNIS_ENCODE(like_id_); return ret; } OB_DEF_DESERIALIZE(ObExprLike) { int ret = OB_SUCCESS; BASE_DESER((ObExprLike, ObFuncExprOperator)); is_pattern_literal_ = false; is_text_literal_ = true; is_escape_literal_ = false; like_id_ = -1; OB_UNIS_DECODE(is_pattern_literal_); OB_UNIS_DECODE(is_text_literal_); OB_UNIS_DECODE(is_escape_literal_); OB_UNIS_DECODE(like_id_); return ret; } OB_DEF_SERIALIZE_SIZE(ObExprLike) { int64_t len = 0; BASE_ADD_LEN((ObExprLike, ObFuncExprOperator)); OB_UNIS_ADD_LEN(is_pattern_literal_); OB_UNIS_ADD_LEN(is_text_literal_); OB_UNIS_ADD_LEN(is_escape_literal_); OB_UNIS_ADD_LEN(like_id_); return len; } int ObExprLike::cg_expr(ObExprCGCtx &op_cg_ctx, const ObRawExpr &raw_expr, ObExpr &rt_expr) const { UNUSED(op_cg_ctx); int ret = OB_SUCCESS; const ObRawExpr *text_expr = NULL; const ObRawExpr *pattern_expr = NULL; const ObRawExpr *escape_expr = NULL; if (OB_UNLIKELY(3 != raw_expr.get_param_count())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("like op should have 3 arguments", K(raw_expr.get_param_count())); } else if (OB_ISNULL(text_expr = raw_expr.get_param_expr(0)) || OB_ISNULL(pattern_expr = raw_expr.get_param_expr(1)) || OB_ISNULL(escape_expr = raw_expr.get_param_expr(2))) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("null pointer", K(text_expr), K(pattern_expr), K(escape_expr)); } else if (rt_expr.arg_cnt_ != 3 || OB_ISNULL(rt_expr.args_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("like expr should have 3 arguments", K(ret), K(rt_expr.arg_cnt_), K(rt_expr.args_)); } else if (OB_ISNULL(rt_expr.args_[0]) || OB_ISNULL(rt_expr.args_[1]) || OB_ISNULL(rt_expr.args_[2])) { ret = OB_ERR_UNEXPECTED; LOG_WARN("child is null", K(ret), K(rt_expr.args_[0]), K(rt_expr.args_[1]), K(rt_expr.args_[2])); } else { OB_ASSERT(ob_is_string_tc(rt_expr.args_[0]->datum_meta_.type_) || ObLongTextType == rt_expr.args_[0]->datum_meta_.type_ || ObNullType == rt_expr.args_[0]->datum_meta_.type_); OB_ASSERT(ob_is_string_tc(rt_expr.args_[1]->datum_meta_.type_) || ObLongTextType == rt_expr.args_[1]->datum_meta_.type_ || ObNullType == rt_expr.args_[1]->datum_meta_.type_); OB_ASSERT(ObVarcharType == rt_expr.args_[2]->datum_meta_.type_ || ObNullType == rt_expr.args_[2]->datum_meta_.type_); //Do optimization even if pattern_expr/escape is pushdown parameter, pattern and escape are //checked whether the same as last time which is recorded in like_ctx for each row in execution. bool pattern_literal = pattern_expr->is_const_expr(); bool escape_literal = escape_expr->is_const_expr(); //do check and match optimization only if extra_ is 1. if (pattern_literal && escape_literal) { rt_expr.extra_ = 1; } else { rt_expr.extra_ = 0; } rt_expr.eval_func_ = ObExprLike::like_varchar; // Since pattern and escape are both literal in TPCH, only support vectorized eval with literal // pattern and escape now. // In the full vectorized implement of like expr, like_ctx will be useless. if (text_expr->is_vectorize_result() && !rt_expr.args_[1]->is_batch_result() && !rt_expr.args_[2]->is_batch_result()) { rt_expr.eval_batch_func_ = ObExprLike::eval_like_expr_batch_only_text_vectorized; } } return ret; } template void ObExprLike::record_last_check(ObExprLikeContext &like_ctx, const ObString pattern_val, const ObString escape_val, ObIAllocator *buf_alloc) { if (is_static_engine) { const uint32_t init_len = 16; like_ctx.same_as_last = false; like_ctx.last_pattern_len_ = pattern_val.length(); if (pattern_val.length() > like_ctx.pattern_buf_len_) { if(0 == like_ctx.pattern_buf_len_) { like_ctx.pattern_buf_len_ = init_len; } while (pattern_val.length() > like_ctx.pattern_buf_len_) { like_ctx.pattern_buf_len_ *= 2; } like_ctx.last_pattern_ = (char*) (buf_alloc->alloc(sizeof(char) * like_ctx.pattern_buf_len_)); } MEMCPY(like_ctx.last_pattern_, pattern_val.ptr(), pattern_val.length()); like_ctx.last_escape_len_ = escape_val.length(); if (escape_val.length() > like_ctx.escape_buf_len_) { if(0 == like_ctx.escape_buf_len_) { like_ctx.escape_buf_len_ = init_len; } while (escape_val.length() > like_ctx.escape_buf_len_) { like_ctx.escape_buf_len_ *= 2; } like_ctx.last_escape_ = (char*) (buf_alloc->alloc(sizeof(char) * like_ctx.escape_buf_len_)); } MEMCPY(like_ctx.last_escape_, escape_val.ptr(), escape_val.length()); } else { like_ctx.set_checked(); } } template bool ObExprLike::checked_already(const ObExprLikeContext &like_ctx, bool null_pattern, const ObString pattern_val, bool null_escape, const ObString escape_val) { bool res = false; if (is_static_engine) { res = !null_pattern && !null_escape && escape_val.length() == like_ctx.last_escape_len_ && pattern_val.length() == like_ctx.last_pattern_len_ && 0 == MEMCMP(escape_val.ptr(), like_ctx.last_escape_, escape_val.length()) && 0 == MEMCMP(pattern_val.ptr(), like_ctx.last_pattern_, pattern_val.length()); } else { res = like_ctx.is_checked(); } LOG_DEBUG("like check already end", K(null_pattern), K(pattern_val), K(escape_val), K(res)); return res; } int ObExprLike::like_varchar(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_datum) { int ret = OB_SUCCESS; const bool do_optimization = expr.extra_; if (OB_FAIL(expr.eval_param_value(ctx))) { LOG_WARN("eval param value failed", K(ret)); } ObDatum &text = expr.locate_param_datum(ctx, 0); ObDatum &pattern = expr.locate_param_datum(ctx, 1); ObDatum &escape = expr.locate_param_datum(ctx, 2); uint64_t like_id = static_cast(expr.expr_ctx_id_); const ObCollationType escape_coll = expr.args_[2]->datum_meta_.cs_type_; const ObCollationType coll_type = expr.args_[1]->datum_meta_.cs_type_; if (OB_FAIL(ret)) { // do nothing } else if (OB_FAIL(check_pattern_valid(pattern, escape, escape_coll, coll_type, &ctx.exec_ctx_, like_id, do_optimization))) { LOG_WARN("fail to check pattern string", K(pattern), K(escape), K(coll_type)); } else if (text.is_null() || pattern.is_null()) { expr_datum.set_null(); } else { ObString text_val = text.get_string(); ObString pattern_val = pattern.get_string(); ObString escape_val; if (escape.is_null()) { escape_val.assign_ptr("\\", 1); } else { escape_val = escape.get_string(); if (escape_val.empty()) { escape_val.assign_ptr("\\", 1); } } if (do_optimization && like_id != OB_INVALID_ID && (!text_val.empty()) && (!pattern_val.empty())) { ObExprLikeContext *like_ctx = NULL; if (NULL == (like_ctx = static_cast (ctx.exec_ctx_.get_expr_op_ctx(like_id)))) { ret = OB_ERR_UNEXPECTED; //like context should be created while checking validation. LOG_WARN("like context is null", K(ret), K(like_id)); } if (OB_SUCC(ret)) { if (OB_UNLIKELY((!is_oracle_mode() && !checked_already(*like_ctx, false, pattern_val, false, escape_val)) || (is_oracle_mode() && !like_ctx->same_as_last))) { if (OB_FAIL(set_instr_info(&ctx.exec_ctx_.get_allocator(), coll_type, pattern_val, escape_val, escape_coll, *like_ctx))) { LOG_WARN("failed to set instr info", K(ret), K(pattern_val), K(text_val)); } else if (like_ctx->is_instr_mode()) {//instr mode ret = calc_with_instr_mode(expr_datum, coll_type, text_val, *like_ctx); } else {//not instr mode ret = calc_with_non_instr_mode(expr_datum, coll_type, escape_coll, text_val, pattern_val, escape_val); } if (OB_SUCC(ret) && !is_oracle_mode()) { record_last_check(*like_ctx, pattern_val, escape_val, &ctx.exec_ctx_.get_allocator()); } } else if (like_ctx->is_instr_mode()) {//instr mode ret = calc_with_instr_mode(expr_datum, coll_type, text_val, *like_ctx); } else { //not instr mode ret = calc_with_non_instr_mode(expr_datum, coll_type, escape_coll, text_val, pattern_val, escape_val); } } } else { //normal path. no optimization here. ret = calc_with_non_instr_mode(expr_datum, coll_type, escape_coll, text_val, pattern_val, escape_val); } } return ret; } template int64_t ObExprLike::match_with_instr_mode(const ObString &text, const InstrInfo instr_info) { int64_t res = 0; const char *text_ptr = text.ptr(); uint32_t text_len = text.length(); const char **instr_pos = instr_info.instr_starts_; const uint32_t *instr_len = instr_info.instr_lengths_; bool match = true; int64_t pos = 0; int64_t end = percent_sign_end ? instr_info.instr_cnt_ : instr_info.instr_cnt_ - 1; // if not start with %, memcmp for first instr. if (!percent_sign_start) { int cmp = MEMCMP(text_ptr, instr_pos[0], instr_len[0]); match = 0 == cmp; text_ptr += instr_len[0]; pos++; } // memmem for str surrounded by % for (; pos < end && match; pos++) { char *new_text = static_cast(MEMMEM(text_ptr, text_len, instr_pos[pos], instr_len[pos])); text_len -= new_text != NULL ? new_text - text_ptr + instr_len[pos] : 0; if (OB_UNLIKELY(text_len < 0)) { match = false; LOG_ERROR("unexpected result of memmem", K(text), K(ObString(instr_len[pos], instr_pos[pos]))); } else { match = new_text != NULL; text_ptr = new_text + instr_len[pos]; } } // if not end with %, memcmp for last instr if (match && !percent_sign_end) { if (text_len < instr_len[pos]) { match = false; } else { match = 0 == MEMCMP(text.ptr() + text.length() - instr_len[pos], instr_pos[pos], instr_len[pos]); } } res = match ? 1 : 0; return res; } struct ObNonInstrModeMatcher { inline int64_t operator() (const ObCollationType coll_type, const ObString &text_val, const ObString &pattern_val, int32_t escape_wc) { int64_t res = 0; if (OB_UNLIKELY(text_val.length() <= 0 && pattern_val.length() <= 0)) { // empty string res = 1; } else { bool b = ObCharset::wildcmp(coll_type, text_val, pattern_val, escape_wc, static_cast('_'), static_cast('%')); res = static_cast(b); } return res; } }; template int ObExprLike::match_text_batch(BATCH_EVAL_FUNC_ARG_DECL, const ObCollationType coll_type, const int32_t escape_wc, const ObString &pattern_val, const InstrInfo instr_info) { int ret = OB_SUCCESS; ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); ObDatum *res_datums = expr.locate_batch_datums(ctx); ObDatum *text_datums = expr.args_[0]->locate_batch_datums(ctx); const int64_t step_size = sizeof(uint16_t) * CHAR_BIT; // calc match result for each text for (int64_t i = 0; i < size && OB_SUCC(ret);) { const int64_t bit_vec_off = i / (CHAR_BIT * sizeof(uint16_t)); const uint16_t skip_v = skip.reinterpret_data()[bit_vec_off]; uint16_t &eval_v = eval_flags.reinterpret_data()[bit_vec_off]; if (i + step_size < size && (0 == (skip_v | eval_v))) { for (int64_t j = 0; OB_SUCC(ret) && j < step_size; i++, j++) { if (NullCheck && text_datums[i].is_null()) { res_datums[i].set_null(); } else if (UseInstrMode) { int64_t res = ALL_PERCENT_SIGN == InstrMode ? 1 : match_with_instr_mode (text_datums[i].get_string(), instr_info); res_datums[i].set_int(res); } else { res_datums[i].set_int(ObNonInstrModeMatcher()(coll_type, text_datums[i].get_string(), pattern_val, escape_wc)); } } if (OB_SUCC(ret)) { eval_v = 0xFFFF; } } else if (i + step_size < size && (0xFFFF == (skip_v | eval_v))) { i += step_size; } else { const int64_t new_size = std::min(size, i + step_size); for (; i < new_size && OB_SUCC(ret); i++) { if (!(skip.at(i) || eval_flags.at(i))) { if (NullCheck && text_datums[i].is_null()) { res_datums[i].set_null(); } else if (UseInstrMode) { int64_t res = ALL_PERCENT_SIGN == InstrMode ? 1 : match_with_instr_mode (text_datums[i].get_string(), instr_info); res_datums[i].set_int(res); } else { res_datums[i].set_int(ObNonInstrModeMatcher()(coll_type, text_datums[i].get_string(), pattern_val, escape_wc)); } eval_flags.set(i); } } } } return ret; } // only text is vectorized, check pattern validation and mode first, then try to match each text. int ObExprLike::eval_like_expr_batch_only_text_vectorized(BATCH_EVAL_FUNC_ARG_DECL) { int ret = OB_SUCCESS; ObExpr &text = *expr.args_[0]; ObExpr &pattern = *expr.args_[1]; ObExpr &escape = *expr.args_[2]; ObDatum *pattern_datum = NULL; ObDatum *escape_datum = NULL; const bool do_optimization = true; uint64_t like_id = static_cast(expr.expr_ctx_id_); const ObCollationType coll_type = expr.args_[0]->datum_meta_.cs_type_; const ObCollationType escape_coll = expr.args_[2]->datum_meta_.cs_type_; if (OB_FAIL(pattern.eval(ctx, pattern_datum))) { LOG_WARN("eval pattern failed", K(ret)); } else if (OB_FAIL(escape.eval(ctx, escape_datum))) { LOG_WARN("eval escape failed", K(ret)); } else if (OB_FAIL(check_pattern_valid(*pattern_datum, *escape_datum, escape_coll, coll_type, &ctx.exec_ctx_, like_id, do_optimization))) { LOG_WARN("check pattern valid failed", K(ret)); } else if (OB_FAIL(text.eval_batch(ctx, skip, size))) { LOG_WARN("eval text batch failed", K(ret)); } else if (OB_UNLIKELY(pattern_datum->is_null())) { ObDatum *res_datums = expr.locate_batch_datums(ctx); ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); for (int64_t i = 0; i < size; i++) { if (!skip.contain(i)) { res_datums[i].set_null(); eval_flags.set(i); } } expr.get_eval_info(ctx).notnull_ = false; } else { ObString pattern_val = pattern_datum->get_string(); ObString escape_val; // check pattern is not null already, so result is null if and only if text is null. bool null_check = !expr.args_[0]->get_eval_info(ctx).notnull_; if (escape_datum->is_null()) { escape_val.assign_ptr("\\", 1); } else { escape_val = escape_datum->get_string(); } ObExprLikeContext *like_ctx = NULL; if (OB_ISNULL(like_ctx = static_cast (ctx.exec_ctx_.get_expr_op_ctx(like_id)))) { ret = OB_ERR_UNEXPECTED; //like context should be created while checking validation. LOG_WARN("like context is null", K(ret), K(like_id)); } else if (OB_UNLIKELY((!is_oracle_mode() && !checked_already(*like_ctx, false, pattern_val, false, escape_val)) || (is_oracle_mode() && !like_ctx->same_as_last))) { if (OB_FAIL(set_instr_info(&ctx.exec_ctx_.get_allocator(), coll_type, pattern_val, escape_val, escape_coll, *like_ctx))) { LOG_WARN("failed to set instr info", K(ret), K(pattern_val)); } else if (!is_oracle_mode()) { record_last_check(*like_ctx, pattern_val, escape_val, &ctx.exec_ctx_.get_allocator()); } } INSTR_MODE instr_mode = like_ctx->get_instr_mode(); const InstrInfo instr_info = like_ctx->instr_info_; int32_t escape_wc = 0; LOG_DEBUG("set instr info inner end", K(coll_type), K(pattern_val), K(instr_mode), K(like_ctx->same_as_last)); if (OB_FAIL(ret)) { } else if (INVALID_INSTR_MODE == instr_mode && OB_FAIL(calc_escape_wc(escape_coll, escape_val, escape_wc))) { LOG_WARN("calc escape wc failed", K(ret)); } else { #define MATCH_TEXT_BATCH_ARG_LIST expr, ctx, skip, size, coll_type, escape_wc, pattern_val, \ instr_info // it seems to take a lot of work to make eval_info.notnull_ correct and it may be removed. // so null_check variable is not used now, match_text_batch is called always with null check. #define CALL_MATCH_TEXT_BATCH(use_instr_mode, instr_mode) \ ret = match_text_batch(MATCH_TEXT_BATCH_ARG_LIST); switch (instr_mode) { case INVALID_INSTR_MODE: { CALL_MATCH_TEXT_BATCH(false, INVALID_INSTR_MODE) break; } case START_WITH_PERCENT_SIGN: { CALL_MATCH_TEXT_BATCH(true, START_WITH_PERCENT_SIGN) break; } case START_END_WITH_PERCENT_SIGN: { CALL_MATCH_TEXT_BATCH(true, START_END_WITH_PERCENT_SIGN) break; } case MIDDLE_PERCENT_SIGN: { CALL_MATCH_TEXT_BATCH(true, MIDDLE_PERCENT_SIGN); break; } case END_WITH_PERCENT_SIGN: { CALL_MATCH_TEXT_BATCH(true, END_WITH_PERCENT_SIGN) break; } case ALL_PERCENT_SIGN: { CALL_MATCH_TEXT_BATCH(true, ALL_PERCENT_SIGN) break; } default : { ret = OB_ERR_UNEXPECTED; LOG_ERROR("unexpected instr mode", K(ret), K(instr_mode), K(pattern_val)); break; } } if (OB_FAIL(ret)) { LOG_WARN("match text batch failed", K(ret), K(instr_mode), K(null_check)); } else { expr.get_eval_info(ctx).notnull_ = !null_check; } #undef MATCH_TEXT_BATCH_ARG_LIST #undef CALL_MATCH_TEXT_BATCH } } return ret; } } }