Vectorization 2.0, short-circuit processing of ‘in’ expressions.

This commit is contained in:
qingsuijiu
2024-01-22 07:47:56 +00:00
committed by ob-robot
parent 05b1bc313b
commit 06d00daec0

View File

@ -1087,86 +1087,97 @@ int ObExprInOrNotIn::inner_eval_vector_in_without_row_fallback(const ObExpr &exp
ObDatum *right_store[expr.inner_func_cnt_]; // store all right param ptrs ObDatum *right_store[expr.inner_func_cnt_]; // store all right param ptrs
ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
bool right_has_null = false; // right param has null bool right_has_null = false; // right param has null
/* ObBitVector &my_skip = expr.get_pvt_skip(ctx);
* CAN_CMP_MEM used for common short path my_skip.deep_copy(skip, bound.start(), bound.end());
* the params of left and right bool left_all_null = true;
* both are string type for (int64_t idx = bound.start(); idx < bound.end(); ++idx) {
* both are CS_TYPE_UTF8MB4_BIN if (input_left_vec->is_null(idx)) {
* both dont have null value my_skip.set(idx);
* both dont have tailing space res_vec->set_null(idx);
* right params count is 2(> 2 will turn to hash calc) eval_flags.set(idx);
*/
bool can_cmp_mem = expr.args_[0]->obj_meta_.is_string_type()
&& CS_TYPE_UTF8MB4_BIN == expr.args_[0]->obj_meta_.get_collation_type();
// eval all right params
for (int64_t i = 0; OB_SUCC(ret) && i < expr.inner_func_cnt_; ++i) {
// Because we know that in this scenario,
// the values on the right side are constants,
// meaning they are single-line data,
// so we use the eval interface.
if (OB_FAIL(expr.args_[1]->args_[i]->eval(ctx, right_store[i]))) {
LOG_WARN("failed to eval right datum", K(ret), K(i));
} else { } else {
check_right_can_cmp_mem(*right_store[i], expr.args_[1]->args_[i]->obj_meta_, left_all_null = false;
can_cmp_mem, right_has_null);
} }
} }
if (OB_SUCC(ret)) { // If all the values on the left are null,
check_left_can_cmp_mem(expr, skip, eval_flags, bound, can_cmp_mem); // perform a short-circuit calculation and return immediately.
int64_t idx = bound.start(); if (!left_all_null) {
if (can_cmp_mem && !std::is_same<LeftVec, ObFixedLengthBase>::value) { /*
static const char SPACE = ' '; * CAN_CMP_MEM used for common short path
const char *ptr0 = right_store[0]->ptr_; * the params of left and right
const char *ptr1 = right_store[1]->ptr_; * both are string type
uint32_t len0 = right_store[0]->len_; * both are CS_TYPE_UTF8MB4_BIN
uint32_t len1 = right_store[1]->len_; * both dont have null value
const char *left_str_ptr = nullptr; * both dont have tailing space
int32_t left_str_len = 0; * right params count is 2(> 2 will turn to hash calc)
for (; OB_SUCC(ret) && idx < bound.end(); ++idx) { */
// If can_cmp_mem is true, then it is guaranteed that the right side is non-null. bool can_cmp_mem = expr.args_[0]->obj_meta_.is_string_type()
if (input_left_vec->is_null(idx)) { && CS_TYPE_UTF8MB4_BIN == expr.args_[0]->obj_meta_.get_collation_type();
res_vec->set_null(idx); // eval all right params
} else { for (int64_t i = 0; OB_SUCC(ret) && i < expr.inner_func_cnt_; ++i) {
input_left_vec->get_payload(idx, left_str_ptr, left_str_len); // Because we know that in this scenario,
if (left_str_len > 0 && SPACE == left_str_ptr[left_str_len - 1]) { // the values on the right side are constants,
can_cmp_mem = false; // meaning they are single-line data,
break; // so we use the eval interface.
} else { if (OB_FAIL(expr.args_[1]->args_[i]->eval(ctx, right_store[i]))) {
bool is_equal = false; LOG_WARN("failed to eval right datum", K(ret), K(i));
is_equal = (left_str_len >= len0 } else {
&& 0 == MEMCMP(ptr0, left_str_ptr, len0) check_right_can_cmp_mem(*right_store[i], expr.args_[1]->args_[i]->obj_meta_,
&& is_all_space(left_str_ptr + len0, left_str_len - len0)); can_cmp_mem, right_has_null);
is_equal = is_equal || (left_str_len >= len1
&& 0 == MEMCMP(ptr1, left_str_ptr, len1)
&& is_all_space(left_str_ptr + len1, left_str_len - len1));
res_vec->set_int(idx, T_OP_IN == expr.type_ ? is_equal : !is_equal);
}
}
}
if (idx > bound.start()) {
eval_flags.set_all(bound.start(), idx);
} }
} }
if (!can_cmp_mem) { if (OB_SUCC(ret)) {
const char *l_payload = nullptr; check_left_can_cmp_mem(expr, skip, eval_flags, bound, can_cmp_mem);
const char *fixed_base_l_payload = nullptr; int64_t idx = bound.start();
ObLength l_len = 0; if (can_cmp_mem && !std::is_same<LeftVec, ObFixedLengthBase>::value) {
int cmp_ret = 0; static const char SPACE = ' ';
sql::RowCmpFunc row_cmp_func = VectorCmpExprFuncsHelper::get_row_cmp_func( const char *ptr0 = right_store[0]->ptr_;
expr.args_[0]->datum_meta_, const char *ptr1 = right_store[1]->ptr_;
expr.args_[1]->args_[0]->datum_meta_); uint32_t len0 = right_store[0]->len_;
if (std::is_same<LeftVec, ObFixedLengthBase>::value) { uint32_t len1 = right_store[1]->len_;
fixed_base_l_payload = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_data(); const char *left_str_ptr = nullptr;
l_len = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_length(); int32_t left_str_len = 0;
} for (; OB_SUCC(ret) && idx < bound.end(); ++idx) {
for (; OB_SUCC(ret) && idx < bound.end(); ++idx) { // If can_cmp_mem is true, then it is guaranteed that the right side is non-null.
if (skip.at(idx) || eval_flags.at(idx)) { // If input_left_vec->is_null(idx), res_vec has been set before.
continue; if (!input_left_vec->is_null(idx)) {
input_left_vec->get_payload(idx, left_str_ptr, left_str_len);
if (left_str_len > 0 && SPACE == left_str_ptr[left_str_len - 1]) {
can_cmp_mem = false;
break;
} else {
bool is_equal = false;
is_equal = (left_str_len >= len0
&& 0 == MEMCMP(ptr0, left_str_ptr, len0)
&& is_all_space(left_str_ptr + len0, left_str_len - len0));
is_equal = is_equal || (left_str_len >= len1
&& 0 == MEMCMP(ptr1, left_str_ptr, len1)
&& is_all_space(left_str_ptr + len1, left_str_len - len1));
res_vec->set_int(idx, T_OP_IN == expr.type_ ? is_equal : !is_equal);
}
}
} }
if (input_left_vec->is_null(idx)) { if (idx > bound.start()) {
res_vec->set_null(idx); eval_flags.set_all(bound.start(), idx);
eval_flags.set(idx); }
} else { }
if (!can_cmp_mem) {
const char *l_payload = nullptr;
const char *fixed_base_l_payload = nullptr;
ObLength l_len = 0;
int cmp_ret = 0;
sql::RowCmpFunc row_cmp_func = VectorCmpExprFuncsHelper::get_row_cmp_func(
expr.args_[0]->datum_meta_,
expr.args_[1]->args_[0]->datum_meta_);
if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
fixed_base_l_payload = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_data();
l_len = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_length();
}
for (; OB_SUCC(ret) && idx < bound.end(); ++idx) {
if (my_skip.at(idx) || eval_flags.at(idx)) {
continue;
}
// The situation "input_left_vec->is_null(idx)" has already been handled previously.
if (std::is_same<LeftVec, ObFixedLengthBase>::value) { if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
l_payload = fixed_base_l_payload + l_len * idx; l_payload = fixed_base_l_payload + l_len * idx;
} else { } else {
@ -1688,81 +1699,93 @@ int ObExprInOrNotIn::inner_eval_vector_in_without_row(const ObExpr &expr,
const char *fixed_base_l_payload = nullptr; const char *fixed_base_l_payload = nullptr;
bool is_exist = false; bool is_exist = false;
bool right_all_null = false; bool right_all_null = false;
if (OB_FAIL(build_right_hash_without_row(in_id, right_param_num, expr, ObBitVector &my_skip = expr.get_pvt_skip(ctx);
ctx, exec_ctx, in_ctx, right_has_null))) { my_skip.deep_copy(skip, bound.start(), bound.end());
LOG_WARN("failed to build hash table for right params", K(ret)); bool left_all_null = true;
} else { for (int64_t idx = bound.start(); idx < bound.end(); ++idx) {
fallback = in_ctx->is_hash_calc_disabled(); if (input_left_vec->is_null(idx)) {
if (!fallback) { my_skip.set(idx);
// refresh inctx hash fun to left hash func res_vec->set_null(idx);
if (OB_NOT_NULL(in_ctx->hash_func_buff_)) { eval_flags.set(idx);
in_ctx->hash_func_buff_[0] = (void *) } else {
(expr.args_[0]->basic_funcs_->murmur_hash_v2_); left_all_null = false;
}
// hash table use self as left, so here right param is left for cmp func
DatumCmpFunc func_ptr = ObExprCmpFuncsHelper::get_datum_expr_cmp_func(
expr.args_[1]->args_[0]->datum_meta_.type_,
expr.args_[0]->datum_meta_.type_,
expr.args_[1]->args_[0]->datum_meta_.scale_,
expr.args_[0]->datum_meta_.scale_,
expr.args_[1]->args_[0]->datum_meta_.precision_,
expr.args_[0]->datum_meta_.precision_,
lib::is_oracle_mode(),
expr.args_[0]->datum_meta_.cs_type_,
expr.args_[0]->obj_meta_.has_lob_header() ||
expr.args_[1]->args_[0]->obj_meta_.has_lob_header());
for (int i = 0; i < right_param_num; i++) {
in_ctx->cmp_functions_[i] = (void *)func_ptr;
}
if (0 == in_ctx->get_static_engine_hashset_size()) {
// Scenarios where in_list contains only null.
if (in_ctx->ctx_hash_null_) {
for (int64_t left_idx = bound.start(); left_idx < bound.end(); ++left_idx) {
if (skip.at(left_idx) || eval_flags.at(left_idx)) { continue; }
res_vec->set_null(left_idx);
eval_flags.set(left_idx);
}
right_all_null = true;
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("static_engine_hashset_size unexpected", K(ret), K(right_has_null),
K(in_ctx->get_static_engine_hashset_size()));
}
} else if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
fixed_base_l_payload = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_data();
left_datum.len_ = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_length();
}
} }
} }
if (OB_FAIL(ret)) { if (!left_all_null) {
} else if (right_all_null) { if (OB_FAIL(build_right_hash_without_row(in_id, right_param_num, expr,
} else if (!fallback) { ctx, exec_ctx, in_ctx, right_has_null))) {
for (int64_t left_idx = bound.start(); OB_SUCC(ret) && left_idx < bound.end(); ++left_idx) { LOG_WARN("failed to build hash table for right params", K(ret));
if (skip.at(left_idx) || eval_flags.at(left_idx)) { } else {
continue; fallback = in_ctx->is_hash_calc_disabled();
} if (!fallback) {
if (input_left_vec->is_null(left_idx)) { // refresh inctx hash fun to left hash func
res_vec->set_null(left_idx); if (OB_NOT_NULL(in_ctx->hash_func_buff_)) {
eval_flags.set(left_idx); in_ctx->hash_func_buff_[0] = (void *)
} else if (OB_NOT_NULL(in_ctx)) { //second we search in hashset. (expr.args_[0]->basic_funcs_->murmur_hash_v2_);
if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
left_datum.ptr_ = fixed_base_l_payload + left_idx * left_datum.len_;
} else {
left_datum.ptr_ = input_left_vec->get_payload(left_idx);
left_datum.len_ = input_left_vec->get_length(left_idx);
} }
if (OB_FAIL(tmp_row.set_elem(&left_datum))) { // hash table use self as left, so here right param is left for cmp func
LOG_WARN("failed to load left", K(ret)); DatumCmpFunc func_ptr = ObExprCmpFuncsHelper::get_datum_expr_cmp_func(
} else if (OB_FAIL(in_ctx->exist_in_static_engine_hashset(tmp_row, is_exist))) { expr.args_[1]->args_[0]->datum_meta_.type_,
LOG_WARN("failed to search in hashset", K(ret)); expr.args_[0]->datum_meta_.type_,
} else { expr.args_[1]->args_[0]->datum_meta_.scale_,
set_vector_result(T_OP_IN == expr.type_, is_exist, in_ctx->ctx_hash_null_, res_vec, left_idx); expr.args_[0]->datum_meta_.scale_,
eval_flags.set(left_idx); expr.args_[1]->args_[0]->datum_meta_.precision_,
expr.args_[0]->datum_meta_.precision_,
lib::is_oracle_mode(),
expr.args_[0]->datum_meta_.cs_type_,
expr.args_[0]->obj_meta_.has_lob_header() ||
expr.args_[1]->args_[0]->obj_meta_.has_lob_header());
for (int i = 0; i < right_param_num; i++) {
in_ctx->cmp_functions_[i] = (void *)func_ptr;
}
if (0 == in_ctx->get_static_engine_hashset_size()) {
// Scenarios where in_list contains only null.
if (in_ctx->ctx_hash_null_) {
for (int64_t left_idx = bound.start(); left_idx < bound.end(); ++left_idx) {
if (skip.at(left_idx) || eval_flags.at(left_idx)) { continue; }
res_vec->set_null(left_idx);
eval_flags.set(left_idx);
}
right_all_null = true;
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("static_engine_hashset_size unexpected", K(ret), K(right_has_null),
K(in_ctx->get_static_engine_hashset_size()));
}
} else if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
fixed_base_l_payload = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_data();
left_datum.len_ = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_length();
} }
} }
} }
} else { if (OB_FAIL(ret)) {
ret = eval_vector_in_without_row_fallback(expr, ctx, skip, bound); } else if (right_all_null) {
} else if (!fallback) {
for (int64_t left_idx = bound.start(); OB_SUCC(ret) && left_idx < bound.end(); ++left_idx) {
if (skip.at(left_idx) || eval_flags.at(left_idx)) {
continue;
}
// The situation "input_left_vec->is_null(idx)" has already been handled previously.
if (OB_NOT_NULL(in_ctx)) { //second we search in hashset.
if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
left_datum.ptr_ = fixed_base_l_payload + left_idx * left_datum.len_;
} else {
left_datum.ptr_ = input_left_vec->get_payload(left_idx);
left_datum.len_ = input_left_vec->get_length(left_idx);
}
if (OB_FAIL(tmp_row.set_elem(&left_datum))) {
LOG_WARN("failed to load left", K(ret));
} else if (OB_FAIL(in_ctx->exist_in_static_engine_hashset(tmp_row, is_exist))) {
LOG_WARN("failed to search in hashset", K(ret));
} else {
set_vector_result(T_OP_IN == expr.type_, is_exist, in_ctx->ctx_hash_null_, res_vec, left_idx);
eval_flags.set(left_idx);
}
}
}
} else {
ret = eval_vector_in_without_row_fallback(expr, ctx, skip, bound);
}
} }
return ret; return ret;