Vectorization 2.0, short-circuit processing of ‘in’ expressions.
This commit is contained in:
@ -1087,86 +1087,97 @@ int ObExprInOrNotIn::inner_eval_vector_in_without_row_fallback(const ObExpr &exp
|
|||||||
ObDatum *right_store[expr.inner_func_cnt_]; // store all right param ptrs
|
ObDatum *right_store[expr.inner_func_cnt_]; // store all right param ptrs
|
||||||
ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
|
ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
|
||||||
bool right_has_null = false; // right param has null
|
bool right_has_null = false; // right param has null
|
||||||
/*
|
ObBitVector &my_skip = expr.get_pvt_skip(ctx);
|
||||||
* CAN_CMP_MEM used for common short path
|
my_skip.deep_copy(skip, bound.start(), bound.end());
|
||||||
* the params of left and right
|
bool left_all_null = true;
|
||||||
* both are string type
|
for (int64_t idx = bound.start(); idx < bound.end(); ++idx) {
|
||||||
* both are CS_TYPE_UTF8MB4_BIN
|
if (input_left_vec->is_null(idx)) {
|
||||||
* both dont have null value
|
my_skip.set(idx);
|
||||||
* both dont have tailing space
|
res_vec->set_null(idx);
|
||||||
* right params count is 2(> 2 will turn to hash calc)
|
eval_flags.set(idx);
|
||||||
*/
|
|
||||||
bool can_cmp_mem = expr.args_[0]->obj_meta_.is_string_type()
|
|
||||||
&& CS_TYPE_UTF8MB4_BIN == expr.args_[0]->obj_meta_.get_collation_type();
|
|
||||||
// eval all right params
|
|
||||||
for (int64_t i = 0; OB_SUCC(ret) && i < expr.inner_func_cnt_; ++i) {
|
|
||||||
// Because we know that in this scenario,
|
|
||||||
// the values on the right side are constants,
|
|
||||||
// meaning they are single-line data,
|
|
||||||
// so we use the eval interface.
|
|
||||||
if (OB_FAIL(expr.args_[1]->args_[i]->eval(ctx, right_store[i]))) {
|
|
||||||
LOG_WARN("failed to eval right datum", K(ret), K(i));
|
|
||||||
} else {
|
} else {
|
||||||
check_right_can_cmp_mem(*right_store[i], expr.args_[1]->args_[i]->obj_meta_,
|
left_all_null = false;
|
||||||
can_cmp_mem, right_has_null);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (OB_SUCC(ret)) {
|
// If all the values on the left are null,
|
||||||
check_left_can_cmp_mem(expr, skip, eval_flags, bound, can_cmp_mem);
|
// perform a short-circuit calculation and return immediately.
|
||||||
int64_t idx = bound.start();
|
if (!left_all_null) {
|
||||||
if (can_cmp_mem && !std::is_same<LeftVec, ObFixedLengthBase>::value) {
|
/*
|
||||||
static const char SPACE = ' ';
|
* CAN_CMP_MEM used for common short path
|
||||||
const char *ptr0 = right_store[0]->ptr_;
|
* the params of left and right
|
||||||
const char *ptr1 = right_store[1]->ptr_;
|
* both are string type
|
||||||
uint32_t len0 = right_store[0]->len_;
|
* both are CS_TYPE_UTF8MB4_BIN
|
||||||
uint32_t len1 = right_store[1]->len_;
|
* both dont have null value
|
||||||
const char *left_str_ptr = nullptr;
|
* both dont have tailing space
|
||||||
int32_t left_str_len = 0;
|
* right params count is 2(> 2 will turn to hash calc)
|
||||||
for (; OB_SUCC(ret) && idx < bound.end(); ++idx) {
|
*/
|
||||||
// If can_cmp_mem is true, then it is guaranteed that the right side is non-null.
|
bool can_cmp_mem = expr.args_[0]->obj_meta_.is_string_type()
|
||||||
if (input_left_vec->is_null(idx)) {
|
&& CS_TYPE_UTF8MB4_BIN == expr.args_[0]->obj_meta_.get_collation_type();
|
||||||
res_vec->set_null(idx);
|
// eval all right params
|
||||||
} else {
|
for (int64_t i = 0; OB_SUCC(ret) && i < expr.inner_func_cnt_; ++i) {
|
||||||
input_left_vec->get_payload(idx, left_str_ptr, left_str_len);
|
// Because we know that in this scenario,
|
||||||
if (left_str_len > 0 && SPACE == left_str_ptr[left_str_len - 1]) {
|
// the values on the right side are constants,
|
||||||
can_cmp_mem = false;
|
// meaning they are single-line data,
|
||||||
break;
|
// so we use the eval interface.
|
||||||
} else {
|
if (OB_FAIL(expr.args_[1]->args_[i]->eval(ctx, right_store[i]))) {
|
||||||
bool is_equal = false;
|
LOG_WARN("failed to eval right datum", K(ret), K(i));
|
||||||
is_equal = (left_str_len >= len0
|
} else {
|
||||||
&& 0 == MEMCMP(ptr0, left_str_ptr, len0)
|
check_right_can_cmp_mem(*right_store[i], expr.args_[1]->args_[i]->obj_meta_,
|
||||||
&& is_all_space(left_str_ptr + len0, left_str_len - len0));
|
can_cmp_mem, right_has_null);
|
||||||
is_equal = is_equal || (left_str_len >= len1
|
|
||||||
&& 0 == MEMCMP(ptr1, left_str_ptr, len1)
|
|
||||||
&& is_all_space(left_str_ptr + len1, left_str_len - len1));
|
|
||||||
res_vec->set_int(idx, T_OP_IN == expr.type_ ? is_equal : !is_equal);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (idx > bound.start()) {
|
|
||||||
eval_flags.set_all(bound.start(), idx);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!can_cmp_mem) {
|
if (OB_SUCC(ret)) {
|
||||||
const char *l_payload = nullptr;
|
check_left_can_cmp_mem(expr, skip, eval_flags, bound, can_cmp_mem);
|
||||||
const char *fixed_base_l_payload = nullptr;
|
int64_t idx = bound.start();
|
||||||
ObLength l_len = 0;
|
if (can_cmp_mem && !std::is_same<LeftVec, ObFixedLengthBase>::value) {
|
||||||
int cmp_ret = 0;
|
static const char SPACE = ' ';
|
||||||
sql::RowCmpFunc row_cmp_func = VectorCmpExprFuncsHelper::get_row_cmp_func(
|
const char *ptr0 = right_store[0]->ptr_;
|
||||||
expr.args_[0]->datum_meta_,
|
const char *ptr1 = right_store[1]->ptr_;
|
||||||
expr.args_[1]->args_[0]->datum_meta_);
|
uint32_t len0 = right_store[0]->len_;
|
||||||
if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
|
uint32_t len1 = right_store[1]->len_;
|
||||||
fixed_base_l_payload = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_data();
|
const char *left_str_ptr = nullptr;
|
||||||
l_len = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_length();
|
int32_t left_str_len = 0;
|
||||||
}
|
for (; OB_SUCC(ret) && idx < bound.end(); ++idx) {
|
||||||
for (; OB_SUCC(ret) && idx < bound.end(); ++idx) {
|
// If can_cmp_mem is true, then it is guaranteed that the right side is non-null.
|
||||||
if (skip.at(idx) || eval_flags.at(idx)) {
|
// If input_left_vec->is_null(idx), res_vec has been set before.
|
||||||
continue;
|
if (!input_left_vec->is_null(idx)) {
|
||||||
|
input_left_vec->get_payload(idx, left_str_ptr, left_str_len);
|
||||||
|
if (left_str_len > 0 && SPACE == left_str_ptr[left_str_len - 1]) {
|
||||||
|
can_cmp_mem = false;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
bool is_equal = false;
|
||||||
|
is_equal = (left_str_len >= len0
|
||||||
|
&& 0 == MEMCMP(ptr0, left_str_ptr, len0)
|
||||||
|
&& is_all_space(left_str_ptr + len0, left_str_len - len0));
|
||||||
|
is_equal = is_equal || (left_str_len >= len1
|
||||||
|
&& 0 == MEMCMP(ptr1, left_str_ptr, len1)
|
||||||
|
&& is_all_space(left_str_ptr + len1, left_str_len - len1));
|
||||||
|
res_vec->set_int(idx, T_OP_IN == expr.type_ ? is_equal : !is_equal);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (input_left_vec->is_null(idx)) {
|
if (idx > bound.start()) {
|
||||||
res_vec->set_null(idx);
|
eval_flags.set_all(bound.start(), idx);
|
||||||
eval_flags.set(idx);
|
}
|
||||||
} else {
|
}
|
||||||
|
if (!can_cmp_mem) {
|
||||||
|
const char *l_payload = nullptr;
|
||||||
|
const char *fixed_base_l_payload = nullptr;
|
||||||
|
ObLength l_len = 0;
|
||||||
|
int cmp_ret = 0;
|
||||||
|
sql::RowCmpFunc row_cmp_func = VectorCmpExprFuncsHelper::get_row_cmp_func(
|
||||||
|
expr.args_[0]->datum_meta_,
|
||||||
|
expr.args_[1]->args_[0]->datum_meta_);
|
||||||
|
if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
|
||||||
|
fixed_base_l_payload = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_data();
|
||||||
|
l_len = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_length();
|
||||||
|
}
|
||||||
|
for (; OB_SUCC(ret) && idx < bound.end(); ++idx) {
|
||||||
|
if (my_skip.at(idx) || eval_flags.at(idx)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// The situation "input_left_vec->is_null(idx)" has already been handled previously.
|
||||||
if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
|
if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
|
||||||
l_payload = fixed_base_l_payload + l_len * idx;
|
l_payload = fixed_base_l_payload + l_len * idx;
|
||||||
} else {
|
} else {
|
||||||
@ -1688,81 +1699,93 @@ int ObExprInOrNotIn::inner_eval_vector_in_without_row(const ObExpr &expr,
|
|||||||
const char *fixed_base_l_payload = nullptr;
|
const char *fixed_base_l_payload = nullptr;
|
||||||
bool is_exist = false;
|
bool is_exist = false;
|
||||||
bool right_all_null = false;
|
bool right_all_null = false;
|
||||||
if (OB_FAIL(build_right_hash_without_row(in_id, right_param_num, expr,
|
ObBitVector &my_skip = expr.get_pvt_skip(ctx);
|
||||||
ctx, exec_ctx, in_ctx, right_has_null))) {
|
my_skip.deep_copy(skip, bound.start(), bound.end());
|
||||||
LOG_WARN("failed to build hash table for right params", K(ret));
|
bool left_all_null = true;
|
||||||
} else {
|
for (int64_t idx = bound.start(); idx < bound.end(); ++idx) {
|
||||||
fallback = in_ctx->is_hash_calc_disabled();
|
if (input_left_vec->is_null(idx)) {
|
||||||
if (!fallback) {
|
my_skip.set(idx);
|
||||||
// refresh inctx hash fun to left hash func
|
res_vec->set_null(idx);
|
||||||
if (OB_NOT_NULL(in_ctx->hash_func_buff_)) {
|
eval_flags.set(idx);
|
||||||
in_ctx->hash_func_buff_[0] = (void *)
|
} else {
|
||||||
(expr.args_[0]->basic_funcs_->murmur_hash_v2_);
|
left_all_null = false;
|
||||||
}
|
|
||||||
// hash table use self as left, so here right param is left for cmp func
|
|
||||||
DatumCmpFunc func_ptr = ObExprCmpFuncsHelper::get_datum_expr_cmp_func(
|
|
||||||
expr.args_[1]->args_[0]->datum_meta_.type_,
|
|
||||||
expr.args_[0]->datum_meta_.type_,
|
|
||||||
expr.args_[1]->args_[0]->datum_meta_.scale_,
|
|
||||||
expr.args_[0]->datum_meta_.scale_,
|
|
||||||
expr.args_[1]->args_[0]->datum_meta_.precision_,
|
|
||||||
expr.args_[0]->datum_meta_.precision_,
|
|
||||||
lib::is_oracle_mode(),
|
|
||||||
expr.args_[0]->datum_meta_.cs_type_,
|
|
||||||
expr.args_[0]->obj_meta_.has_lob_header() ||
|
|
||||||
expr.args_[1]->args_[0]->obj_meta_.has_lob_header());
|
|
||||||
for (int i = 0; i < right_param_num; i++) {
|
|
||||||
in_ctx->cmp_functions_[i] = (void *)func_ptr;
|
|
||||||
}
|
|
||||||
if (0 == in_ctx->get_static_engine_hashset_size()) {
|
|
||||||
// Scenarios where in_list contains only null.
|
|
||||||
if (in_ctx->ctx_hash_null_) {
|
|
||||||
for (int64_t left_idx = bound.start(); left_idx < bound.end(); ++left_idx) {
|
|
||||||
if (skip.at(left_idx) || eval_flags.at(left_idx)) { continue; }
|
|
||||||
res_vec->set_null(left_idx);
|
|
||||||
eval_flags.set(left_idx);
|
|
||||||
}
|
|
||||||
right_all_null = true;
|
|
||||||
} else {
|
|
||||||
ret = OB_ERR_UNEXPECTED;
|
|
||||||
LOG_WARN("static_engine_hashset_size unexpected", K(ret), K(right_has_null),
|
|
||||||
K(in_ctx->get_static_engine_hashset_size()));
|
|
||||||
}
|
|
||||||
} else if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
|
|
||||||
fixed_base_l_payload = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_data();
|
|
||||||
left_datum.len_ = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_length();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (OB_FAIL(ret)) {
|
if (!left_all_null) {
|
||||||
} else if (right_all_null) {
|
if (OB_FAIL(build_right_hash_without_row(in_id, right_param_num, expr,
|
||||||
} else if (!fallback) {
|
ctx, exec_ctx, in_ctx, right_has_null))) {
|
||||||
for (int64_t left_idx = bound.start(); OB_SUCC(ret) && left_idx < bound.end(); ++left_idx) {
|
LOG_WARN("failed to build hash table for right params", K(ret));
|
||||||
if (skip.at(left_idx) || eval_flags.at(left_idx)) {
|
} else {
|
||||||
continue;
|
fallback = in_ctx->is_hash_calc_disabled();
|
||||||
}
|
if (!fallback) {
|
||||||
if (input_left_vec->is_null(left_idx)) {
|
// refresh inctx hash fun to left hash func
|
||||||
res_vec->set_null(left_idx);
|
if (OB_NOT_NULL(in_ctx->hash_func_buff_)) {
|
||||||
eval_flags.set(left_idx);
|
in_ctx->hash_func_buff_[0] = (void *)
|
||||||
} else if (OB_NOT_NULL(in_ctx)) { //second we search in hashset.
|
(expr.args_[0]->basic_funcs_->murmur_hash_v2_);
|
||||||
if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
|
|
||||||
left_datum.ptr_ = fixed_base_l_payload + left_idx * left_datum.len_;
|
|
||||||
} else {
|
|
||||||
left_datum.ptr_ = input_left_vec->get_payload(left_idx);
|
|
||||||
left_datum.len_ = input_left_vec->get_length(left_idx);
|
|
||||||
}
|
}
|
||||||
if (OB_FAIL(tmp_row.set_elem(&left_datum))) {
|
// hash table use self as left, so here right param is left for cmp func
|
||||||
LOG_WARN("failed to load left", K(ret));
|
DatumCmpFunc func_ptr = ObExprCmpFuncsHelper::get_datum_expr_cmp_func(
|
||||||
} else if (OB_FAIL(in_ctx->exist_in_static_engine_hashset(tmp_row, is_exist))) {
|
expr.args_[1]->args_[0]->datum_meta_.type_,
|
||||||
LOG_WARN("failed to search in hashset", K(ret));
|
expr.args_[0]->datum_meta_.type_,
|
||||||
} else {
|
expr.args_[1]->args_[0]->datum_meta_.scale_,
|
||||||
set_vector_result(T_OP_IN == expr.type_, is_exist, in_ctx->ctx_hash_null_, res_vec, left_idx);
|
expr.args_[0]->datum_meta_.scale_,
|
||||||
eval_flags.set(left_idx);
|
expr.args_[1]->args_[0]->datum_meta_.precision_,
|
||||||
|
expr.args_[0]->datum_meta_.precision_,
|
||||||
|
lib::is_oracle_mode(),
|
||||||
|
expr.args_[0]->datum_meta_.cs_type_,
|
||||||
|
expr.args_[0]->obj_meta_.has_lob_header() ||
|
||||||
|
expr.args_[1]->args_[0]->obj_meta_.has_lob_header());
|
||||||
|
for (int i = 0; i < right_param_num; i++) {
|
||||||
|
in_ctx->cmp_functions_[i] = (void *)func_ptr;
|
||||||
|
}
|
||||||
|
if (0 == in_ctx->get_static_engine_hashset_size()) {
|
||||||
|
// Scenarios where in_list contains only null.
|
||||||
|
if (in_ctx->ctx_hash_null_) {
|
||||||
|
for (int64_t left_idx = bound.start(); left_idx < bound.end(); ++left_idx) {
|
||||||
|
if (skip.at(left_idx) || eval_flags.at(left_idx)) { continue; }
|
||||||
|
res_vec->set_null(left_idx);
|
||||||
|
eval_flags.set(left_idx);
|
||||||
|
}
|
||||||
|
right_all_null = true;
|
||||||
|
} else {
|
||||||
|
ret = OB_ERR_UNEXPECTED;
|
||||||
|
LOG_WARN("static_engine_hashset_size unexpected", K(ret), K(right_has_null),
|
||||||
|
K(in_ctx->get_static_engine_hashset_size()));
|
||||||
|
}
|
||||||
|
} else if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
|
||||||
|
fixed_base_l_payload = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_data();
|
||||||
|
left_datum.len_ = (reinterpret_cast<ObFixedLengthBase *>(input_left_vec))->get_length();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
if (OB_FAIL(ret)) {
|
||||||
ret = eval_vector_in_without_row_fallback(expr, ctx, skip, bound);
|
} else if (right_all_null) {
|
||||||
|
} else if (!fallback) {
|
||||||
|
for (int64_t left_idx = bound.start(); OB_SUCC(ret) && left_idx < bound.end(); ++left_idx) {
|
||||||
|
if (skip.at(left_idx) || eval_flags.at(left_idx)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// The situation "input_left_vec->is_null(idx)" has already been handled previously.
|
||||||
|
if (OB_NOT_NULL(in_ctx)) { //second we search in hashset.
|
||||||
|
if (std::is_same<LeftVec, ObFixedLengthBase>::value) {
|
||||||
|
left_datum.ptr_ = fixed_base_l_payload + left_idx * left_datum.len_;
|
||||||
|
} else {
|
||||||
|
left_datum.ptr_ = input_left_vec->get_payload(left_idx);
|
||||||
|
left_datum.len_ = input_left_vec->get_length(left_idx);
|
||||||
|
}
|
||||||
|
if (OB_FAIL(tmp_row.set_elem(&left_datum))) {
|
||||||
|
LOG_WARN("failed to load left", K(ret));
|
||||||
|
} else if (OB_FAIL(in_ctx->exist_in_static_engine_hashset(tmp_row, is_exist))) {
|
||||||
|
LOG_WARN("failed to search in hashset", K(ret));
|
||||||
|
} else {
|
||||||
|
set_vector_result(T_OP_IN == expr.type_, is_exist, in_ctx->ctx_hash_null_, res_vec, left_idx);
|
||||||
|
eval_flags.set(left_idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ret = eval_vector_in_without_row_fallback(expr, ctx, skip, bound);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
Reference in New Issue
Block a user