[FEAT MERGE] impl vectorization 2.0
Co-authored-by: Naynahs <cfzy002@126.com> Co-authored-by: hwx65 <1780011298@qq.com> Co-authored-by: oceanoverflow <oceanoverflow@gmail.com>
This commit is contained in:
@ -14,6 +14,7 @@
|
||||
|
||||
#include "lib/oblog/ob_log.h"
|
||||
#include "share/object/ob_obj_cast.h"
|
||||
#include "share/vector/ob_vector_define.h"
|
||||
#include "sql/engine/expr/ob_expr_substr.h"
|
||||
#include "objit/common/ob_item_type.h"
|
||||
#include "sql/engine/expr/ob_expr_util.h"
|
||||
@ -442,7 +443,9 @@ int ObExprSubstr::substr(common::ObString &varchar,
|
||||
const int64_t start_pos,
|
||||
const int64_t length,
|
||||
common::ObCollationType cs_type,
|
||||
const bool do_ascii_optimize_check)
|
||||
const bool do_ascii_optimize_check,
|
||||
const bool is_arg_batch_ascii,
|
||||
bool &is_result_batch_ascii)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
varchar = text;
|
||||
@ -459,29 +462,35 @@ int ObExprSubstr::substr(common::ObString &varchar,
|
||||
if (OB_UNLIKELY(start < 0 || start >= varchar.length())) {
|
||||
varchar.assign(NULL, 0);
|
||||
} else {
|
||||
if (do_ascii_optimize_check) { // ObCharsetType is CHARSET_UTF8MB4 or CHARSET_GBK
|
||||
if (is_arg_batch_ascii) {
|
||||
res_len = min(length, varchar.length() - start);
|
||||
is_ascii = storage::is_ascii_str(varchar.ptr(), start + res_len);
|
||||
}
|
||||
if (is_ascii) {
|
||||
varchar.assign_ptr(varchar.ptr() + start, static_cast<int32_t>(res_len));
|
||||
} else { // If not all the front chars in param is ascii, rollback to original method.
|
||||
start = start_pos;
|
||||
res_len = 0;
|
||||
int64_t mb_len = ObCharset::strlen_char(cs_type, varchar.ptr(), varchar.length());
|
||||
if (lib::is_oracle_mode() && 0 == start_pos) {
|
||||
start = 1;
|
||||
} else {
|
||||
if (do_ascii_optimize_check) { // ObCharsetType is CHARSET_UTF8MB4 or CHARSET_GBK
|
||||
res_len = min(length, varchar.length() - start);
|
||||
is_ascii = storage::is_ascii_str(varchar.ptr(), start + res_len);
|
||||
}
|
||||
start = (start >= 0) ? start - 1 : start + mb_len;
|
||||
if (OB_UNLIKELY(start < 0 || start >= mb_len)) {
|
||||
varchar.assign(NULL, 0);
|
||||
} else {
|
||||
//It holds that 0<=start<mb_len && length > 0
|
||||
res_len = min(length, mb_len - start);
|
||||
int64_t offset = ObCharset::charpos(cs_type, varchar.ptr(), varchar.length(), start);
|
||||
res_len = ObCharset::charpos(cs_type, varchar.ptr() + offset,
|
||||
(offset == 0) ? varchar.length() : varchar.length() - offset, res_len);
|
||||
varchar.assign_ptr(varchar.ptr() + offset, static_cast<int32_t>(res_len));
|
||||
if (is_ascii) {
|
||||
varchar.assign_ptr(varchar.ptr() + start, static_cast<int32_t>(res_len));
|
||||
} else { // If not all the front chars in param is ascii, rollback to original method.
|
||||
is_result_batch_ascii = false;
|
||||
start = start_pos;
|
||||
res_len = 0;
|
||||
int64_t mb_len = ObCharset::strlen_char(cs_type, varchar.ptr(), varchar.length());
|
||||
if (lib::is_oracle_mode() && 0 == start_pos) {
|
||||
start = 1;
|
||||
}
|
||||
start = (start >= 0) ? start - 1 : start + mb_len;
|
||||
if (OB_UNLIKELY(start < 0 || start >= mb_len)) {
|
||||
varchar.assign(NULL, 0);
|
||||
} else {
|
||||
//It holds that 0<=start<mb_len && length > 0
|
||||
res_len = min(length, mb_len - start);
|
||||
int64_t offset = ObCharset::charpos(cs_type, varchar.ptr(), varchar.length(), start);
|
||||
res_len = ObCharset::charpos(cs_type, varchar.ptr() + offset,
|
||||
(offset == 0) ? varchar.length() : varchar.length() - offset, res_len);
|
||||
varchar.assign_ptr(varchar.ptr() + offset, static_cast<int32_t>(res_len));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -499,7 +508,9 @@ int ObExprSubstr::calc(ObObj &result,
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObString varchar;
|
||||
if (OB_FAIL(substr(varchar, text, start_pos, length, cs_type, storage::can_do_ascii_optimize(cs_type)))) {
|
||||
bool is_result_batch_ascii = false;
|
||||
if (OB_FAIL(substr(varchar, text, start_pos, length, cs_type,
|
||||
storage::can_do_ascii_optimize(cs_type), false, is_result_batch_ascii))) {
|
||||
LOG_WARN("get substr failed", K(ret));
|
||||
} else {
|
||||
if (varchar.length() <= 0 && lib::is_oracle_mode() && !is_clob) {
|
||||
@ -529,11 +540,13 @@ int ObExprSubstr::cg_expr(ObExprCGCtx &op_cg_ctx,
|
||||
&& rt_expr.args_[0]->is_batch_result()
|
||||
&& !rt_expr.args_[1]->is_batch_result()) {
|
||||
rt_expr.eval_batch_func_ = eval_substr_batch;
|
||||
rt_expr.eval_vector_func_ = eval_substr_vector;
|
||||
} else if (3 == rt_expr.arg_cnt_
|
||||
&& rt_expr.args_[0]->is_batch_result()
|
||||
&& !rt_expr.args_[1]->is_batch_result()
|
||||
&& !rt_expr.args_[2]->is_batch_result()) {
|
||||
rt_expr.eval_batch_func_ = eval_substr_batch;
|
||||
rt_expr.eval_vector_func_ = eval_substr_vector;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
@ -545,6 +558,9 @@ static int eval_substr_text(const ObCollationType &cs_type,
|
||||
int64_t &total_byte_len,
|
||||
int64_t &pos,
|
||||
int64_t &len,
|
||||
const bool do_ascii_optimize_check,
|
||||
const bool is_arg_batch_ascii,
|
||||
bool &is_result_batch_ascii,
|
||||
bool is_batch = false,
|
||||
int64_t batch_idx = 0)
|
||||
{
|
||||
@ -556,7 +572,8 @@ static int eval_substr_text(const ObCollationType &cs_type,
|
||||
LOG_WARN("fail to get mbmaxlen", K(cs_type), K(ret));
|
||||
} else if (OB_FAIL(input_iter.get_char_len(total_char_len))) {
|
||||
LOG_WARN("get input char len failed", K(ret));
|
||||
} else if (FALSE_IT(result_byte_len = MIN((pos >= 0 ? total_byte_len - pos + 1 : -pos * mbmaxlen), (MIN((len), (total_char_len)) * mbmaxlen)))) {
|
||||
} else if (FALSE_IT(result_byte_len = MIN((pos >= 0 ? total_byte_len - pos + 1 : -pos * mbmaxlen),
|
||||
(MIN((len), (total_char_len)) * mbmaxlen)))) {
|
||||
} else if (len <= 0 && lib::is_oracle_mode()) {
|
||||
output_result.set_result_null();
|
||||
} else if (pos > total_char_len || len <= 0) {
|
||||
@ -597,7 +614,9 @@ static int eval_substr_text(const ObCollationType &cs_type,
|
||||
ObString inrow_result;
|
||||
if (OB_FAIL(ObExprSubstr::substr(inrow_result, src_block_data, pos, len,
|
||||
cs_type,
|
||||
storage::can_do_ascii_optimize(cs_type)))) {
|
||||
do_ascii_optimize_check,
|
||||
is_arg_batch_ascii,
|
||||
is_result_batch_ascii))) {
|
||||
LOG_WARN("get substr failed", K(ret));
|
||||
} else if (OB_FAIL(output_result.append(inrow_result))) {
|
||||
LOG_WARN("append result failed", K(ret), K(output_result), K(src_block_data));
|
||||
@ -652,9 +671,10 @@ int ObExprSubstr::eval_substr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (!ob_is_text_tc(input_meta.type_)) {
|
||||
ObString output;
|
||||
if (OB_FAIL(substr(output, input, pos, len,
|
||||
expr.datum_meta_.cs_type_,
|
||||
storage::can_do_ascii_optimize(expr.datum_meta_.cs_type_)))) {
|
||||
bool is_result_batch_ascii = false;
|
||||
if (OB_FAIL(substr(output, input, pos, len, expr.datum_meta_.cs_type_,
|
||||
storage::can_do_ascii_optimize(expr.datum_meta_.cs_type_), false,
|
||||
is_result_batch_ascii))) {
|
||||
LOG_WARN("get substr failed", K(ret));
|
||||
} else {
|
||||
if (OB_UNLIKELY(output.length() <= 0)
|
||||
@ -678,13 +698,17 @@ int ObExprSubstr::eval_substr(const ObExpr &expr, ObEvalCtx &ctx, ObDatum &expr_
|
||||
} else {
|
||||
len = NULL == len_datum ? total_byte_len : len;
|
||||
}
|
||||
bool is_result_batch_ascii = false;
|
||||
if (OB_FAIL(ret)) {
|
||||
} else if (OB_FAIL(eval_substr_text(expr.datum_meta_.cs_type_,
|
||||
input_iter,
|
||||
output_result,
|
||||
total_byte_len,
|
||||
pos,
|
||||
len))) {
|
||||
len,
|
||||
false,
|
||||
false,
|
||||
is_result_batch_ascii))) {
|
||||
LOG_WARN("eval substr text failed", K(ret));
|
||||
}
|
||||
}
|
||||
@ -757,6 +781,7 @@ int ObExprSubstr::eval_substr_batch(const ObExpr &expr, ObEvalCtx &ctx,
|
||||
len = has_len_param ? len_datum->get_int() : len;
|
||||
}
|
||||
bool do_ascii_optimize_check = storage::can_do_ascii_optimize(expr.datum_meta_.cs_type_);
|
||||
bool is_result_batch_ascii = true;
|
||||
for (int64_t j = 0; OB_SUCC(ret) && (j < batch_size); ++j) {
|
||||
if (skip.at(j) || eval_flags.at(j)) {
|
||||
continue;
|
||||
@ -764,11 +789,10 @@ int ObExprSubstr::eval_substr_batch(const ObExpr &expr, ObEvalCtx &ctx,
|
||||
results[j].set_null();
|
||||
eval_flags.set(j);
|
||||
} else if(!ob_is_text_tc(expr.args_[0]->datum_meta_.type_)) {
|
||||
if (OB_FAIL(substr(output,
|
||||
datum_array[j].get_string(),
|
||||
pos,
|
||||
if (OB_FAIL(substr(output, datum_array[j].get_string(), pos,
|
||||
min(len, datum_array[j].get_string().length()),
|
||||
expr.datum_meta_.cs_type_, do_ascii_optimize_check))) {
|
||||
expr.datum_meta_.cs_type_, do_ascii_optimize_check, false,
|
||||
is_result_batch_ascii))) {
|
||||
LOG_WARN("get substr failed", K(ret));
|
||||
} else {
|
||||
if (OB_UNLIKELY(output.length() <= 0)
|
||||
@ -795,8 +819,13 @@ int ObExprSubstr::eval_substr_batch(const ObExpr &expr, ObEvalCtx &ctx,
|
||||
input_iter,
|
||||
output_result,
|
||||
total_byte_len,
|
||||
pos, len,
|
||||
true, j))) {
|
||||
pos,
|
||||
len,
|
||||
false,
|
||||
false,
|
||||
is_result_batch_ascii,
|
||||
true,
|
||||
j))) {
|
||||
LOG_WARN("eval substr text failed", K(ret));
|
||||
} else {
|
||||
eval_flags.set(j);
|
||||
@ -810,6 +839,168 @@ int ObExprSubstr::eval_substr_batch(const ObExpr &expr, ObEvalCtx &ctx,
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename ArgVec, typename ResVec>
|
||||
int ObExprSubstr::vector_substr(VECTOR_EVAL_FUNC_ARG_DECL)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
||||
ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
|
||||
const bool has_len_param = (expr.arg_cnt_ > 2);
|
||||
const ArgVec *arg0_vec = static_cast<const ArgVec *>(expr.args_[0]->get_vector(ctx));
|
||||
ResVec *res_vec = static_cast<ResVec *>(expr.get_vector(ctx));
|
||||
ConstUniformFormat *pos_vec = NULL;
|
||||
ConstUniformFormat *len_vec = NULL;
|
||||
bool is_text_params_all_null = true; // used for mark if all the first params are all null
|
||||
bool is_result_all_null = false;
|
||||
// 1.1 check if result all null according to text param
|
||||
for (int64_t j = bound.start(); is_text_params_all_null && j < bound.end(); ++j) {
|
||||
if (skip.at(j) || eval_flags.at(j)) {
|
||||
continue;
|
||||
} else if (!arg0_vec->is_null(j)) {
|
||||
is_text_params_all_null = false;
|
||||
}
|
||||
}
|
||||
if (is_text_params_all_null) {
|
||||
is_result_all_null = true;
|
||||
} else if (OB_FAIL(expr.args_[1]->eval_vector(ctx, skip, bound))) {
|
||||
LOG_WARN("failed to eval vector result args0", K(ret));
|
||||
} else if (has_len_param && OB_FAIL(expr.args_[2]->eval_vector(ctx, skip, bound))) {
|
||||
LOG_WARN("failed to eval vector result args0", K(ret));
|
||||
} else {
|
||||
// 1.2 check if result all null according to pos param and len param
|
||||
pos_vec = static_cast<ConstUniformFormat *>(expr.args_[1]->get_vector(ctx));
|
||||
if (pos_vec->is_null(0)) {
|
||||
is_result_all_null = true;
|
||||
} else if (has_len_param) {
|
||||
len_vec = static_cast<ConstUniformFormat *>(expr.args_[2]->get_vector(ctx));
|
||||
if (len_vec->is_null(0)) {
|
||||
is_result_all_null = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (OB_SUCC(ret)) {
|
||||
if (is_result_all_null) { // any param is null, result is null
|
||||
for (int64_t idx = bound.start(); idx < bound.end(); ++idx) {
|
||||
if (skip.at(idx) || eval_flags.at(idx)) {
|
||||
continue;
|
||||
} else {
|
||||
res_vec->set_null(idx);
|
||||
eval_flags.set(idx);
|
||||
}
|
||||
}
|
||||
eval_flags.set_all(bound.start(), bound.end());
|
||||
} else {
|
||||
// 2. calc substr while result is not all null
|
||||
int64_t pos = 0;
|
||||
int64_t len = INT_MAX64;
|
||||
if (is_oracle_mode()) {
|
||||
if (OB_FAIL(ora_get_integer(pos_vec->get_datum(0), *expr.args_[1], pos)) || (NULL != len_vec
|
||||
&& OB_FAIL(ora_get_integer(len_vec->get_datum(0), *expr.args_[2], len)))) {
|
||||
LOG_WARN("get integer value failed", K(ret));
|
||||
}
|
||||
} else {
|
||||
pos = pos_vec->get_int(0);
|
||||
len = has_len_param ? len_vec->get_int(0) : len;
|
||||
}
|
||||
bool is_arg_batch_ascii = arg0_vec->is_batch_ascii();
|
||||
bool is_result_batch_ascii = true;
|
||||
bool do_ascii_optimize_check = storage::can_do_ascii_optimize(expr.datum_meta_.cs_type_);
|
||||
for (int64_t idx = bound.start(); OB_SUCC(ret) && idx < bound.end(); ++idx) {
|
||||
if (skip.at(idx) || eval_flags.at(idx)) {
|
||||
continue;
|
||||
} else if (arg0_vec->is_null(idx)) {
|
||||
res_vec->set_null(idx);
|
||||
eval_flags.set(idx);
|
||||
} else {
|
||||
// 2.1 deal with string tc
|
||||
if (!ob_is_text_tc(expr.args_[0]->datum_meta_.type_)) {
|
||||
ObString output;
|
||||
if (OB_FAIL(substr(output, arg0_vec->get_string(idx), pos,
|
||||
min(len, arg0_vec->get_string(idx).length()),
|
||||
expr.datum_meta_.cs_type_, do_ascii_optimize_check, is_arg_batch_ascii,
|
||||
is_result_batch_ascii))) {
|
||||
LOG_WARN("get substr failed", K(ret));
|
||||
} else {
|
||||
if (OB_UNLIKELY(output.length() <= 0) && lib::is_oracle_mode()) {
|
||||
res_vec->set_null(idx);
|
||||
} else {
|
||||
res_vec->set_string(idx, output);
|
||||
}
|
||||
eval_flags.set(idx);
|
||||
}
|
||||
// 2.2 deal with text tc
|
||||
} else {
|
||||
const ObDatumMeta &input_meta = expr.args_[0]->datum_meta_;
|
||||
const bool has_lob_header = expr.args_[0]->obj_meta_.has_lob_header();
|
||||
ObEvalCtx::TempAllocGuard alloc_guard(ctx);
|
||||
ObIAllocator &calc_alloc = alloc_guard.get_allocator();
|
||||
ObTextStringIter input_iter(input_meta.type_, input_meta.cs_type_,
|
||||
arg0_vec->get_string(idx), has_lob_header);
|
||||
ObTextStringDatumResult output_result(expr.datum_meta_.type_, &expr, &ctx, res_vec,
|
||||
idx);
|
||||
int64_t total_byte_len = 0;
|
||||
if (OB_FAIL(input_iter.init(0, NULL, &calc_alloc))) {
|
||||
LOG_WARN("init input_iter failed ", K(ret), K(input_iter));
|
||||
} else if (OB_FAIL(input_iter.get_byte_len(total_byte_len))) {
|
||||
LOG_WARN("get input byte len failed", K(ret), K(idx));
|
||||
} else if (OB_FAIL(eval_substr_text(expr.datum_meta_.cs_type_,
|
||||
input_iter,
|
||||
output_result,
|
||||
total_byte_len,
|
||||
pos,
|
||||
len,
|
||||
do_ascii_optimize_check,
|
||||
is_arg_batch_ascii,
|
||||
is_result_batch_ascii,
|
||||
true,
|
||||
idx))) {
|
||||
LOG_WARN("eval substr text failed", K(ret));
|
||||
} else {
|
||||
eval_flags.set(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO Set set_is_batch_ascii = true only if bound is a whole batch and there is no skip.
|
||||
/*
|
||||
if (OB_SUCC(ret)) {
|
||||
if (is_result_batch_ascii) {
|
||||
res_vec->set_is_batch_ascii();
|
||||
}
|
||||
} */
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObExprSubstr::eval_substr_vector(VECTOR_EVAL_FUNC_ARG_DECL)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_FAIL(expr.args_[0]->eval_vector(ctx, skip, bound))) {
|
||||
LOG_WARN("failed to eval vector result args0", K(ret));
|
||||
} else {
|
||||
VectorFormat arg_format = expr.args_[0]->get_format(ctx);
|
||||
VectorFormat res_format = expr.get_format(ctx);
|
||||
if (VEC_DISCRETE == arg_format && VEC_DISCRETE == res_format) {
|
||||
ret = vector_substr<TextDiscVec, TextDiscVec>(VECTOR_EVAL_FUNC_ARG_LIST);
|
||||
} else if (VEC_UNIFORM == arg_format && VEC_DISCRETE == res_format) {
|
||||
ret = vector_substr<TextUniVec, TextDiscVec>(VECTOR_EVAL_FUNC_ARG_LIST);
|
||||
} else if (VEC_CONTINUOUS == arg_format && VEC_DISCRETE == res_format) {
|
||||
ret = vector_substr<TextContVec, TextDiscVec>(VECTOR_EVAL_FUNC_ARG_LIST);
|
||||
} else if (VEC_DISCRETE == arg_format && VEC_UNIFORM == res_format) {
|
||||
ret = vector_substr<TextDiscVec, TextUniVec>(VECTOR_EVAL_FUNC_ARG_LIST);
|
||||
} else if (VEC_UNIFORM == arg_format && VEC_UNIFORM == res_format) {
|
||||
ret = vector_substr<TextUniVec, TextUniVec>(VECTOR_EVAL_FUNC_ARG_LIST);
|
||||
} else if (VEC_CONTINUOUS == arg_format && VEC_UNIFORM == res_format) {
|
||||
ret = vector_substr<TextContVec, TextUniVec>(VECTOR_EVAL_FUNC_ARG_LIST);
|
||||
} else {
|
||||
ret = vector_substr<ObVectorBase, ObVectorBase>(VECTOR_EVAL_FUNC_ARG_LIST);
|
||||
}
|
||||
}
|
||||
SQL_LOG(DEBUG, "expr", K(ToStrVectorHeader(expr.get_vector_header(ctx), expr, &skip, bound)));
|
||||
SQL_LOG(DEBUG, "expr.args_[0]", K(ToStrVectorHeader(expr.args_[0]->get_vector_header(ctx), *expr.args_[0], &skip, bound)));
|
||||
return ret;
|
||||
}
|
||||
DEF_SET_LOCAL_SESSION_VARS(ObExprSubstr, raw_expr) {
|
||||
int ret = OB_SUCCESS;
|
||||
SET_LOCAL_SYSVAR_CAPACITY(1);
|
||||
|
||||
Reference in New Issue
Block a user