[FEAT MERGE] impl vectorization 2.0

Co-authored-by: Naynahs <cfzy002@126.com>
Co-authored-by: hwx65 <1780011298@qq.com>
Co-authored-by: oceanoverflow <oceanoverflow@gmail.com>
This commit is contained in:
obdev
2023-12-22 03:43:19 +00:00
committed by ob-robot
parent 1178245448
commit b6773084c6
592 changed files with 358124 additions and 303288 deletions

View File

@ -12,14 +12,18 @@
#define USING_LOG_PREFIX SQL_ENG
#if defined(__x86_64__)
#include <immintrin.h>
#endif
#include <string.h>
#include "sql/engine/expr/ob_expr_lower.h"
#include "share/object/ob_obj_cast.h"
#include "objit/common/ob_item_type.h"
//#include "sql/engine/expr/ob_expr_promotion_util.h"
#include "sql/session/ob_sql_session_info.h"
#include "sql/engine/expr/ob_expr_lob_utils.h"
#include "storage/blocksstable/encoding/ob_encoding_query_util.h"
namespace oceanbase {
using namespace common;
@ -321,6 +325,7 @@ int ObExprLower::cg_expr(ObExprCGCtx &op_cg_ctx,
LOG_WARN("lower expr cg expr failed", K(ret));
} else {
rt_expr.eval_func_ = ObExprLower::calc_lower;
rt_expr.eval_vector_func_ = eval_lower_vector;
}
return ret;
}
@ -334,6 +339,7 @@ int ObExprUpper::cg_expr(ObExprCGCtx &op_cg_ctx,
LOG_WARN("upper expr cg expr failed", K(ret));
} else {
rt_expr.eval_func_ = ObExprUpper::calc_upper;
rt_expr.eval_vector_func_ = eval_upper_vector;
}
return ret;
}
@ -458,6 +464,258 @@ int ObExprLowerUpper::calc_common(const ObExpr &expr, ObEvalCtx &ctx,
return ret;
}
template <char CA, char CZ>
void ObExprLowerUpper::calc_common_inner_optimized(
char *buf, const int32_t &buf_len, const ObString &m_text)
{
MEMCPY(buf, m_text.ptr(), m_text.length());
const char *src_ptr = m_text.ptr();
const size_t size = m_text.length();
char *dst_ptr = buf;
const char *end = m_text.ptr() + size;
#if defined(__x86_64__)
const char *begin = src_ptr;
static constexpr int SSE2_BYTES = sizeof(__m128i);
const char *simd_end = begin + (size & ~(SSE2_BYTES - 1));
const auto a_minus1 = _mm_set1_epi8(CA - 1);
const auto z_plus1 = _mm_set1_epi8(CZ + 1);
const auto flips = _mm_set1_epi8(32);
for (; src_ptr > simd_end; src_ptr += SSE2_BYTES, dst_ptr += SSE2_BYTES) {
auto bytes = _mm_loadu_si128((const __m128i*)src_ptr);
// the i-th byte of masks is set to 0xff if the corresponding byte is
// between a..z when computing upper function (A..Z when computing lower function),
// otherwise set to 0;
auto masks = _mm_and_si128(_mm_cmpgt_epi8(bytes, a_minus1), _mm_cmpgt_epi8(z_plus1, bytes));
// only flip 5th bit of lowcase(uppercase) byte, other bytes keep verbatim.
_mm_storeu_si128((__m128i*)dst_ptr, _mm_xor_si128(bytes, _mm_and_si128(masks, flips)));
}
#endif
// only flip 5th bit of lowcase(uppercase) byte, other bytes keep verbatim.
// i.e. 'a' and 'A' are 0b0110'0001 and 0b'0100'0001 respectively in binary form,
// whether 'a' to 'A' or 'A' to 'a' conversion, just flip 5th bit(xor 32).
for (; src_ptr < end; src_ptr += 1, dst_ptr += 1) {
*dst_ptr = *src_ptr ^ (((CA <= *src_ptr) & (*src_ptr <= CZ)) << 5);
}
}
template <typename ArgVec, typename ResVec, bool IsLower>
int ObExprLowerUpper::vector_lower_upper(VECTOR_EVAL_FUNC_ARG_DECL, common::ObCollationType cs_type)
{
int ret = OB_SUCCESS;
const ArgVec *arg0_vec = static_cast<const ArgVec *>(expr.args_[0]->get_vector(ctx));
ResVec *res_vec = static_cast<ResVec *>(expr.get_vector(ctx));
ObBitVector &eval_flags = expr.get_evaluated_flags(ctx);
// 1. check if result all null according to text param
bool is_params_all_null = true;
for (int64_t idx = bound.start(); idx < bound.end(); ++idx) {
if (skip.at(idx) || eval_flags.at(idx)) {
continue;
} else if (arg0_vec->is_null(idx)) {
res_vec->set_null(idx);
eval_flags.set(idx);
} else {
is_params_all_null = false;
}
}
// 2. calc lower_upper while result is not all null
if (!is_params_all_null) {
if (cs_type == CS_TYPE_INVALID) {
cs_type = expr.datum_meta_.cs_type_;
}
ObDatumMeta text_meta = expr.args_[0]->datum_meta_;
uchar multiply = 0;
ObString str_result;
bool is_arg_batch_ascii = arg0_vec->is_batch_ascii();
bool is_result_batch_ascii = true;
bool do_ascii_optimize_check = storage::can_do_ascii_optimize(expr.datum_meta_.cs_type_);
if (OB_UNLIKELY(!ObCharset::is_valid_collation(cs_type))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("charset is null", K(ret), K(cs_type));
} else if (FALSE_IT(multiply = (IsLower ? ObCharset::get_charset(cs_type)->casedn_multiply
: ObCharset::get_charset(cs_type)->caseup_multiply))) {
} else if (!ob_is_text_tc(text_meta.type_)) { // 2.1 deal with string tc
for (int64_t idx = bound.start(); OB_SUCC(ret) && idx < bound.end(); ++idx) {
if (skip.at(idx) || eval_flags.at(idx)) {
continue;
} else {
ObString m_text = arg0_vec->get_string(idx);
char *buf = expr.get_str_res_mem(ctx, m_text.length() * multiply, idx);
if (m_text.empty()) {
str_result.reset();
} else if (is_arg_batch_ascii || (do_ascii_optimize_check
&& storage::is_ascii_str(m_text.ptr(), m_text.length()))) {
if (IsLower) {
calc_common_inner_optimized<'A', 'Z'>(buf, m_text.length(), m_text);
} else {
calc_common_inner_optimized<'a', 'z'>(buf, m_text.length(), m_text);
}
str_result.assign(buf, static_cast<int32_t>(m_text.length()));
} else {
is_result_batch_ascii = false;
int32_t out_len = calc_common_inner(
buf, m_text.length() * multiply, m_text, cs_type, IsLower);
str_result.assign(buf, static_cast<int32_t>(out_len));
}
if (OB_SUCC(ret)) {
if (OB_UNLIKELY(is_oracle_mode() && str_result.empty())) {
res_vec->set_null(idx);
} else {
res_vec->set_string(idx, str_result);
}
eval_flags.set(idx);
}
}
}
} else { // 2.2 deal with text tc
const ObDatumMeta &input_meta = expr.args_[0]->datum_meta_;
const bool has_lob_header = expr.args_[0]->obj_meta_.has_lob_header();
for (int64_t idx = bound.start(); OB_SUCC(ret) && idx < bound.end(); ++idx) {
if (skip.at(idx) || eval_flags.at(idx)) {
continue;
} else {
ObEvalCtx::TempAllocGuard alloc_guard(ctx);
ObIAllocator &calc_alloc = alloc_guard.get_allocator();
ObTextStringIter src_iter(
input_meta.type_, input_meta.cs_type_, arg0_vec->get_string(idx), has_lob_header);
ObTextStringVectorResult<ResVec> output_result(expr.datum_meta_.type_, &expr, &ctx, res_vec, idx);
ObString dst;
char *buf = NULL; // res buffer
int64_t src_byte_len = 0;
int64_t buf_size = 0;
int32_t buf_len = 0;
bool is_ascii = (is_arg_batch_ascii
|| (do_ascii_optimize_check
&& storage::is_ascii_str(arg0_vec->get_string(idx).ptr(),
arg0_vec->get_string(idx).length())));
if (!is_ascii) {
is_result_batch_ascii = false;
}
if (OB_UNLIKELY(!ObCharset::is_valid_collation(cs_type))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("charset is null", K(ret), K(cs_type));
} else if (OB_FAIL(src_iter.init(0, NULL, &calc_alloc))) {
LOG_WARN("init src_iter failed ", K(ret), K(src_iter));
} else if (OB_FAIL(src_iter.get_byte_len(src_byte_len))) {
LOG_WARN("get input byte len failed", K(ret));
} else if (FALSE_IT(buf_len = multiply * src_byte_len)) {
} else if (OB_FAIL(output_result.init_with_batch_idx(buf_len, idx))) {
LOG_WARN("init stringtext result failed", K(ret));
} else if (buf_len == 0) {
output_result.set_result();
output_result.get_result_buffer(str_result);
} else if (OB_FAIL(output_result.get_reserved_buffer(buf, buf_size))) {
LOG_WARN("stringtext result reserve buffer failed", K(ret));
} else if (OB_ISNULL(buf)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_ERROR("alloc memory failed", "size", buf_len);
} else {
OB_ASSERT(buf_size == buf_len);
ObTextStringIterState state;
ObString src_block_data;
while (OB_SUCC(ret)
&& buf_size > 0
&& (state = src_iter.get_next_block(src_block_data)) == TEXTSTRING_ITER_NEXT) {
int32_t out_len = 0;
if (is_ascii) {
if (IsLower) {
calc_common_inner_optimized<'A', 'Z'>(buf, buf_size, src_block_data);
} else {
calc_common_inner_optimized<'a', 'z'>(buf, buf_size, src_block_data);
}
out_len = src_block_data.length();
} else {
out_len = calc_common_inner(buf,
buf_size,
src_block_data,
cs_type,
IsLower);
}
buf += out_len;
buf_size -= out_len;
if (OB_FAIL(output_result.lseek(out_len, 0))) {
LOG_WARN("result lseek failed", K(ret));
}
}
if (OB_FAIL(ret)) {
} else if (state != TEXTSTRING_ITER_NEXT && state != TEXTSTRING_ITER_END) {
ret = (src_iter.get_inner_ret() != OB_SUCCESS) ?
src_iter.get_inner_ret() : OB_INVALID_DATA;
LOG_WARN("iter state invalid", K(ret), K(state), K(src_iter));
} else {
output_result.get_result_buffer(str_result);
}
}
if (OB_SUCC(ret)) {
if (OB_UNLIKELY(is_oracle_mode() && str_result.length() == 0
&& ob_is_string_tc(expr.datum_meta_.type_))) {
res_vec->set_null(idx);
} else {
res_vec->set_string(idx, str_result);
}
eval_flags.set(idx);
}
}
}
}
// TODO Set set_is_batch_ascii = true only if bound is a whole batch and there is no skip.
/*
if (OB_SUCC(ret)) {
if (is_result_batch_ascii) {
res_vec->set_is_batch_ascii();
}
} */
}
return ret;
}
template <bool IsLower>
int ObExprLowerUpper::calc_common_vector(
VECTOR_EVAL_FUNC_ARG_DECL, common::ObCollationType cs_type)
{
int ret = OB_SUCCESS;
if (OB_FAIL(expr.args_[0]->eval_vector(ctx, skip, bound))) {
LOG_WARN("failed to eval vector result args0", K(ret));
} else {
VectorFormat arg_format = expr.args_[0]->get_format(ctx);
VectorFormat res_format = expr.get_format(ctx);
if (VEC_DISCRETE == arg_format && VEC_DISCRETE == res_format) {
ret = vector_lower_upper<TextDiscVec, TextDiscVec, IsLower>(VECTOR_EVAL_FUNC_ARG_LIST, cs_type);
} else if (VEC_UNIFORM == arg_format && VEC_DISCRETE == res_format) {
ret = vector_lower_upper<TextUniVec, TextDiscVec, IsLower>(VECTOR_EVAL_FUNC_ARG_LIST, cs_type);
} else if (VEC_CONTINUOUS == arg_format && VEC_DISCRETE == res_format) {
ret = vector_lower_upper<TextContVec, TextDiscVec, IsLower>(VECTOR_EVAL_FUNC_ARG_LIST, cs_type);
} else if (VEC_DISCRETE == arg_format && VEC_UNIFORM == res_format) {
ret = vector_lower_upper<TextDiscVec, TextUniVec, IsLower>(VECTOR_EVAL_FUNC_ARG_LIST, cs_type);
} else if (VEC_UNIFORM == arg_format && VEC_UNIFORM == res_format) {
ret = vector_lower_upper<TextUniVec, TextUniVec, IsLower>(VECTOR_EVAL_FUNC_ARG_LIST, cs_type);
} else if (VEC_CONTINUOUS == arg_format && VEC_UNIFORM == res_format) {
ret = vector_lower_upper<TextContVec, TextUniVec, IsLower>(VECTOR_EVAL_FUNC_ARG_LIST, cs_type);
} else {
ret = vector_lower_upper<ObVectorBase, ObVectorBase, IsLower>(VECTOR_EVAL_FUNC_ARG_LIST, cs_type);
}
}
return ret;
}
int ObExprLower::eval_lower_vector(VECTOR_EVAL_FUNC_ARG_DECL)
{
int ret = OB_SUCCESS;
if (OB_FAIL(calc_common_vector<true>(VECTOR_EVAL_FUNC_ARG_LIST, CS_TYPE_INVALID))) {
LOG_WARN("failed to calc_common_vector", K(ret));
}
return ret;
}
int ObExprUpper::eval_upper_vector(VECTOR_EVAL_FUNC_ARG_DECL)
{
int ret = OB_SUCCESS;
if (OB_FAIL(calc_common_vector<false>(VECTOR_EVAL_FUNC_ARG_LIST, CS_TYPE_INVALID))) {
LOG_WARN("failed to calc_common_vector", K(ret));
}
return ret;
}
int ObExprLowerUpper::calc_nls_common(const ObExpr &expr, ObEvalCtx &ctx,
ObDatum &expr_datum, bool lower)
{