Files
oceanbase/src/storage/ob_storage_util.h
Naynahs 3d4ef9741d [FEAT MERGE] performance optimzation for OLTP
Co-authored-by: dimstars <liangjinrongcm@gmail.com>
Co-authored-by: pe-99y <315053752@qq.com>
2024-04-10 07:32:28 +00:00

374 lines
12 KiB
C++

/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_STORAGE_OB_STORAGE_UTIL_
#define OCEANBASE_STORAGE_OB_STORAGE_UTIL_
#include "lib/allocator/ob_allocator.h"
#include "share/datum/ob_datum_funcs.h"
#include "sql/engine/expr/ob_expr.h"
namespace oceanbase
{
namespace share
{
namespace schema
{
class ObColumnParam;
}
}
namespace blocksstable
{
struct ObStorageDatum;
}
namespace storage
{
class ObTableIterParam;
class ObTableAccessContext;
int pad_column(const ObObjMeta &obj_meta,
const ObAccuracy accuracy,
common::ObIAllocator &padding_alloc,
blocksstable::ObStorageDatum &datum);
int pad_column(const ObAccuracy accuracy,
common::ObIAllocator &padding_alloc,
common::ObObj &cell);
int pad_column(const common::ObAccuracy accuracy,
sql::ObEvalCtx &ctx,
sql::ObExpr &expr);
int pad_on_datums(const common::ObAccuracy accuracy,
const common::ObCollationType cs_type,
common::ObIAllocator &padding_alloc,
int64_t row_count,
common::ObDatum *&datums);
int pad_on_rich_format_columns(const common::ObAccuracy accuracy,
const common::ObCollationType cs_type,
const int64_t row_cap,
const int64_t vec_offset,
common::ObIAllocator &padding_alloc,
sql::ObExpr &expr,
sql::ObEvalCtx &eval_ctx);
int fill_datums_lob_locator(const ObTableIterParam &iter_param,
const ObTableAccessContext &context,
const share::schema::ObColumnParam &col_param,
const int64_t row_cap,
ObDatum *datums,
bool reuse_lob_locator = true);
int fill_exprs_lob_locator(const ObTableIterParam &iter_param,
const ObTableAccessContext &context,
const share::schema::ObColumnParam &col_param,
sql::ObExpr &expr,
sql::ObEvalCtx &eval_ctx,
const int64_t vec_offset,
const int64_t row_cap);
int cast_obj(const common::ObObjMeta &src_meta, common::ObIAllocator &cast_allocator, common::ObObj &obj);
int init_expr_vector_header(
sql::ObExpr &expr,
sql::ObEvalCtx &eval_ctx,
const int64_t size,
const VectorFormat format = VectorFormat::VEC_UNIFORM);
OB_INLINE int init_exprs_uniform_header(
const sql::ObExprPtrIArray *exprs,
sql::ObEvalCtx &eval_ctx,
const int64_t size)
{
int ret = OB_SUCCESS;
if (nullptr != exprs) {
for (int64_t i = 0; OB_SUCC(ret) && i < exprs->count(); ++i) {
sql::ObExpr *expr = exprs->at(i);
if (OB_ISNULL(expr)) {
ret = OB_ERR_UNEXPECTED;
STORAGE_LOG(WARN, "Unexpected null expr", K(ret), KPC(exprs));
} else if (OB_FAIL(init_expr_vector_header(*expr, eval_ctx, size))) {
STORAGE_LOG(WARN, "Failed to init vector", K(ret), K(i), KPC(expr));
}
}
}
return ret;
}
int init_exprs_new_format_header(
const common::ObIArray<int32_t> &cols_projector,
const sql::ObExprPtrIArray &exprs,
sql::ObEvalCtx &eval_ctx);
OB_INLINE bool can_do_ascii_optimize(common::ObCollationType cs_type)
{
return common::CS_TYPE_UTF8MB4_GENERAL_CI == cs_type
|| common::CS_TYPE_UTF8MB4_BIN == cs_type
|| common::CS_TYPE_UTF8MB4_UNICODE_CI == cs_type
|| common::CS_TYPE_GBK_CHINESE_CI == cs_type
|| common::CS_TYPE_GBK_BIN == cs_type;
}
OB_INLINE bool is_ascii_less_8(const char *str, int64_t len)
{
bool is_not_ascii = true;
const uint8_t *val = reinterpret_cast<const uint8_t *>(str);
switch (len) {
case 0:
is_not_ascii = false;
break;
case 1:
is_not_ascii = (0x80 & val[0]);
break;
case 2:
is_not_ascii = 0x8080 & *((const uint16_t *)val);
break;
case 3:
is_not_ascii = (0x8080 & *(const uint16_t *)val) | (0x80 & val[2]);
break;
case 4:
is_not_ascii = (0x80808080U & *((const uint32_t *)val));
break;
case 5:
is_not_ascii = (0x80808080U & *((const uint32_t *)val)) | (0x80 & val[4]);
break;
case 6:
is_not_ascii = (0x80808080U & *(const uint32_t *)val) | (0x8080 & *(const uint16_t *)(val + 4));
break;
case 7:
is_not_ascii = (0x80808080U & *(const uint32_t *)val) | (0x80808080U & *(const uint32_t *)(val + 3));
break;
}
return !is_not_ascii;
}
OB_INLINE bool is_ascii_str(const char *str, const int64_t len)
{
bool bret = true;
if (len >= 8) {
const int64_t length = len / 8;
const uint64_t *vals = reinterpret_cast<const uint64_t *>(str);
for (int64_t i = 0; bret && i < length; i++) {
if (vals[i] & 0x8080808080808080UL) {
bret = false;
}
}
bret = bret && is_ascii_less_8(str + len / 8 * 8, len % 8);
} else {
bret = is_ascii_less_8(str, len);
}
return bret;
}
class ObObjBufArray final
{
public:
ObObjBufArray()
: capacity_(0),
is_inited_(false),
data_(NULL),
allocator_(NULL)
{
//MEMSET(local_data_buf_, 0, LOCAL_ARRAY_SIZE * sizeof(common::ObObj));
}
~ObObjBufArray()
{
reset();
}
int init(common::ObIAllocator *allocator)
{
int ret = common::OB_SUCCESS;
if (IS_INIT) {
ret = common::OB_INIT_TWICE;
STORAGE_LOG(WARN, "init twice", K(ret), K(is_inited_));
} else if (OB_ISNULL(allocator)) {
ret = common::OB_INVALID_ARGUMENT;
STORAGE_LOG(WARN, "invalid arguments", K(ret), KP(allocator));
} else {
allocator_ = allocator;
data_ = reinterpret_cast<common::ObObj*>(local_data_buf_);
capacity_ = LOCAL_ARRAY_SIZE;
is_inited_ = true;
}
return ret;
}
inline bool is_inited() const { return is_inited_; }
inline int reserve(int64_t count)
{
int ret = common::OB_SUCCESS;
if (IS_NOT_INIT) {
ret = common::OB_NOT_INIT;
STORAGE_LOG(WARN, "ObObjBufArray not inited", K(ret), K(is_inited_));
} else if (count > capacity_) {
int64_t new_size = count * sizeof(common::ObObj);
common::ObObj *new_data = reinterpret_cast<common::ObObj *>(allocator_->alloc(new_size));
if (OB_NOT_NULL(new_data)) {
if ((char *)data_ != local_data_buf_) {
allocator_->free(data_);
}
MEMSET(new_data, 0, new_size);
data_ = new_data;
capacity_ = count;
} else {
ret = common::OB_ALLOCATE_MEMORY_FAILED;
STORAGE_LOG(ERROR, "no memory", K(ret), K(new_size), K(capacity_));
}
}
return ret;
}
inline int64_t get_count() const { return capacity_; }
inline common::ObObj *get_data() { return data_; }
void reset()
{
if (NULL != allocator_ && (char *)data_ != local_data_buf_) {
allocator_->free(data_);
}
allocator_ = NULL;
data_ = NULL;
capacity_ = 0;
is_inited_ = false;
}
inline common::ObObj &at(int64_t idx) const
{
OB_ASSERT(idx >= 0 && idx < capacity_);
return data_[idx];
}
protected:
const static int64_t LOCAL_ARRAY_SIZE = 64;
int64_t capacity_;
bool is_inited_;
common::ObObj *data_;
char local_data_buf_[LOCAL_ARRAY_SIZE * sizeof(common::ObObj)];
common::ObIAllocator *allocator_;
};
inline static common::ObDatumCmpFuncType get_datum_cmp_func(const common::ObObjMeta &col_obj_type, const common::ObObjMeta &param_obj_type)
{
common::ObDatumCmpFuncType cmp_func = nullptr;
bool is_oracle_mode = lib::is_oracle_mode();
// if compare lob with non-lob, should use get_nullsafe_cmp_func to get cmp_func
// especially tinytext, beacause tinytext does not have lob header, but it's type class is TextTC.
bool not_both_lob_storage = col_obj_type.is_lob_storage() ^ param_obj_type.is_lob_storage();
if (col_obj_type.get_type_class() != param_obj_type.get_type_class() || not_both_lob_storage) {
cmp_func = ObDatumFuncs::get_nullsafe_cmp_func(
col_obj_type.get_type(),
param_obj_type.get_type(),
is_oracle_mode ? NULL_LAST : NULL_FIRST,
col_obj_type.get_collation_type(),
col_obj_type.get_scale(),
is_oracle_mode,
col_obj_type.has_lob_header() || param_obj_type.has_lob_header());
} else {
sql::ObExprBasicFuncs *basic_funcs = ObDatumFuncs::get_basic_func(col_obj_type.get_type(), col_obj_type.get_collation_type());
cmp_func = is_oracle_mode ? basic_funcs->null_last_cmp_ : basic_funcs->null_first_cmp_;
}
return cmp_func;
}
struct ObDatumComparator
{
public:
ObDatumComparator(const ObDatumCmpFuncType cmp_func, int &ret, bool &equal)
: cmp_func_(cmp_func),
ret_(ret),
equal_(equal)
{}
~ObDatumComparator() {}
OB_INLINE bool operator() (const ObDatum &datum1, const ObDatum &datum2)
{
int &ret = ret_;
int cmp_ret = 0;
if (OB_FAIL(ret)) {
// do nothing
} else if (OB_FAIL(cmp_func_(datum1, datum2, cmp_ret))) {
STORAGE_LOG(WARN, "Failed to compare datum", K(ret), K(datum1), K(datum2), K_(cmp_func));
} else if (0 == cmp_ret && !equal_) {
equal_ = true;
}
return cmp_ret < 0;
}
private:
ObDatumCmpFuncType cmp_func_;
int &ret_;
bool &equal_;
};
enum class ObFilterInCmpType {
MERGE_SEARCH,
BINARY_SEARCH_DICT,
BINARY_SEARCH,
HASH_SEARCH,
};
inline ObFilterInCmpType get_filter_in_cmp_type(
const int64_t row_count,
const int64_t param_count,
const bool is_sorted_dict)
{
// BINARY_HASH_THRESHOLD: means the threshold to choose BINARY_SEARCH or HASH_SEARCH
// When the dictionary is unordered, the only variable available for iteration is param_count.
// Testing has shown that when the data size is small, the overhead of binary search is
// lower than the overhead of computing hashes.
// Therefore, this threshold is temporarily set to a small value(8).
static constexpr int64_t BINARY_HASH_THRESHOLD = 8;
// HASH_BUCKETS: means the number of buckets(slots) in hashset.
// This value is related to the performance of the hashset.
const int64_t HASH_BUCKETS = hash::cal_next_prime(param_count * 2);
ObFilterInCmpType cmp_type = ObFilterInCmpType::HASH_SEARCH;
if (is_sorted_dict) {
if (row_count > 3 * param_count) {
// row_count >> param_count
if (row_count > HASH_BUCKETS * 4) {
cmp_type = ObFilterInCmpType::BINARY_SEARCH_DICT;
} else {
cmp_type = ObFilterInCmpType::MERGE_SEARCH;
}
} else if (row_count * 3 >= param_count) {
// row_count ~~ param_count
if (row_count > HASH_BUCKETS) {
cmp_type = ObFilterInCmpType::MERGE_SEARCH;
} else {
cmp_type = ObFilterInCmpType::HASH_SEARCH;
}
} else {
// row_count << param_count
cmp_type = ObFilterInCmpType::HASH_SEARCH;
}
} else {
// Unordered dict
if (param_count <= BINARY_HASH_THRESHOLD) {
cmp_type = ObFilterInCmpType::BINARY_SEARCH;
} else {
cmp_type = ObFilterInCmpType::HASH_SEARCH;
}
}
return cmp_type;
}
}
}
#endif // OCEANBASE_STORAGE_OB_STORAGE_UTIL_