/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #ifndef OCEANBASE_STORAGE_OB_STORAGE_UTIL_ #define OCEANBASE_STORAGE_OB_STORAGE_UTIL_ #include "lib/allocator/ob_allocator.h" #include "share/datum/ob_datum_funcs.h" #include "sql/engine/expr/ob_expr.h" namespace oceanbase { namespace share { namespace schema { class ObColumnParam; } } namespace blocksstable { struct ObStorageDatum; } namespace storage { class ObTableIterParam; class ObTableAccessContext; int pad_column(const ObObjMeta &obj_meta, const ObAccuracy accuracy, common::ObIAllocator &padding_alloc, blocksstable::ObStorageDatum &datum); int pad_column(const ObAccuracy accuracy, common::ObIAllocator &padding_alloc, common::ObObj &cell); int pad_column(const common::ObAccuracy accuracy, sql::ObEvalCtx &ctx, sql::ObExpr &expr); int pad_on_datums(const common::ObAccuracy accuracy, const common::ObCollationType cs_type, common::ObIAllocator &padding_alloc, int64_t row_count, common::ObDatum *&datums); int pad_on_rich_format_columns(const common::ObAccuracy accuracy, const common::ObCollationType cs_type, const int64_t row_cap, const int64_t vec_offset, common::ObIAllocator &padding_alloc, sql::ObExpr &expr, sql::ObEvalCtx &eval_ctx); int fill_datums_lob_locator(const ObTableIterParam &iter_param, const ObTableAccessContext &context, const share::schema::ObColumnParam &col_param, const int64_t row_cap, ObDatum *datums, bool reuse_lob_locator = true); int fill_exprs_lob_locator(const ObTableIterParam &iter_param, const ObTableAccessContext &context, const share::schema::ObColumnParam &col_param, sql::ObExpr &expr, sql::ObEvalCtx &eval_ctx, const int64_t vec_offset, const int64_t row_cap); int cast_obj(const common::ObObjMeta &src_meta, common::ObIAllocator &cast_allocator, common::ObObj &obj); int init_expr_vector_header( sql::ObExpr &expr, sql::ObEvalCtx &eval_ctx, const int64_t size, const VectorFormat format = VectorFormat::VEC_UNIFORM); OB_INLINE int init_exprs_uniform_header( const sql::ObExprPtrIArray *exprs, sql::ObEvalCtx &eval_ctx, const int64_t size) { int ret = OB_SUCCESS; if (nullptr != exprs) { for (int64_t i = 0; OB_SUCC(ret) && i < exprs->count(); ++i) { sql::ObExpr *expr = exprs->at(i); if (OB_ISNULL(expr)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected null expr", K(ret), KPC(exprs)); } else if (OB_FAIL(init_expr_vector_header(*expr, eval_ctx, size))) { STORAGE_LOG(WARN, "Failed to init vector", K(ret), K(i), KPC(expr)); } } } return ret; } int init_exprs_new_format_header( const common::ObIArray &cols_projector, const sql::ObExprPtrIArray &exprs, sql::ObEvalCtx &eval_ctx); OB_INLINE bool can_do_ascii_optimize(common::ObCollationType cs_type) { return common::CS_TYPE_UTF8MB4_GENERAL_CI == cs_type || common::CS_TYPE_UTF8MB4_BIN == cs_type || common::CS_TYPE_UTF8MB4_UNICODE_CI == cs_type || common::CS_TYPE_GBK_CHINESE_CI == cs_type || common::CS_TYPE_GBK_BIN == cs_type; } OB_INLINE bool is_ascii_less_8(const char *str, int64_t len) { bool is_not_ascii = true; const uint8_t *val = reinterpret_cast(str); switch (len) { case 0: is_not_ascii = false; break; case 1: is_not_ascii = (0x80 & val[0]); break; case 2: is_not_ascii = 0x8080 & *((const uint16_t *)val); break; case 3: is_not_ascii = (0x8080 & *(const uint16_t *)val) | (0x80 & val[2]); break; case 4: is_not_ascii = (0x80808080U & *((const uint32_t *)val)); break; case 5: is_not_ascii = (0x80808080U & *((const uint32_t *)val)) | (0x80 & val[4]); break; case 6: is_not_ascii = (0x80808080U & *(const uint32_t *)val) | (0x8080 & *(const uint16_t *)(val + 4)); break; case 7: is_not_ascii = (0x80808080U & *(const uint32_t *)val) | (0x80808080U & *(const uint32_t *)(val + 3)); break; } return !is_not_ascii; } OB_INLINE bool is_ascii_str(const char *str, const int64_t len) { bool bret = true; if (len >= 8) { const int64_t length = len / 8; const uint64_t *vals = reinterpret_cast(str); for (int64_t i = 0; bret && i < length; i++) { if (vals[i] & 0x8080808080808080UL) { bret = false; } } bret = bret && is_ascii_less_8(str + len / 8 * 8, len % 8); } else { bret = is_ascii_less_8(str, len); } return bret; } class ObObjBufArray final { public: ObObjBufArray() : capacity_(0), is_inited_(false), data_(NULL), allocator_(NULL) { //MEMSET(local_data_buf_, 0, LOCAL_ARRAY_SIZE * sizeof(common::ObObj)); } ~ObObjBufArray() { reset(); } int init(common::ObIAllocator *allocator) { int ret = common::OB_SUCCESS; if (IS_INIT) { ret = common::OB_INIT_TWICE; STORAGE_LOG(WARN, "init twice", K(ret), K(is_inited_)); } else if (OB_ISNULL(allocator)) { ret = common::OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), KP(allocator)); } else { allocator_ = allocator; data_ = reinterpret_cast(local_data_buf_); capacity_ = LOCAL_ARRAY_SIZE; is_inited_ = true; } return ret; } inline bool is_inited() const { return is_inited_; } inline int reserve(int64_t count) { int ret = common::OB_SUCCESS; if (IS_NOT_INIT) { ret = common::OB_NOT_INIT; STORAGE_LOG(WARN, "ObObjBufArray not inited", K(ret), K(is_inited_)); } else if (count > capacity_) { int64_t new_size = count * sizeof(common::ObObj); common::ObObj *new_data = reinterpret_cast(allocator_->alloc(new_size)); if (OB_NOT_NULL(new_data)) { if ((char *)data_ != local_data_buf_) { allocator_->free(data_); } MEMSET(new_data, 0, new_size); data_ = new_data; capacity_ = count; } else { ret = common::OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(ERROR, "no memory", K(ret), K(new_size), K(capacity_)); } } return ret; } inline int64_t get_count() const { return capacity_; } inline common::ObObj *get_data() { return data_; } void reset() { if (NULL != allocator_ && (char *)data_ != local_data_buf_) { allocator_->free(data_); } allocator_ = NULL; data_ = NULL; capacity_ = 0; is_inited_ = false; } inline common::ObObj &at(int64_t idx) const { OB_ASSERT(idx >= 0 && idx < capacity_); return data_[idx]; } protected: const static int64_t LOCAL_ARRAY_SIZE = 64; int64_t capacity_; bool is_inited_; common::ObObj *data_; char local_data_buf_[LOCAL_ARRAY_SIZE * sizeof(common::ObObj)]; common::ObIAllocator *allocator_; }; inline static common::ObDatumCmpFuncType get_datum_cmp_func(const common::ObObjMeta &col_obj_type, const common::ObObjMeta ¶m_obj_type) { common::ObDatumCmpFuncType cmp_func = nullptr; bool is_oracle_mode = lib::is_oracle_mode(); // if compare lob with non-lob, should use get_nullsafe_cmp_func to get cmp_func // especially tinytext, beacause tinytext does not have lob header, but it's type class is TextTC. bool not_both_lob_storage = col_obj_type.is_lob_storage() ^ param_obj_type.is_lob_storage(); if (col_obj_type.get_type_class() != param_obj_type.get_type_class() || not_both_lob_storage) { cmp_func = ObDatumFuncs::get_nullsafe_cmp_func( col_obj_type.get_type(), param_obj_type.get_type(), is_oracle_mode ? NULL_LAST : NULL_FIRST, col_obj_type.get_collation_type(), col_obj_type.get_scale(), is_oracle_mode, col_obj_type.has_lob_header() || param_obj_type.has_lob_header()); } else { sql::ObExprBasicFuncs *basic_funcs = ObDatumFuncs::get_basic_func(col_obj_type.get_type(), col_obj_type.get_collation_type()); cmp_func = is_oracle_mode ? basic_funcs->null_last_cmp_ : basic_funcs->null_first_cmp_; } return cmp_func; } struct ObDatumComparator { public: ObDatumComparator(const ObDatumCmpFuncType cmp_func, int &ret, bool &equal) : cmp_func_(cmp_func), ret_(ret), equal_(equal) {} ~ObDatumComparator() {} OB_INLINE bool operator() (const ObDatum &datum1, const ObDatum &datum2) { int &ret = ret_; int cmp_ret = 0; if (OB_FAIL(ret)) { // do nothing } else if (OB_FAIL(cmp_func_(datum1, datum2, cmp_ret))) { STORAGE_LOG(WARN, "Failed to compare datum", K(ret), K(datum1), K(datum2), K_(cmp_func)); } else if (0 == cmp_ret && !equal_) { equal_ = true; } return cmp_ret < 0; } private: ObDatumCmpFuncType cmp_func_; int &ret_; bool &equal_; }; enum class ObFilterInCmpType { MERGE_SEARCH, BINARY_SEARCH_DICT, BINARY_SEARCH, HASH_SEARCH, }; inline ObFilterInCmpType get_filter_in_cmp_type( const int64_t row_count, const int64_t param_count, const bool is_sorted_dict) { // BINARY_HASH_THRESHOLD: means the threshold to choose BINARY_SEARCH or HASH_SEARCH // When the dictionary is unordered, the only variable available for iteration is param_count. // Testing has shown that when the data size is small, the overhead of binary search is // lower than the overhead of computing hashes. // Therefore, this threshold is temporarily set to a small value(8). static constexpr int64_t BINARY_HASH_THRESHOLD = 8; // HASH_BUCKETS: means the number of buckets(slots) in hashset. // This value is related to the performance of the hashset. const int64_t HASH_BUCKETS = hash::cal_next_prime(param_count * 2); ObFilterInCmpType cmp_type = ObFilterInCmpType::HASH_SEARCH; if (is_sorted_dict) { if (row_count > 3 * param_count) { // row_count >> param_count if (row_count > HASH_BUCKETS * 4) { cmp_type = ObFilterInCmpType::BINARY_SEARCH_DICT; } else { cmp_type = ObFilterInCmpType::MERGE_SEARCH; } } else if (row_count * 3 >= param_count) { // row_count ~~ param_count if (row_count > HASH_BUCKETS) { cmp_type = ObFilterInCmpType::MERGE_SEARCH; } else { cmp_type = ObFilterInCmpType::HASH_SEARCH; } } else { // row_count << param_count cmp_type = ObFilterInCmpType::HASH_SEARCH; } } else { // Unordered dict if (param_count <= BINARY_HASH_THRESHOLD) { cmp_type = ObFilterInCmpType::BINARY_SEARCH; } else { cmp_type = ObFilterInCmpType::HASH_SEARCH; } } return cmp_type; } } } #endif // OCEANBASE_STORAGE_OB_STORAGE_UTIL_