Files
oceanbase/src/sql/engine/basic/ob_temp_column_store.cpp
obdev cc1b65e578 [FEAT MERGE] sql execution improvements
Co-authored-by: DengzhiLiu <dengzhiliu@gmail.com>
Co-authored-by: Zach41 <zach_41@163.com>
Co-authored-by: tushicheng <18829573815@163.com>
2024-06-21 15:16:43 +00:00

443 lines
16 KiB
C++

/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SQL_ENG
#include "ob_temp_column_store.h"
#include "sql/engine/basic/ob_temp_block_store.h"
#include "share/vector/ob_fixed_length_vector.h"
#include "share/vector/ob_continuous_vector.h"
#include "share/vector/ob_uniform_vector.h"
#include "share/vector/ob_discrete_vector.h"
#include "share/ob_define.h"
namespace oceanbase
{
using namespace common;
namespace sql
{
int ObTempColumnStore::ColumnBlock::calc_rows_size(const IVectorPtrs &vectors,
const uint16_t *selector,
const ObArray<ObLength> &lengths,
const int64_t size,
int64_t &batch_mem_size)
{
int ret = OB_SUCCESS;
batch_mem_size = get_header_size(vectors.count());
for (int64_t i = 0; OB_SUCC(ret) && i < vectors.count(); ++i) {
const ObIVector *vec = vectors.at(i);
const VectorFormat format = vec->get_format();
switch (format) {
case VEC_FIXED:
batch_mem_size += calc_size(static_cast<const ObFixedLengthBase*>(vec), selector, size);
break;
case VEC_DISCRETE:
batch_mem_size += calc_size(static_cast<const ObDiscreteBase*>(vec), selector, size);
break;
case VEC_CONTINUOUS:
batch_mem_size += calc_size(static_cast<const ObContinuousBase*>(vec), selector, size);
break;
case VEC_UNIFORM:
batch_mem_size += calc_size<false>(static_cast<const ObUniformBase*>(vec),
selector, size, lengths[i]);
break;
case VEC_UNIFORM_CONST:
batch_mem_size += calc_size<true>(static_cast<const ObUniformBase*>(vec),
selector, size, lengths[i]);
break;
default:
ret = OB_ERR_UNEXPECTED;
}
}
return ret;
}
int ObTempColumnStore::ColumnBlock::add_batch(ShrinkBuffer &buf,
const IVectorPtrs &vectors,
const uint16_t *selector,
const ObArray<ObLength> &lengths,
const int64_t size,
const int64_t batch_mem_size)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(batch_mem_size > buf.remain())) {
ret = OB_BUF_NOT_ENOUGH;
LOG_WARN("block is not enough", K(ret), K(batch_mem_size), K(buf));
} else {
char *head = buf.head();
*reinterpret_cast<int32_t *>(head) = static_cast<int32_t>(size); // row_count
int32_t *vec_offsets = reinterpret_cast<int32_t *>(head + sizeof(int32_t));
int64_t pos = get_header_size(vectors.count());
for (int64_t i = 0; OB_SUCC(ret) && i < vectors.count(); ++i) {
const ObIVector *vec = vectors.at(i);
const VectorFormat format = vec->get_format();
vec_offsets[i] = pos;
switch (format) {
case VEC_FIXED:
ret = to_buf(static_cast<const ObFixedLengthBase*>(vec), selector, size, head, pos);
break;
case VEC_DISCRETE:
ret = to_buf(static_cast<const ObDiscreteBase*>(vec), selector, size, head, pos);
break;
case VEC_CONTINUOUS:
ret = to_buf(static_cast<const ObContinuousBase*>(vec), selector, size, head, pos);
break;
case VEC_UNIFORM:
ret = to_buf<false>(static_cast<const ObUniformBase*>(vec), selector, size, lengths[i],
head, pos);
break;
case VEC_UNIFORM_CONST:
ret = to_buf<true>(static_cast<const ObUniformBase*>(vec), selector, size, lengths[i],
head, pos);
break;
default:
ret = OB_ERR_UNEXPECTED;
}
}
vec_offsets[vectors.count()] = pos; // last offset, the size of vector
buf.fast_advance(pos);
if (OB_FAIL(ret)) {
} else if (OB_UNLIKELY(pos != batch_mem_size)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected memory size", K(ret), K(pos), K(batch_mem_size));
} else {
cnt_ += size;
}
}
return ret;
}
int ObTempColumnStore::ColumnBlock::get_next_batch(const IVectorPtrs &vectors,
const ObArray<ObLength> &lengths,
const int32_t start_read_pos,
int32_t &batch_rows,
int32_t &batch_pos) const
{
int ret = OB_SUCCESS;
char* buf = const_cast<char *>(start_read_pos + payload_);
const int32_t size = *reinterpret_cast<const int32_t*>(buf);
const int32_t *vec_offsets = reinterpret_cast<int32_t *>(buf + sizeof(int32_t));
for (int64_t i = 0; OB_SUCC(ret) && i < vectors.count(); ++i) {
ObIVector *vec = vectors.at(i);
if (NULL == vec || (VEC_UNIFORM_CONST == vec->get_format())) {
// if vector is null or uniform const, skip read vector
} else {
int64_t pos = vec_offsets[i];
const VectorFormat format = vec->get_format();
switch (format) {
case VEC_FIXED:
ret = from_buf(buf, pos, size, static_cast<ObFixedLengthBase*>(vec));
break;
case VEC_CONTINUOUS:
ret = from_buf(buf, pos, size, static_cast<ObContinuousBase*>(vec));
break;
case VEC_UNIFORM:
static_cast<UniformFormat *>(vec)->set_all_null(size);
break;
default:
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected format", K(ret), K(format));
}
}
}
if (OB_SUCC(ret)) {
batch_rows = size;
batch_pos = vec_offsets[vectors.count()];
}
return ret;
}
int ObTempColumnStore::Iterator::init(ObTempColumnStore *store)
{
reset();
column_store_ = store;
return BlockReader::init(store);
}
int ObTempColumnStore::Iterator::get_next_batch(const ObExprPtrIArray &exprs,
ObEvalCtx &ctx,
const int64_t max_rows,
int64_t &read_rows)
{
int ret = OB_SUCCESS;
read_rows = 0;
if (OB_UNLIKELY(exprs.count() != column_store_->get_col_cnt())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("column count mismatch", K(ret), K(exprs.count()), K(column_store_->get_col_cnt()));
} else if (OB_UNLIKELY(NULL == cur_blk_ || !cur_blk_->contain(cur_blk_id_))) {
if (OB_FAIL(next_block())) {
if (ret != OB_ITER_END) {
LOG_WARN("fail to get next block", K(ret));
}
}
}
int32_t batch_rows = 0;
int32_t batch_pos = 0;
if (OB_FAIL(ret)) {
} else if (OB_FAIL(ensure_read_vectors(exprs, ctx, max_rows))) {
LOG_WARN("fail to ensure read vectors", K(ret));
} else if (OB_FAIL(cur_blk_->get_next_batch(*vectors_, column_store_->batch_ctx_->lengths_,
read_pos_, batch_rows, batch_pos))) {
LOG_WARN("fail to get next batch from column block", K(ret));
} else if (OB_UNLIKELY(has_rest_row_in_batch())) {
// current block has remaining unread data
const int64_t begin = batch_rows - rest_row_cnt_;
for (int64_t i = 0; i < exprs.count() && OB_SUCC(ret); i++) {
ObExpr *e = exprs.at(i);
ObIVector *vec = NULL;
if (OB_ISNULL(e) || ((is_uniform_format((vec = e->get_vector(ctx))->get_format())))) {
// skip null input expr and uniform expr
} else {
const VectorFormat format = vec->get_format();
switch (format) {
case VEC_FIXED:
ret = from_vector(static_cast<ObFixedLengthBase*>(vec), e, ctx, begin, batch_rows);
break;
case VEC_CONTINUOUS:
ret = from_vector(static_cast<ObContinuousBase*>(vec), e, ctx, begin, batch_rows);
break;
default:
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected format", K(ret), K(format));
}
}
}
if (OB_SUCC(ret)) {
read_rows = MIN(max_rows, rest_row_cnt_);
rest_row_cnt_ -= read_rows;
}
} else {
read_rows = MIN(max_rows, batch_rows);
if (OB_UNLIKELY(read_rows < batch_rows)) {
rest_row_cnt_ = batch_rows - read_rows;
}
}
if (OB_SUCC(ret)) {
cur_blk_id_ += read_rows;
if (0 == rest_row_cnt_) {
read_pos_ += batch_pos;
}
for (int64_t i = 0; i < exprs.count(); i++) {
if (NULL != exprs.at(i)) {
exprs.at(i)->set_evaluated_projected(ctx);
}
}
}
return ret;
}
int ObTempColumnStore::Iterator::next_block()
{
int ret = OB_SUCCESS;
const Block *read_blk = NULL;
if (cur_blk_id_ >= get_row_cnt()) {
ret = OB_ITER_END;
} else if (OB_FAIL(get_block(cur_blk_id_, read_blk))) {
LOG_WARN("fail to get block from store", K(ret), K(cur_blk_id_));
} else {
cur_blk_ = static_cast<const ColumnBlock*>(read_blk);
rest_row_cnt_ = 0;
read_pos_ = 0;
}
return ret;
}
int ObTempColumnStore::Iterator::ensure_read_vectors(const ObExprPtrIArray &exprs,
ObEvalCtx &ctx,
const int64_t max_rows)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(column_store_->batch_ctx_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("try read vector from empty store", K(ret), K(get_row_cnt()));
} else if (OB_ISNULL(vectors_)) {
if (column_store_->reuse_vector_array_) {
vectors_ = &column_store_->batch_ctx_->vectors_;
} else {
void *mem = column_store_->allocator_->alloc(sizeof(ObArray<ObIVector *>),
column_store_->mem_attr_);
if (OB_ISNULL(mem)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to alloc vector array", K(ret));
} else {
vectors_ = new(mem)ObArray<ObIVector *>();
vectors_->set_attr(column_store_->mem_attr_);
ret = vectors_->prepare_allocate(get_col_cnt());
}
}
}
for (int64_t i = 0; i < exprs.count() && OB_SUCC(ret); i++) {
ObExpr *e = exprs.at(i);
if (OB_ISNULL(e) || e->is_const_expr()) {
vectors_->at(i) = NULL;
} else if (OB_FAIL(e->init_vector(ctx, e->get_temp_column_store_res_format(), max_rows))) {
LOG_WARN("fail to init vector", K(ret));
} else {
vectors_->at(i) = e->get_vector(ctx);
}
}
return ret;
}
ObTempColumnStore::ObTempColumnStore(common::ObIAllocator *alloc /* = NULL */)
: ObTempBlockStore(alloc), cur_blk_(NULL), col_cnt_(0), batch_ctx_(NULL), max_batch_size_(0),
reuse_vector_array_(true)
{
}
ObTempColumnStore::~ObTempColumnStore()
{
reset();
}
void ObTempColumnStore::reset()
{
if (NULL != batch_ctx_) {
batch_ctx_->~BatchCtx();
allocator_->free(batch_ctx_);
batch_ctx_ = NULL;
cur_blk_ = NULL;
}
ObTempBlockStore::reset();
}
int ObTempColumnStore::init(const ObExprPtrIArray &exprs,
const int64_t max_batch_size,
const lib::ObMemAttr &mem_attr,
const int64_t mem_limit,
const bool enable_dump,
const bool reuse_vector_array,
const common::ObCompressorType compressor_type)
{
int ret = OB_SUCCESS;
mem_attr_ = mem_attr;
col_cnt_ = exprs.count();
max_batch_size_ = max_batch_size;
ObTempBlockStore::set_inner_allocator_attr(mem_attr);
OZ(ObTempBlockStore::init(mem_limit, enable_dump, mem_attr.tenant_id_, mem_attr.ctx_id_,
mem_attr_.label_, compressor_type));
reuse_vector_array_ = reuse_vector_array;
inited_ = true;
return ret;
}
int ObTempColumnStore::init_batch_ctx(const ObExprPtrIArray &exprs)
{
int ret = OB_SUCCESS;
const int64_t max_batch_size = max_batch_size_;
if (OB_UNLIKELY(NULL == batch_ctx_)) {
const int64_t size = sizeof(*batch_ctx_) + sizeof(*batch_ctx_->selector_) * max_batch_size;
char *mem = static_cast<char *>(allocator_->alloc(size, mem_attr_));
if (OB_UNLIKELY(max_batch_size <= 0)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("max batch size is not positive when init batch ctx", K(ret), K(max_batch_size));
} else if (NULL == mem) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate memory failed", K(ret), K(size), K(col_cnt_), K(max_batch_size));
} else {
char *begin = mem;
batch_ctx_ = new(mem)BatchCtx();
batch_ctx_->vectors_.set_attr(mem_attr_);
batch_ctx_->lengths_.set_attr(mem_attr_);
batch_ctx_->max_batch_size_ = max_batch_size;
if (OB_FAIL(batch_ctx_->vectors_.prepare_allocate(col_cnt_))) {
LOG_WARN("fail to prepare allocate vectors", K(ret), K(col_cnt_));
} else if (OB_FAIL(batch_ctx_->lengths_.prepare_allocate(col_cnt_))) {
LOG_WARN("fail to prepare allocate lengths", K(ret), K(col_cnt_));
} else {
for (int64_t i = 0; i < exprs.count(); ++i) {
ObExpr *expr = exprs.at(i);
if (expr->is_fixed_length_data_) {
batch_ctx_->lengths_.at(i) = expr->get_fixed_length();
} else if (expr->datum_meta_.type_ == ObNullType && expr->is_batch_result()) {
batch_ctx_->lengths_.at(i) = NULL_WITH_BATCH_RESULT_LENGTH;
} else {
batch_ctx_->lengths_.at(i) = UNFIXED_LENGTH;
}
}
mem += sizeof(*batch_ctx_);
batch_ctx_->selector_ = reinterpret_cast<typeof(batch_ctx_->selector_)>(mem);
mem += sizeof(*batch_ctx_->selector_) * max_batch_size;
if (mem - begin != size) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("size mismatch", K(ret), K(mem - begin), K(size), K(col_cnt_),
K(max_batch_size));
}
}
}
}
return ret;
}
int ObTempColumnStore::add_batch(const common::ObIArray<ObExpr *> &exprs, ObEvalCtx &ctx,
const ObBatchRows &brs, int64_t &stored_rows_count)
{
int ret = OB_SUCCESS;
int16_t size = 0;
const uint16_t *selector = NULL;
if (OB_UNLIKELY(exprs.count() != get_col_cnt())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("column count mismatch", K(ret), K(exprs.count()), K(get_col_cnt()));
} else if (OB_FAIL(init_batch_ctx(exprs))) {
LOG_WARN("fail to init batch ctx", K(ret));
} else if (brs.all_rows_active_ || (0 == brs.skip_->accumulate_bit_cnt(brs.size_))) {
// all skipped, set selector point to null
size = brs.size_;
} else {
for (int64_t i = 0; i < brs.size_; i++) {
if (brs.skip_->at(i)) {
continue;
} else {
batch_ctx_->selector_[size++] = i;
}
}
selector = batch_ctx_->selector_;
}
if (OB_SUCC(ret) && size > 0) {
for (int64_t i = 0; i < exprs.count() && OB_SUCC(ret); i++) {
ObExpr *e = exprs.at(i);
ObIVector *vec = NULL;
if (OB_FAIL(e->eval_vector(ctx, brs))) {
LOG_WARN("evaluate batch failed", K(ret));
} else {
vec = e->get_vector(ctx);
batch_ctx_->vectors_.at(i) = vec;
}
}
int64_t batch_mem_size = 0;
if (OB_FAIL(ret)) {
} else if (OB_FAIL(ColumnBlock::calc_rows_size(batch_ctx_->vectors_,
selector,
batch_ctx_->lengths_,
size,
batch_mem_size))) {
LOG_WARN("fail to calc rows size", K(ret));
} else if (OB_FAIL(ensure_write_blk(batch_mem_size))) {
LOG_WARN("ensure write block failed", K(ret));
} else if (OB_FAIL(cur_blk_->add_batch(blk_buf_, batch_ctx_->vectors_, selector,
batch_ctx_->lengths_, size, batch_mem_size))) {
LOG_WARN("fail to add batch to column store", K(ret));
} else {
block_id_cnt_ += size;
inc_mem_used(batch_mem_size);
}
}
stored_rows_count = size;
return ret;
}
} // end namespace sql
} // end namespace oceanbase