
Co-authored-by: Naynahs <cfzy002@126.com> Co-authored-by: hwx65 <1780011298@qq.com> Co-authored-by: oceanoverflow <oceanoverflow@gmail.com>
299 lines
12 KiB
C++
299 lines
12 KiB
C++
/**
|
|
* Copyright (c) 2021 OceanBase
|
|
* OceanBase CE is licensed under Mulan PubL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
|
* You may obtain a copy of Mulan PubL v2 at:
|
|
* http://license.coscl.org.cn/MulanPubL-2.0
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PubL v2 for more details.
|
|
*/
|
|
#define USING_LOG_PREFIX COMMON
|
|
#include "deps/oblib/src/common/object/ob_obj_type.h"
|
|
#include "test_op_engine.h"
|
|
#include "ob_fake_table_scan_vec_op.h"
|
|
#include "data_generator.h"
|
|
|
|
namespace oceanbase
|
|
{
|
|
namespace sql
|
|
{
|
|
int ObFakeTableScanVecOp::inner_open()
|
|
{
|
|
ObDataGenerator::get_instance().register_op(this);
|
|
std::string round;
|
|
ObTestOpConfig::get_instance().get_config("round", round);
|
|
if (!round.empty()) { max_round_ = current_round_ + std::stoi(round); }
|
|
|
|
return OB_SUCCESS;
|
|
}
|
|
|
|
int ObFakeTableScanVecOp::inner_get_next_batch(const int64_t max_row_cnt)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
clear_evaluated_flag();
|
|
uint64_t op_id = get_spec().get_id();
|
|
int64_t generate_random_value = 0;
|
|
bool is_duplicate = false;
|
|
|
|
const ObPushdownExprSpec &pd_expr_spec =
|
|
reinterpret_cast<const ObTableScanSpec *>(&spec_)->tsc_ctdef_.scan_ctdef_.pd_expr_spec_;
|
|
for (int j = 0; j < pd_expr_spec.access_exprs_.count(); j++) {
|
|
ObExpr *expr = pd_expr_spec.access_exprs_.at(j);
|
|
OB_FAIL(fill_random_data_into_expr_datum_frame(j, pd_expr_spec.access_exprs_.count(), expr,
|
|
ObTestOpConfig::get_instance().batch_size_, is_duplicate));
|
|
}
|
|
|
|
// random set skip
|
|
set_random_skip(current_round_, ObTestOpConfig::get_instance().batch_size_);
|
|
|
|
if (is_duplicate) { ObDataGenerator::get_instance().reset_temp_store(op_id, current_round_); }
|
|
|
|
current_round_++;
|
|
brs_.size_ = ObTestOpConfig::get_instance().batch_size_;
|
|
if (current_round_ == max_round_) { brs_.end_ = true; }
|
|
|
|
// print generate data
|
|
LOG_INFO("[DG] data generated by DataGenerator in ", K(current_round_ - 1));
|
|
if (op_id_2_output_streams_.count(op_id) == 0) {
|
|
std::string output_file_name = "generate_data_" + std::to_string(op_id) + ".data";
|
|
op_id_2_output_streams_[op_id].open(output_file_name.data(), std::ios::out | std::ios::trunc);
|
|
}
|
|
test::TestOpEngine::print_to_file(&brs_, this, pd_expr_spec.access_exprs_, false, &op_id_2_output_streams_[op_id]);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObFakeTableScanVecOp::fill_random_data_into_expr_datum_frame(int expr_i, int expr_count, const ObExpr *expr,
|
|
const int output_row_count, bool &is_duplicate)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
LOG_DEBUG("generate_random_value for expr: ", K(*expr));
|
|
|
|
ObIVector *i_vec = NULL;
|
|
ObDatum *datums = NULL;
|
|
|
|
if (expr->enable_rich_format()) {
|
|
// vectorization 2.0 new operator
|
|
i_vec = expr->get_vector(eval_ctx_);
|
|
if (expr->is_fixed_length_data_) {
|
|
// for VEC_FIXED and VEC_UNIFORM
|
|
// we must first call init_vector because we need to use i_vec->set_xxx() which need use vector's meta data
|
|
OB_ASSERT(ObTestOpConfig::get_instance().digit_data_format_ == VEC_UNIFORM
|
|
|| ObTestOpConfig::get_instance().digit_data_format_ == VEC_FIXED);
|
|
expr->init_vector(eval_ctx_, ObTestOpConfig::get_instance().digit_data_format_, output_row_count, true);
|
|
} else {
|
|
// while for VEC_DISCRETE and VEC_CONTINUOUS it doesn't matter
|
|
OB_ASSERT(ObTestOpConfig::get_instance().string_data_format_ == VEC_UNIFORM
|
|
|| ObTestOpConfig::get_instance().string_data_format_ == VEC_DISCRETE
|
|
|| ObTestOpConfig::get_instance().string_data_format_ == VEC_CONTINUOUS);
|
|
expr->init_vector(eval_ctx_, ObTestOpConfig::get_instance().string_data_format_, output_row_count, true);
|
|
}
|
|
} else {
|
|
// vectorization 1.0 old operator
|
|
datums = expr->locate_datums_for_update(eval_ctx_, output_row_count);
|
|
}
|
|
|
|
// generate random data
|
|
get_random_data(expr_i, expr_count, expr, current_round_, output_row_count, expr->max_length_, is_duplicate);
|
|
|
|
int vec_continuous_offset = 0; // only used in VEC_CONTINUOUS
|
|
std::string vec_continuous_data; // use to store data of VEC_CONTINUOUS temporarily
|
|
|
|
ObDataGenerator::TempDataStore &data_store =
|
|
ObDataGenerator::get_instance().op_2_round_2_temp_store_[get_spec().get_id()][current_round_][expr_i];
|
|
for (int row = 0; row < output_row_count; row++) {
|
|
bool is_null = data_store.null_[row];
|
|
|
|
switch (expr->datum_meta_.get_type()) {
|
|
// why expr->datum_meta_.get_type() == ObInt32Type while expr->res_buf_len_ == 8 ?????
|
|
case ObInt32Type: {
|
|
int data = data_store.temp_int32_vector_[row];
|
|
if (i_vec != NULL) {
|
|
i_vec->set_int(row, static_cast<int64_t>(data));
|
|
if (is_null) { i_vec->set_null(row); }
|
|
} else {
|
|
datums[row].set_int32(data);
|
|
if (is_null) { datums[row].set_null(); }
|
|
}
|
|
break;
|
|
}
|
|
case ObIntType: {
|
|
int64_t data = data_store.temp_int64_vector_[row];
|
|
if (i_vec != NULL) {
|
|
i_vec->set_int(row, data);
|
|
if (is_null) { i_vec->set_null(row); }
|
|
} else {
|
|
datums[row].set_int(data);
|
|
if (is_null) { datums[row].set_null(); }
|
|
}
|
|
break;
|
|
}
|
|
case ObDoubleType: {
|
|
double data = data_store.temp_double_vector_[row];
|
|
if (i_vec != NULL) {
|
|
i_vec->set_double(row, data);
|
|
if (is_null) { i_vec->set_null(row); }
|
|
} else {
|
|
datums[row].set_double(data);
|
|
if (is_null) { datums[row].set_null(); }
|
|
}
|
|
break;
|
|
}
|
|
case ObNumberType: {
|
|
break;
|
|
}
|
|
case ObDecimalIntType: {
|
|
// have bug
|
|
// if decimal(20, 10)
|
|
// expr->res_buf_len_ = 16 is *not* equal to datums[x].len_ == 8
|
|
// where datums[x].len_ is set?
|
|
// i_vec->set_int(row,
|
|
// ObDataGenerator::get_instance().round_2_temp_store_[round_].first[expr_i].temp_int64_vector_[row]);
|
|
break;
|
|
}
|
|
case ObVarcharType:
|
|
case ObCharType: {
|
|
std::string tmp_str = data_store.temp_string_vector_[row];
|
|
int str_len = static_cast<int>(tmp_str.size());
|
|
|
|
if (i_vec != NULL) {
|
|
// vectorization 1.0 new operator
|
|
if (expr->get_format(eval_ctx_) == VEC_UNIFORM) {
|
|
if (is_null) {
|
|
i_vec->set_null(row);
|
|
} else {
|
|
i_vec->set_payload(row, tmp_str.data(), str_len);
|
|
}
|
|
} else {
|
|
if (is_null) { i_vec->set_null(row); }
|
|
if (expr->get_format(eval_ctx_) == VEC_DISCRETE) {
|
|
char **ptrs = expr->get_discrete_vector_ptrs(eval_ctx_);
|
|
int32_t *lens = expr->get_discrete_vector_lens(eval_ctx_);
|
|
ptrs[row] = expr->get_res_buf(eval_ctx_) + row * (expr->res_buf_len_);
|
|
lens[row] = str_len;
|
|
|
|
i_vec->set_payload(row, tmp_str.data(), str_len);
|
|
} else {
|
|
// VEC_CONTINUOUS
|
|
// offset[0] == 0 which is set in cg phase
|
|
vec_continuous_offset += str_len;
|
|
uint32_t *offset = expr->get_continuous_vector_offsets(eval_ctx_);
|
|
offset[row + 1] = vec_continuous_offset;
|
|
vec_continuous_data += tmp_str; // temporarily store data here
|
|
}
|
|
}
|
|
} else {
|
|
// vectorization 1.0 old operator
|
|
if (is_null) {
|
|
datums[row].set_null();
|
|
} else {
|
|
datums[row].set_string(tmp_str.data(), str_len);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
default: LOG_INFO("Can not generate random value so far for: ", K(expr->datum_meta_.get_type()));
|
|
}
|
|
}
|
|
|
|
if (expr->enable_rich_format() && expr->get_format(eval_ctx_) == VEC_CONTINUOUS && !vec_continuous_data.empty()) {
|
|
ObDynReserveBuf *drb = reinterpret_cast<ObDynReserveBuf *>(expr->get_continuous_vector_data(eval_ctx_));
|
|
// Todo: replace below logic when shengle handle continuous format memory
|
|
// in ObExpr::get_str_res_mem()
|
|
char *mem = NULL;
|
|
const int64_t alloc_size = next_pow2(vec_continuous_data.size());
|
|
if (OB_UNLIKELY(alloc_size > UINT32_MAX)) {
|
|
ret = OB_INVALID_ARGUMENT;
|
|
LOG_WARN("invalid argument", K(vec_continuous_data.size()), K(alloc_size), K(ret));
|
|
} else if (OB_ISNULL(mem = static_cast<char *>(eval_ctx_.alloc_expr_res(alloc_size)))) {
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
LOG_WARN("allocate memory failed", K(ret), K(ret));
|
|
} else {
|
|
// When extend memory, the old memory can not free, because the old memory may
|
|
// still be referenced. see: ob_datum_cast.cpp::common_copy_string
|
|
if (0 == drb->len_) { drb->magic_ = ObDynReserveBuf::MAGIC_NUM; }
|
|
drb->len_ = alloc_size;
|
|
drb->mem_ = mem;
|
|
MEMCPY(drb->mem_, vec_continuous_data.data(), vec_continuous_data.size());
|
|
|
|
ObContinuousBase *cont_vec = static_cast<ObContinuousBase *>(expr->get_vector(eval_ctx_));
|
|
cont_vec->set_data(drb->mem_);
|
|
}
|
|
LOG_DEBUG("extend expr result memory", K(ret), K(vec_continuous_data.size()), K(alloc_size), KP(this), KP(mem));
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int ObFakeTableScanVecOp::get_random_data(int expr_i, int expr_count, const ObExpr *expr, const int round,
|
|
const int batch_size, const int len, bool &is_duplicate)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
uint64_t op_id = get_spec().get_id();
|
|
switch (expr->datum_meta_.get_type()) {
|
|
// why expr->datum_meta_.get_type() == ObInt32Type while expr->res_buf_len_ == 8 ?????
|
|
case ObInt32Type: {
|
|
ObDataGenerator::get_instance().generate_data<int>(op_id, expr_i, expr_count, round, batch_size, len, is_duplicate);
|
|
break;
|
|
}
|
|
case ObIntType: {
|
|
ObDataGenerator::get_instance().generate_data<int64_t>(op_id, expr_i, expr_count, round, batch_size, len,
|
|
is_duplicate);
|
|
break;
|
|
}
|
|
case ObDoubleType: {
|
|
ObDataGenerator::get_instance().generate_data<double>(op_id, expr_i, expr_count, round, batch_size, len,
|
|
is_duplicate);
|
|
break;
|
|
}
|
|
case ObNumberType: {
|
|
break;
|
|
}
|
|
case ObDecimalIntType: {
|
|
// have bug
|
|
// if decimal(20, 10)
|
|
// expr->res_buf_len_ = 16 is *not* equal to datums[x].len_ == 8
|
|
// where datums[x].len_ is set?
|
|
ObDataGenerator::get_instance().generate_data<int64_t>(op_id, expr_i, expr_count, round, batch_size, len,
|
|
is_duplicate);
|
|
break;
|
|
}
|
|
case ObVarcharType:
|
|
case ObCharType: {
|
|
ObDataGenerator::get_instance().generate_data<std::string>(op_id, expr_i, expr_count, round, batch_size, len,
|
|
is_duplicate);
|
|
break;
|
|
}
|
|
default: LOG_INFO("Can not generate random value so far for: ", K(expr->datum_meta_.get_type()));
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void ObFakeTableScanVecOp::set_random_skip(const int round, const int batch_size)
|
|
{
|
|
std::string generate_data_skips;
|
|
uint64_t op_id = get_spec().get_id();
|
|
if (ObDataGenerator::get_instance().op_2_round_2_skips_[op_id].count(round) == 0) {
|
|
std::uniform_int_distribution<int> u_i(0, 1);
|
|
bool if_skip;
|
|
for (int i = 0; i < batch_size; i++) {
|
|
if_skip =
|
|
ObDataGenerator::get_instance().zero_one_rand_by_probability(ObTestOpConfig::get_instance().skips_probability_);
|
|
generate_data_skips += std::to_string(!if_skip) + " ";
|
|
ObDataGenerator::get_instance().op_2_round_2_skips_[op_id][round].push_back(if_skip);
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < batch_size; i++) {
|
|
if (ObDataGenerator::get_instance().op_2_round_2_skips_[op_id][round][i] == true) { brs_.skip_->set(i); }
|
|
}
|
|
LOG_INFO("skips : ", K(generate_data_skips.data()));
|
|
return;
|
|
}
|
|
|
|
} // namespace sql
|
|
} // namespace oceanbase
|