/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #define USING_LOG_PREFIX SQL_EXE #include "sql/engine/px/ob_px_util.h" #include "sql/executor/ob_slice_calc.h" #include "sql/engine/ob_exec_context.h" #include "sql/engine/expr/ob_expr_calc_partition_id.h" #include "sql/engine/px/exchange/ob_transmit_op.h" #include "share/schema/ob_table_schema.h" #include "common/row/ob_row.h" #include "lib/ob_define.h" #include "share/schema/ob_part_mgr_util.h" using namespace oceanbase::sql; using namespace oceanbase::common; using namespace oceanbase::share; using namespace oceanbase::share::schema; int ObSliceIdxCalc::get_slice_indexes( const ObIArray &exprs, ObEvalCtx &eval_ctx, SliceIdxArray &slice_idx_array) { int ret = OB_SUCCESS; if (OB_UNLIKELY(slice_idx_array.count() != 1)) { slice_idx_array.reuse(); if (OB_FAIL(slice_idx_array.push_back(0))) { LOG_WARN("array push back failed", K(ret)); } } if (OB_SUCC(ret)) { ret = get_slice_idx(exprs, eval_ctx, slice_idx_array.at(0)); } return ret; } int ObSliceIdxCalc::get_previous_row_tablet_id(ObObj &tablet_id) { int ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "get previous tablet id"); UNUSED(tablet_id); return ret; } int ObSliceIdxCalc::setup_slice_indexes(ObEvalCtx &ctx) { int ret = OB_SUCCESS; if (NULL == slice_indexes_) { slice_indexes_ = static_cast(alloc_.alloc( sizeof(*slice_indexes_) * ctx.max_batch_size_)); if (NULL == slice_indexes_) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate memory failed", K(ret)); } } return ret; } int ObSliceIdxCalc::setup_tablet_ids(ObEvalCtx &ctx) { int ret = OB_SUCCESS; if (OB_ISNULL(tablet_ids_)) { tablet_ids_ = static_cast (alloc_.alloc(sizeof(*tablet_ids_) * ctx.max_batch_size_)); if (OB_ISNULL(tablet_ids_)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate memory failed", K(ret)); } } return ret; } int ObSliceIdxCalc::calc_for_null_aware(const ObExpr &expr, const int64_t task_cnt, ObEvalCtx &eval_ctx, SliceIdxArray &slice_idx_array, bool &processed) { int ret = OB_SUCCESS; processed = false; ObDatum *dis_key = nullptr; if (is_first_row_) { for (int64_t i = 0; OB_SUCC(ret) && i < task_cnt; ++i) { if (OB_FAIL(slice_idx_array.push_back(i))) { LOG_WARN("failed to push back i", K(ret)); } } is_first_row_ = false; processed = true; } else if (OB_FAIL(expr.eval(eval_ctx, dis_key))) { LOG_WARN("failed to eval repartition expr", K(ret)); } else if (dis_key->is_null()) { for (int64_t i = 0; OB_SUCC(ret) && i < task_cnt; ++i) { if (OB_FAIL(slice_idx_array.push_back(i))) { LOG_WARN("failed to push back i", K(ret)); } } processed = true; } return ret; } int ObRepartSliceIdxCalc::get_slice_idx(const ObIArray &exprs, ObEvalCtx &eval_ctx, int64_t &slice_idx) { UNUSED(exprs); UNUSED(eval_ctx); UNUSED(slice_idx); return OB_NOT_SUPPORTED; } int ObRepartSliceIdxCalc::get_slice_idx_vec(const ObIArray &exprs, ObEvalCtx &eval_ctx, ObBitVector &skip, const int64_t batch_size, int64_t *&indexes) { UNUSED(exprs); UNUSED(eval_ctx); UNUSED(skip); UNUSED(batch_size); UNUSED(indexes); return OB_NOT_SUPPORTED; } //TODO yishen int ObRepartSliceIdxCalc::get_part_id_by_one_level_sub_ch_map(int64_t &part_id) { int ret = OB_SUCCESS; if (px_repart_ch_map_.size() > 0) { ObTabletID tablet_id(px_repart_ch_map_.begin()->first); int64_t subpart_id = OB_INVALID_ID; if (OB_FAIL(table_schema_.get_part_id_by_tablet(tablet_id, part_id, subpart_id))) { LOG_WARN("fail to get part id by tablet", K(ret)); } } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected ch map", K(ret)); } return ret; } int ObRepartSliceIdxCalc::get_sub_part_id_by_one_level_first_ch_map( const int64_t part_id, int64_t &tablet_id) { int ret = OB_SUCCESS; if (part2tablet_id_map_.size() > 0) { if (OB_FAIL(part2tablet_id_map_.get_refactored(part_id, tablet_id))) { LOG_WARN("fail to get tablet id", K(ret)); } } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected ch map", K(ret)); } return ret; } int ObRepartSliceIdxCalc::get_tablet_id(ObEvalCtx &eval_ctx, int64_t &tablet_id) { int ret = OB_SUCCESS; ObDatum *tablet_id_datum = NULL; if (OB_ISNULL(calc_part_id_expr_)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(calc_part_id_expr_), K(ret)); } else if (OB_FAIL(calc_part_id_expr_->eval(eval_ctx, tablet_id_datum))) { LOG_WARN("fail to calc part id", K(ret), K(*calc_part_id_expr_)); } else if (ObExprCalcPartitionId::NONE_PARTITION_ID == (tablet_id = tablet_id_datum->get_int())) { // Usually, calc_part_id_expr_ returns tablet_id and set tablet_id_datum = 0 // if no partition match(refer to ObExprCalcPartitionBase::calc_partition_id). // In this condition, it will not go to this branch because NONE_PARTITION_ID equals to -1. // But when repart_type_ equals to OB_REPARTITION_ONE_SIDE_ONE_LEVEL_FIRST(means value of sub part key is fixed), // calc_part_id_expr_ returns partition_id of first part and set tablet_id_datum = -1. } else if (OB_REPARTITION_ONE_SIDE_ONE_LEVEL_FIRST == repart_type_) { int64_t part_id = tablet_id; if (OB_FAIL(get_sub_part_id_by_one_level_first_ch_map(part_id, tablet_id))) { LOG_WARN("fail to get part id by ch map", K(ret)); } } if (OB_SUCC(ret)) { tablet_id_ = tablet_id; } LOG_DEBUG("repart partition id", K(tablet_id)); return ret; } int ObRepartSliceIdxCalc::get_tablet_ids(ObEvalCtx &eval_ctx, ObBitVector &skip, const int64_t batch_size, int64_t *&tablet_ids) { int ret = OB_SUCCESS; if (OB_FAIL(ret)) { } else if (OB_ISNULL(calc_part_id_expr_)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid calc part id expr", K(ret)); } else if (OB_FAIL(calc_part_id_expr_->eval_batch(eval_ctx, skip, batch_size))) { LOG_WARN("failed to eval batch", K(ret)); } else if (OB_FAIL(setup_tablet_ids(eval_ctx))) { LOG_WARN("failed to setup partition id", K(ret)); } else { ObDatumVector tablet_id_datums = calc_part_id_expr_->locate_expr_datumvector(eval_ctx); for (int64_t i = 0; OB_SUCC(ret) && i < batch_size; ++i) { if (skip.at(i)) { continue; } if (ObExprCalcPartitionId::NONE_PARTITION_ID == (tablet_ids_[i] = tablet_id_datums.at(i)->get_int())) { // do nothing } else if (OB_REPARTITION_ONE_SIDE_ONE_LEVEL_FIRST == repart_type_) { int64_t part_id = tablet_ids_[i]; if (OB_FAIL(get_sub_part_id_by_one_level_first_ch_map(part_id, tablet_ids_[i]))) { LOG_WARN("fail to get part id by ch map", K(ret)); } } if (OB_SUCC(ret)) { tablet_id_ = tablet_ids[i]; } } if (OB_SUCC(ret)) { tablet_ids = tablet_ids_; } } return ret; } int ObRepartSliceIdxCalc::get_previous_row_tablet_id(ObObj &tablet_id) { int ret = OB_SUCCESS; tablet_id.set_int(tablet_id_); return ret; } int ObSlaveMapRepartIdxCalcBase::init() { int ret = OB_SUCCESS; if (OB_FAIL(ObRepartSliceIdxCalc::init())) { LOG_WARN("fail init base", K(ret)); } // 在pkey random情况下,一个partition是可以被其所在的SQC上的所有worker处理的, // 所以一个tablet_id可能会对应多个task idx, // 形成 tablet_id -> task_idx_list的映射关系 // 例如:SQC1有3 worker(task1,task2,task3),2个partition(p0,p1); // SQC2有2 worker(task4,task5),1个partition(p2); // 就会形成: // p0 : [task1,task2,task3] // p1 : [task1,task2,task3] // p2 : [task4,task5] const ObPxPartChMapArray &part_ch_array = part_ch_info_.part_ch_array_; if (OB_SUCC(ret)) { if (OB_FAIL(part_to_task_array_map_.create(max(1, part_ch_array.count()), ObModIds::OB_SQL_PX))) { LOG_WARN("fail create part to task array map", "count", part_ch_array.count(), K(ret)); } else { // In ObRepartSliceIdxCalc::init(), the support_vectorized_calc_ has been set to true. support_vectorized_calc_ = false; } } // TODO: jiangting.lk // 这里可以再进行优化,节约空间,将相同SQC的partition使用同一个task idx array ARRAY_FOREACH_X(part_ch_array, idx, cnt, OB_SUCC(ret)) { int64_t tablet_id = part_ch_array.at(idx).first_; int64_t task_idx = part_ch_array.at(idx).second_; LOG_DEBUG("get one channel relationship", K(idx), K(cnt), "key", tablet_id, "val", task_idx); if (OB_ISNULL(part_to_task_array_map_.get(tablet_id))) { TaskIdxArray task_idx_array; if (OB_FAIL(part_to_task_array_map_.set_refactored(tablet_id, task_idx_array))) { LOG_WARN("push partition id and task idx array to map failed", K(ret), K(tablet_id)); } else { LOG_TRACE("add task idx array successfully", K(ret), K(tablet_id)); } } const TaskIdxArray *task_idx_array = part_to_task_array_map_.get(tablet_id); if (OB_ISNULL(task_idx_array)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("task idx list is null", K(ret), K(tablet_id)); } else if (OB_FAIL(const_cast(task_idx_array)->push_back(task_idx))) { LOG_WARN("failed push back task idx to task idx array", K(ret), K(task_idx), K(tablet_id)); } else { LOG_TRACE("push task idx to task idx array", K(tablet_id), K(task_idx), K(*task_idx_array), K(task_idx_array->count())); } } return ret; } int ObSlaveMapRepartIdxCalcBase::destroy() { int ret = OB_SUCCESS; if (part_to_task_array_map_.created()) { ret = part_to_task_array_map_.destroy(); } int tmp_ret = ObRepartSliceIdxCalc::destroy(); ret = (OB_SUCCESS == ret) ? tmp_ret : ret; return ret; } int ObRepartRandomSliceIdxCalc::init() { return ObSlaveMapRepartIdxCalcBase::init(); } int ObRepartRandomSliceIdxCalc::destroy() { return ObSlaveMapRepartIdxCalcBase::destroy(); } int ObRepartRandomSliceIdxCalc::get_slice_idx(const ObIArray &exprs, ObEvalCtx &eval_ctx, int64_t &slice_idx) { int ret = OB_SUCCESS; UNUSED(exprs); // 计算过程: // 1. 通过row计算出对应的partition id // 2. 通过partition id,找到对应的task array // 3. random的方式从task array中选择task idx作为slice idx int64_t tablet_id = OB_INVALID_INDEX; if (part_ch_info_.part_ch_array_.size() <= 0) { // 表示没有 partition到task idx的映射 ret = OB_NOT_INIT; LOG_WARN("the size of part task channel map is zero", K(ret)); } else if (OB_FAIL(ObRepartSliceIdxCalc::get_tablet_id(eval_ctx, tablet_id))) { LOG_WARN("failed to get partition id", K(ret)); } else if (OB_FAIL(get_task_idx_by_tablet_id(tablet_id, slice_idx))) { if (OB_HASH_NOT_EXIST == ret) { // 没有找到对应的分区,返回OB_NO_PARTITION_FOR_GIVEN_VALUE ret = OB_NO_PARTITION_FOR_GIVEN_VALUE; LOG_WARN("can't get the right partition", K(ret), K(tablet_id), K(slice_idx)); } } return ret; } int ObRepartRandomSliceIdxCalc::get_task_idx_by_tablet_id(int64_t tablet_id, int64_t &task_idx) { int ret = OB_SUCCESS; if (part_to_task_array_map_.size() <= 0) { ret = OB_NOT_INIT; LOG_WARN("part to task array is not inited", K(ret)); } else { const TaskIdxArray *task_idx_array = part_to_task_array_map_.get(tablet_id); if (OB_ISNULL(task_idx_array)) { ret = OB_HASH_NOT_EXIST; // convert to hash error LOG_WARN("the task idx array is null", K(ret), K(tablet_id)); } else if (task_idx_array->count() <= 0){ ret = OB_ERR_UNEXPECTED; LOG_WARN("the size of task idx array is zero", K(ret)); } else { // random的方式从 task idx array中找到结果 static const int64_t min = 0; static const int64_t max = INT64_MAX; int64_t rand_idx = common::ObRandom::rand(min, max) % task_idx_array->count(); task_idx = task_idx_array->at(rand_idx); LOG_TRACE("get task_idx/slice_idx by random way", K(tablet_id), K(task_idx)); } } return ret; } int ObAffinitizedRepartSliceIdxCalc::get_slice_idx(const ObIArray &exprs, ObEvalCtx &eval_ctx, int64_t &slice_idx) { int ret = OB_SUCCESS; UNUSED(exprs); int64_t tablet_id = OB_INVALID_INDEX; if (task_count_ <= 0) { ret = OB_NOT_INIT; LOG_WARN("task_count not inited", K_(task_count), K(ret)); } else if (px_repart_ch_map_.size() <= 0) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid map size, affinity map should not be empty!", K_(task_count), K(ret)); } else if (OB_FAIL(ObRepartSliceIdxCalc::get_tablet_id(eval_ctx, tablet_id))) { LOG_WARN("fail to get partition id", K(ret)); } else if (OB_FAIL(px_repart_ch_map_.get_refactored(tablet_id, slice_idx))) { if (OB_HASH_NOT_EXIST == ret && unmatch_row_dist_method_ == ObPQDistributeMethod::DROP) { slice_idx = ObSliceIdxCalc::DEFAULT_CHANNEL_IDX_TO_DROP_ROW; ret = OB_SUCCESS; } else if (OB_HASH_NOT_EXIST == ret && unmatch_row_dist_method_ == ObPQDistributeMethod::RANDOM) { slice_idx = round_robin_idx_; round_robin_idx_++; round_robin_idx_ = round_robin_idx_ % task_count_; ret = OB_SUCCESS; } else if (OB_HASH_NOT_EXIST == ret && unmatch_row_dist_method_ == ObPQDistributeMethod::HASH) { ObHashSliceIdCalc slice_id_calc(exec_ctx_.get_allocator(), task_count_, ObNullDistributeMethod::NONE, hash_dist_exprs_, hash_funcs_); if (OB_ISNULL(hash_dist_exprs_) || OB_UNLIKELY(0 == hash_dist_exprs_->count())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("hash dist exprs is null", K(ret)); } else if (OB_FAIL(slice_id_calc.get_slice_idx(*hash_dist_exprs_, eval_ctx, slice_idx))) { LOG_WARN("hash slice calc get slice idx failed", K(ret)); } } else { LOG_WARN("fail get affinitized taskid", K(ret), K(tablet_id), K_(task_count), K_(unmatch_row_dist_method)); } } return ret; } int ObAffinitizedRepartSliceIdxCalc::get_slice_idx_vec(const ObIArray &exprs, ObEvalCtx &eval_ctx, ObBitVector &skip, const int64_t batch_size, int64_t *&indexes) { int ret = OB_SUCCESS; int64_t tablet_id = OB_INVALID_INDEX; if (task_count_ <= 0) { ret = OB_NOT_INIT; LOG_WARN("task_count not inited", K_(task_count), K(ret)); } else if (px_repart_ch_map_.size() <= 0) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid map size, affinity map should not be empty!", K_(task_count), K(ret)); } else if (OB_FAIL(setup_slice_indexes(eval_ctx))) { LOG_WARN("failed to set up slice indexes", K(ret)); } else if (OB_FAIL(setup_tablet_ids(eval_ctx))) { LOG_WARN("failed to setup partition ids", K(ret)); } else if (OB_FAIL(ObRepartSliceIdxCalc::get_tablet_ids(eval_ctx, skip, batch_size, tablet_ids_))) { LOG_WARN("fail to get partition id", K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < batch_size; ++i) { if (skip.at(i)) { continue; } if (OB_FAIL(px_repart_ch_map_.get_refactored(tablet_ids_[i], slice_indexes_[i]))) { if (OB_HASH_NOT_EXIST == ret && unmatch_row_dist_method_ == ObPQDistributeMethod::DROP) { slice_indexes_[i] = ObSliceIdxCalc::DEFAULT_CHANNEL_IDX_TO_DROP_ROW; ret = OB_SUCCESS; } else if (OB_HASH_NOT_EXIST == ret && unmatch_row_dist_method_ == ObPQDistributeMethod::RANDOM) { slice_indexes_[i] = round_robin_idx_; round_robin_idx_++; round_robin_idx_ = round_robin_idx_ % task_count_; ret = OB_SUCCESS; } else if (OB_HASH_NOT_EXIST == ret && unmatch_row_dist_method_ == ObPQDistributeMethod::HASH) { ObHashSliceIdCalc slice_id_calc(exec_ctx_.get_allocator(), task_count_, null_row_dist_method_, hash_dist_exprs_, hash_funcs_); if (OB_ISNULL(hash_dist_exprs_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("hash dist exprs is null", K(ret)); } else if (OB_FAIL(slice_id_calc.get_slice_idx(*hash_dist_exprs_, eval_ctx, slice_indexes_[i]))) { LOG_WARN("hash slice calc get slice idx failed", K(ret)); } } else { LOG_WARN("fail get affinitized taskid", K(ret), K(tablet_id), K_(task_count), K_(unmatch_row_dist_method)); } } } if (OB_SUCC(ret)) { indexes = slice_indexes_; } } return ret; } int ObRepartSliceIdxCalc::init() { int ret = OB_SUCCESS; if (px_repart_ch_map_.created()) { ret = OB_INIT_TWICE; LOG_WARN("this map has been init twice", K(ret)); } else if (OB_FAIL(build_repart_ch_map(px_repart_ch_map_))) { LOG_WARN("failed to build affi hash map", K(ret)); } else if (OB_FAIL(setup_one_side_one_level_info())) { LOG_WARN("fail to build one side on level map", K(ret)); } else { support_vectorized_calc_ = true; } return ret; } int ObRepartSliceIdxCalc::setup_one_side_one_level_info() { int ret = OB_SUCCESS; CalcPartitionBaseInfo *calc_part_info = NULL; calc_part_info = reinterpret_cast(calc_part_id_expr_->extra_info_); CK(OB_NOT_NULL(calc_part_info)); CK(px_repart_ch_map_.size() > 0); if (OB_FAIL(ret)) { } else if (OB_ISNULL(calc_part_id_expr_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected calc part id expr", K(ret)); } else if (OB_REPARTITION_ONE_SIDE_ONE_LEVEL_FIRST == repart_type_) { calc_part_info->partition_id_calc_type_ = CALC_IGNORE_SUB_PART; if (OB_FAIL(build_part2tablet_id_map())) { LOG_WARN("fail to build part2tablet id map", K(ret)); } } else if (OB_REPARTITION_ONE_SIDE_ONE_LEVEL_SUB == repart_type_) { calc_part_info->partition_id_calc_type_ = CALC_IGNORE_FIRST_PART; int64_t first_part_id = OB_INVALID_ID; if (OB_FAIL(get_part_id_by_one_level_sub_ch_map(first_part_id))) { LOG_WARN("fail to get part id by ch map", K(ret)); } else { calc_part_info->first_part_id_ = first_part_id; } } return ret; } int ObRepartSliceIdxCalc::build_repart_ch_map(ObPxPartChMap &affinity_map) { int ret = OB_SUCCESS; const ObPxPartChMapArray &part_ch_array = part_ch_info_.part_ch_array_; if (OB_FAIL(affinity_map.create(max(1, part_ch_array.count()), ObModIds::OB_SQL_PX))) { LOG_WARN("fail create hashmap", "count", part_ch_array.count(), K(ret)); } int64_t tablet_id = common::OB_INVALID_INDEX_INT64; // 将 partition idx => channel idx 的映射 array 转化成 hash map,提高查询效率 ARRAY_FOREACH_X(part_ch_array, idx, cnt, OB_SUCC(ret)) { LOG_DEBUG("map build", K(idx), K(cnt), "key", part_ch_array.at(idx).first_, "val", part_ch_array.at(idx).second_); if (tablet_id != part_ch_array.at(idx).first_) { tablet_id = part_ch_array.at(idx).first_; if (OB_FAIL(affinity_map.set_refactored(part_ch_array.at(idx).first_, part_ch_array.at(idx).second_))) { LOG_WARN("fail add item to hash map", K(idx), K(cnt), "key", part_ch_array.at(idx).first_, "val", part_ch_array.at(idx).second_, K(ret)); } } else { // skip, same partition id may take more than one entry in part_ch_array. // (e.g slave mapping pkey hash) } } return ret; } int ObRepartSliceIdxCalc::build_part2tablet_id_map() { int ret = OB_SUCCESS; if (OB_FAIL(part2tablet_id_map_.create(max(1, px_repart_ch_map_.size()), "PxTabletMap"))) { LOG_WARN("fail create hashmap", K(ret)); } else { int64_t part_id = OB_INVALID_ID; int64_t subpart_id = OB_INVALID_ID; ObTabletID tablet_id; for (auto iter = px_repart_ch_map_.begin(); OB_SUCC(ret) && iter != px_repart_ch_map_.end(); ++iter) { tablet_id = ObTabletID(iter->first); if (OB_FAIL(table_schema_.get_part_id_by_tablet(tablet_id, part_id, subpart_id))) { LOG_WARN("get part id by tablet id", K(tablet_id), K(part_id)); } else if (OB_FAIL(part2tablet_id_map_.set_refactored(part_id, tablet_id.id()))) { LOG_WARN("fail to set part id", K(part_id), K(tablet_id)); } } } return ret; } int ObSlaveMapBcastIdxCalc::get_slice_indexes(const ObIArray &exprs, ObEvalCtx &eval_ctx, SliceIdxArray &slice_idx_array) { UNUSED(exprs); int ret = OB_SUCCESS; int64_t tablet_id = common::OB_INVALID_INDEX; slice_idx_array.reuse(); if (OB_FAIL(get_tablet_id(eval_ctx, tablet_id))) { LOG_WARN("failed to get_tablet_id", K(ret)); } else if (OB_INVALID_INDEX == tablet_id) { int64_t drop_idx = ObSliceIdxCalc::DEFAULT_CHANNEL_IDX_TO_DROP_ROW; if (OB_FAIL(slice_idx_array.push_back(drop_idx))) { LOG_WARN("failed to push back slice idx", K(ret)); } } else { const ObPxPartChMapArray &part_ch_array = part_ch_info_.part_ch_array_; ARRAY_FOREACH (part_ch_array, idx) { if (tablet_id == part_ch_array.at(idx).first_) { if (OB_FAIL(slice_idx_array.push_back(part_ch_array.at(idx).second_))) { LOG_WARN("failed to push back slice idx", K(ret)); } LOG_DEBUG("find slice id to trans", K(tablet_id), K(part_ch_array.at(idx).second_)); } } } return ret; } int ObAllToOneSliceIdxCalc::get_slice_idx( const ObIArray &exprs, ObEvalCtx &eval_ctx, int64_t &slice_idx) { UNUSED(exprs); UNUSED(eval_ctx); slice_idx = 0; return OB_SUCCESS; } int ObAllToOneSliceIdxCalc::get_slice_idx_vec(const ObIArray &, ObEvalCtx &eval_ctx, ObBitVector &, const int64_t, int64_t *&indexes) { int ret = OB_SUCCESS; if (NULL == slice_indexes_) { if (OB_FAIL(setup_slice_indexes(eval_ctx))) { LOG_WARN("setup slice indexes failed", K(ret)); } else { const int64_t size = sizeof(*slice_indexes_) * eval_ctx.max_batch_size_; MEMSET(slice_indexes_, 0, size); } } indexes = slice_indexes_; return ret; } int ObBc2HostSliceIdCalc::get_slice_indexes( const ObIArray &exprs, ObEvalCtx &eval_ctx, SliceIdxArray &slice_idx_array) { UNUSED(exprs); UNUSED(eval_ctx); int ret = OB_SUCCESS; if (OB_UNLIKELY(slice_idx_array.count() != host_idx_.count())) { slice_idx_array.reuse(); FOREACH_CNT_X(it, host_idx_, OB_SUCC(ret)) { UNUSED(it); if (OB_FAIL(slice_idx_array.push_back(0))) { LOG_WARN("array push back failed", K(ret)); } } } if (OB_SUCC(ret)) { for (int64_t i = 0; i < host_idx_.count(); i++) { auto &hi = host_idx_.at(i); int64_t idx = hi.begin_ + hi.idx_ % (hi.end_ - hi.begin_); slice_idx_array.at(i) = channel_idx_.at(idx); hi.idx_++; } } return ret; } int ObRandomSliceIdCalc::get_slice_idx( const ObIArray &exprs, ObEvalCtx &eval_ctx, int64_t &slice_idx) { UNUSED(exprs); UNUSED(eval_ctx); int ret = OB_SUCCESS; if (slice_cnt_ <= 0) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid slice count", K(ret), K(slice_cnt_)); } else { slice_idx = idx_ % slice_cnt_; idx_++; } return ret; } int ObBroadcastSliceIdCalc::get_slice_indexes( const ObIArray &exprs, ObEvalCtx &eval_ctx, SliceIdxArray &slice_idx_array) { UNUSED(exprs); UNUSED(eval_ctx); int ret = OB_SUCCESS; slice_idx_array.reuse(); for (int64_t i = 0; OB_SUCC(ret) && i < slice_cnt_; ++i) { if (OB_FAIL(slice_idx_array.push_back(i))) { LOG_WARN("failed to push back i", K(ret)); } } return ret; } int ObHashSliceIdCalc::calc_slice_idx(ObEvalCtx &eval_ctx, int64_t slice_size, int64_t &slice_idx) { int ret = OB_SUCCESS; uint64_t hash_val = SLICE_CALC_HASH_SEED; ObDatum *datum = nullptr; bool found_null = false; if (OB_ISNULL(hash_dist_exprs_) || OB_ISNULL(hash_funcs_)) { ret = OB_NOT_INIT; LOG_WARN("hash func and expr not init", K(ret)); } else if (n_keys_ > hash_dist_exprs_->count()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: n_keys is invalid", K(ret), K(n_keys_), K(hash_dist_exprs_->count())); } for (int64_t i = 0; OB_SUCC(ret) && i < n_keys_; ++i) { const ObExpr* dist_expr = hash_dist_exprs_->at(i); if (OB_FAIL(dist_expr->eval(eval_ctx, datum))) { LOG_WARN("failed to eval datum", K(ret)); } else if (datum->is_null() && ObNullDistributeMethod::DROP == null_row_dist_method_) { slice_idx = ObSliceIdxCalc::DEFAULT_CHANNEL_IDX_TO_DROP_ROW; found_null = true; break; } else if (datum->is_null() && ObNullDistributeMethod::RANDOM == null_row_dist_method_) { slice_idx = round_robin_idx_ % slice_size; round_robin_idx_++; found_null = true; break; } else { hash_val = hash_funcs_->at(i).hash_func_(*datum, hash_val); } } if (OB_SUCC(ret) && !found_null) { slice_idx = hash_val % slice_size; } return ret; } int ObHashSliceIdCalc::calc_hash_value(ObEvalCtx &eval_ctx, uint64_t &hash_val) { int ret = OB_SUCCESS; ObDatum *datum = nullptr; if (OB_ISNULL(hash_dist_exprs_) || OB_ISNULL(hash_funcs_)) { ret = OB_NOT_INIT; LOG_WARN("hash func and expr not init", K(ret)); } else if (n_keys_ > hash_dist_exprs_->count()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: n_keys is invalid", K(ret), K(n_keys_), K(hash_dist_exprs_->count())); } for (int64_t i = 0; OB_SUCC(ret) && i < n_keys_; ++i) { const ObExpr* dist_expr = hash_dist_exprs_->at(i); if (OB_FAIL(dist_expr->eval(eval_ctx, datum))) { LOG_WARN("failed to eval datum", K(ret)); } else { hash_val = hash_funcs_->at(i).hash_func_(*datum, hash_val); } } return ret; } int ObHashSliceIdCalc::get_slice_idx(const ObIArray &, ObEvalCtx &eval_ctx, int64_t &slice_idx) { return calc_slice_idx(eval_ctx, task_cnt_, slice_idx); } int ObHashSliceIdCalc::get_slice_idx_vec(const ObIArray &, ObEvalCtx &eval_ctx, ObBitVector &skip, const int64_t batch_size, int64_t *&indexes) { int ret = OB_SUCCESS; if (n_keys_ > hash_dist_exprs_->count()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected status: n_keys is invalid", K(ret), K(n_keys_), K(hash_dist_exprs_->count())); } else if (OB_FAIL(setup_slice_indexes(eval_ctx))) { LOG_WARN("setup slice indexes failed", K(ret)); } else { uint64_t *hash_val = reinterpret_cast(slice_indexes_); uint64_t default_seed = SLICE_CALC_HASH_SEED; for (int64_t i = 0; OB_SUCC(ret) && i < n_keys_; i++) { ObExpr *e = hash_dist_exprs_->at(i); if (OB_FAIL(e->eval_batch(eval_ctx, skip, batch_size))) { LOG_WARN("eval batch failed", K(ret)); } else { const bool is_batch_seed = i > 0; hash_funcs_->at(i).batch_hash_func_(hash_val, e->locate_batch_datums(eval_ctx), e->is_batch_result(), skip, batch_size, is_batch_seed ? hash_val : &default_seed, is_batch_seed); } } if (OB_SUCC(ret)) { for (int64_t i = 0; i < batch_size; i++) { if (!skip.at(i)) { slice_indexes_[i] = hash_val[i] % task_cnt_; } } indexes = slice_indexes_; } } return ret; } /******************* ObSlaveMapPkeyRangeIdxCalc ********************/ ObSlaveMapPkeyRangeIdxCalc::~ObSlaveMapPkeyRangeIdxCalc() { destroy(); } int ObSlaveMapPkeyRangeIdxCalc::init() { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("init twice", K(ret), K(is_inited_)); } else if (OB_FAIL(ObSlaveMapRepartIdxCalcBase::init())) { LOG_WARN("fail init base repart class", K(ret)); } else if (OB_UNLIKELY(nullptr == calc_part_id_expr_ || sort_exprs_.count() <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), KP(calc_part_id_expr_), K(sort_exprs_.count())); } else if (OB_FAIL(sort_key_.reserve(sort_exprs_.count()))) { LOG_WARN("reserve sort key failed", K(ret), K(sort_exprs_.count())); } else if (OB_FAIL(build_partition_range_channel_map(part_range_map_))) { LOG_WARN("build partition range map failed", K(ret)); } else { is_inited_ = true; } return ret; } int ObSlaveMapPkeyRangeIdxCalc::destroy() { int ret = OB_SUCCESS; is_inited_ = false; if (part_range_map_.created()) { struct DeleteFn { DeleteFn(ObIAllocator &allocator) : allocator_(allocator) {} int operator ()(hash::HashMapPair &it) { if (nullptr != it.second) { it.second->~PartitionRangeChannelInfo(); allocator_.free(it.second); it.second = nullptr; } // The Story Behind Return Code: // We change the interface for this because of supporting that iterations encounter an error // to return immediately, yet for all the existing logics there, they don't care the return // code and wants to continue iteration anyway. So to keep the old behavior and makes everyone // else happy, we have to return OB_SUCCESS here. And we only make this return code thing // affects the behavior in tenant meta manager washing tablet. If you want to change the // behavior in such places, please consult the individual file owners to fully understand the // needs there. return OB_SUCCESS; } ObIAllocator &allocator_; }; DeleteFn delete_fn(exec_ctx_.get_allocator()); if (OB_FAIL(part_range_map_.foreach_refactored(delete_fn))) { LOG_WARN("free item in partition range map failed", K(ret)); } else if (OB_FAIL(part_range_map_.destroy())) { LOG_WARN("destroy partition range map failed", K(ret)); } } int tmp_ret = ObSlaveMapRepartIdxCalcBase::destroy(); ret = (OB_SUCCESS == ret) ? tmp_ret : ret; return ret; } int ObSlaveMapPkeyRangeIdxCalc::build_partition_range_channel_map( common::hash::ObHashMap &part_range_channel_map) { int ret = OB_SUCCESS; part_range_channel_map.destroy(); const ObIArray &part_ranges = exec_ctx_.get_partition_ranges(); if (OB_FAIL(part_range_channel_map.create(DEFAULT_PARTITION_COUNT, common::ObModIds::OB_SQL_PX))) { LOG_WARN("create part range map failed", K(ret)); } else { int64_t tmp_tablet_id = OB_INVALID_INDEX_INT64; ObArray tmp_channels; const int64_t part_ch_count = part_ch_info_.part_ch_array_.count(); for (int64_t i = 0; OB_SUCC(ret) && i < part_ch_count + 1; ++i) { const int64_t cur_tablet_id = i < part_ch_count ? part_ch_info_.part_ch_array_.at(i).first_ : OB_INVALID_INDEX_INT64; const int64_t cur_ch_idx = i< part_ch_count ? part_ch_info_.part_ch_array_.at(i).second_ : OB_INVALID_INDEX_INT64; if (cur_tablet_id != tmp_tablet_id) { if (tmp_channels.count() > 0) { // save void *buf = nullptr; PartitionRangeChannelInfo *item = nullptr; if (OB_ISNULL(buf = exec_ctx_.get_allocator().alloc(sizeof(PartitionRangeChannelInfo)))) { ret = OB_ALLOCATE_MEMORY_FAILED; } else if (FALSE_IT(item = new (buf) PartitionRangeChannelInfo)) { } else if (FALSE_IT(item->tablet_id_ = tmp_tablet_id)) { } else if (OB_FAIL(item->channels_.assign(tmp_channels))) { LOG_WARN("assign partition channels failed", K(ret)); } else if (OB_FAIL(part_range_channel_map.set_refactored(item->tablet_id_, item))) { LOG_WARN("insert partition range map failed", K(ret), K(item)); } else { buf = nullptr; } if (nullptr != buf) { item->~PartitionRangeChannelInfo(); item = nullptr; exec_ctx_.get_allocator().free(buf); } } // reset tmp_tablet_id = cur_tablet_id; tmp_channels.reset(); } if (OB_SUCC(ret) && OB_INVALID_INDEX_INT64 != cur_ch_idx) { if (OB_FAIL(tmp_channels.push_back(cur_ch_idx))) { LOG_WARN("push back channel index failed", K(ret), K(i), K(cur_ch_idx)); } } } for (int64_t i = 0; OB_SUCC(ret) && i < part_ranges.count(); ++i) { const ObPxTabletRange &cur_part_range = part_ranges.at(i); PartitionRangeChannelInfo *item = nullptr; if (OB_FAIL(part_range_channel_map.get_refactored(cur_part_range.tablet_id_, item))) { LOG_WARN("get partition channel info failed", K(ret), K(i), K(cur_part_range)); } else if (OB_FAIL(item->range_cut_.assign(cur_part_range.range_cut_))) { LOG_WARN("assign end key failed", K(ret), K(i), K(cur_part_range), K(*item)); } } } return ret; } static int calc_ch_idx(const int64_t range_count, const int64_t ch_count, const int64_t range_idx, int64_t &ch_idx) { int ret = OB_SUCCESS; ch_idx = -1; if (OB_UNLIKELY(ch_count <= 0 || range_idx < 0 || range_idx >= range_count)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(range_count), K(ch_count), K(range_idx)); } else if (range_count <= ch_count) { ch_idx = range_idx; } else { const int64_t avg = range_count / ch_count; const int64_t rem = range_count % ch_count; int64_t tmp = 0; if ((tmp = range_idx - rem * (avg + 1)) > 0) { ch_idx = rem + tmp / avg; } else { ch_idx = range_idx / (avg + 1); } } return ret; } bool ObSlaveMapPkeyRangeIdxCalc::Compare::operator()( const ObPxTabletRange::DatumKey &l, const ObPxTabletRange::DatumKey &r) { bool less = false; int &ret = ret_; if (OB_FAIL(ret)) { // already fail } else if (OB_ISNULL(sort_cmp_funs_) || OB_ISNULL(sort_collations_)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(l), K(r)); } else { int cmp = 0; const int64_t cnt = sort_cmp_funs_->count(); for (int64_t i = 0; 0 == cmp && i < cnt; i++) { cmp = sort_cmp_funs_->at(i).cmp_func_(l[i], r[i]); if (cmp < 0) { less = sort_collations_->at(i).is_ascending_; } else if (cmp > 0) { less = !sort_collations_->at(i).is_ascending_; } } } return less; } int ObSlaveMapPkeyRangeIdxCalc::get_task_idx( const int64_t tablet_id, const ObPxTabletRange::DatumKey &sort_key, int64_t &task_idx) { int ret = OB_SUCCESS; PartitionRangeChannelInfo *item = nullptr; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret), K(is_inited_)); } else if (OB_UNLIKELY(tablet_id <= 0)) { ret = OB_NO_PARTITION_FOR_GIVEN_VALUE; LOG_WARN("can't get the right partition", K(ret), K(tablet_id)); } else if (OB_UNLIKELY(sort_key.count() <= 0)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid argument", K(ret), K(tablet_id), K(sort_key)); } else if (OB_FAIL(part_range_map_.get_refactored(tablet_id, item))) { LOG_WARN("get partition ranges failed", K(ret), K(tablet_id)); } else if (OB_UNLIKELY(nullptr == item || item->tablet_id_ != tablet_id)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid range channel map", K(ret), K(tablet_id), KP(item)); } else { ObPxTabletRange::RangeCut &range_cut = item->range_cut_; ObPxTabletRange::RangeCut::iterator found_it = std::lower_bound( range_cut.begin(), range_cut.end(), sort_key, sort_cmp_); if (OB_FAIL(sort_cmp_.ret_)) { LOG_WARN("sort compare failed", K(ret)); } else { const int64_t range_idx = found_it - range_cut.begin(); int64_t ch_idx = -1; if (OB_FAIL(calc_ch_idx(range_cut.count() + 1, item->channels_.count(), range_idx, ch_idx))) { LOG_WARN("get channel idx failed", K(ret), K(*item), K(range_idx)); } else if (ch_idx < 0 || ch_idx >= item->channels_.count()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid channel index", K(ret), K(ch_idx), K(*item)); } else { task_idx = item->channels_.at(ch_idx); } } } return ret; } int ObSlaveMapPkeyRangeIdxCalc::get_slice_idx( const common::ObIArray &exprs, ObEvalCtx &eval_ctx, int64_t &slice_idx) { int ret = OB_SUCCESS; int64_t tablet_id = OB_INVALID_INDEX; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", K(ret)); } else if (OB_UNLIKELY(exprs.count() <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(exprs.count())); } else if (OB_FAIL(ObRepartSliceIdxCalc::get_tablet_id(eval_ctx, tablet_id))) { LOG_WARN("failed to get partition id", K(ret)); } else { sort_key_.reuse(); for (int64_t i = 0; OB_SUCC(ret) && i < sort_exprs_.count(); ++i) { const ObExpr *cur_expr = sort_exprs_.at(i); ObDatum *cur_datum = nullptr; if (OB_ISNULL(cur_expr)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("current expr is null", K(ret), KP(cur_expr)); } else if (OB_FAIL(cur_expr->eval(eval_ctx, cur_datum))) { LOG_WARN("eval expr to datum failed", K(ret), K(*cur_expr)); } else if (OB_ISNULL(cur_datum)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("current datum is null", K(ret), KP(cur_datum)); } else if (OB_FAIL(sort_key_.push_back(*cur_datum))) { LOG_WARN("push back datum failed", K(ret)); } } if (OB_SUCC(ret)) { if (OB_FAIL(get_task_idx(tablet_id, sort_key_, slice_idx))) { LOG_WARN("get task idx failed", K(ret), K(tablet_id), K(sort_key_)); } } } return ret; } /******************* ObSlaveMapPkeyHashIdxCalc ********************/ int ObSlaveMapPkeyHashIdxCalc::init() { int ret = OB_SUCCESS; if (OB_FAIL(ObSlaveMapRepartIdxCalcBase::init())) { LOG_WARN("fail init base repart class", K(ret)); } else if (affi_hash_map_.created()) { ret = OB_INIT_TWICE; LOG_WARN("this map has been init twice", K(ret)); } else if (OB_FAIL(build_affi_hash_map(affi_hash_map_))) { LOG_WARN("failed to build affi hash map", K(ret)); } return ret; } int ObSlaveMapPkeyHashIdxCalc::destroy() { int ret = OB_SUCCESS; if (affi_hash_map_.created()) { ret = affi_hash_map_.destroy(); } int tmp_ret = ObSlaveMapRepartIdxCalcBase::destroy(); ret = (OB_SUCCESS == ret) ? tmp_ret : ret; return ret; } int ObSlaveMapPkeyHashIdxCalc::get_slice_idx( const ObIArray &, ObEvalCtx &eval_ctx, int64_t &slice_idx) { int ret = OB_SUCCESS; // 计算过程: // 1. 通过row计算出对应的partition id // 2. 通过partition id,找到对应的task array // 3. hash 的方式从task array中选择task idx作为slice idx int64_t tablet_id = OB_INVALID_INDEX; if (part_ch_info_.part_ch_array_.size() <= 0) { // 表示没有 partition到task idx的映射 ret = OB_NOT_INIT; LOG_WARN("the size of part task channel map is zero", K(ret)); } else if (OB_FAIL(ObRepartSliceIdxCalc::get_tablet_id(eval_ctx, tablet_id))) { LOG_WARN("failed to get partition id", K(ret)); } else if (OB_FAIL(get_task_idx_by_tablet_id(eval_ctx, tablet_id, slice_idx))) { if (OB_HASH_NOT_EXIST == ret) { // 没有找到对应的分区,返回OB_NO_PARTITION_FOR_GIVEN_VALUE ret = OB_NO_PARTITION_FOR_GIVEN_VALUE; LOG_WARN("can't get the right partition", K(ret), K(tablet_id), K(slice_idx)); } } return ret; } int ObSlaveMapPkeyHashIdxCalc::get_task_idx_by_tablet_id(ObEvalCtx &eval_ctx, int64_t tablet_id, int64_t &task_idx) { int ret = OB_SUCCESS; int64_t hash_idx = 0; if (part_to_task_array_map_.size() <= 0) { ret = OB_NOT_INIT; LOG_WARN("part to task array is not inited", K(ret)); } else { const TaskIdxArray *task_idx_array = part_to_task_array_map_.get(tablet_id); if (OB_ISNULL(task_idx_array)) { ret = OB_HASH_NOT_EXIST; // convert to hash error LOG_WARN("the task idx array is null", K(ret), K(tablet_id)); } else if (task_idx_array->count() <= 0){ ret = OB_ERR_UNEXPECTED; LOG_WARN("the size of task idx array is zero", K(ret)); } else if (OB_FAIL(calc_slice_idx(eval_ctx, task_idx_array->count(), hash_idx))) { LOG_WARN("fail calc hash value", K(ret)); } else if (ObSliceIdxCalc::DEFAULT_CHANNEL_IDX_TO_DROP_ROW == hash_idx) { task_idx = ObSliceIdxCalc::DEFAULT_CHANNEL_IDX_TO_DROP_ROW; } else { task_idx = task_idx_array->at(hash_idx); LOG_TRACE("get task_idx/slice_idx by hash way", K(tablet_id), K(hash_idx), K(task_idx)); } } return ret; } int ObSlaveMapPkeyHashIdxCalc::build_affi_hash_map(hash::ObHashMap &affi_hash_map) { int ret = OB_SUCCESS; int64_t tablet_id = common::OB_INVALID_INDEX_INT64; ObPxPartChMapItem item; const ObPxPartChMapArray &part_ch_array = part_ch_info_.part_ch_array_; if (OB_FAIL(affi_hash_map.create(part_ch_array.count(), common::ObModIds::OB_SQL_PX))) { LOG_WARN("failed to create part ch map", K(ret)); } ARRAY_FOREACH (part_ch_array, idx) { if (common::OB_INVALID_INDEX_INT64 == tablet_id) { tablet_id = part_ch_array.at(idx).first_; item.first_ = idx; } else if (tablet_id != part_ch_array.at(idx).first_) { item.second_ = idx; if (OB_FAIL(affi_hash_map.set_refactored(tablet_id, item))) { LOG_WARN("failed to set refactored", K(ret)); } LOG_DEBUG("build affi hash map", K(tablet_id), K(item)); tablet_id = part_ch_array.at(idx).first_; item.first_ = idx; } } if (OB_SUCC(ret)) { item.second_ = part_ch_array.count(); if (OB_FAIL(affi_hash_map.set_refactored(tablet_id, item))) { LOG_WARN("failed to set refactored", K(ret)); } LOG_DEBUG("build affi hash map", K(tablet_id), K(item)); } return ret; } int ObRangeSliceIdCalc::get_slice_idx(const ObIArray &, ObEvalCtx &eval_ctx, int64_t &slice_idx) { int ret = OB_SUCCESS; if (OB_ISNULL(dist_exprs_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected dist exprs", K(ret)); } else if (OB_ISNULL(range_) || range_->range_cut_.empty()) { slice_idx = 0; } else { ObPxTabletRange::DatumKey sort_key; ObDatum *datum = nullptr; for (int i = 0; i < dist_exprs_->count() && OB_SUCC(ret); ++i) { if (OB_ISNULL(dist_exprs_->at(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("null expr", K(ret)); } else if (OB_FAIL(dist_exprs_->at(i)->eval(eval_ctx, datum))) { LOG_WARN("fail to eval expr", K(ret)); } else if (OB_FAIL(sort_key.push_back(*datum))) { LOG_WARN("fail to push back sort key", K(ret)); } } if (OB_SUCC(ret)) { Compare sort_cmp(&sort_cmp_funs_, &sort_collations_); ObPxTabletRange::RangeCut &range_cut = const_cast(range_->range_cut_); ObPxTabletRange::RangeCut::iterator found_it = std::lower_bound( range_cut.begin(), range_cut.end(), sort_key, sort_cmp); const int64_t range_idx = found_it - range_cut.begin(); slice_idx = range_idx % task_cnt_; } } return ret; } int ObRangeSliceIdCalc::get_slice_idx_vec(const ObIArray &, ObEvalCtx &eval_ctx, ObBitVector &skip, const int64_t batch_size, int64_t *&indexes) { int ret = OB_SUCCESS; if (OB_ISNULL(dist_exprs_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected dist exprs", K(ret)); } else if (OB_FAIL(setup_slice_indexes(eval_ctx))) { LOG_WARN("setup slice indexes failed", K(ret)); } else if (OB_ISNULL(range_) || range_->range_cut_.empty()) { for (int64_t idx = 0; idx < batch_size; ++idx) { slice_indexes_[idx] = 0; } indexes = slice_indexes_; } else { Compare sort_cmp(&sort_cmp_funs_, &sort_collations_); ObPxTabletRange::DatumKey sort_key; ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx); batch_info_guard.set_batch_size(batch_size); for (int64_t idx = 0; OB_SUCC(ret) && idx < batch_size; ++idx) { if (skip.at(idx)) { continue; } else { batch_info_guard.set_batch_idx(idx); ObDatum *datum = nullptr; for (int64_t i = 0; i < dist_exprs_->count() && OB_SUCC(ret); ++i) { if (OB_ISNULL(dist_exprs_->at(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("null expr", K(ret)); } else if (OB_FAIL(dist_exprs_->at(i)->eval(eval_ctx, datum))) { LOG_WARN("fail to eval expr", K(ret)); } else if (OB_FAIL(sort_key.push_back(*datum))) { LOG_WARN("fail to push back sort key", K(ret)); } } if (OB_SUCC(ret)) { ObPxTabletRange::RangeCut &range_cut = const_cast(range_->range_cut_); ObPxTabletRange::RangeCut::iterator found_it = std::lower_bound( range_cut.begin(), range_cut.end(), sort_key, sort_cmp); int64_t range_idx = found_it - range_cut.begin(); slice_indexes_[idx] = range_idx % task_cnt_; } sort_key.reuse(); } } indexes = slice_indexes_; } return ret; } bool ObRangeSliceIdCalc::Compare::operator()( const ObPxTabletRange::DatumKey &l, const ObPxTabletRange::DatumKey &r) { bool less = false; int &ret = ret_; if (OB_FAIL(ret)) { // already fail } else if (OB_ISNULL(sort_cmp_funs_) || OB_ISNULL(sort_collations_)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(l), K(r)); } else { int cmp = 0; const int64_t cnt = sort_cmp_funs_->count(); for (int64_t i = 0; 0 == cmp && i < cnt; i++) { cmp = sort_cmp_funs_->at(i).cmp_func_(l[i], r[i]); if (cmp < 0) { less = sort_collations_->at(i).is_ascending_; } else if (cmp > 0) { less = !sort_collations_->at(i).is_ascending_; } } } return less; } int ObNullAwareHashSliceIdCalc::get_slice_indexes(const ObIArray &exprs, ObEvalCtx &eval_ctx, SliceIdxArray &slice_idx_array) { int ret = OB_SUCCESS; uint64_t hash_val = SLICE_CALC_HASH_SEED; UNUSED(exprs); bool processed = false; slice_idx_array.reuse(); if (OB_ISNULL(hash_dist_exprs_) || hash_dist_exprs_->count() != 1) { ret = OB_ERR_UNEXPECTED; LOG_WARN("null aware hash join can only process 1 join key now", K(ret), K(hash_dist_exprs_)); } else if (OB_FAIL(calc_for_null_aware(*hash_dist_exprs_->at(0), task_cnt_, eval_ctx, slice_idx_array, processed))) { LOG_WARN("failed to calc for null aware", K(ret)); } else if (processed) { // do nothing } else if (OB_FAIL(ObHashSliceIdCalc::calc_hash_value(eval_ctx, hash_val))) { LOG_WARN("fail calc hash value null aware", K(ret)); } else { OZ (slice_idx_array.push_back(hash_val % task_cnt_)); } return ret; } int ObNullAwareAffinitizedRepartSliceIdxCalc::init() { int ret = OB_SUCCESS; OZ (ObRepartSliceIdxCalc::init()); OX (support_vectorized_calc_ = false); return ret; } int ObNullAwareAffinitizedRepartSliceIdxCalc::get_slice_indexes( const ObIArray &exprs, ObEvalCtx &eval_ctx, SliceIdxArray &slice_idx_array) { int ret = OB_SUCCESS; UNUSED(exprs); int64_t tablet_id = OB_INVALID_INDEX; int64_t slice_idx = OB_INVALID_INDEX; bool processed = false; slice_idx_array.reuse(); if (task_count_ <= 0) { ret = OB_NOT_INIT; LOG_WARN("task_count not inited", K_(task_count), K(ret)); } else if (OB_ISNULL(repartition_exprs_) || 1 != repartition_exprs_->count()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected repartition exprs", KP(repartition_exprs_)); } else if (px_repart_ch_map_.size() <= 0) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid map size, affinity map should not be empty!", K_(task_count), K(ret)); } else if (OB_FAIL(calc_for_null_aware(*repartition_exprs_->at(0), task_count_, eval_ctx, slice_idx_array, processed))) { LOG_WARN("failed to calc for null aware", K(ret)); } else if (processed) { // do nothing } else if (OB_FAIL(ObAffinitizedRepartSliceIdxCalc::get_slice_idx(exprs, eval_ctx, slice_idx))) { LOG_WARN("failed to get slice idx", K(ret)); } else { OZ(slice_idx_array.push_back(slice_idx)); } return ret; }