/** * Copyright (c) 2021 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #include "storage/ob_multiple_multi_scan_merge.h" #include #include "storage/ob_row_fuse.h" #if !USE_NEW_MULTIPLE_MULTI_SCAN_MERGE namespace oceanbase { using namespace common; namespace storage { ObMultipleMultiScanMerge::ObMultipleMultiScanMerge() : ObMultipleScanMergeImpl(), get_num_(0), ranges_(NULL), cow_ranges_() { memset(get_items_, 0, sizeof(ObStoreRow*) * MAX_TABLE_CNT_IN_STORAGE); } ObMultipleMultiScanMerge::~ObMultipleMultiScanMerge() {} int ObMultipleMultiScanMerge::is_get_data_ready(bool& is_ready) { int ret = OB_SUCCESS; int64_t cur_get_index_ = -1; STORAGE_LOG(DEBUG, "calc_get_index", K(get_num_), K(iters_.count())); if (get_num_ == iters_.count()) { for (int64_t i = 0; OB_SUCC(ret) && i < iters_.count(); ++i) { if (NULL == get_items_[i]) { cur_get_index_ = -1; break; } else if (cur_get_index_ != -1 && cur_get_index_ != get_items_[i]->scan_index_) { cur_get_index_ = -1; break; } else { cur_get_index_ = get_items_[i]->scan_index_; } } } if (OB_SUCC(ret)) { is_ready = cur_get_index_ != -1; } return ret; } int ObMultipleMultiScanMerge::is_scan_data_ready(bool& is_ready) { int ret = OB_SUCCESS; is_ready = !is_scan_end(); return ret; } void ObMultipleMultiScanMerge::reset() { ObMultipleScanMergeImpl::reset(); get_num_ = 0; ranges_ = NULL; cow_ranges_.reset(); memset(get_items_, 0, sizeof(ObStoreRow*) * MAX_TABLE_CNT_IN_STORAGE); } int ObMultipleMultiScanMerge::open(const ObIArray& ranges) { int ret = OB_SUCCESS; if (OB_UNLIKELY(ranges.count() <= 0)) { STORAGE_LOG(WARN, "Invalid range count ", K(ret), K(ranges.count())); } else if (OB_FAIL(ObMultipleMerge::open())) { STORAGE_LOG(WARN, "Fail to open ObMultipleMerge, ", K(ret)); } else if (OB_FAIL(to_collation_free_range_on_demand(ranges, *access_ctx_->allocator_))) { STORAGE_LOG(WARN, "fail to get collation free rowkey", K(ret)); } else { row_filter_ = NULL; if (NULL != access_ctx_->row_filter_ && tables_handle_.has_split_source_table(access_ctx_->pkey_)) { row_filter_ = access_ctx_->row_filter_; } ranges_ = &ranges; if (OB_FAIL(ObMultipleMultiScanMerge::prepare())) { STORAGE_LOG(WARN, "fail to prepare", K(ret)); } else if (OB_FAIL(construct_iters())) { STORAGE_LOG(WARN, "fail to construct iters", K(ret)); } else if (OB_UNLIKELY(access_ctx_->need_prewarm())) { access_ctx_->store_ctx_->warm_up_ctx_->record_multi_scan(*access_param_, *access_ctx_, ranges); } } return ret; } int ObMultipleMultiScanMerge::calc_scan_range() { int ret = OB_SUCCESS; ObStoreRange rowkey_range; if (!curr_rowkey_.is_valid()) { // no row has been iterated } else if (NULL == access_param_ || NULL == access_ctx_) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "multiple multi scan merge not inited", K(ret), KP(access_param_), KP(access_ctx_)); } else if (OB_ISNULL(ranges_)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "ranges is NULL", K(ret)); } else if (OB_FAIL(rowkey_range.build_range(access_param_->iter_param_.table_id_, curr_rowkey_))) { STORAGE_LOG(WARN, "fail to build range for rowkey", K(ret)); } else { ObArray tmp_ranges; if (OB_FAIL(tmp_ranges.reserve(ranges_->count()))) { STORAGE_LOG(WARN, "fail to reserve memory for range array", K(ret)); } for (int64_t i = 0; i < ranges_->count() && OB_SUCC(ret); ++i) { if (OB_FAIL(tmp_ranges.push_back(ranges_->at(i)))) { STORAGE_LOG(WARN, "fail to push back range", K(ret)); } } if (OB_SUCC(ret)) { if (ranges_ != &cow_ranges_) { cow_ranges_.reset(); ranges_ = &cow_ranges_; } } if (OB_SUCC(ret)) { const bool is_reverse_scan = access_ctx_->query_flag_.is_reverse_scan(); int64_t l = curr_scan_index_; int64_t r = tmp_ranges.count(); cow_ranges_.reuse(); for (int64_t i = l; i < r && OB_SUCC(ret); ++i) { ObExtStoreRange& range = tmp_ranges.at(i); if (curr_scan_index_ == i && range.get_range().include(rowkey_range)) { range.change_boundary(curr_rowkey_, is_reverse_scan, true); if (range.get_range().is_valid()) { if (OB_FAIL(const_cast(range).to_collation_free_range_on_demand_and_cutoff_range( *access_ctx_->allocator_))) { STORAGE_LOG(WARN, "fail to get collation free rowkey", K(ret)); } else if (OB_FAIL(cow_ranges_.push_back(range))) { STORAGE_LOG(WARN, "push back range failed", K(ret)); } else { range_idx_delta_ += i; } } else { range_idx_delta_ += i + 1; } } else if (OB_FAIL(cow_ranges_.push_back(range))) { STORAGE_LOG(WARN, "push back range failed", K(ret)); } } } } return ret; } int ObMultipleMultiScanMerge::is_range_valid() const { int ret = OB_SUCCESS; if (OB_ISNULL(ranges_)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "ranges is null", K(ret)); } else if (0 == ranges_->count()) { ret = OB_ITER_END; } return ret; } OB_INLINE int ObMultipleMultiScanMerge::prepare() { get_num_ = 0; return ObMultipleScanMergeImpl::prepare_loser_tree(); } void ObMultipleMultiScanMerge::collect_merge_stat(ObTableStoreStat& stat) const { stat.multi_scan_stat_.call_cnt_++; stat.multi_scan_stat_.output_row_cnt_ += table_stat_.output_row_cnt_; } int ObMultipleMultiScanMerge::construct_iters() { int ret = OB_SUCCESS; const ObIArray& tables = tables_handle_.get_tables(); consumer_.reset(); if (OB_UNLIKELY(iters_.count() > 0 && iters_.count() != tables.count())) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "iter cnt is not equal to table cnt", K(ret), "iter cnt", iters_.count(), "table cnt", tables.count(), KP(this)); } else if (tables.count() > 0) { ObITable* table = NULL; ObStoreRowIterator* iter = NULL; const ObTableIterParam* iter_param = NULL; bool use_cache_iter = iters_.count() > 0; if (OB_FAIL(loser_tree_.init(tables.count(), *access_ctx_->stmt_allocator_))) { STORAGE_LOG(WARN, "init loser tree fail", K(ret)); } for (int64_t i = tables.count() - 1; OB_SUCC(ret) && i >= 0; --i) { if (OB_FAIL(tables.at(i, table))) { STORAGE_LOG(WARN, "Fail to get i store, ", K(i), K(ret)); } else if (OB_ISNULL(iter_param = get_actual_iter_param(table))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Fail to get access param", K(i), K(ret), K(*table)); } else if (!use_cache_iter) { if (OB_FAIL(table->multi_scan(*iter_param, *access_ctx_, *ranges_, iter))) { STORAGE_LOG(WARN, "Fail to get iterator, ", K(ret), K(*iter_param), K(*access_ctx_), K(i)); } else if (OB_FAIL(iters_.push_back(iter))) { iter->~ObStoreRowIterator(); STORAGE_LOG(WARN, "Fail to push iter to iterator array, ", K(ret), K(i)); } } else if (OB_ISNULL(iters_.at(tables.count() - 1 - i))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected null iter", K(ret), "idx", tables.count() - 1 - i, K_(iters)); } else if (OB_FAIL(iter->init(*iter_param, *access_ctx_, table, ranges_))) { STORAGE_LOG(WARN, "failed to init scan iter", K(ret), "idx", tables.count() - i); } if (OB_SUCC(ret)) { consumer_.add_consumer(i); STORAGE_LOG(DEBUG, "add iter for consumer", KPC(table), KPC(access_param_)); } } if (OB_SUCC(ret)) { if (OB_FAIL(prepare_range_skip())) { STORAGE_LOG(WARN, "Fail to prepare range skip", K(ret)); } } } return ret; } void ObMultipleMultiScanMerge::reuse() { ObMultipleScanMergeImpl::reuse(); } int ObMultipleMultiScanMerge::supply_consume() { int ret = OB_SUCCESS; ObScanMergeLoserTreeItem item; const int64_t consumer_cnt = consumer_.get_consumer_num(); for (int64_t i = 0; OB_SUCC(ret) && i < consumer_cnt; ++i) { const int64_t iter_idx = consumer_.get_consumer_iters()[i]; ObStoreRowIterator* iter = iters_.at(iter_idx); if (NULL == iter) { ret = common::OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected error", K(ret), K(iter)); } else if (OB_FAIL(iter->get_next_row_ext(item.row_, item.iter_flag_))) { if (common::OB_ITER_END != ret) { STORAGE_LOG(WARN, "failed to get next row from iterator", "index", iter_idx, "iterator", *iter); } else if (try_push_top_item_ && 1 != consumer_cnt) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "try_push_top_item_ mismatch", K(ret), K(consumer_cnt)); } else { ret = common::OB_SUCCESS; } } else if (OB_ISNULL(item.row_)) { ret = common::OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "get next row return NULL row", "iter_index", iter_idx, K(ret)); } else { const int64_t iter_idx = consumer_.get_consumer_iters()[i]; if (item.row_->is_get_) { get_items_[iter_idx] = item.row_; ++get_num_; } else { item.iter_idx_ = consumer_.get_consumer_iters()[i]; if (try_push_top_item_) { if (1 != consumer_cnt) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "try_push_top_item_ mismatch", K(ret), K(consumer_cnt)); } else if (OB_FAIL(loser_tree_.push_top(item))) { STORAGE_LOG(WARN, "loser tree push top error", K(ret)); } try_push_top_item_ = false; } else if (OB_FAIL(loser_tree_.push(item))) { STORAGE_LOG(WARN, "loser tree push error", K(ret)); } else { STORAGE_LOG(DEBUG, "push", K(item.iter_idx_), K(*item.row_), K(access_param_->iter_param_.table_id_)); } } if (OB_SUCC(ret)) { if (0 == iter_idx) { ++row_stat_.inc_row_count_; } else { ++row_stat_.base_row_count_; } } } } if (OB_SUCC(ret)) { // no worry, if no new items pushed, the rebuild will quickly exit if (OB_FAIL(loser_tree_.rebuild())) { STORAGE_LOG(WARN, "loser tree rebuild fail", K(ret)); } else { consumer_.set_consumer_num(0); try_push_top_item_ = false; } } return ret; } int ObMultipleMultiScanMerge::inner_get_next_row_for_get(ObStoreRow& row, bool& need_retry) { int ret = OB_SUCCESS; const ObStoreRow* tmp_row = NULL; bool final_result = false; nop_pos_.reset(); row.row_val_.count_ = 0; row.flag_ = ObActionFlag::OP_ROW_DOES_NOT_EXIST; row.from_base_ = false; need_retry = false; if (get_num_ < iters_.count()) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "get_num must not less than iters count", K(ret), K_(get_num), K(iters_.count())); } for (int64_t i = 0; OB_SUCC(ret) && i < iters_.count(); ++i) { tmp_row = get_items_[i]; if (OB_ISNULL(tmp_row)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "unexpected error, row is NULL", K(ret)); } else { --get_num_; if (!final_result) { row.scan_index_ = tmp_row->scan_index_; if (OB_FAIL(ObRowFuse::fuse_row(*tmp_row, row, nop_pos_, final_result))) { STORAGE_LOG(WARN, "fail to merge rows", K(ret), "tmp_row", *tmp_row, K(row)); } } } } if (OB_SUCC(ret) && ObActionFlag::OP_ROW_EXIST != row.flag_) { need_retry = true; } if (OB_SUCC(ret)) { consumer_.set_consumer_num(0); get_num_ = 0; for (int64_t i = 0; i < iters_.count(); ++i) { consumer_.add_consumer(i); } } return ret; } int ObMultipleMultiScanMerge::inner_get_next_row_for_scan(ObStoreRow& row, bool& need_retry) { int ret = OB_SUCCESS; if (OB_FAIL(ObMultipleScanMergeImpl::inner_get_next_row(row, need_retry))) { STORAGE_LOG(WARN, "fail to inner get next row for scan", K(ret)); } STORAGE_LOG(DEBUG, "get_next_row", K(ret), K(row), K(need_retry)); return ret; } int ObMultipleMultiScanMerge::inner_get_next_row(ObStoreRow& row) { int ret = OB_SUCCESS; bool need_retry = true; bool is_data_ready = false; STORAGE_LOG(DEBUG, "inner_get_next_row", K(get_num_)); if (OB_UNLIKELY(0 == iters_.count())) { ret = OB_ITER_END; } else { while (OB_SUCC(ret) && need_retry) { if (OB_FAIL(supply_consume())) { if (OB_ITER_END != ret) { STORAGE_LOG(WARN, "supply consume failed", K(ret)); } } else { if (0 == get_num_ && is_scan_end()) { ret = OB_ITER_END; STORAGE_LOG(DEBUG, "inner_get_next_row reach end"); break; } if (OB_FAIL(is_scan_data_ready(is_data_ready))) { STORAGE_LOG(WARN, "fail to check if scan data is ready", K(ret)); } else if (is_data_ready) { if (OB_FAIL(inner_get_next_row_for_scan(row, need_retry))) { STORAGE_LOG(WARN, "fail to get next row for scan", K(ret)); } } else if (OB_FAIL(is_get_data_ready(is_data_ready))) { STORAGE_LOG(WARN, "fail t o check if get data is ready", K(ret)); } else if (is_data_ready) { if (OB_FAIL(inner_get_next_row_for_get(row, need_retry))) { STORAGE_LOG(WARN, "fail to get next row for get", K(ret)); } } else { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "unexpected error, both get and scan have no data", K(ret)); } } } } if (OB_SUCC(ret)) { row.range_array_idx_ = ranges_->at(row.scan_index_).get_range_array_idx(); STORAGE_LOG(DEBUG, "multi_scan_merge: get_next_row", K(row)); } return ret; } int ObMultipleMultiScanMerge::to_collation_free_range_on_demand( const ObIArray& ranges, common::ObIAllocator& allocator) { int ret = OB_SUCCESS; if (0 == ranges.count()) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid rowkeys count", K(ret), K(ranges.count())); } for (int64_t i = 0; OB_SUCC(ret) && i < ranges.count(); ++i) { if (OB_FAIL( const_cast(ranges.at(i)).to_collation_free_range_on_demand_and_cutoff_range(allocator))) { STORAGE_LOG(WARN, "fail to get collation free rowkey", K(ret), K(ranges.at(i).get_range())); } } return ret; } int ObMultipleMultiScanMerge::estimate_row_count(const ObQueryFlag query_flag, const uint64_t table_id, const common::ObIArray& ranges, const common::ObIArray& tables, ObPartitionEst& part_estimate, common::ObIArray& est_records) { int ret = OB_SUCCESS; part_estimate.reset(); est_records.reuse(); if (OB_INVALID_ID == table_id || ranges.count() <= 0 || tables.count() <= 0) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(table_id), K(ranges), K(tables.count())); } else { // factors that including column cnt, column compare and primary key which is not included in output columns // are not calculated in the cost ObPartitionEst table_est; ObEstRowCountRecord record; for (int64_t i = 0; OB_SUCC(ret) && i < tables.count(); ++i) { int64_t start_time = common::ObTimeUtility::current_time(); table_est.reset(); ObITable* table = tables.at(i); if (NULL == table) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected error, store shouldn't be null", K(ret), KP(table)); } else if (OB_SUCCESS != (ret = table->estimate_multi_scan_row_count(query_flag, table_id, ranges, table_est))) { STORAGE_LOG(WARN, "failed to estimate cost", K(ranges), K(table->get_key()), K(i), K(ret)); } else if (OB_FAIL(part_estimate.add(table_est))) { STORAGE_LOG(WARN, "failed to add table estimation", K(ret), K(i), K(table_est), K(table->get_key())); } else { record.table_id_ = table->get_key().table_id_; record.table_type_ = table->get_key().table_type_; record.version_range_ = table->get_key().trans_version_range_; record.logical_row_count_ = table_est.logical_row_count_; record.physical_row_count_ = table_est.physical_row_count_; if (OB_FAIL(est_records.push_back(record))) { STORAGE_LOG(WARN, "failed to push back est row count record", K(ret)); } else { STORAGE_LOG(DEBUG, "table estimate row count", K(table->get_key()), K(table_est), "cost time", common::ObTimeUtility::current_time() - start_time); } } } part_estimate.logical_row_count_ = part_estimate.logical_row_count_ < 0 ? 1 : part_estimate.logical_row_count_; part_estimate.physical_row_count_ = part_estimate.physical_row_count_ < 0 ? 1 : part_estimate.physical_row_count_; if (part_estimate.logical_row_count_ > part_estimate.physical_row_count_) { part_estimate.logical_row_count_ = part_estimate.physical_row_count_; } STORAGE_LOG(DEBUG, "final estimate", K(part_estimate)); } return ret; } int ObMultipleMultiScanMerge::skip_to_range(const int64_t range_idx) { int ret = OB_SUCCESS; if (OB_UNLIKELY(range_idx < 0)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid arguments", K(ret), K(range_idx)); } else if (range_idx >= ranges_->count()) { ret = OB_ITER_END; } else { // skip data in get items for (int64_t i = 0; i < iters_.count(); ++i) { if (nullptr != get_items_[i]) { if (get_items_[i]->scan_index_ < range_idx) { get_items_[i] = nullptr; } } } // skip data in heap and iterators const bool include_gap_key = ranges_->at(range_idx).get_range().get_border_flag().inclusive_start(); const ObStoreRowkey& rowkey = ranges_->at(range_idx).get_range().get_start_key(); STORAGE_LOG(DEBUG, "skip to range", K(include_gap_key), K(rowkey), K(range_idx)); if (OB_FAIL(reset_range(0L /*skip all tables*/, range_idx, &rowkey, include_gap_key))) { STORAGE_LOG(WARN, "fail to reset range", K(ret)); } else { try_push_top_item_ = false; } } return ret; } } // namespace storage } // namespace oceanbase #endif