From ffe1ed06af76d4d112ebfee964faddfca8d02346 Mon Sep 17 00:00:00 2001 From: chaser-ch Date: Wed, 30 Oct 2024 17:47:29 +0000 Subject: [PATCH] opt split range with estimate method --- deps/oblib/src/common/ob_store_range.h | 2 +- .../storage/test_index_sstable_estimator.cpp | 93 ++---- .../test_index_sstable_multi_estimator.cpp | 117 ++------ .../access/ob_index_sstable_estimator.cpp | 281 ++++++++++++------ .../access/ob_index_sstable_estimator.h | 69 +++-- .../access/ob_index_tree_prefetcher.cpp | 4 +- src/storage/access/ob_table_estimator.cpp | 5 +- .../ob_index_block_row_scanner.cpp | 6 +- .../index_block/ob_index_block_row_scanner.h | 2 +- .../index_block/ob_index_block_row_struct.h | 12 + src/storage/ddl/ob_tablet_split_task.cpp | 2 +- src/storage/ob_partition_range_spliter.cpp | 104 ++++--- src/storage/ob_partition_range_spliter.h | 8 +- 13 files changed, 384 insertions(+), 321 deletions(-) diff --git a/deps/oblib/src/common/ob_store_range.h b/deps/oblib/src/common/ob_store_range.h index 54a6fe247..b0f68dcd7 100644 --- a/deps/oblib/src/common/ob_store_range.h +++ b/deps/oblib/src/common/ob_store_range.h @@ -238,7 +238,7 @@ bool ObStoreRange::is_single_rowkey() const { int ret = true; - if (!start_key_.simple_equal(end_key_)) { + if (0 != start_key_.compare(end_key_)) { ret = false; } else if (!border_flag_.inclusive_start() || !border_flag_.inclusive_end()) { ret = false; diff --git a/mittest/mtlenv/storage/test_index_sstable_estimator.cpp b/mittest/mtlenv/storage/test_index_sstable_estimator.cpp index c71c04ba7..6c7ae723f 100644 --- a/mittest/mtlenv/storage/test_index_sstable_estimator.cpp +++ b/mittest/mtlenv/storage/test_index_sstable_estimator.cpp @@ -41,6 +41,7 @@ public: protected: void prepare_context(); void generate_range(const int64_t start, const int64_t end, ObDatumRange &range); + void get_part_est(ObSSTable &sstable, const ObDatumRange &range, ObPartitionEst &part_est); private: ObDatumRow start_row_; ObDatumRow end_row_; @@ -117,25 +118,27 @@ void TestIndexSSTableEstimator::generate_range( } } +void TestIndexSSTableEstimator::get_part_est( + ObSSTable &sstable, const ObDatumRange &range, ObPartitionEst &part_est) +{ + ObIndexSSTableEstimateContext esti_ctx(tablet_handle_, context_.query_flag_); + ObIndexBlockScanEstimator estimator(esti_ctx); + ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(sstable, range, part_est)); +} + TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_whole_range) { ObDatumRange range; range.set_whole_range(); - ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); - ObPartitionEst part_est; - ObIndexBlockScanEstimator estimator(esti_ctx); ObPartitionEst ddl_kv_part_est; - ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); ObPartitionEst ddl_merge_part_est; - ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); - ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + get_part_est(sstable_, range, part_est); + get_part_est(ddl_kv_, range, ddl_kv_part_est); + get_part_est(partial_sstable_, range, ddl_merge_part_est); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); ASSERT_EQ(part_est, ddl_merge_part_est); } @@ -144,20 +147,14 @@ TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_range) { ObDatumRange range; generate_range(100, -1, range); - ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); - ObPartitionEst part_est; - ObIndexBlockScanEstimator estimator(esti_ctx); ObPartitionEst ddl_kv_part_est; - ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); ObPartitionEst ddl_merge_part_est; - ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); - ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + get_part_est(sstable_, range, part_est); + get_part_est(ddl_kv_, range, ddl_kv_part_est); + get_part_est(partial_sstable_, range, ddl_merge_part_est); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); ASSERT_EQ(part_est, ddl_merge_part_est); } @@ -166,20 +163,14 @@ TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_left_range) { ObDatumRange range; generate_range(-1, 100, range); - ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); - ObPartitionEst part_est; - ObIndexBlockScanEstimator estimator(esti_ctx); ObPartitionEst ddl_kv_part_est; - ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); ObPartitionEst ddl_merge_part_est; - ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); - ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + get_part_est(sstable_, range, part_est); + get_part_est(ddl_kv_, range, ddl_kv_part_est); + get_part_est(partial_sstable_, range, ddl_merge_part_est); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); ASSERT_EQ(part_est, ddl_merge_part_est); } @@ -188,20 +179,14 @@ TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_right_range) { ObDatumRange range; generate_range(row_cnt_ - 100, -1, range); - ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); - ObPartitionEst part_est; - ObIndexBlockScanEstimator estimator(esti_ctx); ObPartitionEst ddl_kv_part_est; - ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); ObPartitionEst ddl_merge_part_est; - ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); - ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + get_part_est(sstable_, range, part_est); + get_part_est(ddl_kv_, range, ddl_kv_part_est); + get_part_est(partial_sstable_, range, ddl_merge_part_est); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); ASSERT_EQ(part_est, ddl_merge_part_est); } @@ -210,20 +195,14 @@ TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_middle_range) { ObDatumRange range; generate_range(100, row_cnt_ - 100, range); - ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); - ObPartitionEst part_est; - ObIndexBlockScanEstimator estimator(esti_ctx); ObPartitionEst ddl_kv_part_est; - ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); ObPartitionEst ddl_merge_part_est; - ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); - ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + get_part_est(sstable_, range, part_est); + get_part_est(ddl_kv_, range, ddl_kv_part_est); + get_part_est(partial_sstable_, range, ddl_merge_part_est); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); ASSERT_EQ(part_est, ddl_merge_part_est); } @@ -232,20 +211,14 @@ TEST_F(TestIndexSSTableEstimator, estimate_major_sstable_noexist_range) { ObDatumRange range; generate_range(row_cnt_, row_cnt_, range); - ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); - ObPartitionEst part_est; - ObIndexBlockScanEstimator estimator(esti_ctx); ObPartitionEst ddl_kv_part_est; - ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); ObPartitionEst ddl_merge_part_est; - ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); - ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); + get_part_est(sstable_, range, part_est); + get_part_est(ddl_kv_, range, ddl_kv_part_est); + get_part_est(partial_sstable_, range, ddl_merge_part_est); + STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); ASSERT_EQ(part_est, ddl_merge_part_est); } diff --git a/mittest/mtlenv/storage/test_index_sstable_multi_estimator.cpp b/mittest/mtlenv/storage/test_index_sstable_multi_estimator.cpp index 674fc009b..4f04e8671 100644 --- a/mittest/mtlenv/storage/test_index_sstable_multi_estimator.cpp +++ b/mittest/mtlenv/storage/test_index_sstable_multi_estimator.cpp @@ -41,6 +41,7 @@ public: protected: void prepare_context(); void generate_range(const int64_t start, const int64_t end, ObDatumRange &range); + void get_part_est(ObSSTable &sstable, const ObDatumRange &range); // ObTableParam table_param_; private: @@ -120,130 +121,68 @@ void TestMultiVersionIndexSSTableEstimator::generate_range( } } +void TestMultiVersionIndexSSTableEstimator::get_part_est(ObSSTable &sstable, const ObDatumRange &range) +{ + ObPartitionEst part_est; + ObIndexSSTableEstimateContext esti_ctx(tablet_handle_, context_.query_flag_); + ObIndexBlockScanEstimator estimator(esti_ctx); + ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(sstable, range, part_est)); + STORAGE_LOG(INFO, "part_est", K(part_est)); +} + TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_minor_sstable_whole_range) { ObDatumRange range; range.set_whole_range(); - ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); + get_part_est(sstable_, range); + get_part_est(ddl_kv_, range); + get_part_est(partial_sstable_, range); - ObPartitionEst part_est; - ObIndexBlockScanEstimator estimator(esti_ctx); - ObPartitionEst ddl_kv_part_est; - ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); - ObPartitionEst ddl_merge_part_est; - ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); - - ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); } TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_minor_sstable_range) { ObDatumRange range; generate_range(100, -1, range); - ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); - - ObPartitionEst part_est; - ObIndexBlockScanEstimator estimator(esti_ctx); - ObPartitionEst ddl_kv_part_est; - ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); - ObPartitionEst ddl_merge_part_est; - ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); - - ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); + get_part_est(sstable_, range); + get_part_est(ddl_kv_, range); + get_part_est(partial_sstable_, range); } TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_major_sstable_left_range) { ObDatumRange range; generate_range(-1, 100, range); - ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); - - ObPartitionEst part_est; - ObIndexBlockScanEstimator estimator(esti_ctx); - ObPartitionEst ddl_kv_part_est; - ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); - ObPartitionEst ddl_merge_part_est; - ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); - - ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); + get_part_est(sstable_, range); + get_part_est(ddl_kv_, range); + get_part_est(partial_sstable_, range); } TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_major_sstable_right_range) { ObDatumRange range; generate_range(row_cnt_ - 100, -1, range); - ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); - - ObPartitionEst part_est; - ObIndexBlockScanEstimator estimator(esti_ctx); - ObPartitionEst ddl_kv_part_est; - ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); - ObPartitionEst ddl_merge_part_est; - ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); - - ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); + get_part_est(sstable_, range); + get_part_est(ddl_kv_, range); + get_part_est(partial_sstable_, range); } TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_major_sstable_middle_range) { ObDatumRange range; generate_range(100, row_cnt_ - 100, range); - ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); - - ObPartitionEst part_est; - ObIndexBlockScanEstimator estimator(esti_ctx); - ObPartitionEst ddl_kv_part_est; - ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); - ObPartitionEst ddl_merge_part_est; - ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); - - ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); + get_part_est(sstable_, range); + get_part_est(ddl_kv_, range); + get_part_est(partial_sstable_, range); } TEST_F(TestMultiVersionIndexSSTableEstimator, estimate_major_sstable_noexist_range) { ObDatumRange range; generate_range(row_cnt_, row_cnt_, range); - ObIndexSSTableEstimateContext esti_ctx(sstable_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_kv_esti_ctx(ddl_kv_, tablet_handle_, context_.query_flag_, range); - ObIndexSSTableEstimateContext ddl_merge_esti_ctx(partial_sstable_, tablet_handle_, context_.query_flag_, range); - - ObPartitionEst part_est; - ObIndexBlockScanEstimator estimator(esti_ctx); - ObPartitionEst ddl_kv_part_est; - ObIndexBlockScanEstimator ddl_kv_estimator(ddl_kv_esti_ctx); - ObPartitionEst ddl_merge_part_est; - ObIndexBlockScanEstimator ddl_merge_estimator(ddl_merge_esti_ctx); - - ASSERT_EQ(OB_SUCCESS, estimator.estimate_row_count(part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_kv_estimator.estimate_row_count(ddl_kv_part_est)); - ASSERT_EQ(OB_SUCCESS, ddl_merge_estimator.estimate_row_count(ddl_merge_part_est)); - STORAGE_LOG(INFO, "part_est", K(part_est), K(ddl_kv_part_est), K(ddl_merge_part_est)); + get_part_est(sstable_, range); + get_part_est(ddl_kv_, range); + get_part_est(partial_sstable_, range); } diff --git a/src/storage/access/ob_index_sstable_estimator.cpp b/src/storage/access/ob_index_sstable_estimator.cpp index bba2c1bc1..97c2672c4 100644 --- a/src/storage/access/ob_index_sstable_estimator.cpp +++ b/src/storage/access/ob_index_sstable_estimator.cpp @@ -53,7 +53,9 @@ int ObPartitionEst::deep_copy(const ObPartitionEst &src) } ObIndexBlockScanEstimator::ObIndexBlockScanEstimator(const ObIndexSSTableEstimateContext &context) - : level_(0), context_(context), allocator_() + : level_(0), + context_(context), + allocator_("OB_STORAGE_EST", OB_MALLOC_MIDDLE_BLOCK_SIZE, MTL_ID()) { tenant_id_ = MTL_ID(); } @@ -69,115 +71,209 @@ ObIndexBlockScanEstimator::~ObIndexBlockScanEstimator() index_block_row_scanner_.reset(); } -int ObIndexBlockScanEstimator::estimate_row_count(ObPartitionEst &part_est) +void ObIndexBlockScanEstimator::reuse() +{ + root_index_block_.reset(); + index_block_row_scanner_.reuse(); + level_ = 0; + for (int64_t i = 0; i < DEFAULT_GET_MICRO_DATA_HANDLE_CNT; ++i) { + micro_handles_[i].reset(); + } + index_block_data_.reset(); + +} + +int ObIndexBlockScanEstimator::init_index_scanner(ObSSTable &sstable) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!context_.is_valid())) { + ret = common::OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "Invalid argument to init index scanner", K(ret), K(context_)); + } else if (index_block_row_scanner_.is_valid()) { + // need reuse index block row scanner + index_block_row_scanner_.switch_context( + sstable, nullptr, context_.index_read_info_.get_datum_utils(), context_.query_flag_); + } else if (OB_FAIL(index_block_row_scanner_.init( + context_.index_read_info_.get_datum_utils(), + allocator_, + context_.query_flag_, + sstable.get_macro_offset()))) { + STORAGE_LOG(WARN, "Failed to init index block row scanner", K(ret)); + } + if (FAILEDx(sstable.get_index_tree_root(root_index_block_))) { + STORAGE_LOG(WARN, "Failed to get index tree root", K(ret)); + } else if (sstable.is_ddl_merge_sstable()) { + if (OB_ISNULL(context_.tablet_handle_)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "Unexpected null tablet handle", K(ret), K(context_)); + } else { + index_block_row_scanner_.set_iter_param(&sstable, context_.tablet_handle_->get_obj()); + } + } + return ret; +} + +int ObIndexBlockScanEstimator::estimate_row_count(ObSSTable &sstable, + const blocksstable::ObDatumRange &datum_range, + ObPartitionEst &part_est) { int ret = OB_SUCCESS; ObEstimatedResult result; - - lib::ObMemAttr mem_attr(MTL_ID(), "OB_STORAGE_EST"); - if (OB_UNLIKELY(!context_.is_valid())) { - ret = common::OB_INVALID_ARGUMENT; - STORAGE_LOG(WARN, "estimate context is not valid", K(ret), K(context_)); - } else if (OB_FAIL(allocator_.init(nullptr, OB_MALLOC_MIDDLE_BLOCK_SIZE, mem_attr))) { - STORAGE_LOG(WARN, "Fail to init allocator", K(ret)); - } else if (OB_FAIL(index_block_row_scanner_.init( - context_.tablet_handle_.get_obj()->get_rowkey_read_info().get_datum_utils(), - allocator_, - context_.query_flag_, - context_.sstable_.get_macro_offset()))) { - STORAGE_LOG(WARN, "Failed to init index block row scanner", K(ret)); - } else if (OB_FAIL(context_.sstable_.get_index_tree_root(root_index_block_))) { - STORAGE_LOG(WARN, "Failed to get index tree root", K(ret)); - } else if (context_.sstable_.is_ddl_merge_sstable()) { - index_block_row_scanner_.set_iter_param(&context_.sstable_, - context_.tablet_handle_.get_obj()); - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(cal_total_row_count(result))) { - STORAGE_LOG(WARN, "Failed to get total_row_count_delta", K(ret), K(root_index_block_)); - } else if (result.total_row_count_ > 0) { - if (context_.range_.is_whole_range()) { - } else { - if (!context_.range_.get_start_key().is_min_rowkey()) { - if (OB_FAIL(estimate_excluded_border_row_count(true, result))) { - STORAGE_LOG(WARN, "Failed to estimate left excluded row count", K(ret)); - } - } - if (OB_SUCC(ret) && !context_.range_.get_end_key().is_max_rowkey()) { - level_ = 0; - if (OB_FAIL(estimate_excluded_border_row_count(false, result))) { - STORAGE_LOG(WARN, "Failed to estimate right excluded row count", K(ret)); - } - } - } - } - if (OB_SUCC(ret)) { + if (OB_FAIL(cal_total_estimate_result(sstable, datum_range, result))) { + STORAGE_LOG(WARN, "Failed to get total estimate result", K(ret), K(root_index_block_)); + } else { part_est.physical_row_count_ = result.total_row_count_ - result.excluded_row_count_; - if (context_.sstable_.is_multi_version_minor_sstable()) { + if (sstable.is_multi_version_minor_sstable()) { part_est.logical_row_count_ = result.total_row_count_delta_ - result.excluded_row_count_delta_; } else { part_est.logical_row_count_ = part_est.physical_row_count_; } } - STORAGE_LOG(DEBUG, "estimate result", K(ret), K(result), K(part_est)); + STORAGE_LOG(DEBUG, "estimate row count result", K(ret), K(result), K(part_est)); return ret; } -int ObIndexBlockScanEstimator::cal_total_row_count(ObEstimatedResult &result) +int ObIndexBlockScanEstimator::estimate_block_count(ObSSTable &sstable, + const blocksstable::ObDatumRange &datum_range, + int64_t ¯o_block_cnt, + int64_t µ_block_cnt) +{ + int ret = OB_SUCCESS; + ObEstimatedResult result(true /* for block */); + if (OB_FAIL(cal_total_estimate_result(sstable, datum_range, result))) { + STORAGE_LOG(WARN, "Failed to get total estimate result", K(ret), K(root_index_block_)); + } else { + macro_block_cnt = MAX(result.macro_block_cnt_, 1); + micro_block_cnt = MAX(result.micro_block_cnt_, 1); + STORAGE_LOG(TRACE, "estimate block count result", K(ret), K(result), K(macro_block_cnt), K(micro_block_cnt)); + } + return ret; +} + +int ObIndexBlockScanEstimator::cal_total_estimate_result( + ObSSTable &sstable, + const blocksstable::ObDatumRange &datum_range, + ObEstimatedResult &result) { int ret = OB_SUCCESS; // TODO remove this if we can get row_count_delta from sstable meta directly // result.total_row_count_ = context_.sstable_->get_meta().get_row_count(); - blocksstable::ObDatumRange whole_range; - whole_range.set_whole_range(); - if (OB_FAIL(index_block_row_scanner_.open( - ObIndexBlockRowHeader::DEFAULT_IDX_ROW_MACRO_ID, - root_index_block_, - whole_range, - 0, - true, - true))) { - if (OB_BEYOND_THE_RANGE != ret) { - STORAGE_LOG(WARN, "Failed to open whole range", K(ret), K(root_index_block_)); - } else { - ret = OB_SUCCESS; + if (sstable.is_ddl_merge_sstable() && context_.tablet_handle_ == nullptr) { + if (OB_FAIL(cal_total_estimate_result_for_ddl(sstable, datum_range, result))) { + STORAGE_LOG(WARN, "Failed to cal estimate result for ddl merge sstable", K(ret)); } + } else if (OB_FAIL(init_index_scanner(sstable))) { + STORAGE_LOG(WARN, "Failed to init index scanner", K(ret)); } else { - blocksstable::ObMicroIndexInfo tmp_micro_index_info; - while (OB_SUCC(ret)) { - if (OB_FAIL(index_block_row_scanner_.get_next(tmp_micro_index_info))) { - if (OB_ITER_END != ret) { - STORAGE_LOG(WARN, "Failed to get next index row", K(ret), K(index_block_row_scanner_)); - } + ObDatumRange whole_range; + whole_range.set_whole_range(); + if (OB_FAIL(index_block_row_scanner_.open( + ObIndexBlockRowHeader::DEFAULT_IDX_ROW_MACRO_ID, + root_index_block_, + whole_range, + 0, + true, + true))) { + if (OB_BEYOND_THE_RANGE != ret) { + STORAGE_LOG(WARN, "Failed to open whole range", K(ret), K(root_index_block_)); } else { - result.total_row_count_ += tmp_micro_index_info.get_row_count(); - result.total_row_count_delta_ += tmp_micro_index_info.get_row_count_delta(); + ret = OB_SUCCESS; + } + } else { + ObMicroIndexInfo tmp_micro_index_info; + while (OB_SUCC(ret)) { + if (OB_FAIL(index_block_row_scanner_.get_next(tmp_micro_index_info))) { + if (OB_ITER_END != ret) { + STORAGE_LOG(WARN, "Failed to get next index row", K(ret), K(index_block_row_scanner_)); + } + } else { + result.total_row_count_ += tmp_micro_index_info.get_row_count(); + result.total_row_count_delta_ += tmp_micro_index_info.get_row_count_delta(); + result.macro_block_cnt_ += tmp_micro_index_info.get_macro_block_count(); + result.micro_block_cnt_ += tmp_micro_index_info.get_micro_block_count(); + } + } + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; } } - if (OB_ITER_END == ret) { - ret = OB_SUCCESS; + if (OB_SUCC(ret) && result.total_row_count_ > 0) { + if (datum_range.is_whole_range()) { + } else { + const bool is_multi_version_minor = sstable.is_multi_version_minor_sstable(); + if (!datum_range.get_start_key().is_min_rowkey()) { + if (OB_FAIL(estimate_excluded_border_result( + is_multi_version_minor, datum_range, true, result))) { + STORAGE_LOG(WARN, "Failed to estimate left excluded row count", K(ret)); + } + } + if (OB_SUCC(ret) && !datum_range.get_end_key().is_max_rowkey()) { + level_ = 0; + if (OB_FAIL(estimate_excluded_border_result( + is_multi_version_minor, datum_range, false, result))) { + STORAGE_LOG(WARN, "Failed to estimate right excluded row count", K(ret)); + } + } + } } } return ret; } -int ObIndexBlockScanEstimator::estimate_excluded_border_row_count(bool is_left, ObEstimatedResult &result) +int ObIndexBlockScanEstimator::cal_total_estimate_result_for_ddl(ObSSTable &sstable, + const blocksstable::ObDatumRange &datum_range, + ObEstimatedResult &result) +{ + int ret = OB_SUCCESS; + + ObSSTableMetaHandle meta_handle; + if (OB_FAIL(sstable.get_meta(meta_handle))) { + STORAGE_LOG(WARN, "get sstable meta failed", K(ret)); + } else { + int64_t factor = 1; + const ObSSTableBasicMeta &basic_meta = meta_handle.get_sstable_meta().get_basic_meta(); + result.total_row_count_ = basic_meta.row_count_; + result.total_row_count_delta_ = 0; + result.macro_block_cnt_ = basic_meta.get_data_macro_block_count(); + result.micro_block_cnt_ = basic_meta.get_data_micro_block_count(); + if (OB_SUCC(ret) && result.total_row_count_ > 0) { + if (datum_range.is_whole_range()) { + } else { + if (!datum_range.get_start_key().is_min_rowkey()) { + factor = 10; + } + if (OB_SUCC(ret) && !datum_range.get_end_key().is_max_rowkey()) { + factor *= 2; + } + result.total_row_count_ = MAX(1, result.total_row_count_ / factor); + result.macro_block_cnt_ = MAX(1, result.macro_block_cnt_ / factor); + result.micro_block_cnt_ = MAX(1, result.micro_block_cnt_ / factor); + } + } + } + + return ret; +} + +int ObIndexBlockScanEstimator::estimate_excluded_border_result(const bool is_multi_version_minor, + const blocksstable::ObDatumRange &datum_range, + bool is_left, + ObEstimatedResult &result) { int ret = OB_SUCCESS; blocksstable::ObDatumRange excluded_range; - const blocksstable::ObDatumRange &orig_range = context_.range_; if (is_left) { excluded_range.start_key_.set_min_rowkey(); - excluded_range.set_end_key(orig_range.get_start_key()); - if (orig_range.get_border_flag().inclusive_start()) { + excluded_range.set_end_key(datum_range.get_start_key()); + if (datum_range.get_border_flag().inclusive_start()) { excluded_range.set_right_open(); } else { excluded_range.set_right_closed(); } } else { - excluded_range.set_start_key(orig_range.get_end_key()); + excluded_range.set_start_key(datum_range.get_end_key()); excluded_range.end_key_.set_max_rowkey(); - if (orig_range.get_border_flag().inclusive_end()) { + if (datum_range.get_border_flag().inclusive_end()) { excluded_range.set_left_open(); } else { excluded_range.set_left_closed(); @@ -216,7 +312,9 @@ int ObIndexBlockScanEstimator::estimate_excluded_border_row_count(bool is_left, border_micro_index_info = tmp_micro_index_info; } else { result.excluded_row_count_ += tmp_micro_index_info.get_row_count(); - if (context_.sstable_.is_multi_version_minor_sstable()) { + result.macro_block_cnt_ -= tmp_micro_index_info.get_macro_block_count(); + result.micro_block_cnt_ -= tmp_micro_index_info.get_micro_block_count(); + if (is_multi_version_minor) { result.excluded_row_count_delta_ += tmp_micro_index_info.get_row_count_delta(); } } @@ -224,7 +322,7 @@ int ObIndexBlockScanEstimator::estimate_excluded_border_row_count(bool is_left, } if (OB_ITER_END == ret && idx > 0) { - if (OB_FAIL(goto_next_level(excluded_range, border_micro_index_info, result))) { + if (OB_FAIL(goto_next_level(excluded_range, border_micro_index_info, is_multi_version_minor, result))) { if (OB_ITER_END != ret) { STORAGE_LOG(WARN, "Failed to go to next level", K(ret), K(border_micro_index_info), K(index_block_row_scanner_)); @@ -246,6 +344,7 @@ int ObIndexBlockScanEstimator::estimate_excluded_border_row_count(bool is_left, int ObIndexBlockScanEstimator::goto_next_level( const blocksstable::ObDatumRange &range, const blocksstable::ObMicroIndexInfo µ_index_info, + const bool is_multi_version_minor, ObEstimatedResult &result) { int ret = OB_SUCCESS; @@ -254,20 +353,24 @@ int ObIndexBlockScanEstimator::goto_next_level( if (OB_FAIL(prefetch_index_block_data(micro_index_info, micro_handle))) { STORAGE_LOG(WARN, "Failed to prefetch index block", K(ret), K(micro_index_info)); } else if (micro_index_info.is_data_block()) { - ObPartitionEst tmp_part_est; - int64_t logical_row_count = 0, physical_row_count = 0; - if (OB_FAIL(estimate_data_block_row_count( - range, - micro_handle, - context_.sstable_.is_multi_version_minor_sstable(), - tmp_part_est))) { - STORAGE_LOG(WARN, "Failed to estimate data block row count", K(ret), K(micro_handle)); - } else { - result.excluded_row_count_ += tmp_part_est.physical_row_count_; - if (context_.sstable_.is_multi_version_minor_sstable()) { - result.excluded_row_count_delta_ += tmp_part_est.logical_row_count_; - } + if (result.only_block_) { ret = OB_ITER_END; + } else { + ObPartitionEst tmp_part_est; + int64_t logical_row_count = 0, physical_row_count = 0; + if (OB_FAIL(estimate_data_block_row_count( + range, + micro_handle, + is_multi_version_minor, + tmp_part_est))) { + STORAGE_LOG(WARN, "Failed to estimate data block row count", K(ret), K(micro_handle)); + } else { + result.excluded_row_count_ += tmp_part_est.physical_row_count_; + if (is_multi_version_minor) { + result.excluded_row_count_delta_ += tmp_part_est.logical_row_count_; + } + ret = OB_ITER_END; + } } } else { index_block_data_.reset(); @@ -347,7 +450,7 @@ int ObIndexBlockScanEstimator::estimate_data_block_row_count( if (OB_FAIL(micro_handle.get_micro_block_data(¯o_reader_, block_data))) { STORAGE_LOG(WARN, "Failed to get block data", K(ret), K(micro_handle)); } else if (OB_FAIL(block_scanner.estimate_row_count( - context_.tablet_handle_.get_obj()->get_rowkey_read_info(), + context_.index_read_info_, block_data, range, consider_multi_version, diff --git a/src/storage/access/ob_index_sstable_estimator.h b/src/storage/access/ob_index_sstable_estimator.h index 6ed3c70af..1279856c3 100644 --- a/src/storage/access/ob_index_sstable_estimator.h +++ b/src/storage/access/ob_index_sstable_estimator.h @@ -14,6 +14,7 @@ #define OCEANBASE_STORAGE_OB_INDEX_SSTABLE_ESTIMATOR_H #include "storage/access/ob_micro_block_handle_mgr.h" +#include "storage/tablet/ob_tablet.h" namespace oceanbase { @@ -42,39 +43,49 @@ struct ObIndexSSTableEstimateContext { public: ObIndexSSTableEstimateContext( - blocksstable::ObSSTable &sstable, const ObTabletHandle &tablet_handle, - const common::ObQueryFlag &query_flag, - const blocksstable::ObDatumRange &datum_range) - : sstable_(sstable), - tablet_handle_(tablet_handle), - query_flag_(query_flag), - range_(datum_range) {} + const common::ObQueryFlag &query_flag) + : index_read_info_(tablet_handle.get_obj()->get_rowkey_read_info()), + tablet_handle_(&tablet_handle), + query_flag_(query_flag) + { + } + ObIndexSSTableEstimateContext( + const ObITableReadInfo &index_read_info, + const common::ObQueryFlag &query_flag) + : index_read_info_(index_read_info), + tablet_handle_(nullptr), + query_flag_(query_flag) + {} ~ObIndexSSTableEstimateContext() {} OB_INLINE bool is_valid() const { - return tablet_handle_.is_valid() && range_.is_valid(); + return tablet_handle_ == nullptr || tablet_handle_->is_valid(); } - TO_STRING_KV(K_(tablet_handle), K_(query_flag), K_(range)); - blocksstable::ObSSTable &sstable_; - const ObTabletHandle &tablet_handle_; + TO_STRING_KV(K_(tablet_handle), K_(query_flag)); + const ObITableReadInfo &index_read_info_; + const ObTabletHandle *tablet_handle_; const common::ObQueryFlag &query_flag_; - const blocksstable::ObDatumRange &range_; }; struct ObEstimatedResult { - ObEstimatedResult() : total_row_count_(0), total_row_count_delta_(0), - excluded_row_count_(0), excluded_row_count_delta_(0) + ObEstimatedResult(const bool only_block = false) + : total_row_count_(0), total_row_count_delta_(0), + excluded_row_count_(0), excluded_row_count_delta_(0), macro_block_cnt_(0), micro_block_cnt_(0), + only_block_(only_block) { } int64_t total_row_count_; int64_t total_row_count_delta_; int64_t excluded_row_count_; int64_t excluded_row_count_delta_; - TO_STRING_KV(K_(total_row_count), K_(total_row_count_delta), - K_(excluded_row_count), K_(excluded_row_count_delta)); + uint64_t macro_block_cnt_; + uint64_t micro_block_cnt_; + bool only_block_; + TO_STRING_KV(K_(total_row_count), K_(total_row_count_delta), K_(excluded_row_count), + K_(excluded_row_count_delta), K_(macro_block_cnt), K_(micro_block_cnt), K_(only_block)); }; class ObIndexBlockScanEstimator @@ -82,13 +93,27 @@ class ObIndexBlockScanEstimator public: ObIndexBlockScanEstimator(const ObIndexSSTableEstimateContext &context); ~ObIndexBlockScanEstimator(); - int estimate_row_count(ObPartitionEst &part_est); + void reuse(); + int estimate_row_count(blocksstable::ObSSTable &sstable, + const blocksstable::ObDatumRange &datum_range, + ObPartitionEst &part_est); + int estimate_block_count(blocksstable::ObSSTable &sstable, + const blocksstable::ObDatumRange &datum_range, + int64_t ¯o_block_cnt, + int64_t µ_block_cnt); private: - int cal_total_row_count(ObEstimatedResult &result); - int estimate_excluded_border_row_count(bool is_left, ObEstimatedResult &result); + int init_index_scanner(blocksstable::ObSSTable &sstable); + int cal_total_estimate_result(blocksstable::ObSSTable &sstable, + const blocksstable::ObDatumRange &datum_range, + ObEstimatedResult &result); + int estimate_excluded_border_result(const bool is_multi_version_minor, + const blocksstable::ObDatumRange &datum_range, + const bool is_left, + ObEstimatedResult &result); int goto_next_level( const blocksstable::ObDatumRange &range, const blocksstable::ObMicroIndexInfo µ_index_info, + const bool is_multi_version_minor, ObEstimatedResult &result); int prefetch_index_block_data( const blocksstable::ObMicroIndexInfo µ_index_info, @@ -98,6 +123,10 @@ private: ObMicroBlockDataHandle µ_handle, bool consider_multi_version, ObPartitionEst &est); + int cal_total_estimate_result_for_ddl( + blocksstable::ObSSTable &sstable, + const blocksstable::ObDatumRange &datum_range, + ObEstimatedResult &result); ObMicroBlockDataHandle &get_read_handle() { return micro_handles_[level_++ % DEFAULT_GET_MICRO_DATA_HANDLE_CNT]; @@ -111,7 +140,7 @@ private: ObMicroBlockDataHandle micro_handles_[DEFAULT_GET_MICRO_DATA_HANDLE_CNT]; blocksstable::ObMicroBlockData index_block_data_; const ObIndexSSTableEstimateContext &context_; - ObFIFOAllocator allocator_; + ObArenaAllocator allocator_; }; } diff --git a/src/storage/access/ob_index_tree_prefetcher.cpp b/src/storage/access/ob_index_tree_prefetcher.cpp index 922da4e97..87f65a389 100644 --- a/src/storage/access/ob_index_tree_prefetcher.cpp +++ b/src/storage/access/ob_index_tree_prefetcher.cpp @@ -138,7 +138,7 @@ int ObIndexTreePrefetcher::init_basic_info( LOG_WARN("Unexpected state, index_scanner_ is valid at first scan", K(ret), KPC(this), K(index_scanner_), K(iter_param), K(lbt())); } else { const ObTablet *cur_tablet = OB_ISNULL(iter_param_->tablet_handle_) ? nullptr : iter_param_->tablet_handle_->get_obj(); - index_scanner_.switch_context(sstable, cur_tablet, *datum_utils_, *access_ctx_, + index_scanner_.switch_context(sstable, cur_tablet, *datum_utils_, access_ctx_->query_flag_, ObRowkeyVectorHelper::can_use_non_datum_rowkey_vector(sstable.is_normal_cg_sstable(), iter_param_->tablet_id_) ? iter_param_->get_rowkey_col_descs() : nullptr); } @@ -981,7 +981,7 @@ int ObIndexTreeMultiPassPrefetcher::s LOG_WARN("invalid iter param", K(ret), KPC(iter_param_), K(lbt())); } else { const ObTablet *cur_tablet = OB_ISNULL(iter_param_->tablet_handle_) ? nullptr : iter_param_->tablet_handle_->get_obj(); - tree_handles_[level].index_scanner_.switch_context(sstable, cur_tablet, *datum_utils_, *access_ctx_, + tree_handles_[level].index_scanner_.switch_context(sstable, cur_tablet, *datum_utils_, access_ctx_->query_flag_, ObRowkeyVectorHelper::can_use_non_datum_rowkey_vector(sstable.is_normal_cg_sstable(), iter_param_->tablet_id_) ? iter_param_->get_rowkey_col_descs() : nullptr); } diff --git a/src/storage/access/ob_table_estimator.cpp b/src/storage/access/ob_table_estimator.cpp index df13d24a9..118ba2a1b 100644 --- a/src/storage/access/ob_table_estimator.cpp +++ b/src/storage/access/ob_table_estimator.cpp @@ -227,10 +227,9 @@ int ObTableEstimator::estimate_sstable_scan_row_count( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid get arguments", K(ret), K(base_input.table_id_), K(key_range)); } else { - const ObIndexSSTableEstimateContext context( - *sstable, base_input.tablet_handle_, base_input.query_flag_, key_range); + const ObIndexSSTableEstimateContext context(base_input.tablet_handle_, base_input.query_flag_); ObIndexBlockScanEstimator scan_estimator(context); - if (OB_FAIL(scan_estimator.estimate_row_count(part_est))) { + if (OB_FAIL(scan_estimator.estimate_row_count(*sstable, key_range, part_est))) { LOG_WARN("Fail to estimate cost of scan.", K(ret), K(base_input.table_id_)); } else { LOG_DEBUG("estimate_scan_cost", K(ret), K(base_input.table_id_), diff --git a/src/storage/blocksstable/index_block/ob_index_block_row_scanner.cpp b/src/storage/blocksstable/index_block/ob_index_block_row_scanner.cpp index 82cbafda9..d2140a682 100644 --- a/src/storage/blocksstable/index_block/ob_index_block_row_scanner.cpp +++ b/src/storage/blocksstable/index_block/ob_index_block_row_scanner.cpp @@ -1946,14 +1946,14 @@ int ObIndexBlockRowScanner::get_end_key(ObCommonDatumRowkey &endkey) const void ObIndexBlockRowScanner::switch_context(const ObSSTable &sstable, const ObTablet *tablet, const ObStorageDatumUtils &datum_utils, - ObTableAccessContext &access_ctx, + const ObQueryFlag &query_flag, const ObIArray *rowkey_col_descs) { nested_offset_ = sstable.get_macro_offset(); datum_utils_ = &datum_utils; is_normal_cg_ = sstable.is_normal_cg_sstable(); - is_reverse_scan_ = access_ctx.query_flag_.is_reverse_scan(); - is_normal_query_ = !access_ctx.query_flag_.is_daily_merge() && !access_ctx.query_flag_.is_multi_version_minor_merge(); + is_reverse_scan_ = query_flag.is_reverse_scan(); + is_normal_query_ = !query_flag.is_daily_merge() && !query_flag.is_multi_version_minor_merge(); rowkey_col_descs_ = rowkey_col_descs; iter_param_.sstable_ = &sstable; iter_param_.tablet_ = tablet; diff --git a/src/storage/blocksstable/index_block/ob_index_block_row_scanner.h b/src/storage/blocksstable/index_block/ob_index_block_row_scanner.h index 6a9b1392d..076116178 100644 --- a/src/storage/blocksstable/index_block/ob_index_block_row_scanner.h +++ b/src/storage/blocksstable/index_block/ob_index_block_row_scanner.h @@ -415,7 +415,7 @@ public: void switch_context(const ObSSTable &sstable, const ObTablet *tablet, const ObStorageDatumUtils &datum_utils, - ObTableAccessContext &access_ctx, + const ObQueryFlag &query_flag, const ObIArray *rowkey_col_descs = nullptr); TO_STRING_KV(K_(index_format), KP_(raw_iter), KP_(transformed_iter), KP_(ddl_iter), KP_(ddl_merge_iter), KPC_(iter), K_(range_idx), K_(is_get), K_(is_reverse_scan), K_(is_left_border), K_(is_right_border), diff --git a/src/storage/blocksstable/index_block/ob_index_block_row_struct.h b/src/storage/blocksstable/index_block/ob_index_block_row_struct.h index 87aa2aac1..84cbd0254 100644 --- a/src/storage/blocksstable/index_block/ob_index_block_row_struct.h +++ b/src/storage/blocksstable/index_block/ob_index_block_row_struct.h @@ -179,6 +179,8 @@ struct ObIndexBlockRowHeader OB_INLINE int64_t get_master_key_id() const { return master_key_id_; } OB_INLINE const char *get_encrypt_key() const { return encrypt_key_; } OB_INLINE uint64_t get_row_count() const { return row_count_; } + OB_INLINE uint64_t get_macro_block_count() const { return macro_block_count_; } + OB_INLINE uint64_t get_micro_block_count() const { return micro_block_count_; } OB_INLINE uint64_t get_schema_version() const { return schema_version_; } OB_INLINE MacroBlockId get_macro_id() const { @@ -513,6 +515,16 @@ public: OB_ASSERT(nullptr != row_header_); return row_header_->get_row_count(); } + OB_INLINE uint64_t get_macro_block_count() const + { + OB_ASSERT(nullptr != row_header_); + return row_header_->get_macro_block_count(); + } + OB_INLINE uint64_t get_micro_block_count() const + { + OB_ASSERT(nullptr != row_header_); + return row_header_->get_micro_block_count(); + } OB_INLINE bool is_pre_aggregated() const { OB_ASSERT(nullptr != row_header_); diff --git a/src/storage/ddl/ob_tablet_split_task.cpp b/src/storage/ddl/ob_tablet_split_task.cpp index c7efcaa1f..36340ce9d 100644 --- a/src/storage/ddl/ob_tablet_split_task.cpp +++ b/src/storage/ddl/ob_tablet_split_task.cpp @@ -2398,7 +2398,7 @@ int ObTabletSplitUtil::split_task_ranges( ObStoreRange whole_range; whole_range.set_whole_range(); if (OB_FAIL(range_spliter.get_range_split_info(tables, - rowkey_read_info, whole_range, range_info))) { + rowkey_read_info, whole_range, range_info))) { LOG_WARN("init range split info failed", K(ret)); } else if (OB_FALSE_IT(range_info.parallel_target_count_ = MAX(1, MIN(user_parallelism, (range_info.total_size_ + tablet_size - 1) / tablet_size)))) { diff --git a/src/storage/ob_partition_range_spliter.cpp b/src/storage/ob_partition_range_spliter.cpp index c02020c68..e0244f3e7 100644 --- a/src/storage/ob_partition_range_spliter.cpp +++ b/src/storage/ob_partition_range_spliter.cpp @@ -64,7 +64,8 @@ int ObEndkeyIterator::open( iter_idx < 0 || skip_cnt < 0 || !sstable.is_valid() - || !range_info.is_valid())) { + || !range_info.is_valid() + || !range_info.index_read_info_->is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "Invalid argument to open ObEndkeyIterator", K(ret), K(skip_cnt), K(iter_idx), K(sstable), K(range_info)); @@ -864,16 +865,25 @@ int ObPartitionRangeSpliter::get_range_split_info(ObIArray &tables, if (OB_UNLIKELY(tables.empty() || !store_range.is_valid())) { ret = OB_INVALID_ARGUMENT; - STORAGE_LOG(WARN, "Invalid argument to init ObPartitionRangeSpliter", K(ret), K(tables), - K(store_range)); + STORAGE_LOG(WARN, "Invalid argument to init ObPartitionRangeSpliter", K(ret), K(tables), K(store_range)); } else { // build range paras range_info.store_range_ = &store_range; range_info.tables_ = &tables; + ObArenaAllocator temp_range_allocator; bool is_sstable = false; int64_t size = 0; int64_t macro_block_cnt = 0; int64_t estimate_micro_block_cnt = 0; + ObQueryFlag query_flag(ObQueryFlag::Forward, + false, /*is daily merge scan*/ + false, /*is read multiple macro block*/ + false, /*sys task scan, read one macro block in single io*/ + false /*is full row scan?*/, + false, + false); + const ObIndexSSTableEstimateContext context(index_read_info, query_flag); + ObIndexBlockScanEstimator scan_estimator(context); for (int64_t i = 0; OB_SUCC(ret) && i < tables.count(); i++) { ObITable *table = tables.at(i); macro_block_cnt = 0; @@ -895,8 +905,8 @@ int ObPartitionRangeSpliter::get_range_split_info(ObIArray &tables, ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected memtable and sstable mixed up", K(ret), K(range_info), KPC(table), K(is_sstable), K(i)); - } else if (OB_FAIL(get_single_range_info( - *range_info.store_range_, index_read_info, table, size, macro_block_cnt, estimate_micro_block_cnt))) { + } else if (OB_FAIL(get_single_range_info(scan_estimator, temp_range_allocator, *range_info.store_range_, + index_read_info, table, size, macro_block_cnt, estimate_micro_block_cnt))) { STORAGE_LOG(WARN, "Failed to get single range info", K(ret), K(i), KPC(table)); } else { if (table->is_co_sstable()) { @@ -922,7 +932,9 @@ int ObPartitionRangeSpliter::get_range_split_info(ObIArray &tables, return ret; } -int ObPartitionRangeSpliter::get_single_range_info(const ObStoreRange &store_range, +int ObPartitionRangeSpliter::get_single_range_info(ObIndexBlockScanEstimator &scan_estimator, + ObIAllocator &allocator, + const ObStoreRange &store_range, const ObITableReadInfo &index_read_info, ObITable *table, int64_t &total_size, @@ -941,6 +953,8 @@ int ObPartitionRangeSpliter::get_single_range_info(const ObStoreRange &store_ran total_size, row_count))) { STORAGE_LOG(WARN, "Failed to get single range info from memtable", K(ret), K(store_range)); + } else { + macro_block_cnt = estimate_micro_block_cnt = 0; } } else if (table->is_sstable()) { ObSSTable *sstable = static_cast(table); @@ -961,19 +975,14 @@ int ObPartitionRangeSpliter::get_single_range_info(const ObStoreRange &store_ran macro_block_cnt = 0; estimate_micro_block_cnt = 0; } else { - ObArenaAllocator temp_allocator; ObDatumRange datum_range; ObDatumRowkey sstable_endkey; - total_size = 0; - macro_block_cnt = 0; - estimate_micro_block_cnt = 0; - ObSSTableSecMetaIterator *macro_meta_iter = nullptr; - ObDataMacroBlockMeta macro_meta; + allocator.reuse(); const ObStorageDatumUtils &datum_utils = index_read_info.get_datum_utils(); int cmp_ret = 0; - if (OB_FAIL(datum_range.from_range(store_range, temp_allocator))) { + if (OB_FAIL(datum_range.from_range(store_range, allocator))) { STORAGE_LOG(WARN, "Failed to transfer store range", K(ret), K(store_range)); - } else if (OB_FAIL(sstable->get_last_rowkey(temp_allocator, sstable_endkey))) { + } else if (OB_FAIL(sstable->get_last_rowkey(allocator, sstable_endkey))) { STORAGE_LOG(WARN, "Failed to get last rowkey from sstable"); } else if (OB_FAIL(sstable_endkey.compare(datum_range.get_start_key(), datum_utils, cmp_ret))) { STORAGE_LOG(WARN, "Failed to compare sstable endkey with range start key", @@ -981,30 +990,14 @@ int ObPartitionRangeSpliter::get_single_range_info(const ObStoreRange &store_ran } else if (cmp_ret < 0) { // sstable not in range STORAGE_LOG(DEBUG, "Skip empty range", K(ret), K(datum_range), KPC(sstable)); - } else if (OB_FAIL(sstable->scan_secondary_meta( - allocator_, - datum_range, - index_read_info, - ObMacroBlockMetaType::DATA_BLOCK_META, - macro_meta_iter))) { - STORAGE_LOG(DEBUG, "Skip empty range", K(ret), K(datum_range), KPC(sstable)); } else { - while (OB_SUCC(ret)) { - if (OB_FAIL(macro_meta_iter->get_next(macro_meta))) { - if (OB_UNLIKELY(OB_ITER_END != ret)) { - STORAGE_LOG(WARN, "Fail to get next macro block meta", K(ret)); - } else { - ret = OB_SUCCESS; - break; - } - } else { - total_size += macro_meta.val_.occupy_size_; - macro_block_cnt++; - estimate_micro_block_cnt += macro_meta.val_.micro_block_count_; - } - } - if (OB_NOT_NULL(macro_meta_iter)) { - macro_meta_iter->~ObSSTableSecMetaIterator(); + scan_estimator.reuse(); + if (OB_FAIL(scan_estimator.estimate_block_count(*sstable, datum_range, macro_block_cnt, estimate_micro_block_cnt))) { + STORAGE_LOG(WARN, "Fail to estimate cost of scan.", K(ret)); + } else { + total_size = macro_block_cnt * OB_DEFAULT_MACRO_BLOCK_SIZE; + STORAGE_LOG(DEBUG, "estimate_scan_cost", K(ret), K(datum_range), K(total_size), + K(macro_block_cnt), K(estimate_micro_block_cnt)); } } } @@ -1025,15 +1018,19 @@ int ObPartitionRangeSpliter::build_single_range(const bool for_compaction, int ret = OB_SUCCESS; ObStoreRange dst_range; - if (OB_FAIL(parallel_ranger_.construct_single_range(allocator, + if (!for_compaction) { + dst_range = *range_info.store_range_; + } else if (OB_FAIL(parallel_ranger_.construct_single_range(allocator, range_info.store_range_->get_start_key(), range_info.store_range_->get_end_key(), range_info.store_range_->get_border_flag(), for_compaction, dst_range))) { STORAGE_LOG(WARN, "failed to construct single range", K(ret), K(range_info)); - } else if (FALSE_IT(dst_range.set_table_id(range_info.store_range_->get_table_id()))) { - } else if (OB_FAIL(range_array.push_back(dst_range))) { + } else { + dst_range.set_table_id(range_info.store_range_->get_table_id()); + } + if (FAILEDx(range_array.push_back(dst_range))) { STORAGE_LOG(WARN, "failed to push back merge range", K(ret), K(dst_range)); } @@ -1158,6 +1155,8 @@ int ObPartitionMultiRangeSpliter::get_split_tables(ObTableStoreIterator &table_i } else if (OB_ISNULL(table)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected null table", K(ret), K(table_iter)); + } else if (table->is_sstable() && (static_cast(table))->is_small_sstable()) { + // skip small sstable } else if (table->is_major_sstable()) { if (table->is_co_sstable()) { ObCOSSTableV2 *co_sstable = static_cast(table); @@ -1258,7 +1257,7 @@ int ObPartitionMultiRangeSpliter::get_multi_range_size( total_size = estimate_size; } else { RangeSplitInfoArray range_info_array; - bool all_single_rowkey = false; + bool all_single_rowkey = false; if (OB_FAIL(get_range_split_infos(tables, index_read_info, range_array, range_info_array, range_size, all_single_rowkey))) { STORAGE_LOG(WARN, "Failed to get range split info array", K(ret)); } else { @@ -1601,21 +1600,29 @@ int ObPartitionMultiRangeSpliter::get_range_split_infos(ObIArray &ta STORAGE_LOG(WARN, "Invalid argument to get range split info", K(ret), K(tables), K(range_array)); } else { ObRangeSplitInfo range_info; + const int64_t sample_cnt = MAX(1, range_array.count() / RANGE_COUNT_THRESOLD); for (int64_t i = 0; OB_SUCC(ret) && i < range_array.count(); i++) { + const bool need_sample = i < RANGE_COUNT_THRESOLD + || ((i - RANGE_COUNT_THRESOLD) % sample_cnt == 0); if (range_array.at(i).is_single_rowkey()) { + range_info.reset(); range_info.store_range_ = &range_array.at(i); range_info.tables_ = &tables; range_info.index_read_info_ = &index_read_info; range_info.total_size_ = DEFAULT_MICRO_BLOCK_SIZE; range_info.max_macro_block_count_ = 1; range_info.max_estimate_micro_block_cnt_ = 1; - } else { - range_spliter_.reset(); - all_single_rowkey = false; - if (OB_FAIL(range_spliter_.get_range_split_info( - tables, index_read_info, range_array.at(i), range_info))) { - STORAGE_LOG(WARN, "Failed to get range split info", K(ret), K(i), K(range_array.at(i))); - } + } else if (need_sample) { + range_info.reset(); + range_spliter_.reset(); + all_single_rowkey = false; + if (OB_FAIL(range_spliter_.get_range_split_info( + tables, index_read_info, range_array.at(i), range_info))) { + STORAGE_LOG(WARN, "Failed to get range split info", K(ret), K(i), K(range_array.at(i))); + } + } else { + all_single_rowkey = false; + range_info.store_range_ = &range_array.at(i); } if (OB_FAIL(ret)) { } else if (OB_FAIL(range_info_array.push_back(range_info))) { @@ -1623,7 +1630,6 @@ int ObPartitionMultiRangeSpliter::get_range_split_infos(ObIArray &ta } else { STORAGE_LOG(DEBUG, "get single range split info", K(range_info)); total_size += range_info.total_size_; - range_info.reset(); } } STORAGE_LOG(DEBUG, "get total range split info", K(total_size), K(tables), K(range_info_array)); diff --git a/src/storage/ob_partition_range_spliter.h b/src/storage/ob_partition_range_spliter.h index 9d719359f..40d0b7568 100644 --- a/src/storage/ob_partition_range_spliter.h +++ b/src/storage/ob_partition_range_spliter.h @@ -23,6 +23,7 @@ #include "storage/access/ob_store_row_iterator.h" #include "storage/tablet/ob_table_store_util.h" #include "storage/meta_mem/ob_tablet_handle.h" +#include "access/ob_index_sstable_estimator.h" namespace oceanbase { @@ -92,7 +93,6 @@ struct ObRangeSplitInfo && OB_NOT_NULL(index_read_info_) && OB_NOT_NULL(tables_) && store_range_->is_valid() - && index_read_info_->is_valid() && parallel_target_count_ > 0 && (tables_->count() > 0 || parallel_target_count_ == 1) ; } @@ -252,7 +252,9 @@ private: ObRangeSplitInfo &range_info, common::ObIAllocator &allocator, common::ObIArray &range_array); - int get_single_range_info(const ObStoreRange &store_range, + int get_single_range_info(ObIndexBlockScanEstimator &scan_estimator, + ObIAllocator &allocator, + const ObStoreRange &store_range, const ObITableReadInfo &index_read_info, ObITable *table, int64_t &total_size, @@ -316,7 +318,7 @@ private: private: ObPartitionRangeSpliter range_spliter_; - static const int64_t RANGE_COUNT_THRESOLD = 500; + static const int64_t RANGE_COUNT_THRESOLD = 20; static const int64_t FAST_ESTIMATE_THRESOLD = 80; };