diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index b0e06a5f26..96f6b2faf1 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -302,6 +302,9 @@ DEFINE_Bool(disable_storage_page_cache, "false"); // whether to disable row cache feature in storage DEFINE_Bool(disable_storage_row_cache, "true"); +// Cache for mow primary key storage page size +DEFINE_String(pk_storage_page_cache_limit, "10%"); + DEFINE_Bool(enable_low_cardinality_optimize, "true"); DEFINE_Bool(enable_low_cardinality_cache_code, "true"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 8452e73113..88ddf3f95e 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -342,6 +342,10 @@ DECLARE_Bool(disable_storage_page_cache); // whether to disable row cache feature in storage DECLARE_Bool(disable_storage_row_cache); +// Cache for mow primary key storage page size, it's seperated from +// storage_page_cache_limit +DECLARE_String(pk_storage_page_cache_limit); + DECLARE_Bool(enable_low_cardinality_optimize); DECLARE_Bool(enable_low_cardinality_cache_code); diff --git a/be/src/olap/page_cache.cpp b/be/src/olap/page_cache.cpp index 644162bd2e..de9aeaa28f 100644 --- a/be/src/olap/page_cache.cpp +++ b/be/src/olap/page_cache.cpp @@ -26,14 +26,15 @@ namespace doris { StoragePageCache* StoragePageCache::_s_instance = nullptr; void StoragePageCache::create_global_cache(size_t capacity, int32_t index_cache_percentage, - uint32_t num_shards) { + int64_t pk_index_cache_capacity, uint32_t num_shards) { DCHECK(_s_instance == nullptr); - static StoragePageCache instance(capacity, index_cache_percentage, num_shards); + static StoragePageCache instance(capacity, index_cache_percentage, pk_index_cache_capacity, + num_shards); _s_instance = &instance; } StoragePageCache::StoragePageCache(size_t capacity, int32_t index_cache_percentage, - uint32_t num_shards) + int64_t pk_index_cache_capacity, uint32_t num_shards) : _index_cache_percentage(index_cache_percentage) { if (index_cache_percentage == 0) { _data_page_cache = std::unique_ptr( @@ -51,6 +52,10 @@ StoragePageCache::StoragePageCache(size_t capacity, int32_t index_cache_percenta } else { CHECK(false) << "invalid index page cache percentage"; } + if (pk_index_cache_capacity > 0) { + _pk_index_page_cache = std::unique_ptr(new_lru_cache( + "PkIndexPageCache", pk_index_cache_capacity, LRUCacheType::SIZE, num_shards)); + } } bool StoragePageCache::lookup(const CacheKey& key, PageCacheHandle* handle, diff --git a/be/src/olap/page_cache.h b/be/src/olap/page_cache.h index 8cc41c4385..cc52f4a252 100644 --- a/be/src/olap/page_cache.h +++ b/be/src/olap/page_cache.h @@ -105,13 +105,15 @@ public: // Create global instance of this class static void create_global_cache(size_t capacity, int32_t index_cache_percentage, + int64_t pk_index_cache_capacity, uint32_t num_shards = kDefaultNumShards); // Return global instance. // Client should call create_global_cache before. static StoragePageCache* instance() { return _s_instance; } - StoragePageCache(size_t capacity, int32_t index_cache_percentage, uint32_t num_shards); + StoragePageCache(size_t capacity, int32_t index_cache_percentage, + int64_t pk_index_cache_capacity, uint32_t num_shards); // Lookup the given page in the cache. // @@ -151,6 +153,10 @@ private: int32_t _index_cache_percentage = 0; std::unique_ptr _data_page_cache = nullptr; std::unique_ptr _index_page_cache = nullptr; + // Cache data for primary key index data page, seperated from data + // page cache to make it for flexible. we need this cache When construct + // delete bitmap in unique key with mow + std::unique_ptr _pk_index_page_cache = nullptr; Cache* _get_page_cache(segment_v2::PageTypePB page_type) { switch (page_type) { @@ -159,6 +165,8 @@ private: } case segment_v2::INDEX_PAGE: return _index_page_cache.get(); + case segment_v2::PRIMARY_KEY_INDEX_PAGE: + return _pk_index_page_cache.get(); default: return nullptr; } diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h index 65cc64f0cd..bc26ef8805 100644 --- a/be/src/olap/primary_key_index.h +++ b/be/src/olap/primary_key_index.h @@ -90,6 +90,7 @@ public: Status new_iterator(std::unique_ptr* index_iterator) const { DCHECK(_index_parsed); index_iterator->reset(new segment_v2::IndexedColumnIterator(_index_reader.get())); + (*index_iterator)->set_is_pk_index(true); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp index 8c5997f99f..a94d0ea51f 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp @@ -116,8 +116,12 @@ Status IndexedColumnIterator::_read_data_page(const PagePointer& pp) { PageHandle handle; Slice body; PageFooterPB footer; + PageTypePB page_type = DATA_PAGE; + if (_is_pk_index) { + page_type = PRIMARY_KEY_INDEX_PAGE; + } RETURN_IF_ERROR( - _reader->read_page(pp, &handle, &body, &footer, DATA_PAGE, _compress_codec, true)); + _reader->read_page(pp, &handle, &body, &footer, page_type, _compress_codec, true)); // parse data page // note that page_index is not used in IndexedColumnIterator, so we pass 0 PageDecoderOptions opts; diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.h b/be/src/olap/rowset/segment_v2/indexed_column_reader.h index 3ee39e04c0..d0ae058ddf 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.h +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.h @@ -129,6 +129,8 @@ public: // After one seek, we can only call this function once to read data Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst); + void set_is_pk_index(bool is_pk) { _is_pk_index = is_pk; } + private: Status _read_data_page(const PagePointer& pp); @@ -147,6 +149,8 @@ private: ordinal_t _current_ordinal = 0; // iterator owned compress codec, should NOT be shared by threads, initialized before used BlockCompressionCodec* _compress_codec = nullptr; + + bool _is_pk_index = false; }; } // namespace segment_v2 diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index 81e050ba02..56b2cbcd75 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -238,7 +238,14 @@ Status ExecEnv::_init_mem_env() { << ". Please modify the 'storage_page_cache_shard_size' parameter in your " "conf file to be a power of two for better performance."; } - StoragePageCache::create_global_cache(storage_cache_limit, index_percentage, num_shards); + int64_t pk_storage_page_cache_limit = + ParseUtil::parse_mem_spec(config::pk_storage_page_cache_limit, MemInfo::mem_limit(), + MemInfo::physical_mem(), &is_percent); + while (!is_percent && pk_storage_page_cache_limit > MemInfo::mem_limit() / 2) { + pk_storage_page_cache_limit = storage_cache_limit / 2; + } + StoragePageCache::create_global_cache(storage_cache_limit, index_percentage, + pk_storage_page_cache_limit, num_shards); LOG(INFO) << "Storage page cache memory limit: " << PrettyPrinter::print(storage_cache_limit, TUnit::BYTES) << ", origin config value: " << config::storage_page_cache_limit; diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp index 1e47159e09..780d8d6bf0 100644 --- a/be/src/util/mem_info.cpp +++ b/be/src/util/mem_info.cpp @@ -113,6 +113,12 @@ void MemInfo::process_cache_gc(int64_t& freed_mem) { StoragePageCache::instance()->get_page_cache_mem_consumption(segment_v2::DATA_PAGE); StoragePageCache::instance()->prune(segment_v2::DATA_PAGE); } + if (StoragePageCache::instance()->get_page_cache_mem_consumption( + segment_v2::PRIMARY_KEY_INDEX_PAGE) > min_free_size) { + freed_mem += StoragePageCache::instance()->get_page_cache_mem_consumption( + segment_v2::PRIMARY_KEY_INDEX_PAGE); + StoragePageCache::instance()->prune(segment_v2::PRIMARY_KEY_INDEX_PAGE); + } } // step1: free all cache diff --git a/be/test/olap/page_cache_test.cpp b/be/test/olap/page_cache_test.cpp index 6f9724fe0c..654d8a4341 100644 --- a/be/test/olap/page_cache_test.cpp +++ b/be/test/olap/page_cache_test.cpp @@ -34,7 +34,7 @@ public: // All cache space is allocated to data pages TEST(StoragePageCacheTest, data_page_only) { - StoragePageCache cache(kNumShards * 2048, 0, kNumShards); + StoragePageCache cache(kNumShards * 2048, 0, 0, kNumShards); StoragePageCache::CacheKey key("abc", 0, 0); StoragePageCache::CacheKey memory_key("mem", 0, 0); @@ -100,7 +100,7 @@ TEST(StoragePageCacheTest, data_page_only) { // All cache space is allocated to index pages TEST(StoragePageCacheTest, index_page_only) { - StoragePageCache cache(kNumShards * 2048, 100, kNumShards); + StoragePageCache cache(kNumShards * 2048, 100, 0, kNumShards); StoragePageCache::CacheKey key("abc", 0, 0); StoragePageCache::CacheKey memory_key("mem", 0, 0); @@ -166,7 +166,7 @@ TEST(StoragePageCacheTest, index_page_only) { // Cache space is allocated by index_page_cache_ratio TEST(StoragePageCacheTest, mixed_pages) { - StoragePageCache cache(kNumShards * 2048, 10, kNumShards); + StoragePageCache cache(kNumShards * 2048, 10, 0, kNumShards); StoragePageCache::CacheKey data_key("data", 0, 0); StoragePageCache::CacheKey index_key("index", 0, 0); diff --git a/be/test/testutil/run_all_tests.cpp b/be/test/testutil/run_all_tests.cpp index abed28d985..ae8f0a7fe1 100644 --- a/be/test/testutil/run_all_tests.cpp +++ b/be/test/testutil/run_all_tests.cpp @@ -39,7 +39,7 @@ int main(int argc, char** argv) { doris::ExecEnv::GetInstance()->init_mem_tracker(); doris::thread_context()->thread_mem_tracker_mgr->init(); doris::TabletSchemaCache::create_global_schema_cache(); - doris::StoragePageCache::create_global_cache(1 << 30, 10); + doris::StoragePageCache::create_global_cache(1 << 30, 10, 0); doris::SegmentLoader::create_global_instance(1000); std::string conf = std::string(getenv("DORIS_HOME")) + "/conf/be.conf"; if (!doris::config::init(conf.c_str(), false)) { diff --git a/be/test/tools/benchmark_tool.cpp b/be/test/tools/benchmark_tool.cpp index 0973520571..1d9dcc3616 100644 --- a/be/test/tools/benchmark_tool.cpp +++ b/be/test/tools/benchmark_tool.cpp @@ -586,7 +586,7 @@ int main(int argc, char** argv) { gflags::SetUsageMessage(usage); google::ParseCommandLineFlags(&argc, &argv, true); - doris::StoragePageCache::create_global_cache(1 << 30, 10); + doris::StoragePageCache::create_global_cache(1 << 30, 10, 0); doris::MultiBenchmark multi_bm; multi_bm.add_bm(); diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto index 629f1a4fc8..d124433b65 100644 --- a/gensrc/proto/segment_v2.proto +++ b/gensrc/proto/segment_v2.proto @@ -59,6 +59,7 @@ enum PageTypePB { INDEX_PAGE = 2; DICTIONARY_PAGE = 3; SHORT_KEY_PAGE = 4; + PRIMARY_KEY_INDEX_PAGE = 5; } message DataPageFooterPB {