[improvement](MOW) use seperated cache for mow pk cache (#19686)
In mow, primary key cache have a big impact on load performance, so we add a new cache type to seperate it from page cache to make it more flexible in some cases
This commit is contained in:
@ -302,6 +302,9 @@ DEFINE_Bool(disable_storage_page_cache, "false");
|
||||
// whether to disable row cache feature in storage
|
||||
DEFINE_Bool(disable_storage_row_cache, "true");
|
||||
|
||||
// Cache for mow primary key storage page size
|
||||
DEFINE_String(pk_storage_page_cache_limit, "10%");
|
||||
|
||||
DEFINE_Bool(enable_low_cardinality_optimize, "true");
|
||||
DEFINE_Bool(enable_low_cardinality_cache_code, "true");
|
||||
|
||||
|
||||
@ -342,6 +342,10 @@ DECLARE_Bool(disable_storage_page_cache);
|
||||
// whether to disable row cache feature in storage
|
||||
DECLARE_Bool(disable_storage_row_cache);
|
||||
|
||||
// Cache for mow primary key storage page size, it's seperated from
|
||||
// storage_page_cache_limit
|
||||
DECLARE_String(pk_storage_page_cache_limit);
|
||||
|
||||
DECLARE_Bool(enable_low_cardinality_optimize);
|
||||
DECLARE_Bool(enable_low_cardinality_cache_code);
|
||||
|
||||
|
||||
@ -26,14 +26,15 @@ namespace doris {
|
||||
StoragePageCache* StoragePageCache::_s_instance = nullptr;
|
||||
|
||||
void StoragePageCache::create_global_cache(size_t capacity, int32_t index_cache_percentage,
|
||||
uint32_t num_shards) {
|
||||
int64_t pk_index_cache_capacity, uint32_t num_shards) {
|
||||
DCHECK(_s_instance == nullptr);
|
||||
static StoragePageCache instance(capacity, index_cache_percentage, num_shards);
|
||||
static StoragePageCache instance(capacity, index_cache_percentage, pk_index_cache_capacity,
|
||||
num_shards);
|
||||
_s_instance = &instance;
|
||||
}
|
||||
|
||||
StoragePageCache::StoragePageCache(size_t capacity, int32_t index_cache_percentage,
|
||||
uint32_t num_shards)
|
||||
int64_t pk_index_cache_capacity, uint32_t num_shards)
|
||||
: _index_cache_percentage(index_cache_percentage) {
|
||||
if (index_cache_percentage == 0) {
|
||||
_data_page_cache = std::unique_ptr<Cache>(
|
||||
@ -51,6 +52,10 @@ StoragePageCache::StoragePageCache(size_t capacity, int32_t index_cache_percenta
|
||||
} else {
|
||||
CHECK(false) << "invalid index page cache percentage";
|
||||
}
|
||||
if (pk_index_cache_capacity > 0) {
|
||||
_pk_index_page_cache = std::unique_ptr<Cache>(new_lru_cache(
|
||||
"PkIndexPageCache", pk_index_cache_capacity, LRUCacheType::SIZE, num_shards));
|
||||
}
|
||||
}
|
||||
|
||||
bool StoragePageCache::lookup(const CacheKey& key, PageCacheHandle* handle,
|
||||
|
||||
@ -105,13 +105,15 @@ public:
|
||||
|
||||
// Create global instance of this class
|
||||
static void create_global_cache(size_t capacity, int32_t index_cache_percentage,
|
||||
int64_t pk_index_cache_capacity,
|
||||
uint32_t num_shards = kDefaultNumShards);
|
||||
|
||||
// Return global instance.
|
||||
// Client should call create_global_cache before.
|
||||
static StoragePageCache* instance() { return _s_instance; }
|
||||
|
||||
StoragePageCache(size_t capacity, int32_t index_cache_percentage, uint32_t num_shards);
|
||||
StoragePageCache(size_t capacity, int32_t index_cache_percentage,
|
||||
int64_t pk_index_cache_capacity, uint32_t num_shards);
|
||||
|
||||
// Lookup the given page in the cache.
|
||||
//
|
||||
@ -151,6 +153,10 @@ private:
|
||||
int32_t _index_cache_percentage = 0;
|
||||
std::unique_ptr<Cache> _data_page_cache = nullptr;
|
||||
std::unique_ptr<Cache> _index_page_cache = nullptr;
|
||||
// Cache data for primary key index data page, seperated from data
|
||||
// page cache to make it for flexible. we need this cache When construct
|
||||
// delete bitmap in unique key with mow
|
||||
std::unique_ptr<Cache> _pk_index_page_cache = nullptr;
|
||||
|
||||
Cache* _get_page_cache(segment_v2::PageTypePB page_type) {
|
||||
switch (page_type) {
|
||||
@ -159,6 +165,8 @@ private:
|
||||
}
|
||||
case segment_v2::INDEX_PAGE:
|
||||
return _index_page_cache.get();
|
||||
case segment_v2::PRIMARY_KEY_INDEX_PAGE:
|
||||
return _pk_index_page_cache.get();
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -90,6 +90,7 @@ public:
|
||||
Status new_iterator(std::unique_ptr<segment_v2::IndexedColumnIterator>* index_iterator) const {
|
||||
DCHECK(_index_parsed);
|
||||
index_iterator->reset(new segment_v2::IndexedColumnIterator(_index_reader.get()));
|
||||
(*index_iterator)->set_is_pk_index(true);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -116,8 +116,12 @@ Status IndexedColumnIterator::_read_data_page(const PagePointer& pp) {
|
||||
PageHandle handle;
|
||||
Slice body;
|
||||
PageFooterPB footer;
|
||||
PageTypePB page_type = DATA_PAGE;
|
||||
if (_is_pk_index) {
|
||||
page_type = PRIMARY_KEY_INDEX_PAGE;
|
||||
}
|
||||
RETURN_IF_ERROR(
|
||||
_reader->read_page(pp, &handle, &body, &footer, DATA_PAGE, _compress_codec, true));
|
||||
_reader->read_page(pp, &handle, &body, &footer, page_type, _compress_codec, true));
|
||||
// parse data page
|
||||
// note that page_index is not used in IndexedColumnIterator, so we pass 0
|
||||
PageDecoderOptions opts;
|
||||
|
||||
@ -129,6 +129,8 @@ public:
|
||||
// After one seek, we can only call this function once to read data
|
||||
Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst);
|
||||
|
||||
void set_is_pk_index(bool is_pk) { _is_pk_index = is_pk; }
|
||||
|
||||
private:
|
||||
Status _read_data_page(const PagePointer& pp);
|
||||
|
||||
@ -147,6 +149,8 @@ private:
|
||||
ordinal_t _current_ordinal = 0;
|
||||
// iterator owned compress codec, should NOT be shared by threads, initialized before used
|
||||
BlockCompressionCodec* _compress_codec = nullptr;
|
||||
|
||||
bool _is_pk_index = false;
|
||||
};
|
||||
|
||||
} // namespace segment_v2
|
||||
|
||||
@ -238,7 +238,14 @@ Status ExecEnv::_init_mem_env() {
|
||||
<< ". Please modify the 'storage_page_cache_shard_size' parameter in your "
|
||||
"conf file to be a power of two for better performance.";
|
||||
}
|
||||
StoragePageCache::create_global_cache(storage_cache_limit, index_percentage, num_shards);
|
||||
int64_t pk_storage_page_cache_limit =
|
||||
ParseUtil::parse_mem_spec(config::pk_storage_page_cache_limit, MemInfo::mem_limit(),
|
||||
MemInfo::physical_mem(), &is_percent);
|
||||
while (!is_percent && pk_storage_page_cache_limit > MemInfo::mem_limit() / 2) {
|
||||
pk_storage_page_cache_limit = storage_cache_limit / 2;
|
||||
}
|
||||
StoragePageCache::create_global_cache(storage_cache_limit, index_percentage,
|
||||
pk_storage_page_cache_limit, num_shards);
|
||||
LOG(INFO) << "Storage page cache memory limit: "
|
||||
<< PrettyPrinter::print(storage_cache_limit, TUnit::BYTES)
|
||||
<< ", origin config value: " << config::storage_page_cache_limit;
|
||||
|
||||
@ -113,6 +113,12 @@ void MemInfo::process_cache_gc(int64_t& freed_mem) {
|
||||
StoragePageCache::instance()->get_page_cache_mem_consumption(segment_v2::DATA_PAGE);
|
||||
StoragePageCache::instance()->prune(segment_v2::DATA_PAGE);
|
||||
}
|
||||
if (StoragePageCache::instance()->get_page_cache_mem_consumption(
|
||||
segment_v2::PRIMARY_KEY_INDEX_PAGE) > min_free_size) {
|
||||
freed_mem += StoragePageCache::instance()->get_page_cache_mem_consumption(
|
||||
segment_v2::PRIMARY_KEY_INDEX_PAGE);
|
||||
StoragePageCache::instance()->prune(segment_v2::PRIMARY_KEY_INDEX_PAGE);
|
||||
}
|
||||
}
|
||||
|
||||
// step1: free all cache
|
||||
|
||||
@ -34,7 +34,7 @@ public:
|
||||
|
||||
// All cache space is allocated to data pages
|
||||
TEST(StoragePageCacheTest, data_page_only) {
|
||||
StoragePageCache cache(kNumShards * 2048, 0, kNumShards);
|
||||
StoragePageCache cache(kNumShards * 2048, 0, 0, kNumShards);
|
||||
|
||||
StoragePageCache::CacheKey key("abc", 0, 0);
|
||||
StoragePageCache::CacheKey memory_key("mem", 0, 0);
|
||||
@ -100,7 +100,7 @@ TEST(StoragePageCacheTest, data_page_only) {
|
||||
|
||||
// All cache space is allocated to index pages
|
||||
TEST(StoragePageCacheTest, index_page_only) {
|
||||
StoragePageCache cache(kNumShards * 2048, 100, kNumShards);
|
||||
StoragePageCache cache(kNumShards * 2048, 100, 0, kNumShards);
|
||||
|
||||
StoragePageCache::CacheKey key("abc", 0, 0);
|
||||
StoragePageCache::CacheKey memory_key("mem", 0, 0);
|
||||
@ -166,7 +166,7 @@ TEST(StoragePageCacheTest, index_page_only) {
|
||||
|
||||
// Cache space is allocated by index_page_cache_ratio
|
||||
TEST(StoragePageCacheTest, mixed_pages) {
|
||||
StoragePageCache cache(kNumShards * 2048, 10, kNumShards);
|
||||
StoragePageCache cache(kNumShards * 2048, 10, 0, kNumShards);
|
||||
|
||||
StoragePageCache::CacheKey data_key("data", 0, 0);
|
||||
StoragePageCache::CacheKey index_key("index", 0, 0);
|
||||
|
||||
@ -39,7 +39,7 @@ int main(int argc, char** argv) {
|
||||
doris::ExecEnv::GetInstance()->init_mem_tracker();
|
||||
doris::thread_context()->thread_mem_tracker_mgr->init();
|
||||
doris::TabletSchemaCache::create_global_schema_cache();
|
||||
doris::StoragePageCache::create_global_cache(1 << 30, 10);
|
||||
doris::StoragePageCache::create_global_cache(1 << 30, 10, 0);
|
||||
doris::SegmentLoader::create_global_instance(1000);
|
||||
std::string conf = std::string(getenv("DORIS_HOME")) + "/conf/be.conf";
|
||||
if (!doris::config::init(conf.c_str(), false)) {
|
||||
|
||||
@ -586,7 +586,7 @@ int main(int argc, char** argv) {
|
||||
gflags::SetUsageMessage(usage);
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
|
||||
doris::StoragePageCache::create_global_cache(1 << 30, 10);
|
||||
doris::StoragePageCache::create_global_cache(1 << 30, 10, 0);
|
||||
|
||||
doris::MultiBenchmark multi_bm;
|
||||
multi_bm.add_bm();
|
||||
|
||||
@ -59,6 +59,7 @@ enum PageTypePB {
|
||||
INDEX_PAGE = 2;
|
||||
DICTIONARY_PAGE = 3;
|
||||
SHORT_KEY_PAGE = 4;
|
||||
PRIMARY_KEY_INDEX_PAGE = 5;
|
||||
}
|
||||
|
||||
message DataPageFooterPB {
|
||||
|
||||
Reference in New Issue
Block a user