[improvement](MOW) use seperated cache for mow pk cache (#19686)

In mow, primary key cache have a big impact on load performance, so we add a new cache type to seperate
it from page cache to make it more flexible in some cases
This commit is contained in:
yixiutt
2023-05-18 13:27:09 +08:00
committed by GitHub
parent 50370dead9
commit 943e5fb7e5
13 changed files with 54 additions and 11 deletions

View File

@ -302,6 +302,9 @@ DEFINE_Bool(disable_storage_page_cache, "false");
// whether to disable row cache feature in storage
DEFINE_Bool(disable_storage_row_cache, "true");
// Cache for mow primary key storage page size
DEFINE_String(pk_storage_page_cache_limit, "10%");
DEFINE_Bool(enable_low_cardinality_optimize, "true");
DEFINE_Bool(enable_low_cardinality_cache_code, "true");

View File

@ -342,6 +342,10 @@ DECLARE_Bool(disable_storage_page_cache);
// whether to disable row cache feature in storage
DECLARE_Bool(disable_storage_row_cache);
// Cache for mow primary key storage page size, it's seperated from
// storage_page_cache_limit
DECLARE_String(pk_storage_page_cache_limit);
DECLARE_Bool(enable_low_cardinality_optimize);
DECLARE_Bool(enable_low_cardinality_cache_code);

View File

@ -26,14 +26,15 @@ namespace doris {
StoragePageCache* StoragePageCache::_s_instance = nullptr;
void StoragePageCache::create_global_cache(size_t capacity, int32_t index_cache_percentage,
uint32_t num_shards) {
int64_t pk_index_cache_capacity, uint32_t num_shards) {
DCHECK(_s_instance == nullptr);
static StoragePageCache instance(capacity, index_cache_percentage, num_shards);
static StoragePageCache instance(capacity, index_cache_percentage, pk_index_cache_capacity,
num_shards);
_s_instance = &instance;
}
StoragePageCache::StoragePageCache(size_t capacity, int32_t index_cache_percentage,
uint32_t num_shards)
int64_t pk_index_cache_capacity, uint32_t num_shards)
: _index_cache_percentage(index_cache_percentage) {
if (index_cache_percentage == 0) {
_data_page_cache = std::unique_ptr<Cache>(
@ -51,6 +52,10 @@ StoragePageCache::StoragePageCache(size_t capacity, int32_t index_cache_percenta
} else {
CHECK(false) << "invalid index page cache percentage";
}
if (pk_index_cache_capacity > 0) {
_pk_index_page_cache = std::unique_ptr<Cache>(new_lru_cache(
"PkIndexPageCache", pk_index_cache_capacity, LRUCacheType::SIZE, num_shards));
}
}
bool StoragePageCache::lookup(const CacheKey& key, PageCacheHandle* handle,

View File

@ -105,13 +105,15 @@ public:
// Create global instance of this class
static void create_global_cache(size_t capacity, int32_t index_cache_percentage,
int64_t pk_index_cache_capacity,
uint32_t num_shards = kDefaultNumShards);
// Return global instance.
// Client should call create_global_cache before.
static StoragePageCache* instance() { return _s_instance; }
StoragePageCache(size_t capacity, int32_t index_cache_percentage, uint32_t num_shards);
StoragePageCache(size_t capacity, int32_t index_cache_percentage,
int64_t pk_index_cache_capacity, uint32_t num_shards);
// Lookup the given page in the cache.
//
@ -151,6 +153,10 @@ private:
int32_t _index_cache_percentage = 0;
std::unique_ptr<Cache> _data_page_cache = nullptr;
std::unique_ptr<Cache> _index_page_cache = nullptr;
// Cache data for primary key index data page, seperated from data
// page cache to make it for flexible. we need this cache When construct
// delete bitmap in unique key with mow
std::unique_ptr<Cache> _pk_index_page_cache = nullptr;
Cache* _get_page_cache(segment_v2::PageTypePB page_type) {
switch (page_type) {
@ -159,6 +165,8 @@ private:
}
case segment_v2::INDEX_PAGE:
return _index_page_cache.get();
case segment_v2::PRIMARY_KEY_INDEX_PAGE:
return _pk_index_page_cache.get();
default:
return nullptr;
}

View File

@ -90,6 +90,7 @@ public:
Status new_iterator(std::unique_ptr<segment_v2::IndexedColumnIterator>* index_iterator) const {
DCHECK(_index_parsed);
index_iterator->reset(new segment_v2::IndexedColumnIterator(_index_reader.get()));
(*index_iterator)->set_is_pk_index(true);
return Status::OK();
}

View File

@ -116,8 +116,12 @@ Status IndexedColumnIterator::_read_data_page(const PagePointer& pp) {
PageHandle handle;
Slice body;
PageFooterPB footer;
PageTypePB page_type = DATA_PAGE;
if (_is_pk_index) {
page_type = PRIMARY_KEY_INDEX_PAGE;
}
RETURN_IF_ERROR(
_reader->read_page(pp, &handle, &body, &footer, DATA_PAGE, _compress_codec, true));
_reader->read_page(pp, &handle, &body, &footer, page_type, _compress_codec, true));
// parse data page
// note that page_index is not used in IndexedColumnIterator, so we pass 0
PageDecoderOptions opts;

View File

@ -129,6 +129,8 @@ public:
// After one seek, we can only call this function once to read data
Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst);
void set_is_pk_index(bool is_pk) { _is_pk_index = is_pk; }
private:
Status _read_data_page(const PagePointer& pp);
@ -147,6 +149,8 @@ private:
ordinal_t _current_ordinal = 0;
// iterator owned compress codec, should NOT be shared by threads, initialized before used
BlockCompressionCodec* _compress_codec = nullptr;
bool _is_pk_index = false;
};
} // namespace segment_v2

View File

@ -238,7 +238,14 @@ Status ExecEnv::_init_mem_env() {
<< ". Please modify the 'storage_page_cache_shard_size' parameter in your "
"conf file to be a power of two for better performance.";
}
StoragePageCache::create_global_cache(storage_cache_limit, index_percentage, num_shards);
int64_t pk_storage_page_cache_limit =
ParseUtil::parse_mem_spec(config::pk_storage_page_cache_limit, MemInfo::mem_limit(),
MemInfo::physical_mem(), &is_percent);
while (!is_percent && pk_storage_page_cache_limit > MemInfo::mem_limit() / 2) {
pk_storage_page_cache_limit = storage_cache_limit / 2;
}
StoragePageCache::create_global_cache(storage_cache_limit, index_percentage,
pk_storage_page_cache_limit, num_shards);
LOG(INFO) << "Storage page cache memory limit: "
<< PrettyPrinter::print(storage_cache_limit, TUnit::BYTES)
<< ", origin config value: " << config::storage_page_cache_limit;

View File

@ -113,6 +113,12 @@ void MemInfo::process_cache_gc(int64_t& freed_mem) {
StoragePageCache::instance()->get_page_cache_mem_consumption(segment_v2::DATA_PAGE);
StoragePageCache::instance()->prune(segment_v2::DATA_PAGE);
}
if (StoragePageCache::instance()->get_page_cache_mem_consumption(
segment_v2::PRIMARY_KEY_INDEX_PAGE) > min_free_size) {
freed_mem += StoragePageCache::instance()->get_page_cache_mem_consumption(
segment_v2::PRIMARY_KEY_INDEX_PAGE);
StoragePageCache::instance()->prune(segment_v2::PRIMARY_KEY_INDEX_PAGE);
}
}
// step1: free all cache

View File

@ -34,7 +34,7 @@ public:
// All cache space is allocated to data pages
TEST(StoragePageCacheTest, data_page_only) {
StoragePageCache cache(kNumShards * 2048, 0, kNumShards);
StoragePageCache cache(kNumShards * 2048, 0, 0, kNumShards);
StoragePageCache::CacheKey key("abc", 0, 0);
StoragePageCache::CacheKey memory_key("mem", 0, 0);
@ -100,7 +100,7 @@ TEST(StoragePageCacheTest, data_page_only) {
// All cache space is allocated to index pages
TEST(StoragePageCacheTest, index_page_only) {
StoragePageCache cache(kNumShards * 2048, 100, kNumShards);
StoragePageCache cache(kNumShards * 2048, 100, 0, kNumShards);
StoragePageCache::CacheKey key("abc", 0, 0);
StoragePageCache::CacheKey memory_key("mem", 0, 0);
@ -166,7 +166,7 @@ TEST(StoragePageCacheTest, index_page_only) {
// Cache space is allocated by index_page_cache_ratio
TEST(StoragePageCacheTest, mixed_pages) {
StoragePageCache cache(kNumShards * 2048, 10, kNumShards);
StoragePageCache cache(kNumShards * 2048, 10, 0, kNumShards);
StoragePageCache::CacheKey data_key("data", 0, 0);
StoragePageCache::CacheKey index_key("index", 0, 0);

View File

@ -39,7 +39,7 @@ int main(int argc, char** argv) {
doris::ExecEnv::GetInstance()->init_mem_tracker();
doris::thread_context()->thread_mem_tracker_mgr->init();
doris::TabletSchemaCache::create_global_schema_cache();
doris::StoragePageCache::create_global_cache(1 << 30, 10);
doris::StoragePageCache::create_global_cache(1 << 30, 10, 0);
doris::SegmentLoader::create_global_instance(1000);
std::string conf = std::string(getenv("DORIS_HOME")) + "/conf/be.conf";
if (!doris::config::init(conf.c_str(), false)) {

View File

@ -586,7 +586,7 @@ int main(int argc, char** argv) {
gflags::SetUsageMessage(usage);
google::ParseCommandLineFlags(&argc, &argv, true);
doris::StoragePageCache::create_global_cache(1 << 30, 10);
doris::StoragePageCache::create_global_cache(1 << 30, 10, 0);
doris::MultiBenchmark multi_bm;
multi_bm.add_bm();

View File

@ -59,6 +59,7 @@ enum PageTypePB {
INDEX_PAGE = 2;
DICTIONARY_PAGE = 3;
SHORT_KEY_PAGE = 4;
PRIMARY_KEY_INDEX_PAGE = 5;
}
message DataPageFooterPB {