[enhence](memory) gc inverted index cache when there is not enough memory (#19622)

Support to gc inverted index cache when there is not enough memory.
previous problem: The inverted index cache (InvertedIndexSearcherCache and InvertedIndexQueryCache) may use 20% memory which can't be released.
This commit is contained in:
ZhangYu0123
2023-05-18 16:41:51 +08:00
committed by GitHub
parent fd4fa5c64e
commit 07bbf741fb
10 changed files with 119 additions and 16 deletions

View File

@ -953,6 +953,8 @@ DEFINE_Bool(enable_file_cache_query_limit, "false");
// inverted index searcher cache
// cache entry stay time after lookup, default 1h
DEFINE_mInt32(index_cache_entry_stay_time_after_lookup_s, "3600");
// cache entry that have not been visited for a certain period of time can be cleaned up by GC thread
DEFINE_mInt32(index_cache_entry_no_visit_gc_time_s, "3600");
// inverted index searcher cache size
DEFINE_String(inverted_index_searcher_cache_limit, "10%");
// set `true` to enable insert searcher into cache when write inverted index data

View File

@ -969,6 +969,8 @@ DECLARE_Bool(enable_file_cache_query_limit);
// inverted index searcher cache
// cache entry stay time after lookup, default 1h
DECLARE_mInt32(index_cache_entry_stay_time_after_lookup_s);
// cache entry that have not been visited for a certain period of time can be cleaned up by GC thread
DECLARE_mInt32(index_cache_entry_no_visit_gc_time_s);
// inverted index searcher cache size
DECLARE_String(inverted_index_searcher_cache_limit);
// set `true` to enable insert searcher into cache when write inverted index data

View File

@ -114,7 +114,7 @@ public:
// keep it after query, since query will try to read null_bitmap and put it to cache
InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iterator->read_null_bitmap(&null_bitmap_cache_handle));
roaring::Roaring* null_bitmap = null_bitmap_cache_handle.get_bitmap();
std::shared_ptr<roaring::Roaring> null_bitmap = null_bitmap_cache_handle.get_bitmap();
if (null_bitmap) {
*bitmap -= *null_bitmap;
}

View File

@ -245,7 +245,7 @@ public:
// keep it after query, since query will try to read null_bitmap and put it to cache
InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iterator->read_null_bitmap(&null_bitmap_cache_handle));
roaring::Roaring* null_bitmap = null_bitmap_cache_handle.get_bitmap();
std::shared_ptr<roaring::Roaring> null_bitmap = null_bitmap_cache_handle.get_bitmap();
if (null_bitmap) {
*result -= *null_bitmap;
}

View File

@ -69,7 +69,7 @@ Status MatchPredicate::evaluate(const Schema& schema, InvertedIndexIterator* ite
// keep it after query, since query will try to read null_bitmap and put it to cache
InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iterator->read_null_bitmap(&null_bitmap_cache_handle));
roaring::Roaring* null_bitmap = null_bitmap_cache_handle.get_bitmap();
std::shared_ptr<roaring::Roaring> null_bitmap = null_bitmap_cache_handle.get_bitmap();
if (null_bitmap) {
*bitmap -= *null_bitmap;
}

View File

@ -59,7 +59,7 @@ Status NullPredicate::evaluate(const Schema& schema, InvertedIndexIterator* iter
// and be treated as false in WHERE
InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iterator->read_null_bitmap(&null_bitmap_cache_handle));
roaring::Roaring* null_bitmap = null_bitmap_cache_handle.get_bitmap();
std::shared_ptr<roaring::Roaring> null_bitmap = null_bitmap_cache_handle.get_bitmap();
if (null_bitmap) {
if (_is_null) {
*bitmap &= *null_bitmap;

View File

@ -179,6 +179,39 @@ Status InvertedIndexSearcherCache::erase(const std::string& index_file_path) {
return Status::OK();
}
int64_t InvertedIndexSearcherCache::prune() {
if (_cache) {
const int64_t curtime = UnixMillis();
int64_t byte_size = 0L;
auto pred = [curtime, &byte_size](const void* value) -> bool {
InvertedIndexSearcherCache::CacheValue* cache_value =
(InvertedIndexSearcherCache::CacheValue*)value;
if ((cache_value->last_visit_time +
config::index_cache_entry_no_visit_gc_time_s * 1000) < curtime) {
byte_size += cache_value->size;
return true;
}
return false;
};
MonotonicStopWatch watch;
watch.start();
// Prune cache in lazy mode to save cpu and minimize the time holding write lock
int64_t prune_num = _cache->prune_if(pred, true);
LOG(INFO) << "prune " << prune_num << " entries in inverted index cache. cost(ms): "
<< watch.elapsed_time() / 1000 / 1000;
return byte_size;
}
return 0L;
}
int64_t InvertedIndexSearcherCache::mem_consumption() {
if (_cache) {
return _cache->mem_consumption();
}
return 0L;
}
bool InvertedIndexSearcherCache::_lookup(const InvertedIndexSearcherCache::CacheKey& key,
InvertedIndexCacheHandle* handle) {
auto lru_handle = _cache->lookup(key.index_file_path);
@ -213,14 +246,55 @@ bool InvertedIndexQueryCache::lookup(const CacheKey& key, InvertedIndexQueryCach
return true;
}
void InvertedIndexQueryCache::insert(const CacheKey& key, roaring::Roaring* bitmap,
void InvertedIndexQueryCache::insert(const CacheKey& key, std::shared_ptr<roaring::Roaring> bitmap,
InvertedIndexQueryCacheHandle* handle) {
auto deleter = [](const doris::CacheKey& key, void* value) { delete (roaring::Roaring*)value; };
auto deleter = [](const doris::CacheKey& key, void* value) {
delete (InvertedIndexQueryCache::CacheValue*)value;
};
auto lru_handle = _cache->insert(key.encode(), (void*)bitmap, bitmap->getSizeInBytes(), deleter,
CachePriority::NORMAL);
std::unique_ptr<InvertedIndexQueryCache::CacheValue> cache_value_ptr =
std::make_unique<InvertedIndexQueryCache::CacheValue>();
cache_value_ptr->last_visit_time = UnixMillis();
cache_value_ptr->bitmap = bitmap;
cache_value_ptr->size = bitmap->getSizeInBytes();
auto lru_handle = _cache->insert(key.encode(), (void*)cache_value_ptr.release(),
bitmap->getSizeInBytes(), deleter, CachePriority::NORMAL);
*handle = InvertedIndexQueryCacheHandle(_cache.get(), lru_handle);
}
int64_t InvertedIndexQueryCache::prune() {
if (_cache) {
const int64_t curtime = UnixMillis();
int64_t byte_size = 0L;
auto pred = [curtime, &byte_size](const void* value) -> bool {
InvertedIndexQueryCache::CacheValue* cache_value =
(InvertedIndexQueryCache::CacheValue*)value;
if ((cache_value->last_visit_time +
config::index_cache_entry_no_visit_gc_time_s * 1000) < curtime) {
byte_size += cache_value->size;
return true;
}
return false;
};
MonotonicStopWatch watch;
watch.start();
// Prune cache in lazy mode to save cpu and minimize the time holding write lock
int64_t prune_num = _cache->prune_if(pred, true);
LOG(INFO) << "prune " << prune_num << " entries in inverted index cache. cost(ms): "
<< watch.elapsed_time() / 1000 / 1000;
return byte_size;
}
return 0L;
}
int64_t InvertedIndexQueryCache::mem_consumption() {
if (_cache) {
return _cache->mem_consumption();
}
return 0L;
}
} // namespace segment_v2
} // namespace doris

View File

@ -96,6 +96,10 @@ public:
// function `erase` called after compaction remove segment
Status erase(const std::string& index_file_path);
int64_t prune();
int64_t mem_consumption();
private:
InvertedIndexSearcherCache();
@ -204,7 +208,13 @@ public:
}
};
using CacheValue = roaring::Roaring;
struct CacheValue {
// Save the last visit time of this cache entry.
// Use atomic because it may be modified by multi threads.
std::atomic<int64_t> last_visit_time = 0;
std::shared_ptr<roaring::Roaring> bitmap;
size_t size = 0;
};
// Create global instance of this class
static void create_global_cache(size_t capacity, int32_t index_cache_percentage,
@ -227,9 +237,13 @@ public:
bool lookup(const CacheKey& key, InvertedIndexQueryCacheHandle* handle);
void insert(const CacheKey& key, roaring::Roaring* bitmap,
void insert(const CacheKey& key, std::shared_ptr<roaring::Roaring> bitmap,
InvertedIndexQueryCacheHandle* handle);
int64_t prune();
int64_t mem_consumption();
private:
static InvertedIndexQueryCache* _s_instance;
std::unique_ptr<Cache> _cache {nullptr};
@ -263,11 +277,11 @@ public:
Cache* cache() const { return _cache; }
Slice data() const { return _cache->value_slice(_handle); }
InvertedIndexQueryCache::CacheValue* get_bitmap() const {
std::shared_ptr<roaring::Roaring> get_bitmap() const {
if (!_cache) {
return nullptr;
}
return ((InvertedIndexQueryCache::CacheValue*)_cache->value(_handle));
return ((InvertedIndexQueryCache::CacheValue*)_cache->value(_handle))->bitmap;
}
private:

View File

@ -110,7 +110,7 @@ Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach
}
// ownership of null_bitmap and its deletion will be transfered to cache
roaring::Roaring* null_bitmap = new roaring::Roaring();
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
auto null_bitmap_file_name = InvertedIndexDescriptor::get_temporary_null_bitmap_file_name();
if (dir->fileExists(null_bitmap_file_name.c_str())) {
null_bitmap_in = dir->openInput(null_bitmap_file_name.c_str());
@ -226,7 +226,7 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, const std::string
bool first = true;
bool null_bitmap_already_read = false;
for (auto token_ws : analyse_result) {
roaring::Roaring* term_match_bitmap = nullptr;
std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr;
// try to get term bitmap match result from cache to avoid query index on cache hit
auto cache = InvertedIndexQueryCache::instance();
@ -247,7 +247,7 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, const std::string
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>();
}
term_match_bitmap = new roaring::Roaring();
term_match_bitmap = std::make_shared<roaring::Roaring>();
// unique_ptr with custom deleter
std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> term {
_CLNEW lucene::index::Term(field_ws.c_str(), token_ws.c_str()),
@ -437,7 +437,8 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats,
}
// add to cache
roaring::Roaring* term_match_bitmap = new roaring::Roaring(result);
std::shared_ptr<roaring::Roaring> term_match_bitmap =
std::make_shared<roaring::Roaring>(result);
term_match_bitmap->runOptimize();
cache->insert(cache_key, term_match_bitmap, &cache_handle);

View File

@ -39,6 +39,7 @@
#include "common/status.h"
#include "gutil/strings/split.h"
#include "olap/page_cache.h"
#include "olap/rowset/segment_v2/inverted_index_cache.h"
#include "olap/segment_loader.h"
#include "runtime/memory/chunk_allocator.h"
#include "runtime/memory/mem_tracker_limiter.h"
@ -113,6 +114,15 @@ void MemInfo::process_cache_gc(int64_t& freed_mem) {
StoragePageCache::instance()->get_page_cache_mem_consumption(segment_v2::DATA_PAGE);
StoragePageCache::instance()->prune(segment_v2::DATA_PAGE);
}
if (segment_v2::InvertedIndexSearcherCache::instance()->mem_consumption() > min_free_size) {
freed_mem += segment_v2::InvertedIndexSearcherCache::instance()->prune();
}
if (segment_v2::InvertedIndexQueryCache::instance()->mem_consumption() > min_free_size) {
freed_mem += segment_v2::InvertedIndexQueryCache::instance()->prune();
}
if (StoragePageCache::instance()->get_page_cache_mem_consumption(
segment_v2::PRIMARY_KEY_INDEX_PAGE) > min_free_size) {
freed_mem += StoragePageCache::instance()->get_page_cache_mem_consumption(