[Improvement](inverted index) inverted index query match bitmap cache (#16578)

Add cache for inverted index query match bitmap to accelerate common query keyword, especially for keyword matching many rows. 

Tests result:
- large result: matching 99% out of 247 million rows shows 8x speed up.
- small result: matching 0.1% out of 247 million rows shows 2x speed up.
This commit is contained in:
Kang
2023-02-11 13:38:58 +08:00
committed by GitHub
parent 37d1519316
commit aba843bb2b
16 changed files with 419 additions and 147 deletions

View File

@ -894,6 +894,9 @@ CONF_String(inverted_index_searcher_cache_limit, "10%");
CONF_Bool(enable_write_index_searcher_cache, "true");
CONF_Bool(enable_inverted_index_cache_check_timestamp, "true");
// inverted index match bitmap cache size
CONF_String(inverted_index_query_cache_limit, "10%");
// inverted index
CONF_mDouble(inverted_index_ram_buffer_size, "512");
CONF_Int32(query_bkd_inverted_index_limit_percent, "5"); // 5%

View File

@ -344,6 +344,14 @@ struct OlapReaderStatistics {
int64_t rows_inverted_index_filtered = 0;
int64_t inverted_index_filter_timer = 0;
int64_t inverted_index_query_timer = 0;
int64_t inverted_index_query_cache_hit = 0;
int64_t inverted_index_query_cache_miss = 0;
int64_t inverted_index_query_bitmap_copy_timer = 0;
int64_t inverted_index_query_bitmap_op_timer = 0;
int64_t inverted_index_searcher_open_timer = 0;
int64_t inverted_index_searcher_search_timer = 0;
int64_t inverted_index_searcher_bitmap_timer = 0;
int64_t output_index_result_column_timer = 0;
// number of segment filtered by column stat when creating seg iterator

View File

@ -187,10 +187,11 @@ Status ColumnReader::new_bitmap_index_iterator(BitmapIndexIterator** iterator) {
}
Status ColumnReader::new_inverted_index_iterator(const TabletIndex* index_meta,
OlapReaderStatistics* stats,
InvertedIndexIterator** iterator) {
RETURN_IF_ERROR(_ensure_inverted_index_loaded(index_meta));
if (_inverted_index) {
RETURN_IF_ERROR(_inverted_index->new_iterator(index_meta, iterator));
RETURN_IF_ERROR(_inverted_index->new_iterator(index_meta, stats, iterator));
}
return Status::OK();
}

View File

@ -105,7 +105,7 @@ public:
// Client should delete returned iterator
Status new_bitmap_index_iterator(BitmapIndexIterator** iterator);
Status new_inverted_index_iterator(const TabletIndex* index_meta,
Status new_inverted_index_iterator(const TabletIndex* index_meta, OlapReaderStatistics* stats,
InvertedIndexIterator** iterator);
// Seek to the first entry in the column.

View File

@ -70,7 +70,7 @@ Status InvertedIndexSearcherCache::get_index_searcher(const io::FileSystemSPtr&
const std::string& index_dir,
const std::string& file_name,
InvertedIndexCacheHandle* cache_handle,
bool use_cache) {
OlapReaderStatistics* stats, bool use_cache) {
auto file_path = index_dir + "/" + file_name;
using namespace std::chrono;
@ -85,12 +85,14 @@ Status InvertedIndexSearcherCache::get_index_searcher(const io::FileSystemSPtr&
cache_handle->owned = false;
return Status::OK();
}
cache_handle->owned = !use_cache;
IndexSearcherPtr index_searcher = nullptr;
auto mem_tracker =
std::unique_ptr<MemTracker>(new MemTracker("InvertedIndexSearcherCacheWithRead"));
#ifndef BE_TEST
{
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer);
SCOPED_CONSUME_MEM_TRACKER(mem_tracker.get());
index_searcher = build_index_searcher(fs, index_dir, file_name);
}
@ -169,5 +171,25 @@ Cache::Handle* InvertedIndexSearcherCache::_insert(const InvertedIndexSearcherCa
return lru_handle;
}
InvertedIndexQueryCache* InvertedIndexQueryCache::_s_instance = nullptr;
bool InvertedIndexQueryCache::lookup(const CacheKey& key, InvertedIndexQueryCacheHandle* handle) {
auto lru_handle = _cache->lookup(key.encode());
if (lru_handle == nullptr) {
return false;
}
*handle = InvertedIndexQueryCacheHandle(_cache.get(), lru_handle);
return true;
}
void InvertedIndexQueryCache::insert(const CacheKey& key, roaring::Roaring* bitmap,
InvertedIndexQueryCacheHandle* handle) {
auto deleter = [](const doris::CacheKey& key, void* value) { delete (roaring::Roaring*)value; };
auto lru_handle = _cache->insert(key.encode(), (void*)bitmap, bitmap->getSizeInBytes(), deleter,
CachePriority::NORMAL);
*handle = InvertedIndexQueryCacheHandle(_cache.get(), lru_handle);
}
} // namespace segment_v2
} // namespace doris

View File

@ -23,6 +23,7 @@
#include <map>
#include <memory>
#include <mutex>
#include <roaring/roaring.hh>
#include <vector>
#include "io/fs/file_system.h"
@ -71,7 +72,7 @@ public:
Status get_index_searcher(const io::FileSystemSPtr& fs, const std::string& index_dir,
const std::string& file_name, InvertedIndexCacheHandle* cache_handle,
bool use_cache = true);
OlapReaderStatistics* stats, bool use_cache = true);
// function `insert` called after inverted index writer close
Status insert(const io::FileSystemSPtr& fs, const std::string& index_dir,
@ -163,5 +164,102 @@ private:
DISALLOW_COPY_AND_ASSIGN(InvertedIndexCacheHandle);
};
enum class InvertedIndexQueryType;
class InvertedIndexQueryCacheHandle;
class InvertedIndexQueryCache {
public:
// cache key
struct CacheKey {
io::Path index_path; // index file path
std::string column_name; // column name
InvertedIndexQueryType query_type; // query type
std::wstring value; // query value
// Encode to a flat binary which can be used as LRUCache's key
std::string encode() const {
std::string key_buf(index_path.string());
key_buf.append("/");
key_buf.append(column_name);
key_buf.append("/");
key_buf.append(1, static_cast<char>(query_type));
key_buf.append("/");
key_buf.append(lucene::util::Misc::toString(value.c_str()));
return key_buf;
}
};
using CacheValue = roaring::Roaring;
// Create global instance of this class
static void create_global_cache(size_t capacity, int32_t index_cache_percentage,
uint32_t num_shards = 16) {
DCHECK(_s_instance == nullptr);
static InvertedIndexQueryCache instance(capacity, index_cache_percentage, num_shards);
_s_instance = &instance;
}
// Return global instance.
// Client should call create_global_cache before.
static InvertedIndexQueryCache* instance() { return _s_instance; }
InvertedIndexQueryCache() = delete;
InvertedIndexQueryCache(size_t capacity, int32_t index_cache_percentage, uint32_t num_shards) {
_cache = std::unique_ptr<Cache>(
new_lru_cache("InvertedIndexQueryCache", capacity, LRUCacheType::SIZE, num_shards));
}
bool lookup(const CacheKey& key, InvertedIndexQueryCacheHandle* handle);
void insert(const CacheKey& key, roaring::Roaring* bitmap,
InvertedIndexQueryCacheHandle* handle);
private:
static InvertedIndexQueryCache* _s_instance;
std::unique_ptr<Cache> _cache {nullptr};
};
class InvertedIndexQueryCacheHandle {
public:
InvertedIndexQueryCacheHandle() {}
InvertedIndexQueryCacheHandle(Cache* cache, Cache::Handle* handle)
: _cache(cache), _handle(handle) {}
~InvertedIndexQueryCacheHandle() {
if (_handle != nullptr) {
_cache->release(_handle);
}
}
InvertedIndexQueryCacheHandle(InvertedIndexQueryCacheHandle&& other) noexcept {
// we can use std::exchange if we switch c++14 on
std::swap(_cache, other._cache);
std::swap(_handle, other._handle);
}
InvertedIndexQueryCacheHandle& operator=(InvertedIndexQueryCacheHandle&& other) noexcept {
std::swap(_cache, other._cache);
std::swap(_handle, other._handle);
return *this;
}
Cache* cache() const { return _cache; }
Slice data() const { return _cache->value_slice(_handle); }
InvertedIndexQueryCache::CacheValue* match_bitmap() const {
return ((InvertedIndexQueryCache::CacheValue*)_cache->value(_handle));
}
private:
Cache* _cache = nullptr;
Cache::Handle* _handle = nullptr;
// Don't allow copy and assign
DISALLOW_COPY_AND_ASSIGN(InvertedIndexQueryCacheHandle);
};
} // namespace segment_v2
} // namespace doris

View File

@ -100,19 +100,27 @@ std::vector<std::wstring> FullTextIndexReader::get_analyse_result(
return analyse_result;
}
Status FullTextIndexReader::new_iterator(const TabletIndex* index_meta,
Status FullTextIndexReader::new_iterator(const TabletIndex* index_meta, OlapReaderStatistics* stats,
InvertedIndexIterator** iterator) {
*iterator = new InvertedIndexIterator(index_meta, this);
*iterator = new InvertedIndexIterator(index_meta, stats, this);
return Status::OK();
}
Status FullTextIndexReader::query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type,
Status FullTextIndexReader::query(OlapReaderStatistics* stats, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type,
roaring::Roaring* bit_map) {
SCOPED_RAW_TIMER(&stats->inverted_index_query_timer);
std::string search_str = reinterpret_cast<const StringRef*>(query_value)->to_string();
VLOG_DEBUG << column_name
<< " begin to load the fulltext index from clucene, query_str=" << search_str;
LOG(INFO) << column_name << " begin to search the fulltext index from clucene, query_str ["
<< search_str << "]";
io::Path path(_path);
auto index_dir = path.parent_path();
auto index_file_name = InvertedIndexDescriptor::get_index_file_name(path.filename(), _index_id);
auto index_file_path = index_dir / index_file_name;
std::unique_ptr<lucene::search::Query> query;
std::wstring field_ws = std::wstring(column_name.begin(), column_name.end());
try {
@ -125,75 +133,93 @@ Status FullTextIndexReader::query(const std::string& column_name, const void* qu
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>();
}
switch (query_type) {
case InvertedIndexQueryType::MATCH_ANY_QUERY: {
query.reset(_CLNEW lucene::search::BooleanQuery());
for (auto token_ws : analyse_result) {
lucene::index::Term* term =
_CLNEW lucene::index::Term(field_ws.c_str(), token_ws.c_str());
static_cast<lucene::search::BooleanQuery*>(query.get())
->add(_CLNEW lucene::search::TermQuery(term), true,
lucene::search::BooleanClause::SHOULD);
_CLDECDELETE(term);
roaring::Roaring query_match_bitmap;
bool first = true;
for (auto token : analyse_result) {
roaring::Roaring* term_match_bitmap = nullptr;
// try to get term bitmap match result from cache to avoid query index on cache hit
auto cache = InvertedIndexQueryCache::instance();
// use EQUAL_QUERY type here since cache is for each term/token
InvertedIndexQueryCache::CacheKey cache_key {
index_file_path, column_name, InvertedIndexQueryType::EQUAL_QUERY, token};
InvertedIndexQueryCacheHandle cache_handle;
if (cache->lookup(cache_key, &cache_handle)) {
stats->inverted_index_query_cache_hit++;
term_match_bitmap = cache_handle.match_bitmap();
} else {
stats->inverted_index_query_cache_miss++;
term_match_bitmap = new roaring::Roaring();
std::wstring token_ws = std::wstring(token.begin(), token.end());
// unique_ptr with custom deleter
std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> term {
_CLNEW lucene::index::Term(field_ws.c_str(), token_ws.c_str()),
[](lucene::index::Term* term) { _CLDECDELETE(term); }};
query.reset(new lucene::search::TermQuery(term.get()));
InvertedIndexCacheHandle inverted_index_cache_handle;
InvertedIndexSearcherCache::instance()->get_index_searcher(
_fs, index_dir.c_str(), index_file_name, &inverted_index_cache_handle,
stats);
auto index_searcher = inverted_index_cache_handle.get_index_searcher();
try {
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer);
index_searcher->_search(
query.get(), [&term_match_bitmap, stats](const int32_t docid,
const float_t /*score*/) {
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_bitmap_timer);
// docid equal to rowid in segment
term_match_bitmap->add(docid);
});
} catch (const CLuceneError& e) {
LOG(WARNING) << "CLuceneError occured: " << e.what();
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
}
{
// add to cache
term_match_bitmap->runOptimize();
cache->insert(cache_key, term_match_bitmap, &cache_handle);
}
}
// add to query_match_bitmap
if (first) {
SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer);
query_match_bitmap = *term_match_bitmap;
first = false;
continue;
}
switch (query_type) {
case InvertedIndexQueryType::MATCH_ANY_QUERY: {
SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_op_timer);
query_match_bitmap |= *term_match_bitmap;
break;
}
case InvertedIndexQueryType::MATCH_ALL_QUERY: {
SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_op_timer);
query_match_bitmap &= *term_match_bitmap;
break;
}
case InvertedIndexQueryType::MATCH_PHRASE_QUERY: {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
break;
}
default: {
LOG(ERROR) << "fulltext query do not support query type other than match.";
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
}
break;
}
case InvertedIndexQueryType::MATCH_ALL_QUERY: {
query.reset(_CLNEW lucene::search::BooleanQuery());
for (auto token_ws : analyse_result) {
lucene::index::Term* term =
_CLNEW lucene::index::Term(field_ws.c_str(), token_ws.c_str());
static_cast<lucene::search::BooleanQuery*>(query.get())
->add(_CLNEW lucene::search::TermQuery(term), true,
lucene::search::BooleanClause::MUST);
_CLDECDELETE(term);
}
break;
}
case InvertedIndexQueryType::MATCH_PHRASE_QUERY: {
LOG(WARNING) << "match phrase of fulltext is not supported";
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
}
default:
LOG(ERROR) << "fulltext query do not support query type other than match, column: "
<< column_name;
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>();
}
bit_map->swap(query_match_bitmap);
return Status::OK();
} catch (const CLuceneError& e) {
LOG(WARNING) << "CLuceneError occured, error msg: " << e.what();
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
}
io::Path path(_path);
auto index_dir = path.parent_path();
auto index_file_name = InvertedIndexDescriptor::get_index_file_name(path.filename(), _index_id);
// check index file existence
auto index_file_path = index_dir / index_file_name;
if (!indexExists(index_file_path)) {
LOG(WARNING) << "inverted index path: " << index_file_path.string() << " not exist.";
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>();
}
roaring::Roaring result;
InvertedIndexCacheHandle inverted_index_cache_handle;
InvertedIndexSearcherCache::instance()->get_index_searcher(
_fs, index_dir.c_str(), index_file_name, &inverted_index_cache_handle);
auto index_searcher = inverted_index_cache_handle.get_index_searcher();
try {
index_searcher->_search(query.get(),
[&result](const int32_t docid, const float_t /*score*/) {
// docid equal to rowid in segment
result.add(docid);
});
} catch (const CLuceneError& e) {
LOG(WARNING) << "CLuceneError occured, error msg: " << e.what();
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
}
bit_map->swap(result);
return Status::OK();
}
InvertedIndexReaderType FullTextIndexReader::type() {
@ -201,32 +227,53 @@ InvertedIndexReaderType FullTextIndexReader::type() {
}
Status StringTypeInvertedIndexReader::new_iterator(const TabletIndex* index_meta,
OlapReaderStatistics* stats,
InvertedIndexIterator** iterator) {
*iterator = new InvertedIndexIterator(index_meta, this);
*iterator = new InvertedIndexIterator(index_meta, stats, this);
return Status::OK();
}
Status StringTypeInvertedIndexReader::query(const std::string& column_name, const void* query_value,
Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats,
const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type,
roaring::Roaring* bit_map) {
SCOPED_RAW_TIMER(&stats->inverted_index_query_timer);
const StringRef* search_query = reinterpret_cast<const StringRef*>(query_value);
auto act_len = strnlen(search_query->data, search_query->size);
std::string search_str(search_query->data, act_len);
// std::string search_str = reinterpret_cast<const StringRef*>(query_value)->to_string();
VLOG_DEBUG << "begin to query the inverted index from clucene"
<< ", column_name: " << column_name << ", search_str: " << search_str;
std::wstring column_name_ws = std::wstring(column_name.begin(), column_name.end());
std::wstring search_str_ws = std::wstring(search_str.begin(), search_str.end());
lucene::index::Term* term =
_CLNEW lucene::index::Term(column_name_ws.c_str(), search_str_ws.c_str());
// unique_ptr with custom deleter
std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> term {
_CLNEW lucene::index::Term(column_name_ws.c_str(), search_str_ws.c_str()),
[](lucene::index::Term* term) { _CLDECDELETE(term); }};
std::unique_ptr<lucene::search::Query> query;
io::Path path(_path);
auto index_dir = path.parent_path();
auto index_file_name = InvertedIndexDescriptor::get_index_file_name(path.filename(), _index_id);
auto index_file_path = index_dir / index_file_name;
// try to get query bitmap result from cache and return immediately on cache hit
InvertedIndexQueryCache::CacheKey cache_key {index_file_path, column_name, query_type,
search_str_ws};
auto cache = InvertedIndexQueryCache::instance();
InvertedIndexQueryCacheHandle cache_handle;
if (cache->lookup(cache_key, &cache_handle)) {
stats->inverted_index_query_cache_hit++;
SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer);
*bit_map = *cache_handle.match_bitmap();
return Status::OK();
} else {
stats->inverted_index_query_cache_miss++;
}
// check index file existence
auto index_file_path = index_dir / index_file_name;
if (!indexExists(index_file_path)) {
LOG(WARNING) << "inverted index path: " << index_file_path.string() << " not exist.";
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>();
@ -234,28 +281,23 @@ Status StringTypeInvertedIndexReader::query(const std::string& column_name, cons
switch (query_type) {
case InvertedIndexQueryType::EQUAL_QUERY: {
query.reset(new lucene::search::TermQuery(term));
_CLDECDELETE(term);
query.reset(new lucene::search::TermQuery(term.get()));
break;
}
case InvertedIndexQueryType::LESS_THAN_QUERY: {
query.reset(new lucene::search::RangeQuery(nullptr, term, false));
_CLDECDELETE(term);
query.reset(new lucene::search::RangeQuery(nullptr, term.get(), false));
break;
}
case InvertedIndexQueryType::LESS_EQUAL_QUERY: {
query.reset(new lucene::search::RangeQuery(nullptr, term, true));
_CLDECDELETE(term);
query.reset(new lucene::search::RangeQuery(nullptr, term.get(), true));
break;
}
case InvertedIndexQueryType::GREATER_THAN_QUERY: {
query.reset(new lucene::search::RangeQuery(term, nullptr, false));
_CLDECDELETE(term);
query.reset(new lucene::search::RangeQuery(term.get(), nullptr, false));
break;
}
case InvertedIndexQueryType::GREATER_EQUAL_QUERY: {
query.reset(new lucene::search::RangeQuery(term, nullptr, true));
_CLDECDELETE(term);
query.reset(new lucene::search::RangeQuery(term.get(), nullptr, true));
break;
}
default:
@ -272,13 +314,15 @@ Status StringTypeInvertedIndexReader::query(const std::string& column_name, cons
roaring::Roaring result;
InvertedIndexCacheHandle inverted_index_cache_handle;
InvertedIndexSearcherCache::instance()->get_index_searcher(
_fs, index_dir.c_str(), index_file_name, &inverted_index_cache_handle);
_fs, index_dir.c_str(), index_file_name, &inverted_index_cache_handle, stats);
auto index_searcher = inverted_index_cache_handle.get_index_searcher();
try {
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer);
index_searcher->_search(query.get(),
[&result](const int32_t docid, const float_t /*score*/) {
[&result, stats](const int32_t docid, const float_t /*score*/) {
// docid equal to rowid in segment
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_bitmap_timer);
result.add(docid);
});
} catch (const CLuceneError& e) {
@ -286,6 +330,11 @@ Status StringTypeInvertedIndexReader::query(const std::string& column_name, cons
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
}
// add to cache
roaring::Roaring* term_match_bitmap = new roaring::Roaring(result);
term_match_bitmap->runOptimize();
cache->insert(cache_key, term_match_bitmap, &cache_handle);
bit_map->swap(result);
return Status::OK();
}
@ -312,14 +361,14 @@ BkdIndexReader::BkdIndexReader(io::FileSystemSPtr fs, const std::string& path,
DorisCompoundDirectory::getDirectory(fs, index_dir.c_str()), index_file_name.c_str());
}
Status BkdIndexReader::new_iterator(const TabletIndex* index_meta,
Status BkdIndexReader::new_iterator(const TabletIndex* index_meta, OlapReaderStatistics* stats,
InvertedIndexIterator** iterator) {
*iterator = new InvertedIndexIterator(index_meta, this);
*iterator = new InvertedIndexIterator(index_meta, stats, this);
return Status::OK();
}
Status BkdIndexReader::bkd_query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type,
Status BkdIndexReader::bkd_query(OlapReaderStatistics* stats, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
std::shared_ptr<lucene::util::bkd::bkd_reader>&& r,
InvertedIndexVisitor* visitor) {
lucene::util::bkd::bkd_reader* tmp_reader;
@ -359,15 +408,15 @@ Status BkdIndexReader::bkd_query(const std::string& column_name, const void* que
return Status::OK();
}
Status BkdIndexReader::try_query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type,
Status BkdIndexReader::try_query(OlapReaderStatistics* stats, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type, uint32_t* count) {
uint64_t start = UnixMillis();
auto visitor = std::make_unique<InvertedIndexVisitor>(nullptr, query_type, true);
std::shared_ptr<lucene::util::bkd::bkd_reader> r;
try {
RETURN_IF_ERROR(
bkd_query(column_name, query_value, query_type, std::move(r), visitor.get()));
RETURN_IF_ERROR(bkd_query(stats, column_name, query_value, query_type, std::move(r),
visitor.get()));
*count = r->estimate_point_count(visitor.get());
} catch (const CLuceneError& e) {
LOG(WARNING) << "BKD Query CLuceneError Occurred, error msg: " << e.what();
@ -379,21 +428,49 @@ Status BkdIndexReader::try_query(const std::string& column_name, const void* que
return Status::OK();
}
Status BkdIndexReader::query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type,
Status BkdIndexReader::query(OlapReaderStatistics* stats, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type, roaring::Roaring* bit_map) {
SCOPED_RAW_TIMER(&stats->inverted_index_query_timer);
io::Path path(_path);
auto index_dir = path.parent_path();
auto index_file_name = InvertedIndexDescriptor::get_index_file_name(path.filename(), _index_id);
auto index_file_path = index_dir / index_file_name;
// std::string query_str {(const char *)query_value};
// // try to get query bitmap result from cache and return immediately on cache hit
// InvertedIndexQueryCache::CacheKey cache_key
// {index_file_path, column_name, query_type, std::wstring(query_str.begin(), query_str.end())};
// auto cache = InvertedIndexQueryCache::instance();
// InvertedIndexQueryCacheHandle cache_handle;
// if (cache->lookup(cache_key, &cache_handle)) {
// stats->inverted_index_query_cache_hit++;
// SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer);
// *bit_map = *cache_handle.match_bitmap();
// return Status::OK();
// } else {
// stats->inverted_index_query_cache_miss++;
// }
uint64_t start = UnixMillis();
auto visitor = std::make_unique<InvertedIndexVisitor>(bit_map, query_type);
std::shared_ptr<lucene::util::bkd::bkd_reader> r;
try {
RETURN_IF_ERROR(
bkd_query(column_name, query_value, query_type, std::move(r), visitor.get()));
SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer);
RETURN_IF_ERROR(bkd_query(stats, column_name, query_value, query_type, std::move(r),
visitor.get()));
r->intersect(visitor.get());
} catch (const CLuceneError& e) {
LOG(WARNING) << "BKD Query CLuceneError Occurred, error msg: " << e.what();
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>();
}
// // add to cache
// roaring::Roaring* term_match_bitmap = new roaring::Roaring(*bit_map);
// term_match_bitmap->runOptimize();
// cache->insert(cache_key, term_match_bitmap, &cache_handle);
LOG(INFO) << "BKD index search time taken: " << UnixMillis() - start << "ms "
<< " column: " << column_name << " result: " << bit_map->cardinality()
<< " reader stats: " << r->stats.to_string();
@ -631,7 +708,8 @@ Status InvertedIndexIterator::read_from_inverted_index(const std::string& column
}
}
RETURN_IF_ERROR(_reader->query(column_name, query_value, query_type, _analyser_type, bit_map));
RETURN_IF_ERROR(
_reader->query(_stats, column_name, query_value, query_type, _analyser_type, bit_map));
return Status::OK();
}
@ -645,8 +723,8 @@ Status InvertedIndexIterator::try_read_from_inverted_index(const std::string& co
query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY ||
query_type == InvertedIndexQueryType::LESS_THAN_QUERY ||
query_type == InvertedIndexQueryType::EQUAL_QUERY) {
RETURN_IF_ERROR(
_reader->try_query(column_name, query_value, query_type, _analyser_type, count));
RETURN_IF_ERROR(_reader->try_query(_stats, column_name, query_value, query_type,
_analyser_type, count));
}
return Status::OK();
}

View File

@ -69,13 +69,13 @@ public:
virtual ~InvertedIndexReader() = default;
// create a new column iterator. Client should delete returned iterator
virtual Status new_iterator(const TabletIndex* index_meta,
virtual Status new_iterator(const TabletIndex* index_meta, OlapReaderStatistics* stats,
InvertedIndexIterator** iterator) = 0;
virtual Status query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type,
roaring::Roaring* bit_map) = 0;
virtual Status try_query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type,
virtual Status query(OlapReaderStatistics* stats, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type, roaring::Roaring* bit_map) = 0;
virtual Status try_query(OlapReaderStatistics* stats, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type, uint32_t* count) = 0;
virtual InvertedIndexReaderType type() = 0;
@ -98,13 +98,14 @@ public:
: InvertedIndexReader(std::move(fs), path, uniq_id) {}
~FullTextIndexReader() override = default;
Status new_iterator(const TabletIndex* index_meta, InvertedIndexIterator** iterator) override;
Status query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type,
roaring::Roaring* bit_map) override;
Status try_query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type,
uint32_t* count) override {
Status new_iterator(const TabletIndex* index_meta, OlapReaderStatistics* stats,
InvertedIndexIterator** iterator) override;
Status query(OlapReaderStatistics* stats, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type, roaring::Roaring* bit_map) override;
Status try_query(OlapReaderStatistics* stats, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type, uint32_t* count) override {
return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>();
}
@ -122,13 +123,14 @@ public:
: InvertedIndexReader(std::move(fs), path, uniq_id) {}
~StringTypeInvertedIndexReader() override = default;
Status new_iterator(const TabletIndex* index_meta, InvertedIndexIterator** iterator) override;
Status query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type,
roaring::Roaring* bit_map) override;
Status try_query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type,
uint32_t* count) override {
Status new_iterator(const TabletIndex* index_meta, OlapReaderStatistics* stats,
InvertedIndexIterator** iterator) override;
Status query(OlapReaderStatistics* stats, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type, roaring::Roaring* bit_map) override;
Status try_query(OlapReaderStatistics* stats, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type, uint32_t* count) override {
return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>();
}
InvertedIndexReaderType type() override;
@ -179,16 +181,17 @@ public:
}
}
Status new_iterator(const TabletIndex* index_meta, InvertedIndexIterator** iterator) override;
Status new_iterator(const TabletIndex* index_meta, OlapReaderStatistics* stats,
InvertedIndexIterator** iterator) override;
Status query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type,
roaring::Roaring* bit_map) override;
Status try_query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type,
uint32_t* count) override;
Status bkd_query(const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type,
Status query(OlapReaderStatistics* stats, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type, roaring::Roaring* bit_map) override;
Status try_query(OlapReaderStatistics* stats, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
InvertedIndexParserType analyser_type, uint32_t* count) override;
Status bkd_query(OlapReaderStatistics* stats, const std::string& column_name,
const void* query_value, InvertedIndexQueryType query_type,
std::shared_ptr<lucene::util::bkd::bkd_reader>&& r,
InvertedIndexVisitor* visitor);
@ -203,8 +206,10 @@ private:
class InvertedIndexIterator {
public:
InvertedIndexIterator(const TabletIndex* index_meta, InvertedIndexReader* reader)
: _index_meta(index_meta), _reader(reader) {
InvertedIndexIterator(const TabletIndex* index_meta, OlapReaderStatistics* stats,
InvertedIndexReader* reader)
: _index_meta(index_meta), _stats(stats), _reader(reader) {
// TODO xk maybe change interface to use index
_analyser_type = get_inverted_index_parser_type_from_string(
get_parser_string_from_properties(_index_meta->properties()));
}
@ -221,6 +226,7 @@ public:
private:
const TabletIndex* _index_meta;
OlapReaderStatistics* _stats;
InvertedIndexReader* _reader;
InvertedIndexParserType _analyser_type;
};

View File

@ -302,10 +302,12 @@ Status Segment::new_bitmap_index_iterator(const TabletColumn& tablet_column,
Status Segment::new_inverted_index_iterator(const TabletColumn& tablet_column,
const TabletIndex* index_meta,
OlapReaderStatistics* stats,
InvertedIndexIterator** iter) {
auto col_unique_id = tablet_column.unique_id();
if (_column_readers.count(col_unique_id) > 0 && index_meta) {
return _column_readers.at(col_unique_id)->new_inverted_index_iterator(index_meta, iter);
return _column_readers.at(col_unique_id)
->new_inverted_index_iterator(index_meta, stats, iter);
}
return Status::OK();
}

View File

@ -81,7 +81,8 @@ public:
Status new_bitmap_index_iterator(const TabletColumn& tablet_column, BitmapIndexIterator** iter);
Status new_inverted_index_iterator(const TabletColumn& tablet_column,
const TabletIndex* index_meta, InvertedIndexIterator** iter);
const TabletIndex* index_meta, OlapReaderStatistics* stats,
InvertedIndexIterator** iter);
const ShortKeyIndexDecoder* get_short_key_index() const {
DCHECK(_load_index_once.has_called() && _load_index_once.stored_result().ok());

View File

@ -745,7 +745,7 @@ Status SegmentIterator::_init_inverted_index_iterators() {
if (_inverted_index_iterators.count(unique_id) < 1) {
RETURN_IF_ERROR(_segment->new_inverted_index_iterator(
_opts.tablet_schema->column(cid), _opts.tablet_schema->get_inverted_index(cid),
&_inverted_index_iterators[unique_id]));
_opts.stats, &_inverted_index_iterators[unique_id]));
}
}
return Status::OK();

View File

@ -233,6 +233,19 @@ Status ExecEnv::_init_mem_env() {
<< PrettyPrinter::print(inverted_index_cache_limit, TUnit::BYTES)
<< ", origin config value: " << config::inverted_index_searcher_cache_limit;
// use memory limit
int64_t inverted_index_query_cache_limit =
ParseUtil::parse_mem_spec(config::inverted_index_query_cache_limit,
MemInfo::mem_limit(), MemInfo::physical_mem(), &is_percent);
while (!is_percent && inverted_index_query_cache_limit > MemInfo::mem_limit() / 2) {
// Reason same as buffer_pool_limit
inverted_index_query_cache_limit = inverted_index_query_cache_limit / 2;
}
InvertedIndexQueryCache::create_global_cache(inverted_index_query_cache_limit, 10);
LOG(INFO) << "Inverted index query match cache memory limit: "
<< PrettyPrinter::print(inverted_index_cache_limit, TUnit::BYTES)
<< ", origin config value: " << config::inverted_index_query_cache_limit;
// 4. init other managers
RETURN_IF_ERROR(_tmp_file_mgr->init());
RETURN_IF_ERROR(_block_spill_mgr->init());

View File

@ -110,7 +110,22 @@ Status NewOlapScanNode::_init_profile() {
_inverted_index_filter_counter =
ADD_COUNTER(_segment_profile, "RowsInvertedIndexFiltered", TUnit::UNIT);
_inverted_index_filter_timer = ADD_TIMER(_segment_profile, "InvertedIndexFilterTimer");
_inverted_index_filter_timer = ADD_TIMER(_segment_profile, "InvertedIndexFilterTime");
_inverted_index_query_cache_hit_counter =
ADD_COUNTER(_segment_profile, "InvertedIndexQueryCacheHit", TUnit::UNIT);
_inverted_index_query_cache_miss_counter =
ADD_COUNTER(_segment_profile, "InvertedIndexQueryCacheMiss", TUnit::UNIT);
_inverted_index_query_timer = ADD_TIMER(_segment_profile, "InvertedIndexQueryTime");
_inverted_index_query_bitmap_copy_timer =
ADD_TIMER(_segment_profile, "InvertedIndexQueryBitmapCopyTime");
_inverted_index_query_bitmap_op_timer =
ADD_TIMER(_segment_profile, "InvertedIndexQueryBitmapOpTime");
_inverted_index_searcher_open_timer =
ADD_TIMER(_segment_profile, "InvertedIndexSearcherOpenTime");
_inverted_index_searcher_search_timer =
ADD_TIMER(_segment_profile, "InvertedIndexSearcherSearchTime");
_inverted_index_searcher_bitmap_timer =
ADD_TIMER(_segment_profile, "InvertedIndexSearcherGenBitmapTime");
_output_index_result_column_timer = ADD_TIMER(_segment_profile, "OutputIndexResultColumnTimer");

View File

@ -129,6 +129,14 @@ private:
RuntimeProfile::Counter* _inverted_index_filter_counter = nullptr;
RuntimeProfile::Counter* _inverted_index_filter_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_query_cache_hit_counter = nullptr;
RuntimeProfile::Counter* _inverted_index_query_cache_miss_counter = nullptr;
RuntimeProfile::Counter* _inverted_index_query_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_query_bitmap_copy_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_query_bitmap_op_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_searcher_open_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_searcher_search_timer = nullptr;
RuntimeProfile::Counter* _inverted_index_searcher_bitmap_timer = nullptr;
RuntimeProfile::Counter* _output_index_result_column_timer = nullptr;

View File

@ -496,6 +496,21 @@ void NewOlapScanner::_update_counters_before_close() {
COUNTER_UPDATE(olap_parent->_inverted_index_filter_counter, stats.rows_inverted_index_filtered);
COUNTER_UPDATE(olap_parent->_inverted_index_filter_timer, stats.inverted_index_filter_timer);
COUNTER_UPDATE(olap_parent->_inverted_index_query_cache_hit_counter,
stats.inverted_index_query_cache_hit);
COUNTER_UPDATE(olap_parent->_inverted_index_query_cache_miss_counter,
stats.inverted_index_query_cache_miss);
COUNTER_UPDATE(olap_parent->_inverted_index_query_timer, stats.inverted_index_query_timer);
COUNTER_UPDATE(olap_parent->_inverted_index_query_bitmap_copy_timer,
stats.inverted_index_query_bitmap_copy_timer);
COUNTER_UPDATE(olap_parent->_inverted_index_query_bitmap_op_timer,
stats.inverted_index_query_bitmap_op_timer);
COUNTER_UPDATE(olap_parent->_inverted_index_searcher_open_timer,
stats.inverted_index_searcher_open_timer);
COUNTER_UPDATE(olap_parent->_inverted_index_searcher_search_timer,
stats.inverted_index_searcher_search_timer);
COUNTER_UPDATE(olap_parent->_inverted_index_searcher_bitmap_timer,
stats.inverted_index_searcher_bitmap_timer);
COUNTER_UPDATE(olap_parent->_output_index_result_column_timer,
stats.output_index_result_column_timer);

View File

@ -62,12 +62,14 @@ TEST_F(InvertedIndexSearcherCacheTest, insert_lookup) {
status = index_searcher_cache->insert(fs, kTestDir, file_name_2);
EXPECT_EQ(Status::OK(), status);
OlapReaderStatistics stats;
// lookup after insert
{
// case 1: lookup exist entry
InvertedIndexCacheHandle inverted_index_cache_handle;
status = index_searcher_cache->get_index_searcher(fs, kTestDir, file_name_1,
&inverted_index_cache_handle);
&inverted_index_cache_handle, &stats);
EXPECT_EQ(Status::OK(), status);
auto cache_value_1 =
@ -76,7 +78,7 @@ TEST_F(InvertedIndexSearcherCacheTest, insert_lookup) {
EXPECT_GE(UnixMillis(), cache_value_1->last_visit_time);
status = index_searcher_cache->get_index_searcher(fs, kTestDir, file_name_2,
&inverted_index_cache_handle);
&inverted_index_cache_handle, &stats);
EXPECT_EQ(Status::OK(), status);
auto cache_value_2 =
@ -92,8 +94,8 @@ TEST_F(InvertedIndexSearcherCacheTest, insert_lookup) {
// use cache
{
InvertedIndexCacheHandle inverted_index_cache_handle_1;
status = index_searcher_cache->get_index_searcher(fs, kTestDir, file_name_not_exist_1,
&inverted_index_cache_handle_1);
status = index_searcher_cache->get_index_searcher(
fs, kTestDir, file_name_not_exist_1, &inverted_index_cache_handle_1, &stats);
EXPECT_EQ(Status::OK(), status);
EXPECT_FALSE(inverted_index_cache_handle_1.owned);
}
@ -101,8 +103,8 @@ TEST_F(InvertedIndexSearcherCacheTest, insert_lookup) {
// lookup again
{
InvertedIndexCacheHandle inverted_index_cache_handle_1;
status = index_searcher_cache->get_index_searcher(fs, kTestDir, file_name_not_exist_1,
&inverted_index_cache_handle_1);
status = index_searcher_cache->get_index_searcher(
fs, kTestDir, file_name_not_exist_1, &inverted_index_cache_handle_1, &stats);
EXPECT_EQ(Status::OK(), status);
EXPECT_FALSE(inverted_index_cache_handle_1.owned);
@ -114,15 +116,15 @@ TEST_F(InvertedIndexSearcherCacheTest, insert_lookup) {
// not use cache
InvertedIndexCacheHandle inverted_index_cache_handle_2;
status = index_searcher_cache->get_index_searcher(fs, kTestDir, file_name_not_exist_2,
&inverted_index_cache_handle_2, false);
status = index_searcher_cache->get_index_searcher(
fs, kTestDir, file_name_not_exist_2, &inverted_index_cache_handle_2, &stats, false);
EXPECT_EQ(Status::OK(), status);
EXPECT_TRUE(inverted_index_cache_handle_2.owned);
EXPECT_EQ(nullptr, inverted_index_cache_handle_2._cache);
EXPECT_EQ(nullptr, inverted_index_cache_handle_2._handle);
status = index_searcher_cache->get_index_searcher(fs, kTestDir, file_name_not_exist_2,
&inverted_index_cache_handle_2);
&inverted_index_cache_handle_2, &stats);
EXPECT_EQ(Status::OK(), status);
EXPECT_FALSE(inverted_index_cache_handle_2.owned);
auto cache_value_use_cache_2 =