1. Add hdfs file handle cache for hdfs file reader
Copied from Impala, `https://github.com/apache/impala/blob/master/be/src/util/lru-multi-cache.h`. (Thanks for the Impala team)
This is a lru cache that can store multi entries with same key.
The key is build with {file name + modification time}
The value is the hdfsFile pointer that point to a certain hdfs file.
This cache is to avoid reopen same hdfs file mutli time, which can save
query time.
Add a BE config `max_hdfs_file_handle_cache_num` to limit the max number
of file handle cache, default is 20000.
2. Add file meta cache
The file meta cache is a lru cache. the key is {file name + modification time},
the value is the parsed file meta info of the certain file, which can save
the time of re-parsing file meta everytime.
Currently, it is only used for caching parquet file footer.
The test show that is cache is hit, the `FileOpenTime` and `ParseFooterTime` is reduce to almost 0
in query profile, which can save time when there are lots of files to read.
50 lines
1.5 KiB
C++
50 lines
1.5 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "util/obj_lru_cache.h"
|
|
|
|
namespace doris {
|
|
|
|
ObjLRUCache::ObjLRUCache(int64_t capacity, uint32_t num_shards) {
|
|
_enabled = (capacity > 0);
|
|
if (_enabled) {
|
|
_cache = std::unique_ptr<Cache>(
|
|
new_lru_cache("ObjLRUCache", capacity, LRUCacheType::NUMBER, num_shards));
|
|
}
|
|
}
|
|
|
|
bool ObjLRUCache::lookup(const ObjKey& key, CacheHandle* handle) {
|
|
if (!_enabled) {
|
|
return false;
|
|
}
|
|
auto lru_handle = _cache->lookup(key.key);
|
|
if (!lru_handle) {
|
|
// cache miss
|
|
return false;
|
|
}
|
|
*handle = CacheHandle(_cache.get(), lru_handle);
|
|
return true;
|
|
}
|
|
|
|
void ObjLRUCache::erase(const ObjKey& key) {
|
|
if (_enabled) {
|
|
_cache->erase(key.key);
|
|
}
|
|
}
|
|
|
|
} // namespace doris
|