1. Add hdfs file handle cache for hdfs file reader
Copied from Impala, `https://github.com/apache/impala/blob/master/be/src/util/lru-multi-cache.h`. (Thanks for the Impala team)
This is a lru cache that can store multi entries with same key.
The key is build with {file name + modification time}
The value is the hdfsFile pointer that point to a certain hdfs file.
This cache is to avoid reopen same hdfs file mutli time, which can save
query time.
Add a BE config `max_hdfs_file_handle_cache_num` to limit the max number
of file handle cache, default is 20000.
2. Add file meta cache
The file meta cache is a lru cache. the key is {file name + modification time},
the value is the parsed file meta info of the certain file, which can save
the time of re-parsing file meta everytime.
Currently, it is only used for caching parquet file footer.
The test show that is cache is hit, the `FileOpenTime` and `ParseFooterTime` is reduce to almost 0
in query profile, which can save time when there are lots of files to read.
79 lines
2.5 KiB
C++
79 lines
2.5 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#include "olap/file_header.h"
|
|
|
|
#include <gmock/gmock-actions.h>
|
|
#include <gmock/gmock-matchers.h>
|
|
#include <gtest/gtest-message.h>
|
|
#include <gtest/gtest-test-part.h>
|
|
|
|
#include <filesystem>
|
|
|
|
#include "common/status.h"
|
|
#include "gtest/gtest_pred_impl.h"
|
|
#include "testutil/test_util.h"
|
|
|
|
using ::testing::_;
|
|
using ::testing::Return;
|
|
using ::testing::SetArgPointee;
|
|
using std::string;
|
|
|
|
namespace doris {
|
|
|
|
class FileHeaderTest : public testing::Test {
|
|
public:
|
|
virtual void SetUp() {
|
|
std::filesystem::remove_all(_s_test_data_path);
|
|
EXPECT_FALSE(std::filesystem::exists(_s_test_data_path));
|
|
EXPECT_TRUE(std::filesystem::create_directory(_s_test_data_path));
|
|
}
|
|
|
|
virtual void TearDown() { EXPECT_TRUE(std::filesystem::remove_all(_s_test_data_path)); }
|
|
|
|
static std::string _s_test_data_path;
|
|
};
|
|
|
|
std::string FileHeaderTest::_s_test_data_path = "./file_handler_testxxxx123";
|
|
|
|
TEST_F(FileHeaderTest, TestWrite) {
|
|
std::shared_ptr<io::LocalFileSystem> fs = io::global_local_filesystem();
|
|
std::string file_name = _s_test_data_path + "/abcd123.txt";
|
|
bool exists = true;
|
|
EXPECT_TRUE(fs->exists(file_name, &exists).ok());
|
|
EXPECT_FALSE(exists);
|
|
|
|
io::FileWriterPtr file_writer;
|
|
EXPECT_TRUE(fs->create_file(file_name, &file_writer).ok());
|
|
|
|
// write 12 bytes to disk
|
|
char ten_bytes[12];
|
|
memset(&ten_bytes, 0, sizeof(ten_bytes));
|
|
EXPECT_TRUE(file_writer->append({ten_bytes, sizeof(ten_bytes)}).ok());
|
|
|
|
char large_bytes2[(1 << 10)];
|
|
memset(&large_bytes2, 0, sizeof(large_bytes2));
|
|
int i = 1;
|
|
while (i < LOOP_LESS_OR_MORE(1 << 10, 1 << 17)) {
|
|
EXPECT_TRUE(file_writer->append({large_bytes2, sizeof(large_bytes2)}).ok());
|
|
++i;
|
|
}
|
|
EXPECT_TRUE(file_writer->close().ok());
|
|
}
|
|
|
|
} // namespace doris
|