Files
doris/be/test/util/block_compression_test.cpp
yixiutt b136d80e1a [enhancement](compress) reuse compression ctx and buffer (#12573)
Reuse compression ctx and buffer.
Use a global instance for every compression algorithm, and use a
thread saft buffer pool to reuse compression buffer, pool size is equal
to max parallel thread num in compression, and this will not be too large.

Test shows this feature increase 5% of data import and compaction.

Co-authored-by: yixiutt <yixiu@selectdb.com>
2022-09-15 10:59:46 +08:00

146 lines
5.0 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "util/block_compression.h"
#include <gtest/gtest.h>
#include <iostream>
#include "util/faststring.h"
namespace doris {
class BlockCompressionTest : public testing::Test {
public:
BlockCompressionTest() {}
virtual ~BlockCompressionTest() {}
};
static std::string generate_str(size_t len) {
static char charset[] =
"0123456789"
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
std::string result;
result.resize(len);
for (int i = 0; i < len; ++i) {
result[i] = charset[rand() % sizeof(charset)];
}
return result;
}
void test_single_slice(segment_v2::CompressionTypePB type) {
BlockCompressionCodec* codec;
auto st = get_block_compression_codec(type, &codec);
EXPECT_TRUE(st.ok());
size_t test_sizes[] = {0, 1, 10, 1000, 1000000};
for (auto size : test_sizes) {
auto orig = generate_str(size);
faststring compressed_str;
{
st = codec->compress(orig, &compressed_str);
EXPECT_TRUE(st.ok());
Slice compressed_slice(compressed_str);
std::string uncompressed;
uncompressed.resize(size);
{
Slice uncompressed_slice(uncompressed);
st = codec->decompress(compressed_slice, &uncompressed_slice);
EXPECT_TRUE(st.ok());
EXPECT_STREQ(orig.c_str(), uncompressed.c_str());
}
// buffer not enough for decompress
// snappy has no return value if given buffer is not enough
// NOTE: For ZLIB, we even get OK with a insufficient output
// when uncompressed size is 1
if ((type == segment_v2::CompressionTypePB::ZLIB && uncompressed.size() > 1) &&
type != segment_v2::CompressionTypePB::SNAPPY && uncompressed.size() > 0) {
Slice uncompressed_slice(uncompressed);
uncompressed_slice.size -= 1;
st = codec->decompress(compressed_slice, &uncompressed_slice);
EXPECT_FALSE(st.ok());
}
// corrupt compressed data
if (type != segment_v2::CompressionTypePB::SNAPPY) {
Slice uncompressed_slice(uncompressed);
compressed_slice.size -= 1;
st = codec->decompress(compressed_slice, &uncompressed_slice);
EXPECT_FALSE(st.ok());
compressed_slice.size += 1;
}
}
}
}
TEST_F(BlockCompressionTest, single) {
test_single_slice(segment_v2::CompressionTypePB::SNAPPY);
test_single_slice(segment_v2::CompressionTypePB::ZLIB);
test_single_slice(segment_v2::CompressionTypePB::LZ4);
test_single_slice(segment_v2::CompressionTypePB::LZ4F);
test_single_slice(segment_v2::CompressionTypePB::ZSTD);
}
void test_multi_slices(segment_v2::CompressionTypePB type) {
BlockCompressionCodec* codec;
auto st = get_block_compression_codec(type, &codec);
EXPECT_TRUE(st.ok());
size_t test_sizes[] = {0, 1, 10, 1000, 1000000};
std::vector<std::string> orig_strs;
for (auto size : test_sizes) {
orig_strs.emplace_back(generate_str(size));
}
std::vector<Slice> orig_slices;
std::string orig;
for (auto& str : orig_strs) {
orig_slices.emplace_back(str);
orig.append(str);
}
size_t total_size = orig.size();
faststring compressed;
{
st = codec->compress(orig_slices, total_size, &compressed);
EXPECT_TRUE(st.ok());
Slice compressed_slice(compressed);
std::string uncompressed;
uncompressed.resize(total_size);
// normal case
{
Slice uncompressed_slice(uncompressed);
st = codec->decompress(compressed_slice, &uncompressed_slice);
EXPECT_TRUE(st.ok());
EXPECT_STREQ(orig.c_str(), uncompressed.c_str());
}
}
}
TEST_F(BlockCompressionTest, multi) {
test_multi_slices(segment_v2::CompressionTypePB::SNAPPY);
test_multi_slices(segment_v2::CompressionTypePB::ZLIB);
test_multi_slices(segment_v2::CompressionTypePB::LZ4);
test_multi_slices(segment_v2::CompressionTypePB::LZ4F);
test_multi_slices(segment_v2::CompressionTypePB::ZSTD);
}
} // namespace doris