// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include #include #include #include #include #include #include #include // Must come before gtest.h. #include #include #include #include "util/bit_stream_utils.h" #include "util/bit_stream_utils.inline.h" #include "util/bit_util.h" #include "util/faststring.h" #include "util/rle_encoding.h" #include "util/debug_util.h" using std::string; using std::vector; namespace doris { const int kMaxWidth = 64; class TestRle : public testing::Test {}; // Validates encoding of values by encoding and decoding them. If // expected_encoding != NULL, also validates that the encoded buffer is // exactly 'expected_encoding'. // if expected_len is not -1, it will validate the encoded size is correct. template void ValidateRle(const vector& values, int bit_width, uint8_t* expected_encoding, int expected_len) { faststring buffer; RleEncoder encoder(&buffer, bit_width); for (const auto& value : values) { encoder.Put(value); } int encoded_len = encoder.Flush(); if (expected_len != -1) { EXPECT_EQ(encoded_len, expected_len); } if (expected_encoding != nullptr) { EXPECT_EQ(memcmp(buffer.data(), expected_encoding, expected_len), 0) << "\n" << "Expected: " << hexdump((const char*)expected_encoding, expected_len) << "\n" << "Got: " << hexdump((const char*)buffer.data(), buffer.size()); } // Verify read RleDecoder decoder(buffer.data(), encoded_len, bit_width); for (const auto& value : values) { T val = 0; bool result = decoder.Get(&val); EXPECT_TRUE(result); EXPECT_EQ(value, val); } } TEST(Rle, SpecificSequences) { const int kTestLen = 1024; uint8_t expected_buffer[kTestLen]; vector values; // Test 50 0' followed by 50 1's values.resize(100); for (int i = 0; i < 50; ++i) { values[i] = 0; } for (int i = 50; i < 100; ++i) { values[i] = 1; } // expected_buffer valid for bit width <= 1 byte expected_buffer[0] = (50 << 1); expected_buffer[1] = 0; expected_buffer[2] = (50 << 1); expected_buffer[3] = 1; for (int width = 1; width <= 8; ++width) { ValidateRle(values, width, expected_buffer, 4); } for (int width = 9; width <= kMaxWidth; ++width) { ValidateRle(values, width, nullptr, 2 * (1 + BitUtil::Ceil(width, 8))); } // Test 100 0's and 1's alternating for (int i = 0; i < 100; ++i) { values[i] = i % 2; } int num_groups = BitUtil::Ceil(100, 8); expected_buffer[0] = (num_groups << 1) | 1; for (int i = 0; i < 100/8; ++i) { expected_buffer[i + 1] = BOOST_BINARY(1 0 1 0 1 0 1 0); // 0xaa } // Values for the last 4 0 and 1's expected_buffer[1 + 100/8] = BOOST_BINARY(0 0 0 0 1 0 1 0); // 0x0a // num_groups and expected_buffer only valid for bit width = 1 ValidateRle(values, 1, expected_buffer, 1 + num_groups); for (int width = 2; width <= kMaxWidth; ++width) { ValidateRle(values, width, nullptr, 1 + BitUtil::Ceil(width * 100, 8)); } } // ValidateRle on 'num_vals' values with width 'bit_width'. If 'value' != -1, that value // is used, otherwise alternating values are used. void TestRleValues(int bit_width, int num_vals, int value = -1) { const uint64_t mod = bit_width == 64 ? 1ULL : 1ULL << bit_width; vector values; for (uint64_t v = 0; v < num_vals; ++v) { values.push_back((value != -1) ? value : (bit_width == 64 ? v : (v % mod))); } ValidateRle(values, bit_width, nullptr, -1); } TEST(Rle, TestValues) { for (int width = 1; width <= kMaxWidth; ++width) { TestRleValues(width, 1); TestRleValues(width, 1024); TestRleValues(width, 1024, 0); TestRleValues(width, 1024, 1); } } class BitRle : public testing::Test { public: BitRle() { } virtual ~BitRle() { } }; // Tests all true/false values TEST_F(BitRle, AllSame) { const int kTestLen = 1024; vector values; for (int v = 0; v < 2; ++v) { values.clear(); for (int i = 0; i < kTestLen; ++i) { values.push_back(v ? true : false); } ValidateRle(values, 1, nullptr, 3); } } // Test that writes out a repeated group and then a literal // group but flush before finishing. TEST_F(BitRle, Flush) { vector values; for (int i = 0; i < 16; ++i) values.push_back(1); values.push_back(false); ValidateRle(values, 1, nullptr, -1); values.push_back(true); ValidateRle(values, 1, nullptr, -1); values.push_back(true); ValidateRle(values, 1, nullptr, -1); values.push_back(true); ValidateRle(values, 1, nullptr, -1); } // Test some random bool sequences. TEST_F(BitRle, RandomBools) { int iters = 0; const int n_iters = 20; while (iters < n_iters) { srand(iters++); if (iters % 10000 == 0) LOG(ERROR) << "Seed: " << iters; vector values; bool parity = 0; for (int i = 0; i < 1000; ++i) { int group_size = rand() % 20 + 1; // NOLINT(*) if (group_size > 16) { group_size = 1; } for (int i = 0; i < group_size; ++i) { values.push_back(parity); } parity = !parity; } ValidateRle(values, (iters % kMaxWidth) + 1, nullptr, -1); } } // Test some random 64-bit sequences. TEST_F(BitRle, Random64Bit) { int iters = 0; const int n_iters = 20; while (iters < n_iters) { srand(iters++); if (iters % 10000 == 0) LOG(ERROR) << "Seed: " << iters; vector values; for (int i = 0; i < 1000; ++i) { int group_size = rand() % 20 + 1; // NOLINT(*) uint64_t cur_value = (static_cast(rand()) << 32) + static_cast(rand()); if (group_size > 16) { group_size = 1; } for (int i = 0; i < group_size; ++i) { values.push_back(cur_value); } } ValidateRle(values, 64, nullptr, -1); } } // Test a sequence of 1 0's, 2 1's, 3 0's. etc // e.g. 011000111100000 TEST_F(BitRle, RepeatedPattern) { vector values; const int min_run = 1; const int max_run = 32; for (int i = min_run; i <= max_run; ++i) { int v = i % 2; for (int j = 0; j < i; ++j) { values.push_back(v); } } // And go back down again for (int i = max_run; i >= min_run; --i) { int v = i % 2; for (int j = 0; j < i; ++j) { values.push_back(v); } } ValidateRle(values, 1, nullptr, -1); } TEST_F(TestRle, TestBulkPut) { size_t run_length; bool val = false; faststring buffer(1); RleEncoder encoder(&buffer, 1); encoder.Put(true, 10); encoder.Put(false, 7); encoder.Put(true, 5); encoder.Put(true, 15); encoder.Flush(); RleDecoder decoder(buffer.data(), encoder.len(), 1); run_length = decoder.GetNextRun(&val, std::numeric_limits::max()); ASSERT_TRUE(val); ASSERT_EQ(10, run_length); run_length = decoder.GetNextRun(&val, std::numeric_limits::max()); ASSERT_FALSE(val); ASSERT_EQ(7, run_length); run_length = decoder.GetNextRun(&val, std::numeric_limits::max()); ASSERT_TRUE(val); ASSERT_EQ(20, run_length); ASSERT_EQ(0, decoder.GetNextRun(&val, std::numeric_limits::max())); } TEST_F(TestRle, TestGetNextRun) { // Repeat the test with different number of items for (int num_items = 7; num_items < 200; num_items += 13) { // Test different block patterns // 1: 01010101 01010101 // 2: 00110011 00110011 // 3: 00011100 01110001 // ... for (int block = 1; block <= 20; ++block) { faststring buffer(1); RleEncoder encoder(&buffer, 1); for (int j = 0; j < num_items; ++j) { encoder.Put(!!(j & 1), block); } encoder.Flush(); RleDecoder decoder(buffer.data(), encoder.len(), 1); size_t count = num_items * block; for (int j = 0; j < num_items; ++j) { size_t run_length; bool val = false; DCHECK_GT(count, 0); run_length = decoder.GetNextRun(&val, std::numeric_limits::max()); run_length = std::min(run_length, count); ASSERT_EQ(!!(j & 1), val); ASSERT_EQ(block, run_length); count -= run_length; } DCHECK_EQ(count, 0); } } } // Generate a random bit string which consists of 'num_runs' runs, // each with a random length between 1 and 100. Returns the number // of values encoded (i.e the sum run length). static size_t GenerateRandomBitString(int num_runs, faststring* enc_buf, string* string_rep) { RleEncoder enc(enc_buf, 1); int num_bits = 0; for (int i = 0; i < num_runs; i++) { int run_length = random() % 100; bool value = static_cast(i & 1); enc.Put(value, run_length); string_rep->append(run_length, value ? '1' : '0'); num_bits += run_length; } enc.Flush(); return num_bits; } TEST_F(TestRle, TestRoundTripRandomSequencesWithRuns) { srand(time(nullptr)); // Test the limiting function of GetNextRun. const int kMaxToReadAtOnce = (random() % 20) + 1; // Generate a bunch of random bit sequences, and "round-trip" them // through the encode/decode sequence. for (int rep = 0; rep < 100; rep++) { faststring buf; string string_rep; int num_bits = GenerateRandomBitString(10, &buf, &string_rep); RleDecoder decoder(buf.data(), buf.size(), 1); string roundtrip_str; int rem_to_read = num_bits; size_t run_len; bool val; while (rem_to_read > 0 && (run_len = decoder.GetNextRun(&val, std::min(kMaxToReadAtOnce, rem_to_read))) != 0) { ASSERT_LE(run_len, kMaxToReadAtOnce); roundtrip_str.append(run_len, val ? '1' : '0'); rem_to_read -= run_len; } ASSERT_EQ(string_rep, roundtrip_str); } } TEST_F(TestRle, TestSkip) { faststring buffer(1); RleEncoder encoder(&buffer, 1); // 0101010[1] 01010101 01 // "A" for (int j = 0; j < 18; ++j) { encoder.Put(!!(j & 1)); } // 0011[00] 11001100 11001100 11001100 11001100 // "B" for (int j = 0; j < 19; ++j) { encoder.Put(!!(j & 1), 2); } // 000000000000 11[1111111111] 000000000000 111111111111 // "C" // 000000000000 111111111111 0[00000000000] 111111111111 // "D" // 000000000000 111111111111 000000000000 111111111111 for (int j = 0; j < 12; ++j) { encoder.Put(!!(j & 1), 12); } encoder.Flush(); bool val = false; size_t run_length; RleDecoder decoder(buffer.data(), encoder.len(), 1); // position before "A" ASSERT_EQ(3, decoder.Skip(7)); run_length = decoder.GetNextRun(&val, std::numeric_limits::max()); ASSERT_TRUE(val); ASSERT_EQ(1, run_length); // position before "B" ASSERT_EQ(7, decoder.Skip(14)); run_length = decoder.GetNextRun(&val, std::numeric_limits::max()); ASSERT_FALSE(val); ASSERT_EQ(2, run_length); // position before "C" ASSERT_EQ(18, decoder.Skip(46)); run_length = decoder.GetNextRun(&val, std::numeric_limits::max()); ASSERT_TRUE(val); ASSERT_EQ(10, run_length); // position before "D" ASSERT_EQ(24, decoder.Skip(49)); run_length = decoder.GetNextRun(&val, std::numeric_limits::max()); ASSERT_FALSE(val); ASSERT_EQ(11, run_length); encoder.Flush(); } } // namespace doris int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); }