Delta compression efficiency improvement for non-existent base

Before this CL, when we encoded a sequence with a non-existent
base, we pretended that the delta was 0, and the first delta was
based on that. However, in a sequence where the deltas are small,
but where the first element is big, that would produce
unnecessarily wide deltas. Therefore, we change the behavior in
cases where the base is non-existent, to encode the first existent
value (if any) as a varint; the delta width may then be smaller.

This CL include two piggy-backed changes:
1. Varint encoding/decoding moved to its own file (and an
   additional flavor added).
2. The unit tests for delta encoding are further parameterized
   with a random seed.

Bug: webrtc:8111
Change-Id: I76fff577c86d019c8334bf74b76bd35db06ff68d
Reviewed-on: https://webrtc-review.googlesource.com/c/107860
Reviewed-by: Björn Terelius <terelius@webrtc.org>
Commit-Queue: Elad Alon <eladalon@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#25395}
This commit is contained in:
Elad Alon
2018-10-26 19:09:41 +02:00
committed by Commit Bot
parent 436ebcaec1
commit ff43541927
8 changed files with 288 additions and 112 deletions

View File

@ -10,6 +10,7 @@
#include "logging/rtc_event_log/encoder/delta_encoding.h"
#include <algorithm>
#include <limits>
#include <numeric>
#include <string>
@ -157,21 +158,32 @@ std::vector<absl::optional<uint64_t>> CreateSequenceByDeltas(
// Tests of the delta encoding, parameterized by the number of values
// in the sequence created by the test.
class DeltaEncodingTest : public ::testing::TestWithParam<
std::tuple<DeltaSignedness, size_t, bool>> {
class DeltaEncodingTest
: public ::testing::TestWithParam<
std::tuple<DeltaSignedness, size_t, bool, uint64_t>> {
public:
DeltaEncodingTest()
: signedness_(std::get<0>(GetParam())),
num_of_values_(std::get<1>(GetParam())),
optional_values_(std::get<2>(GetParam())) {
optional_values_(std::get<2>(GetParam())),
partial_random_seed_(std::get<3>(GetParam())) {
MaybeSetSignedness(signedness_);
}
~DeltaEncodingTest() override = default;
// Running with the same seed for all variants would make all tests start
// with the same sequence; avoid this by making the seed different.
uint64_t Seed() const {
// Multiply everything but by different primes to produce unique results.
return 2 * static_cast<uint64_t>(signedness_) + 3 * num_of_values_ +
5 * optional_values_ + 7 * partial_random_seed_;
}
const DeltaSignedness signedness_;
const uint64_t num_of_values_;
const bool optional_values_;
const uint64_t partial_random_seed_; // Explained where it's used.
};
TEST_P(DeltaEncodingTest, AllValuesEqualToExistentBaseValue) {
@ -202,6 +214,36 @@ TEST_P(DeltaEncodingTest, AllValuesEqualToNonExistentBaseValue) {
EXPECT_TRUE(encoded.empty());
}
TEST_P(DeltaEncodingTest, BaseNonExistentButSomeOtherValuesExist) {
if (!optional_values_) {
return; // Test irrelevant for this case.
}
const absl::optional<uint64_t> base;
std::vector<absl::optional<uint64_t>> values(num_of_values_);
Random prng(Seed());
const uint64_t max_bit_width = 1 + prng.Rand(63); // [1, 64]
for (size_t i = 0; i < values.size();) {
// Leave a random number of values as non-existent.
const size_t non_existent_count = prng.Rand(values.size() - i - 1);
i += non_existent_count;
// Assign random values to a random number of values. (At least one, to
// prevent this iteration of the outer loop from being a no-op.)
const size_t existent_count =
std::max<size_t>(prng.Rand(values.size() - i - 1), 1);
for (size_t j = 0; j < existent_count; ++j) {
values[i + j] = RandomWithMaxBitWidth(&prng, max_bit_width);
}
i += existent_count;
}
TestEncodingAndDecoding(base, values);
}
TEST_P(DeltaEncodingTest, MinDeltaNoWrapAround) {
const absl::optional<uint64_t> base(3432);
@ -408,18 +450,20 @@ INSTANTIATE_TEST_CASE_P(
DeltaSignedness::kForceUnsigned,
DeltaSignedness::kForceSigned),
::testing::Values(1, 2, 100, 10000),
::testing::Bool()));
::testing::Bool(),
::testing::Values(10, 20, 30)));
// Tests over the quality of the compression (as opposed to its correctness).
// Not to be confused with tests of runtime efficiency.
class DeltaEncodingCompressionQualityTest
: public ::testing::TestWithParam<
std::tuple<DeltaSignedness, uint64_t, uint64_t>> {
std::tuple<DeltaSignedness, uint64_t, uint64_t, uint64_t>> {
public:
DeltaEncodingCompressionQualityTest()
: signedness_(std::get<0>(GetParam())),
delta_max_bit_width_(std::get<1>(GetParam())),
num_of_values_(std::get<2>(GetParam())) {
num_of_values_(std::get<2>(GetParam())),
partial_random_seed_(std::get<3>(GetParam())) {
MaybeSetSignedness(signedness_);
}
@ -428,17 +472,16 @@ class DeltaEncodingCompressionQualityTest
// Running with the same seed for all variants would make all tests start
// with the same sequence; avoid this by making the seed different.
uint64_t Seed() const {
constexpr uint64_t non_zero_base_seed = 3012;
// Multiply everything but |non_zero_base_seed| by different prime numbers
// to produce unique results.
return non_zero_base_seed + 2 * static_cast<uint64_t>(signedness_) +
3 * delta_max_bit_width_ + 5 * delta_max_bit_width_ +
7 * num_of_values_;
// Multiply everything but by different primes to produce unique results.
return 2 * static_cast<uint64_t>(signedness_) + 3 * delta_max_bit_width_ +
5 * delta_max_bit_width_ + 7 * num_of_values_ +
11 * partial_random_seed_;
}
const DeltaSignedness signedness_;
const uint64_t delta_max_bit_width_;
const uint64_t num_of_values_;
const uint64_t partial_random_seed_; // Explained where it's used.
};
// If no wrap-around occurs in the stream, the width of the values does not
@ -520,19 +563,21 @@ INSTANTIATE_TEST_CASE_P(
DeltaSignedness::kForceUnsigned,
DeltaSignedness::kForceSigned),
::testing::Values(1, 4, 8, 15, 16, 17, 31, 32, 33, 63, 64),
::testing::Values(1, 2, 100, 10000)));
::testing::Values(1, 2, 100, 10000),
::testing::Values(11, 12, 13)));
// Similar to DeltaEncodingTest, but instead of semi-surgically producing
// specific cases, produce large amount of semi-realistic inputs.
class DeltaEncodingFuzzerLikeTest
: public ::testing::TestWithParam<
std::tuple<DeltaSignedness, uint64_t, uint64_t, bool>> {
std::tuple<DeltaSignedness, uint64_t, uint64_t, bool, uint64_t>> {
public:
DeltaEncodingFuzzerLikeTest()
: signedness_(std::get<0>(GetParam())),
delta_max_bit_width_(std::get<1>(GetParam())),
num_of_values_(std::get<2>(GetParam())),
optional_values_(std::get<3>(GetParam())) {
optional_values_(std::get<3>(GetParam())),
partial_random_seed_(std::get<4>(GetParam())) {
MaybeSetSignedness(signedness_);
}
@ -541,18 +586,18 @@ class DeltaEncodingFuzzerLikeTest
// Running with the same seed for all variants would make all tests start
// with the same sequence; avoid this by making the seed different.
uint64_t Seed() const {
constexpr uint64_t non_zero_base_seed = 1983;
// Multiply everything but |non_zero_base_seed| by different prime numbers
// to produce unique results.
return non_zero_base_seed + 2 * static_cast<uint64_t>(signedness_) +
3 * delta_max_bit_width_ + 5 * delta_max_bit_width_ +
7 * num_of_values_ + 11 * static_cast<uint64_t>(optional_values_);
// Multiply everything but by different primes to produce unique results.
return 2 * static_cast<uint64_t>(signedness_) + 3 * delta_max_bit_width_ +
5 * delta_max_bit_width_ + 7 * num_of_values_ +
11 * static_cast<uint64_t>(optional_values_) +
13 * partial_random_seed_;
}
const DeltaSignedness signedness_;
const uint64_t delta_max_bit_width_;
const uint64_t num_of_values_;
const bool optional_values_;
const uint64_t partial_random_seed_; // Explained where it's used.
};
TEST_P(DeltaEncodingFuzzerLikeTest, Test) {
@ -580,7 +625,8 @@ INSTANTIATE_TEST_CASE_P(
DeltaSignedness::kForceSigned),
::testing::Values(1, 4, 8, 15, 16, 17, 31, 32, 33, 63, 64),
::testing::Values(1, 2, 100, 10000),
::testing::Bool()));
::testing::Bool(),
::testing::Values(21, 22, 23)));
class DeltaEncodingSpecificEdgeCasesTest
: public ::testing::TestWithParam<