diff --git a/logging/BUILD.gn b/logging/BUILD.gn index d3f821a3c4..f22f61253a 100644 --- a/logging/BUILD.gn +++ b/logging/BUILD.gn @@ -160,6 +160,8 @@ rtc_static_library("rtc_event_log_impl_encoder") { "rtc_event_log/encoder/blob_encoding.h", "rtc_event_log/encoder/delta_encoding.cc", "rtc_event_log/encoder/delta_encoding.h", + "rtc_event_log/encoder/varint.cc", + "rtc_event_log/encoder/varint.h", ] defines = [] diff --git a/logging/rtc_event_log/encoder/blob_encoding.cc b/logging/rtc_event_log/encoder/blob_encoding.cc index 62d268b765..ee137f13fc 100644 --- a/logging/rtc_event_log/encoder/blob_encoding.cc +++ b/logging/rtc_event_log/encoder/blob_encoding.cc @@ -12,63 +12,11 @@ #include +#include "logging/rtc_event_log/encoder/varint.h" #include "rtc_base/logging.h" namespace webrtc { -const size_t kMaxVarIntLengthBytes = 10; // ceil(64 / 7.0) is 10. - -namespace { - -// Encode a given uint64_t as a varint. From least to most significant, -// each batch of seven bits are put into the lower bits of a byte, and the last -// remaining bit in that byte (the highest one) marks whether additional bytes -// follow (which happens if and only if there are other bits in |input| which -// are non-zero). -// Notes: If input == 0, one byte is used. If input is uint64_t::max, exactly -// kMaxVarIntLengthBytes are used. -std::string EncodeVarInt(uint64_t input) { - std::string output; - output.reserve(kMaxVarIntLengthBytes); - - do { - uint8_t byte = static_cast(input & 0x7f); - input >>= 7; - if (input > 0) { - byte |= 0x80; - } - output += byte; - } while (input > 0); - - RTC_DCHECK_GE(output.size(), 1u); - RTC_DCHECK_LE(output.size(), kMaxVarIntLengthBytes); - - return output; -} - -// Inverse of EncodeVarInt(). -// If decoding is successful, a non-zero number is returned, indicating the -// number of bytes read from |input|, and the decoded varint is written -// into |output|. -// If not successful, 0 is returned, and |output| is not modified. -size_t DecodeVarInt(absl::string_view input, uint64_t* output) { - RTC_DCHECK(output); - - uint64_t decoded = 0; - for (size_t i = 0; i < input.length() && i < kMaxVarIntLengthBytes; ++i) { - decoded += (static_cast(input[i] & 0x7f) - << static_cast(7 * i)); - if (!(input[i] & 0x80)) { - *output = decoded; - return i + 1; - } - } - - return 0; -} - -} // namespace - std::string EncodeBlobs(const std::vector& blobs) { RTC_DCHECK(!blobs.empty()); diff --git a/logging/rtc_event_log/encoder/blob_encoding.h b/logging/rtc_event_log/encoder/blob_encoding.h index 308753441b..32a10d277f 100644 --- a/logging/rtc_event_log/encoder/blob_encoding.h +++ b/logging/rtc_event_log/encoder/blob_encoding.h @@ -18,8 +18,6 @@ namespace webrtc { -extern const size_t kMaxVarIntLengthBytes; - // Encode/decode a sequence of strings, whose length is not known to be // discernable from the blob itself (i.e. without being transmitted OOB), // in a way that would allow us to separate them again on the decoding side. diff --git a/logging/rtc_event_log/encoder/blob_encoding_unittest.cc b/logging/rtc_event_log/encoder/blob_encoding_unittest.cc index 3992445f8b..1d08e0feb8 100644 --- a/logging/rtc_event_log/encoder/blob_encoding_unittest.cc +++ b/logging/rtc_event_log/encoder/blob_encoding_unittest.cc @@ -13,6 +13,7 @@ #include #include +#include "logging/rtc_event_log/encoder/varint.h" #include "rtc_base/checks.h" #include "test/gtest.h" diff --git a/logging/rtc_event_log/encoder/delta_encoding.cc b/logging/rtc_event_log/encoder/delta_encoding.cc index 3d242ec3ab..5e334b98d0 100644 --- a/logging/rtc_event_log/encoder/delta_encoding.cc +++ b/logging/rtc_event_log/encoder/delta_encoding.cc @@ -16,6 +16,7 @@ #include #include "absl/memory/memory.h" +#include "logging/rtc_event_log/encoder/varint.h" #include "rtc_base/bitbuffer.h" #include "rtc_base/checks.h" #include "rtc_base/constructormagic.h" @@ -124,6 +125,13 @@ class BitWriter final { written_bits_ += bit_count; } + void WriteBits(const std::string& input) { + RTC_DCHECK(valid_); + for (std::string::value_type c : input) { + WriteBits(c, 8 * sizeof(std::string::value_type)); + } + } + // Returns everything that was written so far. // Nothing more may be written after this is called. std::string GetString() { @@ -233,7 +241,7 @@ class FixedLengthDeltaEncoder final { // Calculate min/max values of unsigned/signed deltas, given the bit width // of all the values in the series. static void CalculateMinAndMaxDeltas( - uint64_t base, + absl::optional base, const std::vector>& values, uint64_t bit_width, uint64_t* max_unsigned_delta, @@ -310,21 +318,21 @@ std::string FixedLengthDeltaEncoder::EncodeDeltas( return std::string(); } - const uint64_t base_val = base.value_or(0u); - bool non_decreasing = true; - uint64_t max_value_including_base = base_val; - uint64_t previous = base_val; + uint64_t max_value_including_base = base.value_or(0u); size_t existent_values_count = 0; - for (size_t i = 0; i < values.size(); ++i) { - if (!values[i].has_value()) { - continue; + { + uint64_t previous = base.value_or(0u); + for (size_t i = 0; i < values.size(); ++i) { + if (!values[i].has_value()) { + continue; + } + ++existent_values_count; + non_decreasing &= (previous <= values[i].value()); + max_value_including_base = + std::max(max_value_including_base, values[i].value()); + previous = values[i].value(); } - ++existent_values_count; - non_decreasing &= (previous <= values[i].value()); - max_value_including_base = - std::max(max_value_including_base, values[i].value()); - previous = values[i].value(); } // If the sequence is non-decreasing, it may be assumed to have width = 64; @@ -335,9 +343,8 @@ std::string FixedLengthDeltaEncoder::EncodeDeltas( uint64_t max_unsigned_delta; uint64_t max_pos_signed_delta; uint64_t min_neg_signed_delta; - CalculateMinAndMaxDeltas(base_val, values, value_width_bits, - &max_unsigned_delta, &max_pos_signed_delta, - &min_neg_signed_delta); + CalculateMinAndMaxDeltas(base, values, value_width_bits, &max_unsigned_delta, + &max_pos_signed_delta, &min_neg_signed_delta); const uint64_t delta_width_bits_unsigned = UnsignedBitWidth(max_unsigned_delta); @@ -350,7 +357,8 @@ std::string FixedLengthDeltaEncoder::EncodeDeltas( const uint64_t delta_width_bits = signed_deltas ? delta_width_bits_signed : delta_width_bits_unsigned; - const bool values_optional = (existent_values_count < values.size()); + const bool values_optional = + !base.has_value() || (existent_values_count < values.size()); FixedLengthEncodingParameters params(delta_width_bits, signed_deltas, values_optional, value_width_bits); @@ -364,7 +372,7 @@ std::string FixedLengthDeltaEncoder::EncodeDeltas( } void FixedLengthDeltaEncoder::CalculateMinAndMaxDeltas( - uint64_t base, + absl::optional base, const std::vector>& values, uint64_t bit_width, uint64_t* max_unsigned_delta_out, @@ -381,16 +389,24 @@ void FixedLengthDeltaEncoder::CalculateMinAndMaxDeltas( uint64_t max_pos_signed_delta = 0; uint64_t min_neg_signed_delta = 0; - uint64_t prev = base; + absl::optional prev = base; for (size_t i = 0; i < values.size(); ++i) { if (!values[i].has_value()) { continue; } + if (!prev.has_value()) { + // If the base is non-existent, the first existent value is encoded as + // a varint, rather than as a delta. + RTC_DCHECK(!base.has_value()); + prev = values[i]; + continue; + } + const uint64_t current = values[i].value(); - const uint64_t forward_delta = UnsignedDelta(prev, current, bit_mask); - const uint64_t backward_delta = UnsignedDelta(current, prev, bit_mask); + const uint64_t forward_delta = UnsignedDelta(*prev, current, bit_mask); + const uint64_t backward_delta = UnsignedDelta(current, *prev, bit_mask); max_unsigned_delta = std::max(max_unsigned_delta, forward_delta); @@ -444,14 +460,23 @@ std::string FixedLengthDeltaEncoder::Encode() { } } - uint64_t previous = base_.has_value() ? base_.value() : 0u; + absl::optional previous = base_; for (absl::optional value : values_) { if (!value.has_value()) { RTC_DCHECK(params_.values_optional()); continue; } - EncodeDelta(previous, value.value()); - previous = value.value(); + + if (!previous.has_value()) { + // If the base is non-existent, the first existent value is encoded as + // a varint, rather than as a delta. + RTC_DCHECK(!base_.has_value()); + writer_->WriteBits(EncodeVarInt(value.value())); + } else { + EncodeDelta(previous.value(), value.value()); + } + + previous = value; } return writer_->GetString(); @@ -477,7 +502,9 @@ size_t FixedLengthDeltaEncoder::HeaderLengthBits() const { size_t FixedLengthDeltaEncoder::EncodedDeltasLengthBits( size_t existent_values_count) const { - if (params_.values_optional()) { + if (!params_.values_optional()) { + return values_.size() * params_.delta_width_bits(); + } else { RTC_DCHECK_EQ(std::count_if(values_.begin(), values_.end(), [](absl::optional val) { return val.has_value(); @@ -485,10 +512,16 @@ size_t FixedLengthDeltaEncoder::EncodedDeltasLengthBits( existent_values_count); // One bit for each delta, to indicate if the value exists, and delta_width // for each existent value, to indicate the delta itself. - return (1 * values_.size()) + - (params_.delta_width_bits() * existent_values_count); - } else { - return values_.size() * params_.delta_width_bits(); + // If base_ is non-existent, the first value (if any) is encoded as a varint + // rather than as a delta. + const size_t existence_bitmap_size_bits = 1 * values_.size(); + const bool first_value_is_varint = + !base_.has_value() && existent_values_count >= 1; + const size_t first_value_varint_size_bits = 8 * kMaxVarIntLengthBytes; + const size_t deltas_count = existent_values_count - first_value_is_varint; + const size_t deltas_size_bits = deltas_count * params_.delta_width_bits(); + return existence_bitmap_size_bits + first_value_varint_size_bits + + deltas_size_bits; } } @@ -611,6 +644,11 @@ class FixedLengthDeltaDecoder final { // Perform the decoding using the parameters given to the ctor. std::vector> Decode(); + // Decode a varint and write it to |output|. Return value indicates success + // or failure. In case of failure, no guarantees are made about the contents + // of |output| or the results of additional reads. + bool ParseVarInt(uint64_t* output); + // Attempt to parse a delta from the input reader. // Returns true/false for success/failure. // Writes the delta into |delta| if successful. @@ -786,26 +824,44 @@ std::vector> FixedLengthDeltaDecoder::Decode() { std::fill(existing_values.begin(), existing_values.end(), true); } + absl::optional previous = base_; std::vector> values(num_of_deltas_); - uint64_t previous = base_.has_value() ? base_.value() : 0u; for (size_t i = 0; i < num_of_deltas_; ++i) { if (!existing_values[i]) { RTC_DCHECK(params_.values_optional()); continue; } - uint64_t delta; - if (!ParseDelta(&delta)) { - return std::vector>(); + if (!previous) { + // If the base is non-existent, the first existent value is encoded as + // a varint, rather than as a delta. + RTC_DCHECK(!base_.has_value()); + uint64_t first_value; + if (!ParseVarInt(&first_value)) { + RTC_LOG(LS_WARNING) << "Failed to read first value."; + return std::vector>(); + } + values[i] = first_value; + } else { + uint64_t delta; + if (!ParseDelta(&delta)) { + return std::vector>(); + } + values[i] = ApplyDelta(previous.value(), delta); } - values[i] = ApplyDelta(previous, delta); - previous = values[i].value(); + + previous = values[i]; } return values; } +bool FixedLengthDeltaDecoder::ParseVarInt(uint64_t* output) { + RTC_DCHECK(reader_); + return DecodeVarInt(reader_.get(), output) != 0; +} + bool FixedLengthDeltaDecoder::ParseDelta(uint64_t* delta) { RTC_DCHECK(reader_); diff --git a/logging/rtc_event_log/encoder/delta_encoding_unittest.cc b/logging/rtc_event_log/encoder/delta_encoding_unittest.cc index 1cf3b648b1..ca44010eda 100644 --- a/logging/rtc_event_log/encoder/delta_encoding_unittest.cc +++ b/logging/rtc_event_log/encoder/delta_encoding_unittest.cc @@ -10,6 +10,7 @@ #include "logging/rtc_event_log/encoder/delta_encoding.h" +#include #include #include #include @@ -157,21 +158,32 @@ std::vector> CreateSequenceByDeltas( // Tests of the delta encoding, parameterized by the number of values // in the sequence created by the test. -class DeltaEncodingTest : public ::testing::TestWithParam< - std::tuple> { +class DeltaEncodingTest + : public ::testing::TestWithParam< + std::tuple> { public: DeltaEncodingTest() : signedness_(std::get<0>(GetParam())), num_of_values_(std::get<1>(GetParam())), - optional_values_(std::get<2>(GetParam())) { + optional_values_(std::get<2>(GetParam())), + partial_random_seed_(std::get<3>(GetParam())) { MaybeSetSignedness(signedness_); } ~DeltaEncodingTest() override = default; + // Running with the same seed for all variants would make all tests start + // with the same sequence; avoid this by making the seed different. + uint64_t Seed() const { + // Multiply everything but by different primes to produce unique results. + return 2 * static_cast(signedness_) + 3 * num_of_values_ + + 5 * optional_values_ + 7 * partial_random_seed_; + } + const DeltaSignedness signedness_; const uint64_t num_of_values_; const bool optional_values_; + const uint64_t partial_random_seed_; // Explained where it's used. }; TEST_P(DeltaEncodingTest, AllValuesEqualToExistentBaseValue) { @@ -202,6 +214,36 @@ TEST_P(DeltaEncodingTest, AllValuesEqualToNonExistentBaseValue) { EXPECT_TRUE(encoded.empty()); } +TEST_P(DeltaEncodingTest, BaseNonExistentButSomeOtherValuesExist) { + if (!optional_values_) { + return; // Test irrelevant for this case. + } + + const absl::optional base; + std::vector> values(num_of_values_); + + Random prng(Seed()); + + const uint64_t max_bit_width = 1 + prng.Rand(63); // [1, 64] + + for (size_t i = 0; i < values.size();) { + // Leave a random number of values as non-existent. + const size_t non_existent_count = prng.Rand(values.size() - i - 1); + i += non_existent_count; + + // Assign random values to a random number of values. (At least one, to + // prevent this iteration of the outer loop from being a no-op.) + const size_t existent_count = + std::max(prng.Rand(values.size() - i - 1), 1); + for (size_t j = 0; j < existent_count; ++j) { + values[i + j] = RandomWithMaxBitWidth(&prng, max_bit_width); + } + i += existent_count; + } + + TestEncodingAndDecoding(base, values); +} + TEST_P(DeltaEncodingTest, MinDeltaNoWrapAround) { const absl::optional base(3432); @@ -408,18 +450,20 @@ INSTANTIATE_TEST_CASE_P( DeltaSignedness::kForceUnsigned, DeltaSignedness::kForceSigned), ::testing::Values(1, 2, 100, 10000), - ::testing::Bool())); + ::testing::Bool(), + ::testing::Values(10, 20, 30))); // Tests over the quality of the compression (as opposed to its correctness). // Not to be confused with tests of runtime efficiency. class DeltaEncodingCompressionQualityTest : public ::testing::TestWithParam< - std::tuple> { + std::tuple> { public: DeltaEncodingCompressionQualityTest() : signedness_(std::get<0>(GetParam())), delta_max_bit_width_(std::get<1>(GetParam())), - num_of_values_(std::get<2>(GetParam())) { + num_of_values_(std::get<2>(GetParam())), + partial_random_seed_(std::get<3>(GetParam())) { MaybeSetSignedness(signedness_); } @@ -428,17 +472,16 @@ class DeltaEncodingCompressionQualityTest // Running with the same seed for all variants would make all tests start // with the same sequence; avoid this by making the seed different. uint64_t Seed() const { - constexpr uint64_t non_zero_base_seed = 3012; - // Multiply everything but |non_zero_base_seed| by different prime numbers - // to produce unique results. - return non_zero_base_seed + 2 * static_cast(signedness_) + - 3 * delta_max_bit_width_ + 5 * delta_max_bit_width_ + - 7 * num_of_values_; + // Multiply everything but by different primes to produce unique results. + return 2 * static_cast(signedness_) + 3 * delta_max_bit_width_ + + 5 * delta_max_bit_width_ + 7 * num_of_values_ + + 11 * partial_random_seed_; } const DeltaSignedness signedness_; const uint64_t delta_max_bit_width_; const uint64_t num_of_values_; + const uint64_t partial_random_seed_; // Explained where it's used. }; // If no wrap-around occurs in the stream, the width of the values does not @@ -520,19 +563,21 @@ INSTANTIATE_TEST_CASE_P( DeltaSignedness::kForceUnsigned, DeltaSignedness::kForceSigned), ::testing::Values(1, 4, 8, 15, 16, 17, 31, 32, 33, 63, 64), - ::testing::Values(1, 2, 100, 10000))); + ::testing::Values(1, 2, 100, 10000), + ::testing::Values(11, 12, 13))); // Similar to DeltaEncodingTest, but instead of semi-surgically producing // specific cases, produce large amount of semi-realistic inputs. class DeltaEncodingFuzzerLikeTest : public ::testing::TestWithParam< - std::tuple> { + std::tuple> { public: DeltaEncodingFuzzerLikeTest() : signedness_(std::get<0>(GetParam())), delta_max_bit_width_(std::get<1>(GetParam())), num_of_values_(std::get<2>(GetParam())), - optional_values_(std::get<3>(GetParam())) { + optional_values_(std::get<3>(GetParam())), + partial_random_seed_(std::get<4>(GetParam())) { MaybeSetSignedness(signedness_); } @@ -541,18 +586,18 @@ class DeltaEncodingFuzzerLikeTest // Running with the same seed for all variants would make all tests start // with the same sequence; avoid this by making the seed different. uint64_t Seed() const { - constexpr uint64_t non_zero_base_seed = 1983; - // Multiply everything but |non_zero_base_seed| by different prime numbers - // to produce unique results. - return non_zero_base_seed + 2 * static_cast(signedness_) + - 3 * delta_max_bit_width_ + 5 * delta_max_bit_width_ + - 7 * num_of_values_ + 11 * static_cast(optional_values_); + // Multiply everything but by different primes to produce unique results. + return 2 * static_cast(signedness_) + 3 * delta_max_bit_width_ + + 5 * delta_max_bit_width_ + 7 * num_of_values_ + + 11 * static_cast(optional_values_) + + 13 * partial_random_seed_; } const DeltaSignedness signedness_; const uint64_t delta_max_bit_width_; const uint64_t num_of_values_; const bool optional_values_; + const uint64_t partial_random_seed_; // Explained where it's used. }; TEST_P(DeltaEncodingFuzzerLikeTest, Test) { @@ -580,7 +625,8 @@ INSTANTIATE_TEST_CASE_P( DeltaSignedness::kForceSigned), ::testing::Values(1, 4, 8, 15, 16, 17, 31, 32, 33, 63, 64), ::testing::Values(1, 2, 100, 10000), - ::testing::Bool())); + ::testing::Bool(), + ::testing::Values(21, 22, 23))); class DeltaEncodingSpecificEdgeCasesTest : public ::testing::TestWithParam< diff --git a/logging/rtc_event_log/encoder/varint.cc b/logging/rtc_event_log/encoder/varint.cc new file mode 100644 index 0000000000..41e29e8480 --- /dev/null +++ b/logging/rtc_event_log/encoder/varint.cc @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "logging/rtc_event_log/encoder/varint.h" + +#include "rtc_base/checks.h" + +// TODO(eladalon): Add unit tests. + +namespace webrtc { + +const size_t kMaxVarIntLengthBytes = 10; // ceil(64 / 7.0) is 10. + +std::string EncodeVarInt(uint64_t input) { + std::string output; + output.reserve(kMaxVarIntLengthBytes); + + do { + uint8_t byte = static_cast(input & 0x7f); + input >>= 7; + if (input > 0) { + byte |= 0x80; + } + output += byte; + } while (input > 0); + + RTC_DCHECK_GE(output.size(), 1u); + RTC_DCHECK_LE(output.size(), kMaxVarIntLengthBytes); + + return output; +} + +// There is some code duplication between the flavors of this function. +// For performance's sake, it's best to just keep it. +size_t DecodeVarInt(absl::string_view input, uint64_t* output) { + RTC_DCHECK(output); + + uint64_t decoded = 0; + for (size_t i = 0; i < input.length() && i < kMaxVarIntLengthBytes; ++i) { + decoded += (static_cast(input[i] & 0x7f) + << static_cast(7 * i)); + if (!(input[i] & 0x80)) { + *output = decoded; + return i + 1; + } + } + + return 0; +} + +// There is some code duplication between the flavors of this function. +// For performance's sake, it's best to just keep it. +size_t DecodeVarInt(rtc::BitBuffer* input, uint64_t* output) { + RTC_DCHECK(output); + + uint64_t decoded = 0; + for (size_t i = 0; i < kMaxVarIntLengthBytes; ++i) { + uint8_t byte; + if (!input->ReadUInt8(&byte)) { + return 0; + } + decoded += + (static_cast(byte & 0x7f) << static_cast(7 * i)); + if (!(byte & 0x80)) { + *output = decoded; + return i + 1; + } + } + + return 0; +} + +} // namespace webrtc diff --git a/logging/rtc_event_log/encoder/varint.h b/logging/rtc_event_log/encoder/varint.h new file mode 100644 index 0000000000..95494ae753 --- /dev/null +++ b/logging/rtc_event_log/encoder/varint.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef LOGGING_RTC_EVENT_LOG_ENCODER_VARINT_H_ +#define LOGGING_RTC_EVENT_LOG_ENCODER_VARINT_H_ + +#include + +#include "absl/strings/string_view.h" +#include "rtc_base/bitbuffer.h" + +namespace webrtc { + +extern const size_t kMaxVarIntLengthBytes; + +// Encode a given uint64_t as a varint. From least to most significant, +// each batch of seven bits are put into the lower bits of a byte, and the last +// remaining bit in that byte (the highest one) marks whether additional bytes +// follow (which happens if and only if there are other bits in |input| which +// are non-zero). +// Notes: If input == 0, one byte is used. If input is uint64_t::max, exactly +// kMaxVarIntLengthBytes are used. +std::string EncodeVarInt(uint64_t input); + +// Inverse of EncodeVarInt(). +// If decoding is successful, a non-zero number is returned, indicating the +// number of bytes read from |input|, and the decoded varint is written +// into |output|. +// If not successful, 0 is returned, and |output| is not modified. +size_t DecodeVarInt(absl::string_view input, uint64_t* output); + +// Same as other version, but uses a rtc::BitBuffer for input. +// Some bits may be consumed even if a varint fails to be read. +size_t DecodeVarInt(rtc::BitBuffer* input, uint64_t* output); + +} // namespace webrtc + +#endif // LOGGING_RTC_EVENT_LOG_ENCODER_VARINT_H_