From b3bb13c572a880ef7ffb427673c81f9f18f1bfeb Mon Sep 17 00:00:00 2001 From: Philipp Hancke Date: Fri, 23 Apr 2021 10:15:42 +0200 Subject: [PATCH] red: make red encoder more generic potentially allowing distances of more than 2. BUG=webrtc:11640 Change-Id: I0d8c831218285d57cf07f0a8e5829810afd4ab3f Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/188383 Reviewed-by: Jesus de Vicente Pena Reviewed-by: Henrik Lundin Commit-Queue: Jesus de Vicente Pena Cr-Commit-Position: refs/heads/master@{#33913} --- .../codecs/red/audio_encoder_copy_red.cc | 151 +++++++++--------- .../codecs/red/audio_encoder_copy_red.h | 19 ++- .../red/audio_encoder_copy_red_unittest.cc | 32 ++-- 3 files changed, 100 insertions(+), 102 deletions(-) diff --git a/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc b/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc index 8f8e328b8c..6aec55bc8e 100644 --- a/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc +++ b/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc @@ -19,10 +19,18 @@ #include "rtc_base/checks.h" namespace webrtc { -// RED packets must be less than 1024 bytes to fit the 10 bit block length. -static constexpr const int kRedMaxPacketSize = 1 << 10; -// The typical MTU is 1200 bytes. -static constexpr const size_t kAudioMaxRtpPacketLen = 1200; +static constexpr const int kRedMaxPacketSize = + 1 << 10; // RED packets must be less than 1024 bytes to fit the 10 bit + // block length. +static constexpr const size_t kAudioMaxRtpPacketLen = + 1200; // The typical MTU is 1200 bytes. + +static constexpr size_t kRedHeaderLength = 4; // 4 bytes RED header. +static constexpr size_t kRedLastHeaderLength = + 1; // reduced size for last RED header. + +static constexpr size_t kRedNumberOfRedundantEncodings = + 2; // The level of redundancy we support. AudioEncoderCopyRed::Config::Config() = default; AudioEncoderCopyRed::Config::Config(Config&&) = default; @@ -30,9 +38,16 @@ AudioEncoderCopyRed::Config::~Config() = default; AudioEncoderCopyRed::AudioEncoderCopyRed(Config&& config) : speech_encoder_(std::move(config.speech_encoder)), + primary_encoded_(0, kAudioMaxRtpPacketLen), max_packet_length_(kAudioMaxRtpPacketLen), red_payload_type_(config.payload_type) { RTC_CHECK(speech_encoder_) << "Speech encoder not provided."; + + for (size_t i = 0; i < kRedNumberOfRedundantEncodings; i++) { + std::pair redundant; + redundant.second.EnsureCapacity(kAudioMaxRtpPacketLen); + redundant_encodings_.push_front(std::move(redundant)); + } } AudioEncoderCopyRed::~AudioEncoderCopyRed() = default; @@ -61,104 +76,86 @@ int AudioEncoderCopyRed::GetTargetBitrate() const { return speech_encoder_->GetTargetBitrate(); } -size_t AudioEncoderCopyRed::CalculateHeaderLength(size_t encoded_bytes) const { - size_t header_size = 1; - size_t bytes_available = max_packet_length_ - encoded_bytes; - if (secondary_info_.encoded_bytes > 0 && - secondary_info_.encoded_bytes < bytes_available) { - header_size += 4; - bytes_available -= secondary_info_.encoded_bytes; - } - if (tertiary_info_.encoded_bytes > 0 && - tertiary_info_.encoded_bytes < bytes_available) { - header_size += 4; - } - return header_size > 1 ? header_size : 0; -} - AudioEncoder::EncodedInfo AudioEncoderCopyRed::EncodeImpl( uint32_t rtp_timestamp, rtc::ArrayView audio, rtc::Buffer* encoded) { - rtc::Buffer primary_encoded; + primary_encoded_.Clear(); EncodedInfo info = - speech_encoder_->Encode(rtp_timestamp, audio, &primary_encoded); + speech_encoder_->Encode(rtp_timestamp, audio, &primary_encoded_); RTC_CHECK(info.redundant.empty()) << "Cannot use nested redundant encoders."; - RTC_DCHECK_EQ(primary_encoded.size(), info.encoded_bytes); + RTC_DCHECK_EQ(primary_encoded_.size(), info.encoded_bytes); if (info.encoded_bytes == 0 || info.encoded_bytes > kRedMaxPacketSize) { return info; } RTC_DCHECK_GT(max_packet_length_, info.encoded_bytes); + size_t header_length_bytes = kRedLastHeaderLength; + size_t bytes_available = max_packet_length_ - info.encoded_bytes; + auto it = redundant_encodings_.begin(); + + // Determine how much redundancy we can fit into our packet by + // iterating forward. + for (; it != redundant_encodings_.end(); it++) { + if (bytes_available < kRedHeaderLength + it->first.encoded_bytes) { + break; + } + if (it->first.encoded_bytes == 0) { + break; + } + bytes_available -= kRedHeaderLength + it->first.encoded_bytes; + header_length_bytes += kRedHeaderLength; + } + // Allocate room for RFC 2198 header if there is redundant data. // Otherwise this will send the primary payload type without // wrapping in RED. - const size_t header_length_bytes = CalculateHeaderLength(info.encoded_bytes); + if (header_length_bytes == kRedLastHeaderLength) { + header_length_bytes = 0; + } encoded->SetSize(header_length_bytes); + // Iterate backwards and append the data. size_t header_offset = 0; - size_t bytes_available = max_packet_length_ - info.encoded_bytes; - if (tertiary_info_.encoded_bytes > 0 && - tertiary_info_.encoded_bytes + secondary_info_.encoded_bytes < - bytes_available) { - encoded->AppendData(tertiary_encoded_); + while (it-- != redundant_encodings_.begin()) { + encoded->AppendData(it->second); const uint32_t timestamp_delta = - info.encoded_timestamp - tertiary_info_.encoded_timestamp; - - encoded->data()[header_offset] = tertiary_info_.payload_type | 0x80; + info.encoded_timestamp - it->first.encoded_timestamp; + encoded->data()[header_offset] = it->first.payload_type | 0x80; rtc::SetBE16(static_cast(encoded->data()) + header_offset + 1, - (timestamp_delta << 2) | (tertiary_info_.encoded_bytes >> 8)); - encoded->data()[header_offset + 3] = tertiary_info_.encoded_bytes & 0xff; - header_offset += 4; - bytes_available -= tertiary_info_.encoded_bytes; - } - - if (secondary_info_.encoded_bytes > 0 && - secondary_info_.encoded_bytes < bytes_available) { - encoded->AppendData(secondary_encoded_); - - const uint32_t timestamp_delta = - info.encoded_timestamp - secondary_info_.encoded_timestamp; - - encoded->data()[header_offset] = secondary_info_.payload_type | 0x80; - rtc::SetBE16(static_cast(encoded->data()) + header_offset + 1, - (timestamp_delta << 2) | (secondary_info_.encoded_bytes >> 8)); - encoded->data()[header_offset + 3] = secondary_info_.encoded_bytes & 0xff; - header_offset += 4; - bytes_available -= secondary_info_.encoded_bytes; - } - - encoded->AppendData(primary_encoded); - if (header_length_bytes > 0) { - RTC_DCHECK_EQ(header_offset, header_length_bytes - 1); - encoded->data()[header_offset] = info.payload_type; + (timestamp_delta << 2) | (it->first.encoded_bytes >> 8)); + encoded->data()[header_offset + 3] = it->first.encoded_bytes & 0xff; + header_offset += kRedHeaderLength; + info.redundant.push_back(it->first); } // |info| will be implicitly cast to an EncodedInfoLeaf struct, effectively // discarding the (empty) vector of redundant information. This is // intentional. - info.redundant.push_back(info); - RTC_DCHECK_EQ(info.redundant.size(), 1); - RTC_DCHECK_EQ(info.speech, info.redundant[0].speech); - if (secondary_info_.encoded_bytes > 0) { - info.redundant.push_back(secondary_info_); - RTC_DCHECK_EQ(info.redundant.size(), 2); - } - if (tertiary_info_.encoded_bytes > 0) { - info.redundant.push_back(tertiary_info_); - RTC_DCHECK_EQ(info.redundant.size(), - 2 + (secondary_info_.encoded_bytes > 0 ? 1 : 0)); + if (header_length_bytes > 0) { + info.redundant.push_back(info); + RTC_DCHECK_EQ(info.speech, + info.redundant[info.redundant.size() - 1].speech); } - // Save secondary to tertiary. - tertiary_encoded_.SetData(secondary_encoded_); - tertiary_info_ = secondary_info_; + encoded->AppendData(primary_encoded_); + if (header_length_bytes > 0) { + RTC_DCHECK_EQ(header_offset, header_length_bytes - 1); + encoded->data()[header_offset] = info.payload_type; + } - // Save primary to secondary. - secondary_encoded_.SetData(primary_encoded); - secondary_info_ = info; + // Shift the redundant encodings. + it = redundant_encodings_.begin(); + for (auto next = std::next(it); next != redundant_encodings_.end(); + it++, next = std::next(it)) { + next->first = it->first; + next->second.SetData(it->second); + } + it = redundant_encodings_.begin(); + it->first = info; + it->second.SetData(primary_encoded_); // Update main EncodedInfo. if (header_length_bytes > 0) { @@ -170,8 +167,12 @@ AudioEncoder::EncodedInfo AudioEncoderCopyRed::EncodeImpl( void AudioEncoderCopyRed::Reset() { speech_encoder_->Reset(); - secondary_encoded_.Clear(); - secondary_info_.encoded_bytes = 0; + redundant_encodings_.clear(); + for (size_t i = 0; i < kRedNumberOfRedundantEncodings; i++) { + std::pair redundant; + redundant.second.EnsureCapacity(kAudioMaxRtpPacketLen); + redundant_encodings_.push_front(std::move(redundant)); + } } bool AudioEncoderCopyRed::SetFec(bool enable) { diff --git a/modules/audio_coding/codecs/red/audio_encoder_copy_red.h b/modules/audio_coding/codecs/red/audio_encoder_copy_red.h index 9acb9b842c..d5b1bf6868 100644 --- a/modules/audio_coding/codecs/red/audio_encoder_copy_red.h +++ b/modules/audio_coding/codecs/red/audio_encoder_copy_red.h @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -26,10 +27,12 @@ namespace webrtc { -// This class implements redundant audio coding. The class object will have an -// underlying AudioEncoder object that performs the actual encodings. The -// current class will gather the two latest encodings from the underlying codec -// into one packet. +// This class implements redundant audio coding as described in +// https://tools.ietf.org/html/rfc2198 +// The class object will have an underlying AudioEncoder object that performs +// the actual encodings. The current class will gather the N latest encodings +// from the underlying codec into one packet. Currently N is hard-coded to 2. + class AudioEncoderCopyRed final : public AudioEncoder { public: struct Config { @@ -84,15 +87,11 @@ class AudioEncoderCopyRed final : public AudioEncoder { rtc::Buffer* encoded) override; private: - size_t CalculateHeaderLength(size_t encoded_bytes) const; - std::unique_ptr speech_encoder_; + rtc::Buffer primary_encoded_; size_t max_packet_length_; int red_payload_type_; - rtc::Buffer secondary_encoded_; - EncodedInfoLeaf secondary_info_; - rtc::Buffer tertiary_encoded_; - EncodedInfoLeaf tertiary_info_; + std::list> redundant_encodings_; RTC_DISALLOW_COPY_AND_ASSIGN(AudioEncoderCopyRed); }; diff --git a/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc b/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc index 33527997b5..ddd82441db 100644 --- a/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc +++ b/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc @@ -152,7 +152,7 @@ TEST_F(AudioEncoderCopyRedTest, CheckNoOutput) { Encode(); // First call is a special case, since it does not include a secondary // payload. - EXPECT_EQ(1u, encoded_info_.redundant.size()); + EXPECT_EQ(0u, encoded_info_.redundant.size()); EXPECT_EQ(kEncodedSize, encoded_info_.encoded_bytes); // Next call to the speech encoder will not produce any output. @@ -180,7 +180,7 @@ TEST_F(AudioEncoderCopyRedTest, CheckPayloadSizes) { // First call is a special case, since it does not include a secondary // payload. Encode(); - EXPECT_EQ(1u, encoded_info_.redundant.size()); + EXPECT_EQ(0u, encoded_info_.redundant.size()); EXPECT_EQ(1u, encoded_info_.encoded_bytes); // Second call is also special since it does not include a ternary @@ -192,9 +192,9 @@ TEST_F(AudioEncoderCopyRedTest, CheckPayloadSizes) { for (size_t i = 3; i <= kNumPackets; ++i) { Encode(); ASSERT_EQ(3u, encoded_info_.redundant.size()); - EXPECT_EQ(i, encoded_info_.redundant[0].encoded_bytes); + EXPECT_EQ(i, encoded_info_.redundant[2].encoded_bytes); EXPECT_EQ(i - 1, encoded_info_.redundant[1].encoded_bytes); - EXPECT_EQ(i - 2, encoded_info_.redundant[2].encoded_bytes); + EXPECT_EQ(i - 2, encoded_info_.redundant[0].encoded_bytes); EXPECT_EQ(9 + i + (i - 1) + (i - 2), encoded_info_.encoded_bytes); } } @@ -222,8 +222,8 @@ TEST_F(AudioEncoderCopyRedTest, CheckTimestamps) { Encode(); ASSERT_EQ(2u, encoded_info_.redundant.size()); - EXPECT_EQ(primary_timestamp, encoded_info_.redundant[0].encoded_timestamp); - EXPECT_EQ(secondary_timestamp, encoded_info_.redundant[1].encoded_timestamp); + EXPECT_EQ(primary_timestamp, encoded_info_.redundant[1].encoded_timestamp); + EXPECT_EQ(secondary_timestamp, encoded_info_.redundant[0].encoded_timestamp); EXPECT_EQ(primary_timestamp, encoded_info_.encoded_timestamp); } @@ -280,9 +280,7 @@ TEST_F(AudioEncoderCopyRedTest, CheckPayloadType) { // First call is a special case, since it does not include a secondary // payload. Encode(); - ASSERT_EQ(1u, encoded_info_.redundant.size()); - EXPECT_EQ(primary_payload_type, encoded_info_.redundant[0].payload_type); - EXPECT_EQ(primary_payload_type, encoded_info_.payload_type); + ASSERT_EQ(0u, encoded_info_.redundant.size()); const int secondary_payload_type = red_payload_type_ + 2; info.payload_type = secondary_payload_type; @@ -291,8 +289,8 @@ TEST_F(AudioEncoderCopyRedTest, CheckPayloadType) { Encode(); ASSERT_EQ(2u, encoded_info_.redundant.size()); - EXPECT_EQ(secondary_payload_type, encoded_info_.redundant[0].payload_type); - EXPECT_EQ(primary_payload_type, encoded_info_.redundant[1].payload_type); + EXPECT_EQ(secondary_payload_type, encoded_info_.redundant[1].payload_type); + EXPECT_EQ(primary_payload_type, encoded_info_.redundant[0].payload_type); EXPECT_EQ(red_payload_type_, encoded_info_.payload_type); } @@ -316,7 +314,7 @@ TEST_F(AudioEncoderCopyRedTest, CheckRFC2198Header) { EXPECT_EQ(encoded_[0], primary_payload_type | 0x80); uint32_t timestamp_delta = encoded_info_.encoded_timestamp - - encoded_info_.redundant[1].encoded_timestamp; + encoded_info_.redundant[0].encoded_timestamp; // Timestamp delta is encoded as a 14 bit value. EXPECT_EQ(encoded_[1], timestamp_delta >> 6); EXPECT_EQ(static_cast(encoded_[2] >> 2), timestamp_delta & 0x3f); @@ -335,13 +333,13 @@ TEST_F(AudioEncoderCopyRedTest, CheckRFC2198Header) { EXPECT_EQ(encoded_[0], primary_payload_type | 0x80); timestamp_delta = encoded_info_.encoded_timestamp - - encoded_info_.redundant[2].encoded_timestamp; + encoded_info_.redundant[0].encoded_timestamp; // Timestamp delta is encoded as a 14 bit value. EXPECT_EQ(encoded_[1], timestamp_delta >> 6); EXPECT_EQ(static_cast(encoded_[2] >> 2), timestamp_delta & 0x3f); // Redundant length is encoded as 10 bit value. - EXPECT_EQ(encoded_[2] & 0x3u, encoded_info_.redundant[2].encoded_bytes >> 8); - EXPECT_EQ(encoded_[3], encoded_info_.redundant[2].encoded_bytes & 0xff); + EXPECT_EQ(encoded_[2] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8); + EXPECT_EQ(encoded_[3], encoded_info_.redundant[1].encoded_bytes & 0xff); EXPECT_EQ(encoded_[4], primary_payload_type | 0x80); timestamp_delta = encoded_info_.encoded_timestamp - @@ -350,8 +348,8 @@ TEST_F(AudioEncoderCopyRedTest, CheckRFC2198Header) { EXPECT_EQ(encoded_[5], timestamp_delta >> 6); EXPECT_EQ(static_cast(encoded_[6] >> 2), timestamp_delta & 0x3f); // Redundant length is encoded as 10 bit value. - EXPECT_EQ(encoded_[6] & 0x3u, encoded_info_.redundant[2].encoded_bytes >> 8); - EXPECT_EQ(encoded_[7], encoded_info_.redundant[2].encoded_bytes & 0xff); + EXPECT_EQ(encoded_[6] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8); + EXPECT_EQ(encoded_[7], encoded_info_.redundant[1].encoded_bytes & 0xff); EXPECT_EQ(encoded_[8], primary_payload_type); }