From b3bb13c572a880ef7ffb427673c81f9f18f1bfeb Mon Sep 17 00:00:00 2001
From: Philipp Hancke <philipp.hancke@googlemail.com>
Date: Fri, 23 Apr 2021 10:15:42 +0200
Subject: [PATCH] red: make red encoder more generic

potentially allowing distances of more than 2.

BUG=webrtc:11640

Change-Id: I0d8c831218285d57cf07f0a8e5829810afd4ab3f
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/188383
Reviewed-by: Jesus de Vicente Pena <devicentepena@webrtc.org>
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Commit-Queue: Jesus de Vicente Pena <devicentepena@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#33913}
---
 .../codecs/red/audio_encoder_copy_red.cc      | 151 +++++++++---------
 .../codecs/red/audio_encoder_copy_red.h       |  19 ++-
 .../red/audio_encoder_copy_red_unittest.cc    |  32 ++--
 3 files changed, 100 insertions(+), 102 deletions(-)
diff --git a/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc b/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc
index 8f8e328b8c..6aec55bc8e 100644
--- a/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc
+++ b/modules/audio_coding/codecs/red/audio_encoder_copy_red.cc
@@ -19,10 +19,18 @@
 #include "rtc_base/checks.h"
 
 namespace webrtc {
-// RED packets must be less than 1024 bytes to fit the 10 bit block length.
-static constexpr const int kRedMaxPacketSize = 1 << 10;
-// The typical MTU is 1200 bytes.
-static constexpr const size_t kAudioMaxRtpPacketLen = 1200;
+static constexpr const int kRedMaxPacketSize =
+    1 << 10;  // RED packets must be less than 1024 bytes to fit the 10 bit
+              // block length.
+static constexpr const size_t kAudioMaxRtpPacketLen =
+    1200;  // The typical MTU is 1200 bytes.
+
+static constexpr size_t kRedHeaderLength = 4;  // 4 bytes RED header.
+static constexpr size_t kRedLastHeaderLength =
+    1;  // reduced size for last RED header.
+
+static constexpr size_t kRedNumberOfRedundantEncodings =
+    2;  // The level of redundancy we support.
 
 AudioEncoderCopyRed::Config::Config() = default;
 AudioEncoderCopyRed::Config::Config(Config&&) = default;
@@ -30,9 +38,16 @@ AudioEncoderCopyRed::Config::~Config() = default;
 
 AudioEncoderCopyRed::AudioEncoderCopyRed(Config&& config)
     : speech_encoder_(std::move(config.speech_encoder)),
+      primary_encoded_(0, kAudioMaxRtpPacketLen),
       max_packet_length_(kAudioMaxRtpPacketLen),
       red_payload_type_(config.payload_type) {
   RTC_CHECK(speech_encoder_) << "Speech encoder not provided.";
+
+  for (size_t i = 0; i < kRedNumberOfRedundantEncodings; i++) {
+    std::pair<EncodedInfo, rtc::Buffer> redundant;
+    redundant.second.EnsureCapacity(kAudioMaxRtpPacketLen);
+    redundant_encodings_.push_front(std::move(redundant));
+  }
 }
 
 AudioEncoderCopyRed::~AudioEncoderCopyRed() = default;
@@ -61,104 +76,86 @@ int AudioEncoderCopyRed::GetTargetBitrate() const {
   return speech_encoder_->GetTargetBitrate();
 }
 
-size_t AudioEncoderCopyRed::CalculateHeaderLength(size_t encoded_bytes) const {
-  size_t header_size = 1;
-  size_t bytes_available = max_packet_length_ - encoded_bytes;
-  if (secondary_info_.encoded_bytes > 0 &&
-      secondary_info_.encoded_bytes < bytes_available) {
-    header_size += 4;
-    bytes_available -= secondary_info_.encoded_bytes;
-  }
-  if (tertiary_info_.encoded_bytes > 0 &&
-      tertiary_info_.encoded_bytes < bytes_available) {
-    header_size += 4;
-  }
-  return header_size > 1 ? header_size : 0;
-}
-
 AudioEncoder::EncodedInfo AudioEncoderCopyRed::EncodeImpl(
     uint32_t rtp_timestamp,
     rtc::ArrayView<const int16_t> audio,
     rtc::Buffer* encoded) {
-  rtc::Buffer primary_encoded;
+  primary_encoded_.Clear();
   EncodedInfo info =
-      speech_encoder_->Encode(rtp_timestamp, audio, &primary_encoded);
+      speech_encoder_->Encode(rtp_timestamp, audio, &primary_encoded_);
   RTC_CHECK(info.redundant.empty()) << "Cannot use nested redundant encoders.";
-  RTC_DCHECK_EQ(primary_encoded.size(), info.encoded_bytes);
+  RTC_DCHECK_EQ(primary_encoded_.size(), info.encoded_bytes);
 
   if (info.encoded_bytes == 0 || info.encoded_bytes > kRedMaxPacketSize) {
     return info;
   }
   RTC_DCHECK_GT(max_packet_length_, info.encoded_bytes);
 
+  size_t header_length_bytes = kRedLastHeaderLength;
+  size_t bytes_available = max_packet_length_ - info.encoded_bytes;
+  auto it = redundant_encodings_.begin();
+
+  // Determine how much redundancy we can fit into our packet by
+  // iterating forward.
+  for (; it != redundant_encodings_.end(); it++) {
+    if (bytes_available < kRedHeaderLength + it->first.encoded_bytes) {
+      break;
+    }
+    if (it->first.encoded_bytes == 0) {
+      break;
+    }
+    bytes_available -= kRedHeaderLength + it->first.encoded_bytes;
+    header_length_bytes += kRedHeaderLength;
+  }
+
   // Allocate room for RFC 2198 header if there is redundant data.
   // Otherwise this will send the primary payload type without
   // wrapping in RED.
-  const size_t header_length_bytes = CalculateHeaderLength(info.encoded_bytes);
+  if (header_length_bytes == kRedLastHeaderLength) {
+    header_length_bytes = 0;
+  }
   encoded->SetSize(header_length_bytes);
 
+  // Iterate backwards and append the data.
   size_t header_offset = 0;
-  size_t bytes_available = max_packet_length_ - info.encoded_bytes;
-  if (tertiary_info_.encoded_bytes > 0 &&
-      tertiary_info_.encoded_bytes + secondary_info_.encoded_bytes <
-          bytes_available) {
-    encoded->AppendData(tertiary_encoded_);
+  while (it-- != redundant_encodings_.begin()) {
+    encoded->AppendData(it->second);
 
     const uint32_t timestamp_delta =
-        info.encoded_timestamp - tertiary_info_.encoded_timestamp;
-
-    encoded->data()[header_offset] = tertiary_info_.payload_type | 0x80;
+        info.encoded_timestamp - it->first.encoded_timestamp;
+    encoded->data()[header_offset] = it->first.payload_type | 0x80;
     rtc::SetBE16(static_cast<uint8_t*>(encoded->data()) + header_offset + 1,
-                 (timestamp_delta << 2) | (tertiary_info_.encoded_bytes >> 8));
-    encoded->data()[header_offset + 3] = tertiary_info_.encoded_bytes & 0xff;
-    header_offset += 4;
-    bytes_available -= tertiary_info_.encoded_bytes;
-  }
-
-  if (secondary_info_.encoded_bytes > 0 &&
-      secondary_info_.encoded_bytes < bytes_available) {
-    encoded->AppendData(secondary_encoded_);
-
-    const uint32_t timestamp_delta =
-        info.encoded_timestamp - secondary_info_.encoded_timestamp;
-
-    encoded->data()[header_offset] = secondary_info_.payload_type | 0x80;
-    rtc::SetBE16(static_cast<uint8_t*>(encoded->data()) + header_offset + 1,
-                 (timestamp_delta << 2) | (secondary_info_.encoded_bytes >> 8));
-    encoded->data()[header_offset + 3] = secondary_info_.encoded_bytes & 0xff;
-    header_offset += 4;
-    bytes_available -= secondary_info_.encoded_bytes;
-  }
-
-  encoded->AppendData(primary_encoded);
-  if (header_length_bytes > 0) {
-    RTC_DCHECK_EQ(header_offset, header_length_bytes - 1);
-    encoded->data()[header_offset] = info.payload_type;
+                 (timestamp_delta << 2) | (it->first.encoded_bytes >> 8));
+    encoded->data()[header_offset + 3] = it->first.encoded_bytes & 0xff;
+    header_offset += kRedHeaderLength;
+    info.redundant.push_back(it->first);
   }
 
   // |info| will be implicitly cast to an EncodedInfoLeaf struct, effectively
   // discarding the (empty) vector of redundant information. This is
   // intentional.
-  info.redundant.push_back(info);
-  RTC_DCHECK_EQ(info.redundant.size(), 1);
-  RTC_DCHECK_EQ(info.speech, info.redundant[0].speech);
-  if (secondary_info_.encoded_bytes > 0) {
-    info.redundant.push_back(secondary_info_);
-    RTC_DCHECK_EQ(info.redundant.size(), 2);
-  }
-  if (tertiary_info_.encoded_bytes > 0) {
-    info.redundant.push_back(tertiary_info_);
-    RTC_DCHECK_EQ(info.redundant.size(),
-                  2 + (secondary_info_.encoded_bytes > 0 ? 1 : 0));
+  if (header_length_bytes > 0) {
+    info.redundant.push_back(info);
+    RTC_DCHECK_EQ(info.speech,
+                  info.redundant[info.redundant.size() - 1].speech);
   }
 
-  // Save secondary to tertiary.
-  tertiary_encoded_.SetData(secondary_encoded_);
-  tertiary_info_ = secondary_info_;
+  encoded->AppendData(primary_encoded_);
+  if (header_length_bytes > 0) {
+    RTC_DCHECK_EQ(header_offset, header_length_bytes - 1);
+    encoded->data()[header_offset] = info.payload_type;
+  }
 
-  // Save primary to secondary.
-  secondary_encoded_.SetData(primary_encoded);
-  secondary_info_ = info;
+  // Shift the redundant encodings.
+  it = redundant_encodings_.begin();
+  for (auto next = std::next(it); next != redundant_encodings_.end();
+       it++, next = std::next(it)) {
+    next->first = it->first;
+    next->second.SetData(it->second);
+  }
+  it = redundant_encodings_.begin();
+  it->first = info;
+  it->second.SetData(primary_encoded_);
 
   // Update main EncodedInfo.
   if (header_length_bytes > 0) {
@@ -170,8 +167,12 @@ AudioEncoder::EncodedInfo AudioEncoderCopyRed::EncodeImpl(
 
 void AudioEncoderCopyRed::Reset() {
   speech_encoder_->Reset();
-  secondary_encoded_.Clear();
-  secondary_info_.encoded_bytes = 0;
+  redundant_encodings_.clear();
+  for (size_t i = 0; i < kRedNumberOfRedundantEncodings; i++) {
+    std::pair<EncodedInfo, rtc::Buffer> redundant;
+    redundant.second.EnsureCapacity(kAudioMaxRtpPacketLen);
+    redundant_encodings_.push_front(std::move(redundant));
+  }
 }
 
 bool AudioEncoderCopyRed::SetFec(bool enable) {
diff --git a/modules/audio_coding/codecs/red/audio_encoder_copy_red.h b/modules/audio_coding/codecs/red/audio_encoder_copy_red.h
index 9acb9b842c..d5b1bf6868 100644
--- a/modules/audio_coding/codecs/red/audio_encoder_copy_red.h
+++ b/modules/audio_coding/codecs/red/audio_encoder_copy_red.h
@@ -14,6 +14,7 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include <list>
 #include <memory>
 #include <utility>
 
@@ -26,10 +27,12 @@
 
 namespace webrtc {
 
-// This class implements redundant audio coding. The class object will have an
-// underlying AudioEncoder object that performs the actual encodings. The
-// current class will gather the two latest encodings from the underlying codec
-// into one packet.
+// This class implements redundant audio coding as described in
+//   https://tools.ietf.org/html/rfc2198
+// The class object will have an underlying AudioEncoder object that performs
+// the actual encodings. The current class will gather the N latest encodings
+// from the underlying codec into one packet. Currently N is hard-coded to 2.
+
 class AudioEncoderCopyRed final : public AudioEncoder {
  public:
   struct Config {
@@ -84,15 +87,11 @@ class AudioEncoderCopyRed final : public AudioEncoder {
                          rtc::Buffer* encoded) override;
 
  private:
-  size_t CalculateHeaderLength(size_t encoded_bytes) const;
-
   std::unique_ptr<AudioEncoder> speech_encoder_;
+  rtc::Buffer primary_encoded_;
   size_t max_packet_length_;
   int red_payload_type_;
-  rtc::Buffer secondary_encoded_;
-  EncodedInfoLeaf secondary_info_;
-  rtc::Buffer tertiary_encoded_;
-  EncodedInfoLeaf tertiary_info_;
+  std::list<std::pair<EncodedInfo, rtc::Buffer>> redundant_encodings_;
 
   RTC_DISALLOW_COPY_AND_ASSIGN(AudioEncoderCopyRed);
 };
diff --git a/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc b/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc
index 33527997b5..ddd82441db 100644
--- a/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc
+++ b/modules/audio_coding/codecs/red/audio_encoder_copy_red_unittest.cc
@@ -152,7 +152,7 @@ TEST_F(AudioEncoderCopyRedTest, CheckNoOutput) {
   Encode();
   // First call is a special case, since it does not include a secondary
   // payload.
-  EXPECT_EQ(1u, encoded_info_.redundant.size());
+  EXPECT_EQ(0u, encoded_info_.redundant.size());
   EXPECT_EQ(kEncodedSize, encoded_info_.encoded_bytes);
 
   // Next call to the speech encoder will not produce any output.
@@ -180,7 +180,7 @@ TEST_F(AudioEncoderCopyRedTest, CheckPayloadSizes) {
   // First call is a special case, since it does not include a secondary
   // payload.
   Encode();
-  EXPECT_EQ(1u, encoded_info_.redundant.size());
+  EXPECT_EQ(0u, encoded_info_.redundant.size());
   EXPECT_EQ(1u, encoded_info_.encoded_bytes);
 
   // Second call is also special since it does not include a ternary
@@ -192,9 +192,9 @@ TEST_F(AudioEncoderCopyRedTest, CheckPayloadSizes) {
   for (size_t i = 3; i <= kNumPackets; ++i) {
     Encode();
     ASSERT_EQ(3u, encoded_info_.redundant.size());
-    EXPECT_EQ(i, encoded_info_.redundant[0].encoded_bytes);
+    EXPECT_EQ(i, encoded_info_.redundant[2].encoded_bytes);
     EXPECT_EQ(i - 1, encoded_info_.redundant[1].encoded_bytes);
-    EXPECT_EQ(i - 2, encoded_info_.redundant[2].encoded_bytes);
+    EXPECT_EQ(i - 2, encoded_info_.redundant[0].encoded_bytes);
     EXPECT_EQ(9 + i + (i - 1) + (i - 2), encoded_info_.encoded_bytes);
   }
 }
@@ -222,8 +222,8 @@ TEST_F(AudioEncoderCopyRedTest, CheckTimestamps) {
 
   Encode();
   ASSERT_EQ(2u, encoded_info_.redundant.size());
-  EXPECT_EQ(primary_timestamp, encoded_info_.redundant[0].encoded_timestamp);
-  EXPECT_EQ(secondary_timestamp, encoded_info_.redundant[1].encoded_timestamp);
+  EXPECT_EQ(primary_timestamp, encoded_info_.redundant[1].encoded_timestamp);
+  EXPECT_EQ(secondary_timestamp, encoded_info_.redundant[0].encoded_timestamp);
   EXPECT_EQ(primary_timestamp, encoded_info_.encoded_timestamp);
 }
 
@@ -280,9 +280,7 @@ TEST_F(AudioEncoderCopyRedTest, CheckPayloadType) {
   // First call is a special case, since it does not include a secondary
   // payload.
   Encode();
-  ASSERT_EQ(1u, encoded_info_.redundant.size());
-  EXPECT_EQ(primary_payload_type, encoded_info_.redundant[0].payload_type);
-  EXPECT_EQ(primary_payload_type, encoded_info_.payload_type);
+  ASSERT_EQ(0u, encoded_info_.redundant.size());
 
   const int secondary_payload_type = red_payload_type_ + 2;
   info.payload_type = secondary_payload_type;
@@ -291,8 +289,8 @@ TEST_F(AudioEncoderCopyRedTest, CheckPayloadType) {
 
   Encode();
   ASSERT_EQ(2u, encoded_info_.redundant.size());
-  EXPECT_EQ(secondary_payload_type, encoded_info_.redundant[0].payload_type);
-  EXPECT_EQ(primary_payload_type, encoded_info_.redundant[1].payload_type);
+  EXPECT_EQ(secondary_payload_type, encoded_info_.redundant[1].payload_type);
+  EXPECT_EQ(primary_payload_type, encoded_info_.redundant[0].payload_type);
   EXPECT_EQ(red_payload_type_, encoded_info_.payload_type);
 }
 
@@ -316,7 +314,7 @@ TEST_F(AudioEncoderCopyRedTest, CheckRFC2198Header) {
   EXPECT_EQ(encoded_[0], primary_payload_type | 0x80);
 
   uint32_t timestamp_delta = encoded_info_.encoded_timestamp -
-                             encoded_info_.redundant[1].encoded_timestamp;
+                             encoded_info_.redundant[0].encoded_timestamp;
   // Timestamp delta is encoded as a 14 bit value.
   EXPECT_EQ(encoded_[1], timestamp_delta >> 6);
   EXPECT_EQ(static_cast<uint8_t>(encoded_[2] >> 2), timestamp_delta & 0x3f);
@@ -335,13 +333,13 @@ TEST_F(AudioEncoderCopyRedTest, CheckRFC2198Header) {
   EXPECT_EQ(encoded_[0], primary_payload_type | 0x80);
 
   timestamp_delta = encoded_info_.encoded_timestamp -
-                    encoded_info_.redundant[2].encoded_timestamp;
+                    encoded_info_.redundant[0].encoded_timestamp;
   // Timestamp delta is encoded as a 14 bit value.
   EXPECT_EQ(encoded_[1], timestamp_delta >> 6);
   EXPECT_EQ(static_cast<uint8_t>(encoded_[2] >> 2), timestamp_delta & 0x3f);
   // Redundant length is encoded as 10 bit value.
-  EXPECT_EQ(encoded_[2] & 0x3u, encoded_info_.redundant[2].encoded_bytes >> 8);
-  EXPECT_EQ(encoded_[3], encoded_info_.redundant[2].encoded_bytes & 0xff);
+  EXPECT_EQ(encoded_[2] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8);
+  EXPECT_EQ(encoded_[3], encoded_info_.redundant[1].encoded_bytes & 0xff);
 
   EXPECT_EQ(encoded_[4], primary_payload_type | 0x80);
   timestamp_delta = encoded_info_.encoded_timestamp -
@@ -350,8 +348,8 @@ TEST_F(AudioEncoderCopyRedTest, CheckRFC2198Header) {
   EXPECT_EQ(encoded_[5], timestamp_delta >> 6);
   EXPECT_EQ(static_cast<uint8_t>(encoded_[6] >> 2), timestamp_delta & 0x3f);
   // Redundant length is encoded as 10 bit value.
-  EXPECT_EQ(encoded_[6] & 0x3u, encoded_info_.redundant[2].encoded_bytes >> 8);
-  EXPECT_EQ(encoded_[7], encoded_info_.redundant[2].encoded_bytes & 0xff);
+  EXPECT_EQ(encoded_[6] & 0x3u, encoded_info_.redundant[1].encoded_bytes >> 8);
+  EXPECT_EQ(encoded_[7], encoded_info_.redundant[1].encoded_bytes & 0xff);
   EXPECT_EQ(encoded_[8], primary_payload_type);
 }