Handle padded audio packets correctly

RTP packets can be padded with extra data at the end of the payload. The usable payload length of the packet should then be reduced with the padding length, since the padding must be discarded. This was not the case; instead, the entire payload, including padding data, was forwarded to the audio channel and in the end to the decoder. A special case of padding is packets which are empty except for the padding. That is, they carry no usable payload. These packets are sometimes used for probing the network and were discarded in RTPReceiverAudio::ParseAudioCodecSpecific. The result is that NetEq never sees those empty packets, just the holes in the sequence number series; this can throw off the target buffer calculations. With this change, the empty (after removing the padding) packets are let through, all the way down to NetEq, to a new method called NetEq::InsertEmptyPacket. This method notifies the DelayManager that an empty packet was received. BUG=webrtc:7610, webrtc:7625 Review-Url: https://codereview.webrtc.org/2870043003 Cr-Commit-Position: refs/heads/master@{#18083}
2017-05-10 07:38:01 -07:00
parent 423a288a12
commit b8c55b15a3
11 changed files with 113 additions and 8 deletions
--- a/webrtc/modules/audio_coding/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
@ -77,6 +77,11 @@ int AcmReceiver::InsertPacket(const WebRtcRTPHeader& rtp_header,
  uint32_t receive_timestamp = 0;
  const RTPHeader* header = &rtp_header.header;  // Just a shorthand.

+  if (incoming_payload.empty()) {
+    neteq_->InsertEmptyPacket(rtp_header.header);
+    return 0;
+  }
+
  {
    rtc::CritScope lock(&crit_sect_);

--- a/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
+++ b/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
@ -1079,6 +1079,7 @@ rtc::Optional<SdpAudioFormat> AudioCodingModuleImpl::ReceiveFormat() const {
 int AudioCodingModuleImpl::IncomingPacket(const uint8_t* incoming_payload,
                                          const size_t payload_length,
                                          const WebRtcRTPHeader& rtp_header) {
+  RTC_DCHECK_EQ(payload_length == 0, incoming_payload == nullptr);
  return receiver_.InsertPacket(
      rtp_header,
      rtc::ArrayView<const uint8_t>(incoming_payload, payload_length));
--- a/webrtc/modules/audio_coding/neteq/delay_manager.cc
+++ b/webrtc/modules/audio_coding/neteq/delay_manager.cc
@ -374,6 +374,10 @@ void DelayManager::LastDecodedWasCngOrDtmf(bool it_was) {
  }
 }

+void DelayManager::RegisterEmptyPacket() {
+  ++last_seq_no_;
+}
+
 bool DelayManager::SetMinimumDelay(int delay_ms) {
  // Minimum delay shouldn't be more than maximum delay, if any maximum is set.
  // Also, if possible check |delay| to less than 75% of
--- a/webrtc/modules/audio_coding/neteq/delay_manager.h
+++ b/webrtc/modules/audio_coding/neteq/delay_manager.h
@ -95,6 +95,11 @@ class DelayManager {
  // speech.
  virtual void LastDecodedWasCngOrDtmf(bool it_was);

+  // Notify the delay manager that empty packets have been received. These are
+  // packets that are part of the sequence number series, so that an empty
+  // packet will shift the sequence numbers for the following packets.
+  virtual void RegisterEmptyPacket();
+
  // Accessors and mutators.
  // Assuming |delay| is in valid range.
  virtual bool SetMinimumDelay(int delay_ms);
--- a/webrtc/modules/audio_coding/neteq/delay_manager_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/delay_manager_unittest.cc
@ -266,6 +266,58 @@ TEST_F(DelayManagerTest, MinAndRequiredDelay) {
  EXPECT_EQ(kMinDelayPackets << 8, dm_->TargetLevel());
 }

+// Tests that skipped sequence numbers (simulating empty packets) are handled
+// correctly.
+TEST_F(DelayManagerTest, EmptyPacketsReported) {
+  SetPacketAudioLength(kFrameSizeMs);
+  // First packet arrival.
+  InsertNextPacket();
+
+  // Advance time by one frame size.
+  IncreaseTime(kFrameSizeMs);
+
+  // Advance the sequence number by 5, simulating that 5 empty packets were
+  // received, but never inserted.
+  seq_no_ += 10;
+  for (int j = 0; j < 10; ++j) {
+    dm_->RegisterEmptyPacket();
+  }
+
+  // Second packet arrival.
+  // Expect detector update method to be called once with inter-arrival time
+  // equal to 1 packet, and (base) target level equal to 1 as well.
+  // Return false to indicate no peaks found.
+  EXPECT_CALL(detector_, Update(1, 1)).WillOnce(Return(false));
+  InsertNextPacket();
+
+  EXPECT_EQ(1 << 8, dm_->TargetLevel());  // In Q8.
+}
+
+// Same as above, but do not call RegisterEmptyPacket. Observe the target level
+// increase dramatically.
+TEST_F(DelayManagerTest, EmptyPacketsNotReported) {
+  SetPacketAudioLength(kFrameSizeMs);
+  // First packet arrival.
+  InsertNextPacket();
+
+  // Advance time by one frame size.
+  IncreaseTime(kFrameSizeMs);
+
+  // Advance the sequence number by 5, simulating that 5 empty packets were
+  // received, but never inserted.
+  seq_no_ += 10;
+
+  // Second packet arrival.
+  // Expect detector update method to be called once with inter-arrival time
+  // equal to 1 packet, and (base) target level equal to 1 as well.
+  // Return false to indicate no peaks found.
+  EXPECT_CALL(detector_, Update(10, 10)).WillOnce(Return(false));
+  InsertNextPacket();
+
+  // Note 10 times higher target value.
+  EXPECT_EQ(10 * 1 << 8, dm_->TargetLevel());  // In Q8.
+}
+
 TEST_F(DelayManagerTest, Failures) {
  // Wrong sample rate.
  EXPECT_EQ(-1, dm_->Update(0, 0, -1));
--- a/webrtc/modules/audio_coding/neteq/include/neteq.h
+++ b/webrtc/modules/audio_coding/neteq/include/neteq.h
@ -145,6 +145,12 @@ class NetEq {
                           rtc::ArrayView<const uint8_t> payload,
                           uint32_t receive_timestamp) = 0;

+  // Lets NetEq know that a packet arrived with an empty payload. This typically
+  // happens when empty packets are used for probing the network channel, and
+  // these packets use RTP sequence numbers from the same series as the actual
+  // audio packets.
+  virtual void InsertEmptyPacket(const RTPHeader& rtp_header) = 0;
+
  // Instructs NetEq to deliver 10 ms of audio data. The data is written to
  // |audio_frame|. All data in |audio_frame| is wiped; |data_|, |speech_type_|,
  // |num_channels_|, |sample_rate_hz_|, |samples_per_channel_|, and
--- a/webrtc/modules/audio_coding/neteq/mock/mock_delay_manager.h
+++ b/webrtc/modules/audio_coding/neteq/mock/mock_delay_manager.h
@ -45,6 +45,7 @@ class MockDelayManager : public DelayManager {
      void(int* lower_limit, int* higher_limit));
  MOCK_CONST_METHOD0(TargetLevel,
      int());
+  MOCK_METHOD0(RegisterEmptyPacket, void());
  MOCK_METHOD1(set_extra_delay_ms,
      void(int16_t delay));
  MOCK_CONST_METHOD0(base_target_level,
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@ -146,6 +146,14 @@ int NetEqImpl::InsertPacket(const RTPHeader& rtp_header,
  return kOK;
 }

+void NetEqImpl::InsertEmptyPacket(const RTPHeader& /*rtp_header*/) {
+  // TODO(henrik.lundin) Handle NACK as well. This will make use of the
+  // rtp_header parameter.
+  // https://bugs.chromium.org/p/webrtc/issues/detail?id=7611
+  rtc::CritScope lock(&crit_sect_);
+  delay_manager_->RegisterEmptyPacket();
+}
+
 namespace {
 void SetAudioFrameActivityAndType(bool vad_enabled,
                                  NetEqImpl::OutputType type,
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.h
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h
@ -109,6 +109,8 @@ class NetEqImpl : public webrtc::NetEq {
                   rtc::ArrayView<const uint8_t> payload,
                   uint32_t receive_timestamp) override;

+  void InsertEmptyPacket(const RTPHeader& rtp_header) override;
+
  int GetAudio(AudioFrame* audio_frame, bool* muted) override;

  void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) override;
--- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
@ -1284,6 +1284,21 @@ TEST_F(NetEqImplTest, TargetDelayMs) {
  EXPECT_EQ(17 * 30, neteq_->TargetDelayMs());
 }

+TEST_F(NetEqImplTest, InsertEmptyPacket) {
+  UseNoMocks();
+  use_mock_delay_manager_ = true;
+  CreateInstance();
+
+  RTPHeader rtp_header;
+  rtp_header.payloadType = 17;
+  rtp_header.sequenceNumber = 0x1234;
+  rtp_header.timestamp = 0x12345678;
+  rtp_header.ssrc = 0x87654321;
+
+  EXPECT_CALL(*mock_delay_manager_, RegisterEmptyPacket());
+  neteq_->InsertEmptyPacket(rtp_header);
+}
+
 class Decoder120ms : public AudioDecoder {
 public:
  Decoder120ms(int sample_rate_hz, SpeechType speech_type)
--- a/webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.cc
@ -212,9 +212,13 @@ int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
    size_t payload_length,
    const AudioPayload& audio_specific,
    bool is_red) {
-
-  if (payload_length == 0) {
-    return 0;
+  RTC_DCHECK_GE(payload_length, rtp_header->header.paddingLength);
+  const size_t payload_data_length =
+      payload_length - rtp_header->header.paddingLength;
+  if (payload_data_length == 0) {
+    rtp_header->type.Audio.isCNG = false;
+    rtp_header->frameType = kEmptyFrame;
+    return data_callback_->OnReceivedPayloadData(nullptr, 0, rtp_header);
  }

  bool telephone_event_packet =
@ -229,16 +233,17 @@ int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
    // |     event     |E|R| volume    |          duration             |
    // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    //
-    if (payload_length % 4 != 0) {
+    if (payload_data_length % 4 != 0) {
      return -1;
    }
-    size_t number_of_events = payload_length / 4;
+    size_t number_of_events = payload_data_length / 4;

    // sanity
    if (number_of_events >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) {
      number_of_events = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS;
    }
    for (size_t n = 0; n < number_of_events; ++n) {
+      RTC_DCHECK_GE(payload_data_length, (4 * n) + 2);
      bool end = (payload_data[(4 * n) + 1] & 0x80) ? true : false;

      std::set<uint8_t>::iterator event =
@ -291,17 +296,18 @@ int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
    }
  }
  // TODO(holmer): Break this out to have RED parsing handled generically.
+  RTC_DCHECK_GT(payload_data_length, 0);
  if (is_red && !(payload_data[0] & 0x80)) {
    // we recive only one frame packed in a RED packet remove the RED wrapper
    rtp_header->header.payloadType = payload_data[0];

    // only one frame in the RED strip the one byte to help NetEq
    return data_callback_->OnReceivedPayloadData(
-        payload_data + 1, payload_length - 1, rtp_header);
+        payload_data + 1, payload_data_length - 1, rtp_header);
  }

  rtp_header->type.Audio.channel = audio_specific.channels;
-  return data_callback_->OnReceivedPayloadData(
-      payload_data, payload_length, rtp_header);
+  return data_callback_->OnReceivedPayloadData(payload_data,
+                                               payload_data_length, rtp_header);
 }
 }  // namespace webrtc