API to control target delay in NetEq jitter buffer. NetEq maintains the given delay unless channel conditions require a higher delay.

TEST=unit-test, manual, trybots. R=henrik.lundin@webrtc.org, henrika@webrtc.org, mflodman@webrtc.org, mikhal@webrtc.org, stefan@webrtc.org, tina.legrand@webrtc.org Review URL: https://webrtc-codereview.appspot.com/1384005 git-svn-id: http://webrtc.googlecode.com/svn/trunk@4087 4adac7df-926f-26a2-2b94-8c16560cd09d
2013-05-22 20:39:43 +00:00
parent 561990fd73
commit e46c8d3875
19 changed files with 405 additions and 74 deletions
--- a/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
+++ b/webrtc/modules/audio_coding/main/interface/audio_coding_module.h
@ -639,8 +639,9 @@ class AudioCodingModule: public Module {
                                        const uint32_t timestamp = 0) = 0;

  ///////////////////////////////////////////////////////////////////////////
-  // int32_t SetMinimumPlayoutDelay()
-  // Set Minimum playout delay, used for lip-sync.
+  // int SetMinimumPlayoutDelay()
+  // Set a minimum for the playout delay, used for lip-sync. NetEq maintains
+  // such a delay unless channel condition yields to a higher delay.
  //
  // Input:
  //   -time_ms            : minimum delay in milliseconds.
@ -649,7 +650,15 @@ class AudioCodingModule: public Module {
  //   -1 if failed to set the delay,
  //    0 if the minimum delay is set.
  //
-  virtual int32_t SetMinimumPlayoutDelay(const int32_t time_ms) = 0;
+  virtual int SetMinimumPlayoutDelay(int time_ms) = 0;
+
+  //
+  // The shortest latency, in milliseconds, required by jitter buffer. This
+  // is computed based on inter-arrival times and playout mode of NetEq. The
+  // actual delay is the maximum of least-required-delay and the minimum-delay
+  // specified by SetMinumumPlayoutDelay() API.
+  //
+  virtual int LeastRequiredDelayMs() const = 0;

  ///////////////////////////////////////////////////////////////////////////
  // int32_t RegisterIncomingMessagesCallback()
@ -945,8 +954,9 @@ class AudioCodingModule: public Module {
  // Set an initial delay for playout.
  // An initial delay yields ACM playout silence until equivalent of |delay_ms|
  // audio payload is accumulated in NetEq jitter. Thereafter, ACM pulls audio
-  // from NetEq in its regular fashion, and the given delay is maintained as
-  // "minimum playout delay."
+  // from NetEq in its regular fashion, and the given delay is maintained
+  // through out the call, unless channel conditions yield to a higher jitter
+  // buffer delay.
  //
  // Input:
  //   -delay_ms           : delay in milliseconds.
--- a/webrtc/modules/audio_coding/main/source/acm_neteq.cc
+++ b/webrtc/modules/audio_coding/main/source/acm_neteq.cc
@ -44,12 +44,12 @@ ACMNetEQ::ACMNetEQ()
      received_stereo_(false),
      master_slave_info_(NULL),
      previous_audio_activity_(AudioFrame::kVadUnknown),
-      extra_delay_(0),
      callback_crit_sect_(CriticalSectionWrapper::CreateCriticalSection()),
      min_of_max_num_packets_(0),
      min_of_buffer_size_bytes_(0),
      per_packet_overhead_bytes_(0),
-      av_sync_(false) {
+      av_sync_(false),
+      minimum_delay_ms_(0) {
  for (int n = 0; n < MAX_NUM_SLAVE_NETEQ + 1; n++) {
    is_initialized_[n] = false;
    ptr_vadinst_[n] = NULL;
@ -270,24 +270,6 @@ int16_t ACMNetEQ::AllocatePacketBufferByIdxSafe(
  return 0;
 }

-int32_t ACMNetEQ::SetExtraDelay(const int32_t delay_in_ms) {
-  CriticalSectionScoped lock(neteq_crit_sect_);
-
-  for (int16_t idx = 0; idx < num_slaves_ + 1; idx++) {
-    if (!is_initialized_[idx]) {
-      WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
-                   "SetExtraDelay: NetEq is not initialized.");
-      return -1;
-    }
-    if (WebRtcNetEQ_SetExtraDelay(inst_[idx], delay_in_ms) < 0) {
-      LogError("SetExtraDelay", idx);
-      return -1;
-    }
-  }
-  extra_delay_ = delay_in_ms;
-  return 0;
-}
-
 int32_t ACMNetEQ::SetAVTPlayout(const bool enable) {
  CriticalSectionScoped lock(neteq_crit_sect_);
  if (avt_playout_ != enable) {
@ -1037,14 +1019,6 @@ int16_t ACMNetEQ::AddSlave(const WebRtcNetEQDecoder* used_codecs,
    num_slaves_ = 1;
    is_initialized_[slave_idx] = true;

-    // Set Slave delay as all other instances.
-    if (WebRtcNetEQ_SetExtraDelay(inst_[slave_idx], extra_delay_) < 0) {
-      LogError("SetExtraDelay", slave_idx);
-      WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
-                   "AddSlave: AddSlave Failed, Could not set delay");
-      return -1;
-    }
-
    // Set AVT
    if (WebRtcNetEQ_SetAVTPlayout(inst_[slave_idx],
                                  (avt_playout_) ? 1 : 0) < 0) {
@ -1093,8 +1067,13 @@ int16_t ACMNetEQ::AddSlave(const WebRtcNetEQDecoder* used_codecs,
                   "AddSlave: AddSlave Failed, Could not Set Playout Mode.");
      return -1;
    }
+
    // Set AV-sync for the slave.
    WebRtcNetEQ_EnableAVSync(inst_[slave_idx], av_sync_ ? 1 : 0);
+
+    // Set minimum delay.
+    if (minimum_delay_ms_ > 0)
+      WebRtcNetEQ_SetMinimumDelay(inst_[slave_idx], minimum_delay_ms_);
  }

  return 0;
@ -1119,4 +1098,23 @@ void ACMNetEQ::EnableAVSync(bool enable) {
  }
 }

+int ACMNetEQ::SetMinimumDelay(int minimum_delay_ms) {
+  CriticalSectionScoped lock(neteq_crit_sect_);
+  for (int i = 0; i < num_slaves_ + 1; ++i) {
+    assert(is_initialized_[i]);
+    if (WebRtcNetEQ_SetMinimumDelay(inst_[i], minimum_delay_ms) < 0)
+      return -1;
+  }
+  minimum_delay_ms_ = minimum_delay_ms;
+  return 0;
+}
+
+int ACMNetEQ::LeastRequiredDelayMs() const {
+  CriticalSectionScoped lock(neteq_crit_sect_);
+  assert(is_initialized_[0]);
+
+  // Sufficient to query the master.
+  return WebRtcNetEQ_GetRequiredDelayMs(inst_[0]);
+}
+
 }  // namespace webrtc
--- a/webrtc/modules/audio_coding/main/source/acm_neteq.h
+++ b/webrtc/modules/audio_coding/main/source/acm_neteq.h
@ -129,18 +129,6 @@ class ACMNetEQ {
  int32_t AllocatePacketBuffer(const WebRtcNetEQDecoder* used_codecs,
                               int16_t num_codecs);

-  //
-  // SetExtraDelay()
-  // Sets a |delay_in_ms| milliseconds extra delay in NetEQ.
-  //
-  // Input:
-  //   - delay_in_ms          : Extra delay in milliseconds.
-  //
-  // Return value             : 0 if ok.
-  //                           <0 if NetEQ returned an error.
-  //
-  int32_t SetExtraDelay(const int32_t delay_in_ms);
-
  //
  // SetAVTPlayout()
  // Enable/disable playout of AVT payloads.
@ -301,6 +289,20 @@ class ACMNetEQ {
  //
  void EnableAVSync(bool enable);

+  //
+  // Set a minimum delay in NetEq. Unless channel condition dictates a longer
+  // delay, the given delay is maintained by NetEq.
+  //
+  int SetMinimumDelay(int minimum_delay_ms);
+
+  //
+  // The shortest latency, in milliseconds, required by jitter buffer. This
+  // is computed based on inter-arrival times and playout mode of NetEq. The
+  // actual delay is the maximum of least-required-delay and the minimum-delay
+  // specified by SetMinumumPlayoutDelay() API.
+  //
+  int LeastRequiredDelayMs() const ;
+
 private:
  //
  // RTPPack()
@ -365,7 +367,6 @@ class ACMNetEQ {
  bool received_stereo_;
  void* master_slave_info_;
  AudioFrame::VADActivity previous_audio_activity_;
-  int32_t extra_delay_;

  CriticalSectionWrapper* callback_crit_sect_;
  // Minimum of "max number of packets," among all NetEq instances.
@ -376,6 +377,8 @@ class ACMNetEQ {

  // Keep track of AV-sync. Just used to set the slave when a slave is added.
  bool av_sync_;
+
+  int minimum_delay_ms_;
 };

 }  // namespace webrtc
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi
@ -137,14 +137,15 @@
             '../test/RTPFile.cc',
             '../test/SpatialAudio.cc',
             '../test/TestAllCodecs.cc',
+             '../test/target_delay_unittest.cc',
             '../test/Tester.cc',
             '../test/TestFEC.cc',
             '../test/TestStereo.cc',
             '../test/TestVADDTX.cc',
             '../test/TimedTrace.cc',
             '../test/TwoWayCommunication.cc',
-             '../test/utility.cc',
             '../test/initial_delay_unittest.cc',
+             '../test/utility.cc',
          ],
        },
        {
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.cc
@ -2116,8 +2116,11 @@ int32_t AudioCodingModuleImpl::IncomingPacket(

    if (av_sync_ || track_neteq_buffer_) {
      last_incoming_send_timestamp_ = rtp_info.header.timestamp;
-      first_payload_received_ = true;
    }
+
+    // Set the following regardless of tracking NetEq buffer or being in
+    // AV-sync mode.
+    first_payload_received_ = true;
  }
  return 0;
 }
@ -2192,8 +2195,7 @@ int AudioCodingModuleImpl::InitStereoSlave() {
 }

 // Minimum playout delay (Used for lip-sync).
-int32_t AudioCodingModuleImpl::SetMinimumPlayoutDelay(
-    const int32_t time_ms) {
+int AudioCodingModuleImpl::SetMinimumPlayoutDelay(int time_ms) {
  if ((time_ms < 0) || (time_ms > 10000)) {
    WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceAudioCoding, id_,
                 "Delay must be in the range of 0-10000 milliseconds.");
@ -2205,7 +2207,7 @@ int32_t AudioCodingModuleImpl::SetMinimumPlayoutDelay(
    if (track_neteq_buffer_ && first_payload_received_)
      return 0;
  }
-  return neteq_.SetExtraDelay(time_ms);
+  return neteq_.SetMinimumDelay(time_ms);
 }

 // Get Dtmf playout status.
@ -2937,7 +2939,7 @@ int AudioCodingModuleImpl::SetInitialPlayoutDelay(int delay_ms) {
  }
  av_sync_ = true;
  neteq_.EnableAVSync(av_sync_);
-  return neteq_.SetExtraDelay(delay_ms);
+  return neteq_.SetMinimumDelay(delay_ms);
 }

 bool AudioCodingModuleImpl::GetSilence(int desired_sample_rate_hz,
@ -3041,4 +3043,8 @@ void AudioCodingModuleImpl::UpdateBufferingSafe(const WebRtcRTPHeader& rtp_info,
          initial_delay_ms_ * in_sample_rate_khz));
 }

+int AudioCodingModuleImpl::LeastRequiredDelayMs() const {
+  return std::max(neteq_.LeastRequiredDelayMs(), initial_delay_ms_);
+}
+
 }  // namespace webrtc
--- a/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h
+++ b/webrtc/modules/audio_coding/main/source/audio_coding_module_impl.h
@ -167,8 +167,17 @@ class AudioCodingModuleImpl : public AudioCodingModule {
                          const uint8_t payload_type,
                          const uint32_t timestamp = 0);

-  // Minimum playout delay (used for lip-sync).
-  int32_t SetMinimumPlayoutDelay(const int32_t time_ms);
+  // NetEq minimum playout delay (used for lip-sync). The actual target delay
+  // is the max of |time_ms| and the required delay dictated by the channel.
+  int SetMinimumPlayoutDelay(int time_ms);
+
+  //
+  // The shortest latency, in milliseconds, required by jitter buffer. This
+  // is computed based on inter-arrival times and playout mode of NetEq. The
+  // actual delay is the maximum of least-required-delay and the minimum-delay
+  // specified by SetMinumumPlayoutDelay() API.
+  //
+  int LeastRequiredDelayMs() const ;

  // Configure Dtmf playout status i.e on/off playout the incoming outband Dtmf
  // tone.
--- a/webrtc/modules/audio_coding/main/test/target_delay_unittest.cc
+++ b/webrtc/modules/audio_coding/main/test/target_delay_unittest.cc
@ -0,0 +1,172 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "gtest/gtest.h"
+#include "testsupport/fileutils.h"
+#include "webrtc/common_types.h"
+#include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/system_wrappers/interface/sleep.h"
+
+namespace webrtc {
+class TargetDelayTest : public ::testing::Test {
+ protected:
+  static const int kSampleRateHz = 16000;
+  static const int kNum10msPerFrame = 2;
+  static const int kFrameSizeSamples = 320;  // 20 ms @ 16 kHz.
+  // payload-len = frame-samples * 2 bytes/sample.
+  static const int kPayloadLenBytes = 320 * 2;
+  // Inter-arrival time in number of packets in a jittery channel. One is no
+  // jitter.
+  static const int kInterarrivalJitterPacket = 2;
+
+  TargetDelayTest()
+      : acm_(AudioCodingModule::Create(0)) {}
+
+  ~TargetDelayTest() {
+    AudioCodingModule::Destroy(acm_);
+  }
+
+  void SetUp() {
+    EXPECT_TRUE(acm_ != NULL);
+
+    CodecInst codec;
+    ASSERT_EQ(0, AudioCodingModule::Codec("L16", &codec, kSampleRateHz, 1));
+    ASSERT_EQ(0, acm_->InitializeReceiver());
+    ASSERT_EQ(0, acm_->RegisterReceiveCodec(codec));
+
+    rtp_info_.header.payloadType = codec.pltype;
+    rtp_info_.header.timestamp = 0;
+    rtp_info_.header.ssrc = 0x12345678;
+    rtp_info_.header.markerBit = false;
+    rtp_info_.header.sequenceNumber = 0;
+    rtp_info_.type.Audio.channel = 1;
+    rtp_info_.type.Audio.isCNG = false;
+    rtp_info_.frameType = kAudioFrameSpeech;
+  }
+
+  void Push() {
+    rtp_info_.header.timestamp += kFrameSizeSamples;
+    rtp_info_.header.sequenceNumber++;
+    uint8_t payload[kPayloadLenBytes];  // Doesn't need to be initialized.
+    ASSERT_EQ(0, acm_->IncomingPacket(payload, kFrameSizeSamples * 2,
+                                      rtp_info_));
+  }
+
+  // Pull audio equivalent to the amount of audio in one RTP packet.
+  void Pull() {
+    AudioFrame frame;
+    for (int k = 0; k < kNum10msPerFrame; ++k) {  // Pull one frame.
+      ASSERT_EQ(0, acm_->PlayoutData10Ms(-1, &frame));
+      // Had to use ASSERT_TRUE, ASSERT_EQ generated error.
+      ASSERT_TRUE(kSampleRateHz == frame.sample_rate_hz_);
+      ASSERT_EQ(1, frame.num_channels_);
+      ASSERT_TRUE(kSampleRateHz / 100 == frame.samples_per_channel_);
+    }
+  }
+
+  void Run(bool clean) {
+    for (int n = 0; n < 10; ++n) {
+      for (int m = 0; m < 5; ++m) {
+        Push();
+        Pull();
+      }
+
+      if (!clean) {
+        for (int m = 0; m < 10; ++m) {  // Long enough to trigger delay change.
+          Push();
+          for (int n = 0; n < kInterarrivalJitterPacket; ++n)
+            Pull();
+        }
+      }
+    }
+  }
+
+  int SetMinimumDelay(int delay_ms) {
+    return acm_->SetMinimumPlayoutDelay(delay_ms);
+  }
+
+  int GetCurrentOptimalDelayMs() {
+    ACMNetworkStatistics stats;
+    acm_->NetworkStatistics(&stats);
+    return stats.preferredBufferSize;
+  }
+
+  int RequiredDelay() {
+    return acm_->LeastRequiredDelayMs();
+  }
+
+  AudioCodingModule* acm_;
+  WebRtcRTPHeader rtp_info_;
+};
+
+TEST_F(TargetDelayTest, OutOfRangeInput) {
+  EXPECT_EQ(-1, SetMinimumDelay(-1));
+  EXPECT_EQ(-1, SetMinimumDelay(10001));
+}
+
+TEST_F(TargetDelayTest, NoTargetDelayBufferSizeChanges) {
+  for (int n = 0; n < 30; ++n)  // Run enough iterations.
+    Run(true);
+  int clean_optimal_delay = GetCurrentOptimalDelayMs();
+  Run(false);  // Run with jitter.
+  int jittery_optimal_delay = GetCurrentOptimalDelayMs();
+  EXPECT_GT(jittery_optimal_delay, clean_optimal_delay);
+  int required_delay = RequiredDelay();
+  EXPECT_GT(required_delay, 0);
+  EXPECT_NEAR(required_delay, jittery_optimal_delay, 1);
+}
+
+TEST_F(TargetDelayTest, WithTargetDelayBufferNotChanging) {
+  // A target delay that is one packet larger than jitter.
+  const int kTargetDelayMs = (kInterarrivalJitterPacket + 1) *
+      kNum10msPerFrame * 10;
+  ASSERT_EQ(0, SetMinimumDelay(kTargetDelayMs));
+  for (int n = 0; n < 30; ++n)  // Run enough iterations to fill up the buffer.
+    Run(true);
+  int clean_optimal_delay = GetCurrentOptimalDelayMs();
+  EXPECT_EQ(kTargetDelayMs, clean_optimal_delay);
+  Run(false);  // Run with jitter.
+  int jittery_optimal_delay = GetCurrentOptimalDelayMs();
+  EXPECT_EQ(jittery_optimal_delay, clean_optimal_delay);
+}
+
+TEST_F(TargetDelayTest, RequiredDelayAtCorrectRange) {
+  for (int n = 0; n < 30; ++n)  // Run clean and store delay.
+    Run(true);
+  int clean_optimal_delay = GetCurrentOptimalDelayMs();
+
+  // A relatively large delay.
+  const int kTargetDelayMs = (kInterarrivalJitterPacket + 10) *
+      kNum10msPerFrame * 10;
+  ASSERT_EQ(0, SetMinimumDelay(kTargetDelayMs));
+  for (int n = 0; n < 300; ++n)  // Run enough iterations to fill up the buffer.
+    Run(true);
+  Run(false);  // Run with jitter.
+
+  int jittery_optimal_delay = GetCurrentOptimalDelayMs();
+  EXPECT_EQ(kTargetDelayMs, jittery_optimal_delay);
+
+  int required_delay = RequiredDelay();
+
+  // Checking |required_delay| is in correct range.
+  EXPECT_GT(required_delay, 0);
+  EXPECT_GT(jittery_optimal_delay, required_delay);
+  EXPECT_GT(required_delay, clean_optimal_delay);
+
+  // A tighter check for the value of |required_delay|.
+  // The jitter forces a delay of
+  // |kInterarrivalJitterPacket * kNum10msPerFrame * 10| milliseconds. So we
+  // expect |required_delay| be close to that.
+  EXPECT_NEAR(kInterarrivalJitterPacket * kNum10msPerFrame * 10,
+              required_delay, 1);
+}
+
+}  // webrtc