NetEq fix for repeated audio issue.

This CL implements a fix behind a field trial for a NetEq issue. NetEq restarts audio too quickly after a buffer underrun, which can quickly lead to another underrun in some circumstances. The fix changes NetEq's behavior to wait with restarting playback until sufficient audio is buffered. Bug: webrtc:9289 Change-Id: I5968c9478ce8d84caf77f00b8d0a39156b47fc8d Reviewed-on: https://webrtc-review.googlesource.com/77423 Reviewed-by: Minyue Li <minyue@webrtc.org> Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org> Commit-Queue: Ivo Creusen <ivoc@webrtc.org> Cr-Commit-Position: refs/heads/master@{#23347}
2018-05-22 13:21:01 +02:00
parent 169005d8c1
commit c7f09ad2e0
12 changed files with 62 additions and 10 deletions
--- a/api/audio_codecs/audio_decoder.cc
+++ b/api/audio_codecs/audio_decoder.cc
@ -51,6 +51,10 @@ class OldStyleEncodedFrame final : public AudioDecoder::EncodedAudioFrame {

 }  // namespace

+bool AudioDecoder::EncodedAudioFrame::IsDtxPacket() const {
+  return false;
+}
+
 AudioDecoder::ParseResult::ParseResult() = default;
 AudioDecoder::ParseResult::ParseResult(ParseResult&& b) = default;
 AudioDecoder::ParseResult::ParseResult(uint32_t timestamp,
--- a/api/audio_codecs/audio_decoder.h
+++ b/api/audio_codecs/audio_decoder.h
@ -48,6 +48,9 @@ class AudioDecoder {
    // If no duration can be ascertained, returns zero.
    virtual size_t Duration() const = 0;

+    // Returns true if this packet contains DTX.
+    virtual bool IsDtxPacket() const;
+
    // Decodes this frame of audio and writes the result in |decoded|.
    // |decoded| must be large enough to store as many samples as indicated by a
    // call to Duration() . On success, returns an rtc::Optional containing the
--- a/modules/audio_coding/codecs/opus/audio_decoder_opus.cc
+++ b/modules/audio_coding/codecs/opus/audio_decoder_opus.cc
@ -36,6 +36,8 @@ class OpusFrame : public AudioDecoder::EncodedAudioFrame {
    return (ret < 0) ? 0 : static_cast<size_t>(ret);
  }

+  bool IsDtxPacket() const override { return payload_.size() <= 2; }
+
  rtc::Optional<DecodeResult> Decode(
      rtc::ArrayView<int16_t> decoded) const override {
    AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;
--- a/modules/audio_coding/neteq/decision_logic.cc
+++ b/modules/audio_coding/neteq/decision_logic.cc
@ -130,9 +130,9 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,

  FilterBufferLevel(cur_size_samples, prev_mode);

-  return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length,
-                                next_packet, prev_mode, play_dtmf,
-                                reset_decoder, generated_noise_samples);
+  return GetDecisionSpecialized(
+      sync_buffer, expand, decoder_frame_length, next_packet, prev_mode,
+      play_dtmf, reset_decoder, generated_noise_samples, cur_size_samples);
 }

 void DecisionLogic::ExpandDecision(Operations operation) {
--- a/modules/audio_coding/neteq/decision_logic.h
+++ b/modules/audio_coding/neteq/decision_logic.h
@ -137,7 +137,8 @@ class DecisionLogic {
                                            Modes prev_mode,
                                            bool play_dtmf,
                                            bool* reset_decoder,
-                                            size_t generated_noise_samples) = 0;
+                                            size_t generated_noise_samples,
+                                            size_t cur_size_samples) = 0;

  // Updates the |buffer_level_filter_| with the current buffer level
  // |buffer_size_packets|.
--- a/modules/audio_coding/neteq/decision_logic_fax.cc
+++ b/modules/audio_coding/neteq/decision_logic_fax.cc
@ -27,7 +27,8 @@ Operations DecisionLogicFax::GetDecisionSpecialized(
    Modes prev_mode,
    bool play_dtmf,
    bool* reset_decoder,
-    size_t generated_noise_samples) {
+    size_t generated_noise_samples,
+    size_t /*cur_size_samples*/) {
  assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff);
  uint32_t target_timestamp = sync_buffer.end_timestamp();
  uint32_t available_timestamp = 0;
--- a/modules/audio_coding/neteq/decision_logic_fax.h
+++ b/modules/audio_coding/neteq/decision_logic_fax.h
@ -47,7 +47,8 @@ class DecisionLogicFax : public DecisionLogic {
                                    Modes prev_mode,
                                    bool play_dtmf,
                                    bool* reset_decoder,
-                                    size_t generated_noise_samples) override;
+                                    size_t generated_noise_samples,
+                                    size_t cur_size_samples) override;

 private:
  RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax);
--- a/modules/audio_coding/neteq/decision_logic_normal.cc
+++ b/modules/audio_coding/neteq/decision_logic_normal.cc
@ -13,6 +13,7 @@
 #include <assert.h>

 #include <algorithm>
+#include <limits>

 #include "modules/audio_coding/neteq/buffer_level_filter.h"
 #include "modules/audio_coding/neteq/decoder_database.h"
@ -31,7 +32,8 @@ Operations DecisionLogicNormal::GetDecisionSpecialized(
    Modes prev_mode,
    bool play_dtmf,
    bool* reset_decoder,
-    size_t generated_noise_samples) {
+    size_t generated_noise_samples,
+    size_t cur_size_samples) {
  assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming);
  // Guard for errors, to avoid getting stuck in error mode.
  if (prev_mode == kModeError) {
@ -68,6 +70,21 @@ Operations DecisionLogicNormal::GetDecisionSpecialized(
    return kNormal;
  }

+  // Make sure we don't restart audio too soon after an expansion to avoid
+  // running out of data right away again. We should only wait if there are no
+  // DTX or CNG packets in the buffer (otherwise we should just play out what we
+  // have, since we cannot know the exact duration of DTX or CNG packets), and
+  // if the mute factor is low enough (otherwise the expansion was short enough
+  // to not be noticable).
+  // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
+  if (postpone_decoding_after_expand_ && prev_mode == kModeExpand &&
+      !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_) &&
+      cur_size_samples < static_cast<size_t>(delay_manager_->TargetLevel() *
+                                             packet_length_samples_) >> 8 &&
+      expand.MuteFactor(0) < 16384 / 2) {
+    return kExpand;
+  }
+
  const uint32_t five_seconds_samples =
      static_cast<uint32_t>(5 * 8000 * fs_mult_);
  // Check if the required packet is available.
--- a/modules/audio_coding/neteq/decision_logic_normal.h
+++ b/modules/audio_coding/neteq/decision_logic_normal.h
@ -13,6 +13,7 @@

 #include "modules/audio_coding/neteq/decision_logic.h"
 #include "rtc_base/constructormagic.h"
+#include "system_wrappers/include/field_trial.h"
 #include "typedefs.h"  // NOLINT(build/include)

 namespace webrtc {
@ -37,7 +38,9 @@ class DecisionLogicNormal : public DecisionLogic {
                      packet_buffer,
                      delay_manager,
                      buffer_level_filter,
-                      tick_timer) {}
+                      tick_timer),
+        postpone_decoding_after_expand_(field_trial::IsEnabled(
+            "WebRTC-Audio-NetEqPostponeDecodingAfterExpand")) {}

 protected:
  static const int kReinitAfterExpands = 100;
@ -50,7 +53,8 @@ class DecisionLogicNormal : public DecisionLogic {
                                    Modes prev_mode,
                                    bool play_dtmf,
                                    bool* reset_decoder,
-                                    size_t generated_noise_samples) override;
+                                    size_t generated_noise_samples,
+                                    size_t cur_size_samples) override;

  // Returns the operation to do given that the expected packet is not
  // available, but a packet further into the future is at hand.
@ -100,6 +104,8 @@ class DecisionLogicNormal : public DecisionLogic {
  // Checks if num_consecutive_expands_ >= kMaxWaitForPacket.
  bool MaxWaitForPacket() const;

+  const bool postpone_decoding_after_expand_;
+
  RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicNormal);
 };

--- a/modules/audio_coding/neteq/expand.h
+++ b/modules/audio_coding/neteq/expand.h
@ -57,7 +57,7 @@ class Expand {
  virtual void SetParametersForMergeAfterExpand();

  // Returns the mute factor for |channel|.
-  int16_t MuteFactor(size_t channel) {
+  int16_t MuteFactor(size_t channel) const {
    assert(channel < num_channels_);
    return channel_parameters_[channel].mute_factor;
  }
--- a/modules/audio_coding/neteq/packet_buffer.cc
+++ b/modules/audio_coding/neteq/packet_buffer.cc
@ -285,6 +285,18 @@ size_t PacketBuffer::NumSamplesInBuffer(size_t last_decoded_length) const {
  return num_samples;
 }

+bool PacketBuffer::ContainsDtxOrCngPacket(
+    const DecoderDatabase* decoder_database) const {
+  RTC_DCHECK(decoder_database);
+  for (const Packet& packet : buffer_) {
+    if ((packet.frame && packet.frame->IsDtxPacket()) ||
+        decoder_database->IsComfortNoise(packet.payload_type)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 void PacketBuffer::BufferStat(int* num_packets, int* max_num_packets) const {
  *num_packets = static_cast<int>(buffer_.size());
  *max_num_packets = static_cast<int>(max_number_of_packets_);
--- a/modules/audio_coding/neteq/packet_buffer.h
+++ b/modules/audio_coding/neteq/packet_buffer.h
@ -12,6 +12,7 @@
 #define MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_

 #include "api/optional.h"
+#include "modules/audio_coding/neteq/decoder_database.h"
 #include "modules/audio_coding/neteq/packet.h"
 #include "modules/include/module_common_types.h"
 #include "rtc_base/constructormagic.h"
@ -121,6 +122,10 @@ class PacketBuffer {
  // duplicate and redundant packets.
  virtual size_t NumSamplesInBuffer(size_t last_decoded_length) const;

+  // Returns true if the packet buffer contains any DTX or CNG packets.
+  virtual bool ContainsDtxOrCngPacket(
+      const DecoderDatabase* decoder_database) const;
+
  virtual void BufferStat(int* num_packets, int* max_num_packets) const;

  // Static method returning true if |timestamp| is older than |timestamp_limit|