NetEq fix for repeated audio issue.

This CL implements a fix behind a field trial for a NetEq issue. NetEq restarts audio too quickly after a buffer underrun, which can quickly lead to another underrun in some circumstances. The fix changes NetEq's behavior to wait with restarting playback until sufficient audio is buffered.

Bug: webrtc:9289
Change-Id: I5968c9478ce8d84caf77f00b8d0a39156b47fc8d
Reviewed-on: https://webrtc-review.googlesource.com/77423
Reviewed-by: Minyue Li <minyue@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Commit-Queue: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23347}
This commit is contained in:
Ivo Creusen
2018-05-22 13:21:01 +02:00
committed by Commit Bot
parent 169005d8c1
commit c7f09ad2e0
12 changed files with 62 additions and 10 deletions

View File

@ -51,6 +51,10 @@ class OldStyleEncodedFrame final : public AudioDecoder::EncodedAudioFrame {
} // namespace
bool AudioDecoder::EncodedAudioFrame::IsDtxPacket() const {
return false;
}
AudioDecoder::ParseResult::ParseResult() = default;
AudioDecoder::ParseResult::ParseResult(ParseResult&& b) = default;
AudioDecoder::ParseResult::ParseResult(uint32_t timestamp,

View File

@ -48,6 +48,9 @@ class AudioDecoder {
// If no duration can be ascertained, returns zero.
virtual size_t Duration() const = 0;
// Returns true if this packet contains DTX.
virtual bool IsDtxPacket() const;
// Decodes this frame of audio and writes the result in |decoded|.
// |decoded| must be large enough to store as many samples as indicated by a
// call to Duration() . On success, returns an rtc::Optional containing the

View File

@ -36,6 +36,8 @@ class OpusFrame : public AudioDecoder::EncodedAudioFrame {
return (ret < 0) ? 0 : static_cast<size_t>(ret);
}
bool IsDtxPacket() const override { return payload_.size() <= 2; }
rtc::Optional<DecodeResult> Decode(
rtc::ArrayView<int16_t> decoded) const override {
AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;

View File

@ -130,9 +130,9 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
FilterBufferLevel(cur_size_samples, prev_mode);
return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length,
next_packet, prev_mode, play_dtmf,
reset_decoder, generated_noise_samples);
return GetDecisionSpecialized(
sync_buffer, expand, decoder_frame_length, next_packet, prev_mode,
play_dtmf, reset_decoder, generated_noise_samples, cur_size_samples);
}
void DecisionLogic::ExpandDecision(Operations operation) {

View File

@ -137,7 +137,8 @@ class DecisionLogic {
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
size_t generated_noise_samples) = 0;
size_t generated_noise_samples,
size_t cur_size_samples) = 0;
// Updates the |buffer_level_filter_| with the current buffer level
// |buffer_size_packets|.

View File

@ -27,7 +27,8 @@ Operations DecisionLogicFax::GetDecisionSpecialized(
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
size_t generated_noise_samples) {
size_t generated_noise_samples,
size_t /*cur_size_samples*/) {
assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff);
uint32_t target_timestamp = sync_buffer.end_timestamp();
uint32_t available_timestamp = 0;

View File

@ -47,7 +47,8 @@ class DecisionLogicFax : public DecisionLogic {
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
size_t generated_noise_samples) override;
size_t generated_noise_samples,
size_t cur_size_samples) override;
private:
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax);

View File

@ -13,6 +13,7 @@
#include <assert.h>
#include <algorithm>
#include <limits>
#include "modules/audio_coding/neteq/buffer_level_filter.h"
#include "modules/audio_coding/neteq/decoder_database.h"
@ -31,7 +32,8 @@ Operations DecisionLogicNormal::GetDecisionSpecialized(
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
size_t generated_noise_samples) {
size_t generated_noise_samples,
size_t cur_size_samples) {
assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming);
// Guard for errors, to avoid getting stuck in error mode.
if (prev_mode == kModeError) {
@ -68,6 +70,21 @@ Operations DecisionLogicNormal::GetDecisionSpecialized(
return kNormal;
}
// Make sure we don't restart audio too soon after an expansion to avoid
// running out of data right away again. We should only wait if there are no
// DTX or CNG packets in the buffer (otherwise we should just play out what we
// have, since we cannot know the exact duration of DTX or CNG packets), and
// if the mute factor is low enough (otherwise the expansion was short enough
// to not be noticable).
// Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
if (postpone_decoding_after_expand_ && prev_mode == kModeExpand &&
!packet_buffer_.ContainsDtxOrCngPacket(decoder_database_) &&
cur_size_samples < static_cast<size_t>(delay_manager_->TargetLevel() *
packet_length_samples_) >> 8 &&
expand.MuteFactor(0) < 16384 / 2) {
return kExpand;
}
const uint32_t five_seconds_samples =
static_cast<uint32_t>(5 * 8000 * fs_mult_);
// Check if the required packet is available.

View File

@ -13,6 +13,7 @@
#include "modules/audio_coding/neteq/decision_logic.h"
#include "rtc_base/constructormagic.h"
#include "system_wrappers/include/field_trial.h"
#include "typedefs.h" // NOLINT(build/include)
namespace webrtc {
@ -37,7 +38,9 @@ class DecisionLogicNormal : public DecisionLogic {
packet_buffer,
delay_manager,
buffer_level_filter,
tick_timer) {}
tick_timer),
postpone_decoding_after_expand_(field_trial::IsEnabled(
"WebRTC-Audio-NetEqPostponeDecodingAfterExpand")) {}
protected:
static const int kReinitAfterExpands = 100;
@ -50,7 +53,8 @@ class DecisionLogicNormal : public DecisionLogic {
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
size_t generated_noise_samples) override;
size_t generated_noise_samples,
size_t cur_size_samples) override;
// Returns the operation to do given that the expected packet is not
// available, but a packet further into the future is at hand.
@ -100,6 +104,8 @@ class DecisionLogicNormal : public DecisionLogic {
// Checks if num_consecutive_expands_ >= kMaxWaitForPacket.
bool MaxWaitForPacket() const;
const bool postpone_decoding_after_expand_;
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicNormal);
};

View File

@ -57,7 +57,7 @@ class Expand {
virtual void SetParametersForMergeAfterExpand();
// Returns the mute factor for |channel|.
int16_t MuteFactor(size_t channel) {
int16_t MuteFactor(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].mute_factor;
}

View File

@ -285,6 +285,18 @@ size_t PacketBuffer::NumSamplesInBuffer(size_t last_decoded_length) const {
return num_samples;
}
bool PacketBuffer::ContainsDtxOrCngPacket(
const DecoderDatabase* decoder_database) const {
RTC_DCHECK(decoder_database);
for (const Packet& packet : buffer_) {
if ((packet.frame && packet.frame->IsDtxPacket()) ||
decoder_database->IsComfortNoise(packet.payload_type)) {
return true;
}
}
return false;
}
void PacketBuffer::BufferStat(int* num_packets, int* max_num_packets) const {
*num_packets = static_cast<int>(buffer_.size());
*max_num_packets = static_cast<int>(max_number_of_packets_);

View File

@ -12,6 +12,7 @@
#define MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_
#include "api/optional.h"
#include "modules/audio_coding/neteq/decoder_database.h"
#include "modules/audio_coding/neteq/packet.h"
#include "modules/include/module_common_types.h"
#include "rtc_base/constructormagic.h"
@ -121,6 +122,10 @@ class PacketBuffer {
// duplicate and redundant packets.
virtual size_t NumSamplesInBuffer(size_t last_decoded_length) const;
// Returns true if the packet buffer contains any DTX or CNG packets.
virtual bool ContainsDtxOrCngPacket(
const DecoderDatabase* decoder_database) const;
virtual void BufferStat(int* num_packets, int* max_num_packets) const;
// Static method returning true if |timestamp| is older than |timestamp_limit|