NetEq fix for repeated audio issue.
This CL implements a fix behind a field trial for a NetEq issue. NetEq restarts audio too quickly after a buffer underrun, which can quickly lead to another underrun in some circumstances. The fix changes NetEq's behavior to wait with restarting playback until sufficient audio is buffered. Bug: webrtc:9289 Change-Id: I5968c9478ce8d84caf77f00b8d0a39156b47fc8d Reviewed-on: https://webrtc-review.googlesource.com/77423 Reviewed-by: Minyue Li <minyue@webrtc.org> Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org> Commit-Queue: Ivo Creusen <ivoc@webrtc.org> Cr-Commit-Position: refs/heads/master@{#23347}
This commit is contained in:
@ -51,6 +51,10 @@ class OldStyleEncodedFrame final : public AudioDecoder::EncodedAudioFrame {
|
||||
|
||||
} // namespace
|
||||
|
||||
bool AudioDecoder::EncodedAudioFrame::IsDtxPacket() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
AudioDecoder::ParseResult::ParseResult() = default;
|
||||
AudioDecoder::ParseResult::ParseResult(ParseResult&& b) = default;
|
||||
AudioDecoder::ParseResult::ParseResult(uint32_t timestamp,
|
||||
|
||||
@ -48,6 +48,9 @@ class AudioDecoder {
|
||||
// If no duration can be ascertained, returns zero.
|
||||
virtual size_t Duration() const = 0;
|
||||
|
||||
// Returns true if this packet contains DTX.
|
||||
virtual bool IsDtxPacket() const;
|
||||
|
||||
// Decodes this frame of audio and writes the result in |decoded|.
|
||||
// |decoded| must be large enough to store as many samples as indicated by a
|
||||
// call to Duration() . On success, returns an rtc::Optional containing the
|
||||
|
||||
@ -36,6 +36,8 @@ class OpusFrame : public AudioDecoder::EncodedAudioFrame {
|
||||
return (ret < 0) ? 0 : static_cast<size_t>(ret);
|
||||
}
|
||||
|
||||
bool IsDtxPacket() const override { return payload_.size() <= 2; }
|
||||
|
||||
rtc::Optional<DecodeResult> Decode(
|
||||
rtc::ArrayView<int16_t> decoded) const override {
|
||||
AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;
|
||||
|
||||
@ -130,9 +130,9 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
|
||||
|
||||
FilterBufferLevel(cur_size_samples, prev_mode);
|
||||
|
||||
return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length,
|
||||
next_packet, prev_mode, play_dtmf,
|
||||
reset_decoder, generated_noise_samples);
|
||||
return GetDecisionSpecialized(
|
||||
sync_buffer, expand, decoder_frame_length, next_packet, prev_mode,
|
||||
play_dtmf, reset_decoder, generated_noise_samples, cur_size_samples);
|
||||
}
|
||||
|
||||
void DecisionLogic::ExpandDecision(Operations operation) {
|
||||
|
||||
@ -137,7 +137,8 @@ class DecisionLogic {
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
bool* reset_decoder,
|
||||
size_t generated_noise_samples) = 0;
|
||||
size_t generated_noise_samples,
|
||||
size_t cur_size_samples) = 0;
|
||||
|
||||
// Updates the |buffer_level_filter_| with the current buffer level
|
||||
// |buffer_size_packets|.
|
||||
|
||||
@ -27,7 +27,8 @@ Operations DecisionLogicFax::GetDecisionSpecialized(
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
bool* reset_decoder,
|
||||
size_t generated_noise_samples) {
|
||||
size_t generated_noise_samples,
|
||||
size_t /*cur_size_samples*/) {
|
||||
assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff);
|
||||
uint32_t target_timestamp = sync_buffer.end_timestamp();
|
||||
uint32_t available_timestamp = 0;
|
||||
|
||||
@ -47,7 +47,8 @@ class DecisionLogicFax : public DecisionLogic {
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
bool* reset_decoder,
|
||||
size_t generated_noise_samples) override;
|
||||
size_t generated_noise_samples,
|
||||
size_t cur_size_samples) override;
|
||||
|
||||
private:
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax);
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
#include <assert.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "modules/audio_coding/neteq/buffer_level_filter.h"
|
||||
#include "modules/audio_coding/neteq/decoder_database.h"
|
||||
@ -31,7 +32,8 @@ Operations DecisionLogicNormal::GetDecisionSpecialized(
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
bool* reset_decoder,
|
||||
size_t generated_noise_samples) {
|
||||
size_t generated_noise_samples,
|
||||
size_t cur_size_samples) {
|
||||
assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming);
|
||||
// Guard for errors, to avoid getting stuck in error mode.
|
||||
if (prev_mode == kModeError) {
|
||||
@ -68,6 +70,21 @@ Operations DecisionLogicNormal::GetDecisionSpecialized(
|
||||
return kNormal;
|
||||
}
|
||||
|
||||
// Make sure we don't restart audio too soon after an expansion to avoid
|
||||
// running out of data right away again. We should only wait if there are no
|
||||
// DTX or CNG packets in the buffer (otherwise we should just play out what we
|
||||
// have, since we cannot know the exact duration of DTX or CNG packets), and
|
||||
// if the mute factor is low enough (otherwise the expansion was short enough
|
||||
// to not be noticable).
|
||||
// Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
|
||||
if (postpone_decoding_after_expand_ && prev_mode == kModeExpand &&
|
||||
!packet_buffer_.ContainsDtxOrCngPacket(decoder_database_) &&
|
||||
cur_size_samples < static_cast<size_t>(delay_manager_->TargetLevel() *
|
||||
packet_length_samples_) >> 8 &&
|
||||
expand.MuteFactor(0) < 16384 / 2) {
|
||||
return kExpand;
|
||||
}
|
||||
|
||||
const uint32_t five_seconds_samples =
|
||||
static_cast<uint32_t>(5 * 8000 * fs_mult_);
|
||||
// Check if the required packet is available.
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
|
||||
#include "modules/audio_coding/neteq/decision_logic.h"
|
||||
#include "rtc_base/constructormagic.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
#include "typedefs.h" // NOLINT(build/include)
|
||||
|
||||
namespace webrtc {
|
||||
@ -37,7 +38,9 @@ class DecisionLogicNormal : public DecisionLogic {
|
||||
packet_buffer,
|
||||
delay_manager,
|
||||
buffer_level_filter,
|
||||
tick_timer) {}
|
||||
tick_timer),
|
||||
postpone_decoding_after_expand_(field_trial::IsEnabled(
|
||||
"WebRTC-Audio-NetEqPostponeDecodingAfterExpand")) {}
|
||||
|
||||
protected:
|
||||
static const int kReinitAfterExpands = 100;
|
||||
@ -50,7 +53,8 @@ class DecisionLogicNormal : public DecisionLogic {
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
bool* reset_decoder,
|
||||
size_t generated_noise_samples) override;
|
||||
size_t generated_noise_samples,
|
||||
size_t cur_size_samples) override;
|
||||
|
||||
// Returns the operation to do given that the expected packet is not
|
||||
// available, but a packet further into the future is at hand.
|
||||
@ -100,6 +104,8 @@ class DecisionLogicNormal : public DecisionLogic {
|
||||
// Checks if num_consecutive_expands_ >= kMaxWaitForPacket.
|
||||
bool MaxWaitForPacket() const;
|
||||
|
||||
const bool postpone_decoding_after_expand_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicNormal);
|
||||
};
|
||||
|
||||
|
||||
@ -57,7 +57,7 @@ class Expand {
|
||||
virtual void SetParametersForMergeAfterExpand();
|
||||
|
||||
// Returns the mute factor for |channel|.
|
||||
int16_t MuteFactor(size_t channel) {
|
||||
int16_t MuteFactor(size_t channel) const {
|
||||
assert(channel < num_channels_);
|
||||
return channel_parameters_[channel].mute_factor;
|
||||
}
|
||||
|
||||
@ -285,6 +285,18 @@ size_t PacketBuffer::NumSamplesInBuffer(size_t last_decoded_length) const {
|
||||
return num_samples;
|
||||
}
|
||||
|
||||
bool PacketBuffer::ContainsDtxOrCngPacket(
|
||||
const DecoderDatabase* decoder_database) const {
|
||||
RTC_DCHECK(decoder_database);
|
||||
for (const Packet& packet : buffer_) {
|
||||
if ((packet.frame && packet.frame->IsDtxPacket()) ||
|
||||
decoder_database->IsComfortNoise(packet.payload_type)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void PacketBuffer::BufferStat(int* num_packets, int* max_num_packets) const {
|
||||
*num_packets = static_cast<int>(buffer_.size());
|
||||
*max_num_packets = static_cast<int>(max_number_of_packets_);
|
||||
|
||||
@ -12,6 +12,7 @@
|
||||
#define MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_
|
||||
|
||||
#include "api/optional.h"
|
||||
#include "modules/audio_coding/neteq/decoder_database.h"
|
||||
#include "modules/audio_coding/neteq/packet.h"
|
||||
#include "modules/include/module_common_types.h"
|
||||
#include "rtc_base/constructormagic.h"
|
||||
@ -121,6 +122,10 @@ class PacketBuffer {
|
||||
// duplicate and redundant packets.
|
||||
virtual size_t NumSamplesInBuffer(size_t last_decoded_length) const;
|
||||
|
||||
// Returns true if the packet buffer contains any DTX or CNG packets.
|
||||
virtual bool ContainsDtxOrCngPacket(
|
||||
const DecoderDatabase* decoder_database) const;
|
||||
|
||||
virtual void BufferStat(int* num_packets, int* max_num_packets) const;
|
||||
|
||||
// Static method returning true if |timestamp| is older than |timestamp_limit|
|
||||
|
||||
Reference in New Issue
Block a user