diff --git a/api/audio_codecs/audio_decoder.cc b/api/audio_codecs/audio_decoder.cc index ddb06d27ee..4903fb63bf 100644 --- a/api/audio_codecs/audio_decoder.cc +++ b/api/audio_codecs/audio_decoder.cc @@ -51,6 +51,10 @@ class OldStyleEncodedFrame final : public AudioDecoder::EncodedAudioFrame { } // namespace +bool AudioDecoder::EncodedAudioFrame::IsDtxPacket() const { + return false; +} + AudioDecoder::ParseResult::ParseResult() = default; AudioDecoder::ParseResult::ParseResult(ParseResult&& b) = default; AudioDecoder::ParseResult::ParseResult(uint32_t timestamp, diff --git a/api/audio_codecs/audio_decoder.h b/api/audio_codecs/audio_decoder.h index 545bdf52cc..021288fc2b 100644 --- a/api/audio_codecs/audio_decoder.h +++ b/api/audio_codecs/audio_decoder.h @@ -48,6 +48,9 @@ class AudioDecoder { // If no duration can be ascertained, returns zero. virtual size_t Duration() const = 0; + // Returns true if this packet contains DTX. + virtual bool IsDtxPacket() const; + // Decodes this frame of audio and writes the result in |decoded|. // |decoded| must be large enough to store as many samples as indicated by a // call to Duration() . On success, returns an rtc::Optional containing the diff --git a/modules/audio_coding/codecs/opus/audio_decoder_opus.cc b/modules/audio_coding/codecs/opus/audio_decoder_opus.cc index 3d10b6fdbe..c784a68a3d 100644 --- a/modules/audio_coding/codecs/opus/audio_decoder_opus.cc +++ b/modules/audio_coding/codecs/opus/audio_decoder_opus.cc @@ -36,6 +36,8 @@ class OpusFrame : public AudioDecoder::EncodedAudioFrame { return (ret < 0) ? 0 : static_cast(ret); } + bool IsDtxPacket() const override { return payload_.size() <= 2; } + rtc::Optional Decode( rtc::ArrayView decoded) const override { AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech; diff --git a/modules/audio_coding/neteq/decision_logic.cc b/modules/audio_coding/neteq/decision_logic.cc index 6ab27168bd..279a9e6bee 100644 --- a/modules/audio_coding/neteq/decision_logic.cc +++ b/modules/audio_coding/neteq/decision_logic.cc @@ -130,9 +130,9 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer, FilterBufferLevel(cur_size_samples, prev_mode); - return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length, - next_packet, prev_mode, play_dtmf, - reset_decoder, generated_noise_samples); + return GetDecisionSpecialized( + sync_buffer, expand, decoder_frame_length, next_packet, prev_mode, + play_dtmf, reset_decoder, generated_noise_samples, cur_size_samples); } void DecisionLogic::ExpandDecision(Operations operation) { diff --git a/modules/audio_coding/neteq/decision_logic.h b/modules/audio_coding/neteq/decision_logic.h index 5b67196c43..9d88c4d355 100644 --- a/modules/audio_coding/neteq/decision_logic.h +++ b/modules/audio_coding/neteq/decision_logic.h @@ -137,7 +137,8 @@ class DecisionLogic { Modes prev_mode, bool play_dtmf, bool* reset_decoder, - size_t generated_noise_samples) = 0; + size_t generated_noise_samples, + size_t cur_size_samples) = 0; // Updates the |buffer_level_filter_| with the current buffer level // |buffer_size_packets|. diff --git a/modules/audio_coding/neteq/decision_logic_fax.cc b/modules/audio_coding/neteq/decision_logic_fax.cc index cc21ee9deb..22d36ce9cc 100644 --- a/modules/audio_coding/neteq/decision_logic_fax.cc +++ b/modules/audio_coding/neteq/decision_logic_fax.cc @@ -27,7 +27,8 @@ Operations DecisionLogicFax::GetDecisionSpecialized( Modes prev_mode, bool play_dtmf, bool* reset_decoder, - size_t generated_noise_samples) { + size_t generated_noise_samples, + size_t /*cur_size_samples*/) { assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff); uint32_t target_timestamp = sync_buffer.end_timestamp(); uint32_t available_timestamp = 0; diff --git a/modules/audio_coding/neteq/decision_logic_fax.h b/modules/audio_coding/neteq/decision_logic_fax.h index cefd8e4e42..1436f99aaa 100644 --- a/modules/audio_coding/neteq/decision_logic_fax.h +++ b/modules/audio_coding/neteq/decision_logic_fax.h @@ -47,7 +47,8 @@ class DecisionLogicFax : public DecisionLogic { Modes prev_mode, bool play_dtmf, bool* reset_decoder, - size_t generated_noise_samples) override; + size_t generated_noise_samples, + size_t cur_size_samples) override; private: RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax); diff --git a/modules/audio_coding/neteq/decision_logic_normal.cc b/modules/audio_coding/neteq/decision_logic_normal.cc index 1429bb7d13..c163999f58 100644 --- a/modules/audio_coding/neteq/decision_logic_normal.cc +++ b/modules/audio_coding/neteq/decision_logic_normal.cc @@ -13,6 +13,7 @@ #include #include +#include #include "modules/audio_coding/neteq/buffer_level_filter.h" #include "modules/audio_coding/neteq/decoder_database.h" @@ -31,7 +32,8 @@ Operations DecisionLogicNormal::GetDecisionSpecialized( Modes prev_mode, bool play_dtmf, bool* reset_decoder, - size_t generated_noise_samples) { + size_t generated_noise_samples, + size_t cur_size_samples) { assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming); // Guard for errors, to avoid getting stuck in error mode. if (prev_mode == kModeError) { @@ -68,6 +70,21 @@ Operations DecisionLogicNormal::GetDecisionSpecialized( return kNormal; } + // Make sure we don't restart audio too soon after an expansion to avoid + // running out of data right away again. We should only wait if there are no + // DTX or CNG packets in the buffer (otherwise we should just play out what we + // have, since we cannot know the exact duration of DTX or CNG packets), and + // if the mute factor is low enough (otherwise the expansion was short enough + // to not be noticable). + // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1. + if (postpone_decoding_after_expand_ && prev_mode == kModeExpand && + !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_) && + cur_size_samples < static_cast(delay_manager_->TargetLevel() * + packet_length_samples_) >> 8 && + expand.MuteFactor(0) < 16384 / 2) { + return kExpand; + } + const uint32_t five_seconds_samples = static_cast(5 * 8000 * fs_mult_); // Check if the required packet is available. diff --git a/modules/audio_coding/neteq/decision_logic_normal.h b/modules/audio_coding/neteq/decision_logic_normal.h index 366d10380d..a718f99495 100644 --- a/modules/audio_coding/neteq/decision_logic_normal.h +++ b/modules/audio_coding/neteq/decision_logic_normal.h @@ -13,6 +13,7 @@ #include "modules/audio_coding/neteq/decision_logic.h" #include "rtc_base/constructormagic.h" +#include "system_wrappers/include/field_trial.h" #include "typedefs.h" // NOLINT(build/include) namespace webrtc { @@ -37,7 +38,9 @@ class DecisionLogicNormal : public DecisionLogic { packet_buffer, delay_manager, buffer_level_filter, - tick_timer) {} + tick_timer), + postpone_decoding_after_expand_(field_trial::IsEnabled( + "WebRTC-Audio-NetEqPostponeDecodingAfterExpand")) {} protected: static const int kReinitAfterExpands = 100; @@ -50,7 +53,8 @@ class DecisionLogicNormal : public DecisionLogic { Modes prev_mode, bool play_dtmf, bool* reset_decoder, - size_t generated_noise_samples) override; + size_t generated_noise_samples, + size_t cur_size_samples) override; // Returns the operation to do given that the expected packet is not // available, but a packet further into the future is at hand. @@ -100,6 +104,8 @@ class DecisionLogicNormal : public DecisionLogic { // Checks if num_consecutive_expands_ >= kMaxWaitForPacket. bool MaxWaitForPacket() const; + const bool postpone_decoding_after_expand_; + RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicNormal); }; diff --git a/modules/audio_coding/neteq/expand.h b/modules/audio_coding/neteq/expand.h index 39249f13c2..4060bd78bc 100644 --- a/modules/audio_coding/neteq/expand.h +++ b/modules/audio_coding/neteq/expand.h @@ -57,7 +57,7 @@ class Expand { virtual void SetParametersForMergeAfterExpand(); // Returns the mute factor for |channel|. - int16_t MuteFactor(size_t channel) { + int16_t MuteFactor(size_t channel) const { assert(channel < num_channels_); return channel_parameters_[channel].mute_factor; } diff --git a/modules/audio_coding/neteq/packet_buffer.cc b/modules/audio_coding/neteq/packet_buffer.cc index dfffebdae6..9752ec6be1 100644 --- a/modules/audio_coding/neteq/packet_buffer.cc +++ b/modules/audio_coding/neteq/packet_buffer.cc @@ -285,6 +285,18 @@ size_t PacketBuffer::NumSamplesInBuffer(size_t last_decoded_length) const { return num_samples; } +bool PacketBuffer::ContainsDtxOrCngPacket( + const DecoderDatabase* decoder_database) const { + RTC_DCHECK(decoder_database); + for (const Packet& packet : buffer_) { + if ((packet.frame && packet.frame->IsDtxPacket()) || + decoder_database->IsComfortNoise(packet.payload_type)) { + return true; + } + } + return false; +} + void PacketBuffer::BufferStat(int* num_packets, int* max_num_packets) const { *num_packets = static_cast(buffer_.size()); *max_num_packets = static_cast(max_number_of_packets_); diff --git a/modules/audio_coding/neteq/packet_buffer.h b/modules/audio_coding/neteq/packet_buffer.h index c83bf89c7d..c646626f03 100644 --- a/modules/audio_coding/neteq/packet_buffer.h +++ b/modules/audio_coding/neteq/packet_buffer.h @@ -12,6 +12,7 @@ #define MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_ #include "api/optional.h" +#include "modules/audio_coding/neteq/decoder_database.h" #include "modules/audio_coding/neteq/packet.h" #include "modules/include/module_common_types.h" #include "rtc_base/constructormagic.h" @@ -121,6 +122,10 @@ class PacketBuffer { // duplicate and redundant packets. virtual size_t NumSamplesInBuffer(size_t last_decoded_length) const; + // Returns true if the packet buffer contains any DTX or CNG packets. + virtual bool ContainsDtxOrCngPacket( + const DecoderDatabase* decoder_database) const; + virtual void BufferStat(int* num_packets, int* max_num_packets) const; // Static method returning true if |timestamp| is older than |timestamp_limit|