diff --git a/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc b/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc index 6412be5f72..f1953eaacf 100644 --- a/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc +++ b/modules/audio_coding/codecs/opus/audio_encoder_opus_unittest.cc @@ -831,6 +831,7 @@ TEST(AudioEncoderOpusTest, OpusDtxFilteringHighEnergyRefreshPackets) { constexpr size_t kSilenceDurationSamples = kSampleRateHz * 0.2f; std::array silence; uint32_t rtp_timestamp = 0; + bool last_packet_dtx_frame = false; bool opus_entered_dtx = false; bool silence_filled = false; size_t timestamp_start_silence = 0; @@ -850,10 +851,13 @@ TEST(AudioEncoderOpusTest, OpusDtxFilteringHighEnergyRefreshPackets) { } rtp_timestamp += kInputBlockSizeSamples; } - if (info.encoded_bytes < 2 && !opus_entered_dtx) { + EXPECT_TRUE(info.encoded_bytes > 0 || last_packet_dtx_frame); + last_packet_dtx_frame = info.encoded_bytes > 0 ? info.encoded_bytes <= 2 + : last_packet_dtx_frame; + if (info.encoded_bytes <= 2 && !opus_entered_dtx) { timestamp_start_silence = rtp_timestamp; } - opus_entered_dtx = info.encoded_bytes < 2; + opus_entered_dtx = info.encoded_bytes <= 2; } EXPECT_TRUE(silence_filled); @@ -880,6 +884,9 @@ TEST(AudioEncoderOpusTest, OpusDtxFilteringHighEnergyRefreshPackets) { info = encoder->Encode(rtp_timestamp, silence_frame, &encoded); rtp_timestamp += kInputBlockSizeSamples; } + EXPECT_TRUE(info.encoded_bytes > 0 || last_packet_dtx_frame); + last_packet_dtx_frame = info.encoded_bytes > 0 ? info.encoded_bytes <= 2 + : last_packet_dtx_frame; // Tracking the number of non empty packets. if (increase_noise && info.encoded_bytes > 2) { number_non_empty_packets_during_increase++; diff --git a/modules/audio_coding/codecs/opus/opus_interface.cc b/modules/audio_coding/codecs/opus/opus_interface.cc index 95c3bb9db1..f684452ad5 100644 --- a/modules/audio_coding/codecs/opus/opus_interface.cc +++ b/modules/audio_coding/codecs/opus/opus_interface.cc @@ -247,9 +247,15 @@ int WebRtcOpus_Encode(OpusEncInst* inst, inst, rtc::MakeArrayView(audio_in, samples), rtc::MakeArrayView(encoded, res))) { // This packet is a high energy refresh DTX packet. For avoiding an increase - // of the energy in the DTX region at the decoder, this packet is dropped. - inst->in_dtx_mode = 0; - return 0; + // of the energy in the DTX region at the decoder, this packet is + // substituted by a TOC byte with one empty frame. + // The number of frames described in the TOC byte + // (https://tools.ietf.org/html/rfc6716#section-3.1) are overwritten to + // always indicate one frame (last two bits equal to 0). + encoded[0] = encoded[0] & 0b11111100; + inst->in_dtx_mode = 1; + // The payload is just the TOC byte and has 1 byte as length. + return 1; } inst->in_dtx_mode = 0; return res;