From 00eb12a20cd29cf249e9ac7cfe0a8037a1e5f903 Mon Sep 17 00:00:00 2001 From: Henrik Lundin Date: Wed, 5 Sep 2018 18:14:52 +0200 Subject: [PATCH] Let NetEq use the PLC output from a decoder This change enables NetEq to use the packet concealment audio (aka PLC) produced by a decoder. The change also includes a new API to the AudioDecoder interface, which lets the decoder implementation generate and deliver concealment audio. Bug: webrtc:9180 Change-Id: Icaacebccf645d4694b0d2d6310f6f2c7132881c4 Reviewed-on: https://webrtc-review.googlesource.com/96340 Commit-Queue: Henrik Lundin Reviewed-by: Minyue Li Reviewed-by: Karl Wiberg Cr-Commit-Position: refs/heads/master@{#24738} --- api/audio_codecs/audio_decoder.cc | 6 + api/audio_codecs/audio_decoder.h | 14 ++ modules/audio_coding/BUILD.gn | 2 + modules/audio_coding/neteq/accelerate.cc | 17 +- .../audio_coding/neteq/audio_multi_vector.cc | 10 +- .../audio_coding/neteq/audio_multi_vector.h | 12 +- .../neteq/audio_multi_vector_unittest.cc | 46 ++-- modules/audio_coding/neteq/decision_logic.cc | 13 +- modules/audio_coding/neteq/defines.h | 1 + modules/audio_coding/neteq/merge.cc | 3 +- .../neteq/neteq_decoder_plc_unittest.cc | 216 ++++++++++++++++++ modules/audio_coding/neteq/neteq_impl.cc | 53 ++++- modules/audio_coding/neteq/neteq_impl.h | 3 + modules/audio_coding/neteq/normal.cc | 2 +- .../audio_coding/neteq/preemptive_expand.cc | 18 +- modules/audio_coding/neteq/sync_buffer.cc | 10 + modules/audio_coding/neteq/sync_buffer.h | 4 + .../neteq/tools/encode_neteq_input.cc | 2 +- 18 files changed, 372 insertions(+), 60 deletions(-) create mode 100644 modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc diff --git a/api/audio_codecs/audio_decoder.cc b/api/audio_codecs/audio_decoder.cc index 00e45d960a..aaba175919 100644 --- a/api/audio_codecs/audio_decoder.cc +++ b/api/audio_codecs/audio_decoder.cc @@ -130,6 +130,12 @@ size_t AudioDecoder::DecodePlc(size_t num_frames, int16_t* decoded) { return 0; } +// TODO(bugs.webrtc.org/9676): Remove default impementation. +void AudioDecoder::GeneratePlc(size_t /*requested_samples_per_channel*/, + rtc::BufferT* /*concealment_audio*/) { + return; +} + int AudioDecoder::IncomingPacket(const uint8_t* payload, size_t payload_len, uint16_t rtp_sequence_number, diff --git a/api/audio_codecs/audio_decoder.h b/api/audio_codecs/audio_decoder.h index 9a955a6ecc..b01a66a064 100644 --- a/api/audio_codecs/audio_decoder.h +++ b/api/audio_codecs/audio_decoder.h @@ -119,6 +119,20 @@ class AudioDecoder { // memory allocated in |decoded| should accommodate |num_frames| frames. virtual size_t DecodePlc(size_t num_frames, int16_t* decoded); + // Asks the decoder to generate packet-loss concealment and append it to the + // end of |concealment_audio|. The concealment audio should be in + // channel-interleaved format, with as many channels as the last decoded + // packet produced. The implementation must produce at least + // requested_samples_per_channel, or nothing at all. This is a signal to the + // caller to conceal the loss with other means. If the implementation provides + // concealment samples, it is also responsible for "stitching" it together + // with the decoded audio on either side of the concealment. + // Note: The default implementation of GeneratePlc will be deleted soon. All + // implementations must provide their own, which can be a simple as a no-op. + // TODO(bugs.webrtc.org/9676): Remove default impementation. + virtual void GeneratePlc(size_t requested_samples_per_channel, + rtc::BufferT* concealment_audio); + // Resets the decoder state (empty buffers etc.). virtual void Reset() = 0; diff --git a/modules/audio_coding/BUILD.gn b/modules/audio_coding/BUILD.gn index ff5ae13872..a3ff4d40bf 100644 --- a/modules/audio_coding/BUILD.gn +++ b/modules/audio_coding/BUILD.gn @@ -1046,6 +1046,7 @@ rtc_static_library("neteq") { ":neteq_decoder_enum", "..:module_api", "../..:webrtc_common", + "../../api:array_view", "../../api:libjingle_peerconnection_api", "../../api/audio:audio_frame_api", "../../api/audio_codecs:audio_codecs_api", @@ -2044,6 +2045,7 @@ if (rtc_include_tests) { "neteq/mock/mock_red_payload_splitter.h", "neteq/mock/mock_statistics_calculator.h", "neteq/nack_tracker_unittest.cc", + "neteq/neteq_decoder_plc_unittest.cc", "neteq/neteq_external_decoder_unittest.cc", "neteq/neteq_impl_unittest.cc", "neteq/neteq_network_stats_unittest.cc", diff --git a/modules/audio_coding/neteq/accelerate.cc b/modules/audio_coding/neteq/accelerate.cc index 183ad7b2b5..18350b0a78 100644 --- a/modules/audio_coding/neteq/accelerate.cc +++ b/modules/audio_coding/neteq/accelerate.cc @@ -25,7 +25,8 @@ Accelerate::ReturnCodes Accelerate::Process(const int16_t* input, input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_) { // Length of input data too short to do accelerate. Simply move all data // from input to output. - output->PushBackInterleaved(input, input_length); + output->PushBackInterleaved( + rtc::ArrayView(input, input_length)); return kError; } return TimeStretch::Process(input, input_length, fast_accelerate, output, @@ -67,17 +68,18 @@ Accelerate::ReturnCodes Accelerate::CheckCriteriaAndStretch( assert(fs_mult_120 >= peak_index); // Should be handled in Process(). // Copy first part; 0 to 15 ms. - output->PushBackInterleaved(input, fs_mult_120 * num_channels_); + output->PushBackInterleaved( + rtc::ArrayView(input, fs_mult_120 * num_channels_)); // Copy the |peak_index| starting at 15 ms to |temp_vector|. AudioMultiVector temp_vector(num_channels_); - temp_vector.PushBackInterleaved(&input[fs_mult_120 * num_channels_], - peak_index * num_channels_); + temp_vector.PushBackInterleaved(rtc::ArrayView( + &input[fs_mult_120 * num_channels_], peak_index * num_channels_)); // Cross-fade |temp_vector| onto the end of |output|. output->CrossFade(temp_vector, peak_index); // Copy the last unmodified part, 15 ms + pitch period until the end. - output->PushBackInterleaved( + output->PushBackInterleaved(rtc::ArrayView( &input[(fs_mult_120 + peak_index) * num_channels_], - input_length - (fs_mult_120 + peak_index) * num_channels_); + input_length - (fs_mult_120 + peak_index) * num_channels_)); if (active_speech) { return kSuccess; @@ -86,7 +88,8 @@ Accelerate::ReturnCodes Accelerate::CheckCriteriaAndStretch( } } else { // Accelerate not allowed. Simply move all data from decoded to outData. - output->PushBackInterleaved(input, input_length); + output->PushBackInterleaved( + rtc::ArrayView(input, input_length)); return kNoStretch; } } diff --git a/modules/audio_coding/neteq/audio_multi_vector.cc b/modules/audio_coding/neteq/audio_multi_vector.cc index 874633f389..349d75dcdc 100644 --- a/modules/audio_coding/neteq/audio_multi_vector.cc +++ b/modules/audio_coding/neteq/audio_multi_vector.cc @@ -67,15 +67,15 @@ void AudioMultiVector::CopyTo(AudioMultiVector* copy_to) const { } } -void AudioMultiVector::PushBackInterleaved(const int16_t* append_this, - size_t length) { - assert(length % num_channels_ == 0); +void AudioMultiVector::PushBackInterleaved( + rtc::ArrayView append_this) { + RTC_DCHECK_EQ(append_this.size() % num_channels_, 0); if (num_channels_ == 1) { // Special case to avoid extra allocation and data shuffling. - channels_[0]->PushBack(append_this, length); + channels_[0]->PushBack(append_this.data(), append_this.size()); return; } - size_t length_per_channel = length / num_channels_; + size_t length_per_channel = append_this.size() / num_channels_; int16_t* temp_array = new int16_t[length_per_channel]; // Temporary storage. for (size_t channel = 0; channel < num_channels_; ++channel) { // Copy elements to |temp_array|. diff --git a/modules/audio_coding/neteq/audio_multi_vector.h b/modules/audio_coding/neteq/audio_multi_vector.h index 4a9ed489e8..86f82820ca 100644 --- a/modules/audio_coding/neteq/audio_multi_vector.h +++ b/modules/audio_coding/neteq/audio_multi_vector.h @@ -15,6 +15,7 @@ #include +#include "api/array_view.h" #include "modules/audio_coding/neteq/audio_vector.h" #include "rtc_base/constructormagic.h" @@ -44,12 +45,11 @@ class AudioMultiVector { // number of channels. virtual void CopyTo(AudioMultiVector* copy_to) const; - // Appends the contents of array |append_this| to the end of this - // object. The array is assumed to be channel-interleaved. |length| must be - // an even multiple of this object's number of channels. - // The length of this object is increased with the |length| divided by the - // number of channels. - virtual void PushBackInterleaved(const int16_t* append_this, size_t length); + // Appends the contents of |append_this| to the end of this object. The array + // is assumed to be channel-interleaved. The length must be an even multiple + // of this object's number of channels. The length of this object is increased + // with the length of the array divided by the number of channels. + void PushBackInterleaved(rtc::ArrayView append_this); // Appends the contents of AudioMultiVector |append_this| to this object. The // length of this object is increased with the length of |append_this|. diff --git a/modules/audio_coding/neteq/audio_multi_vector_unittest.cc b/modules/audio_coding/neteq/audio_multi_vector_unittest.cc index 3f3283eb4f..ed4c290fc3 100644 --- a/modules/audio_coding/neteq/audio_multi_vector_unittest.cc +++ b/modules/audio_coding/neteq/audio_multi_vector_unittest.cc @@ -14,6 +14,7 @@ #include #include +#include #include "rtc_base/numerics/safe_conversions.h" #include "test/gtest.h" @@ -32,18 +33,16 @@ class AudioMultiVectorTest : public ::testing::TestWithParam { protected: AudioMultiVectorTest() : num_channels_(GetParam()), // Get the test parameter. - interleaved_length_(num_channels_ * array_length()) { - array_interleaved_ = new int16_t[num_channels_ * array_length()]; - } + array_interleaved_(num_channels_ * array_length()) {} - ~AudioMultiVectorTest() { delete[] array_interleaved_; } + ~AudioMultiVectorTest() = default; virtual void SetUp() { // Populate test arrays. for (size_t i = 0; i < array_length(); ++i) { array_[i] = static_cast(i); } - int16_t* ptr = array_interleaved_; + int16_t* ptr = array_interleaved_.data(); // Write 100, 101, 102, ... for first channel. // Write 200, 201, 202, ... for second channel. // And so on. @@ -58,9 +57,8 @@ class AudioMultiVectorTest : public ::testing::TestWithParam { size_t array_length() const { return sizeof(array_) / sizeof(array_[0]); } const size_t num_channels_; - size_t interleaved_length_; int16_t array_[10]; - int16_t* array_interleaved_; + std::vector array_interleaved_; }; // Create and destroy AudioMultiVector objects, both empty and with a predefined @@ -95,7 +93,7 @@ TEST_P(AudioMultiVectorTest, SubscriptOperator) { // method is also invoked. TEST_P(AudioMultiVectorTest, PushBackInterleavedAndCopy) { AudioMultiVector vec(num_channels_); - vec.PushBackInterleaved(array_interleaved_, interleaved_length_); + vec.PushBackInterleaved(array_interleaved_); AudioMultiVector vec_copy(num_channels_); vec.CopyTo(&vec_copy); // Copy from |vec| to |vec_copy|. ASSERT_EQ(num_channels_, vec.Channels()); @@ -122,7 +120,7 @@ TEST_P(AudioMultiVectorTest, PushBackInterleavedAndCopy) { TEST_P(AudioMultiVectorTest, CopyToNull) { AudioMultiVector vec(num_channels_); AudioMultiVector* vec_copy = NULL; - vec.PushBackInterleaved(array_interleaved_, interleaved_length_); + vec.PushBackInterleaved(array_interleaved_); vec.CopyTo(vec_copy); } @@ -154,7 +152,7 @@ TEST_P(AudioMultiVectorTest, PushBackVector) { // Test the PushBackFromIndex method. TEST_P(AudioMultiVectorTest, PushBackFromIndex) { AudioMultiVector vec1(num_channels_); - vec1.PushBackInterleaved(array_interleaved_, interleaved_length_); + vec1.PushBackInterleaved(array_interleaved_); AudioMultiVector vec2(num_channels_); // Append vec1 to the back of vec2 (which is empty). Read vec1 from the second @@ -173,7 +171,7 @@ TEST_P(AudioMultiVectorTest, PushBackFromIndex) { // Starts with pushing some values to the vector, then test the Zeros method. TEST_P(AudioMultiVectorTest, Zeros) { AudioMultiVector vec(num_channels_); - vec.PushBackInterleaved(array_interleaved_, interleaved_length_); + vec.PushBackInterleaved(array_interleaved_); vec.Zeros(2 * array_length()); ASSERT_EQ(num_channels_, vec.Channels()); ASSERT_EQ(2u * array_length(), vec.Size()); @@ -187,20 +185,20 @@ TEST_P(AudioMultiVectorTest, Zeros) { // Test the ReadInterleaved method TEST_P(AudioMultiVectorTest, ReadInterleaved) { AudioMultiVector vec(num_channels_); - vec.PushBackInterleaved(array_interleaved_, interleaved_length_); - int16_t* output = new int16_t[interleaved_length_]; + vec.PushBackInterleaved(array_interleaved_); + int16_t* output = new int16_t[array_interleaved_.size()]; // Read 5 samples. size_t read_samples = 5; EXPECT_EQ(num_channels_ * read_samples, vec.ReadInterleaved(read_samples, output)); - EXPECT_EQ(0, - memcmp(array_interleaved_, output, read_samples * sizeof(int16_t))); + EXPECT_EQ(0, memcmp(array_interleaved_.data(), output, + read_samples * sizeof(int16_t))); // Read too many samples. Expect to get all samples from the vector. - EXPECT_EQ(interleaved_length_, + EXPECT_EQ(array_interleaved_.size(), vec.ReadInterleaved(array_length() + 1, output)); - EXPECT_EQ(0, - memcmp(array_interleaved_, output, read_samples * sizeof(int16_t))); + EXPECT_EQ(0, memcmp(array_interleaved_.data(), output, + read_samples * sizeof(int16_t))); delete[] output; } @@ -208,7 +206,7 @@ TEST_P(AudioMultiVectorTest, ReadInterleaved) { // Test the PopFront method. TEST_P(AudioMultiVectorTest, PopFront) { AudioMultiVector vec(num_channels_); - vec.PushBackInterleaved(array_interleaved_, interleaved_length_); + vec.PushBackInterleaved(array_interleaved_); vec.PopFront(1); // Remove one element from each channel. ASSERT_EQ(array_length() - 1u, vec.Size()); // Let |ptr| point to the second element of the first channel in the @@ -227,12 +225,12 @@ TEST_P(AudioMultiVectorTest, PopFront) { // Test the PopBack method. TEST_P(AudioMultiVectorTest, PopBack) { AudioMultiVector vec(num_channels_); - vec.PushBackInterleaved(array_interleaved_, interleaved_length_); + vec.PushBackInterleaved(array_interleaved_); vec.PopBack(1); // Remove one element from each channel. ASSERT_EQ(array_length() - 1u, vec.Size()); // Let |ptr| point to the first element of the first channel in the // interleaved array. - int16_t* ptr = array_interleaved_; + int16_t* ptr = array_interleaved_.data(); for (size_t i = 0; i < array_length() - 1; ++i) { for (size_t channel = 0; channel < num_channels_; ++channel) { EXPECT_EQ(*ptr, vec[channel][i]); @@ -265,7 +263,7 @@ TEST_P(AudioMultiVectorTest, AssertSize) { // Test the PushBack method with another AudioMultiVector as input argument. TEST_P(AudioMultiVectorTest, OverwriteAt) { AudioMultiVector vec1(num_channels_); - vec1.PushBackInterleaved(array_interleaved_, interleaved_length_); + vec1.PushBackInterleaved(array_interleaved_); AudioMultiVector vec2(num_channels_); vec2.Zeros(3); // 3 zeros in each channel. // Overwrite vec2 at position 5. @@ -273,7 +271,7 @@ TEST_P(AudioMultiVectorTest, OverwriteAt) { // Verify result. // Length remains the same. ASSERT_EQ(array_length(), vec1.Size()); - int16_t* ptr = array_interleaved_; + int16_t* ptr = array_interleaved_.data(); for (size_t i = 0; i < array_length() - 1; ++i) { for (size_t channel = 0; channel < num_channels_; ++channel) { if (i >= 5 && i <= 7) { @@ -294,7 +292,7 @@ TEST_P(AudioMultiVectorTest, CopyChannel) { return; AudioMultiVector vec(num_channels_); - vec.PushBackInterleaved(array_interleaved_, interleaved_length_); + vec.PushBackInterleaved(array_interleaved_); // Create a reference copy. AudioMultiVector ref(num_channels_); ref.PushBack(vec); diff --git a/modules/audio_coding/neteq/decision_logic.cc b/modules/audio_coding/neteq/decision_logic.cc index e24ca6283a..6b61555b47 100644 --- a/modules/audio_coding/neteq/decision_logic.cc +++ b/modules/audio_coding/neteq/decision_logic.cc @@ -164,7 +164,8 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer, // if the mute factor is low enough (otherwise the expansion was short enough // to not be noticable). // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1. - if (postpone_decoding_after_expand_ && prev_mode == kModeExpand && + if (postpone_decoding_after_expand_ && + (prev_mode == kModeExpand || prev_mode == kModeCodecPlc) && !packet_buffer_.ContainsDtxOrCngPacket(decoder_database_) && cur_size_samples(delay_manager_->TargetLevel() * packet_length_samples_)>> 8 && @@ -302,9 +303,9 @@ Operations DecisionLogic::FuturePacketAvailable( // Check if we should continue with an ongoing expand because the new packet // is too far into the future. uint32_t timestamp_leap = available_timestamp - target_timestamp; - if ((prev_mode == kModeExpand) && !ReinitAfterExpands(timestamp_leap) && - !MaxWaitForPacket() && PacketTooEarly(timestamp_leap) && - UnderTargetLevel()) { + if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) && + !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() && + PacketTooEarly(timestamp_leap) && UnderTargetLevel()) { if (play_dtmf) { // Still have DTMF to play, so do not do expand. return kDtmf; @@ -314,6 +315,10 @@ Operations DecisionLogic::FuturePacketAvailable( } } + if (prev_mode == kModeCodecPlc) { + return kNormal; + } + const size_t samples_left = sync_buffer.FutureLength() - expand.overlap_length(); const size_t cur_size_samples = diff --git a/modules/audio_coding/neteq/defines.h b/modules/audio_coding/neteq/defines.h index 768f0b955e..46926fa85e 100644 --- a/modules/audio_coding/neteq/defines.h +++ b/modules/audio_coding/neteq/defines.h @@ -39,6 +39,7 @@ enum Modes { kModePreemptiveExpandFail, kModeRfc3389Cng, kModeCodecInternalCng, + kModeCodecPlc, kModeDtmf, kModeError, kModeUndefined = -1 diff --git a/modules/audio_coding/neteq/merge.cc b/modules/audio_coding/neteq/merge.cc index 3c9ad19d05..357ef8dd92 100644 --- a/modules/audio_coding/neteq/merge.cc +++ b/modules/audio_coding/neteq/merge.cc @@ -58,7 +58,8 @@ size_t Merge::Process(int16_t* input, // Transfer input signal to an AudioMultiVector. AudioMultiVector input_vector(num_channels_); - input_vector.PushBackInterleaved(input, input_length); + input_vector.PushBackInterleaved( + rtc::ArrayView(input, input_length)); size_t input_length_per_channel = input_vector.Size(); assert(input_length_per_channel == input_length / num_channels_); diff --git a/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc b/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc new file mode 100644 index 0000000000..8d0972cb54 --- /dev/null +++ b/modules/audio_coding/neteq/neteq_decoder_plc_unittest.cc @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Test to verify correct operation when using the decoder-internal PLC. + +#include +#include +#include + +#include "absl/types/optional.h" +#include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h" +#include "modules/audio_coding/neteq/tools/audio_checksum.h" +#include "modules/audio_coding/neteq/tools/audio_sink.h" +#include "modules/audio_coding/neteq/tools/encode_neteq_input.h" +#include "modules/audio_coding/neteq/tools/fake_decode_from_file.h" +#include "modules/audio_coding/neteq/tools/input_audio_file.h" +#include "modules/audio_coding/neteq/tools/neteq_test.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "test/gtest.h" +#include "test/testsupport/fileutils.h" + +namespace webrtc { +namespace test { +namespace { + +// This class implements a fake decoder. The decoder will read audio from a file +// and present as output, both for regular decoding and for PLC. +class AudioDecoderPlc : public AudioDecoder { + public: + AudioDecoderPlc(std::unique_ptr input, int sample_rate_hz) + : input_(std::move(input)), sample_rate_hz_(sample_rate_hz) {} + + void Reset() override {} + int SampleRateHz() const override { return sample_rate_hz_; } + size_t Channels() const override { return 1; } + int DecodeInternal(const uint8_t* /*encoded*/, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override { + RTC_CHECK_EQ(encoded_len / 2, 20 * sample_rate_hz_ / 1000); + RTC_CHECK_EQ(sample_rate_hz, sample_rate_hz_); + RTC_CHECK(decoded); + RTC_CHECK(speech_type); + RTC_CHECK(input_->Read(encoded_len / 2, decoded)); + *speech_type = kSpeech; + last_was_plc_ = false; + return encoded_len / 2; + } + + void GeneratePlc(size_t requested_samples_per_channel, + rtc::BufferT* concealment_audio) override { + // Must keep a local copy of this since DecodeInternal sets it to false. + const bool last_was_plc = last_was_plc_; + SpeechType speech_type; + std::vector decoded(5760); + int dec_len = DecodeInternal(nullptr, 2 * 20 * sample_rate_hz_ / 1000, + sample_rate_hz_, decoded.data(), &speech_type); + // This fake decoder can only generate 20 ms of PLC data each time. Make + // sure the caller didn't ask for more. + RTC_CHECK_GE(dec_len, requested_samples_per_channel); + concealment_audio->AppendData(decoded.data(), dec_len); + concealed_samples_ += rtc::checked_cast(dec_len); + if (!last_was_plc) { + ++concealment_events_; + } + last_was_plc_ = true; + } + + size_t concealed_samples() { return concealed_samples_; } + size_t concealment_events() { return concealment_events_; } + + private: + const std::unique_ptr input_; + const int sample_rate_hz_; + size_t concealed_samples_ = 0; + size_t concealment_events_ = 0; + bool last_was_plc_ = false; +}; + +// An input sample generator which generates only zero-samples. +class ZeroSampleGenerator : public EncodeNetEqInput::Generator { + public: + rtc::ArrayView Generate(size_t num_samples) override { + vec.resize(num_samples, 0); + rtc::ArrayView view(vec); + RTC_DCHECK_EQ(view.size(), num_samples); + return view; + } + + private: + std::vector vec; +}; + +// A NetEqInput which connects to another NetEqInput, but drops a number of +// packets on the way. +class LossyInput : public NetEqInput { + public: + LossyInput(int loss_cadence, std::unique_ptr input) + : loss_cadence_(loss_cadence), input_(std::move(input)) {} + + absl::optional NextPacketTime() const override { + return input_->NextPacketTime(); + } + + absl::optional NextOutputEventTime() const override { + return input_->NextOutputEventTime(); + } + + std::unique_ptr PopPacket() override { + if (loss_cadence_ != 0 && (++count_ % loss_cadence_) == 0) { + // Pop one extra packet to create the loss. + input_->PopPacket(); + } + return input_->PopPacket(); + } + + void AdvanceOutputEvent() override { return input_->AdvanceOutputEvent(); } + + bool ended() const override { return input_->ended(); } + + absl::optional NextHeader() const override { + return input_->NextHeader(); + } + + private: + const int loss_cadence_; + int count_ = 0; + const std::unique_ptr input_; +}; + +class AudioChecksumWithOutput : public AudioChecksum { + public: + explicit AudioChecksumWithOutput(std::string* output_str) + : output_str_(*output_str) {} + ~AudioChecksumWithOutput() { output_str_ = Finish(); } + + private: + std::string& output_str_; +}; + +NetEqNetworkStatistics RunTest(int loss_cadence, std::string* checksum) { + NetEq::Config config; + config.for_test_no_time_stretching = true; + + // The input is mostly useless. It sends zero-samples to a PCM16b encoder, + // but the actual encoded samples will never be used by the decoder in the + // test. See below about the decoder. + auto generator = absl::make_unique(); + constexpr int kSampleRateHz = 32000; + constexpr int kPayloadType = 100; + AudioEncoderPcm16B::Config encoder_config; + encoder_config.sample_rate_hz = kSampleRateHz; + encoder_config.payload_type = kPayloadType; + auto encoder = absl::make_unique(encoder_config); + constexpr int kRunTimeMs = 10000; + auto input = absl::make_unique( + std::move(generator), std::move(encoder), kRunTimeMs); + // Wrap the input in a loss function. + auto lossy_input = + absl::make_unique(loss_cadence, std::move(input)); + + // Settinng up decoders. + NetEqTest::DecoderMap decoders; + // Using a fake decoder which simply reads the output audio from a file. + auto input_file = absl::make_unique( + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm")); + AudioDecoderPlc dec(std::move(input_file), kSampleRateHz); + // Masquerading as a PCM16b decoder. + NetEqTest::ExternalDecoderInfo dec_info = { + &dec, NetEqDecoder::kDecoderPCM16Bswb32kHz, "pcm16b_PLC"}; + NetEqTest::ExtDecoderMap external_decoders; + external_decoders.insert(std::make_pair(kPayloadType, dec_info)); + + // Output is simply a checksum calculator. + auto output = absl::make_unique(checksum); + + // No callback objects. + NetEqTest::Callbacks callbacks; + + NetEqTest neteq_test(config, decoders, external_decoders, + std::move(lossy_input), std::move(output), callbacks); + EXPECT_LE(kRunTimeMs, neteq_test.Run()); + + auto lifetime_stats = neteq_test.LifetimeStats(); + EXPECT_EQ(dec.concealed_samples(), lifetime_stats.concealed_samples); + EXPECT_EQ(dec.concealment_events(), lifetime_stats.concealment_events); + + return neteq_test.SimulationStats(); +} +} // namespace + +TEST(NetEqDecoderPlc, Test) { + std::string checksum; + auto stats = RunTest(10, &checksum); + + std::string checksum_no_loss; + auto stats_no_loss = RunTest(0, &checksum_no_loss); + + EXPECT_EQ(checksum, checksum_no_loss); + + EXPECT_EQ(stats.preemptive_rate, stats_no_loss.preemptive_rate); + EXPECT_EQ(stats.accelerate_rate, stats_no_loss.accelerate_rate); + EXPECT_EQ(0, stats_no_loss.expand_rate); + EXPECT_GT(stats.expand_rate, 0); +} + +} // namespace test +} // namespace webrtc diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc index 98c2372f68..857a4d7724 100644 --- a/modules/audio_coding/neteq/neteq_impl.cc +++ b/modules/audio_coding/neteq/neteq_impl.cc @@ -885,7 +885,12 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, break; } case kExpand: { - return_value = DoExpand(play_dtmf); + RTC_DCHECK_EQ(return_value, 0); + if (!current_rtp_payload_type_ || !DoCodecPlc()) { + return_value = DoExpand(play_dtmf); + } + RTC_DCHECK_GE(sync_buffer_->FutureLength() - expand_->overlap_length(), + output_size_samples_); break; } case kAccelerate: @@ -997,7 +1002,7 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, sync_buffer_->set_dtmf_index(sync_buffer_->Size()); } - if (last_mode_ != kModeExpand) { + if (last_mode_ != kModeExpand && last_mode_ != kModeCodecPlc) { // If last operation was not expand, calculate the |playout_timestamp_| from // the |sync_buffer_|. However, do not update the |playout_timestamp_| if it // would be moved "backwards". @@ -1022,7 +1027,7 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, static_cast(audio_frame->samples_per_channel_); if (!(last_mode_ == kModeRfc3389Cng || last_mode_ == kModeCodecInternalCng || - last_mode_ == kModeExpand)) { + last_mode_ == kModeExpand || last_mode_ == kModeCodecPlc)) { generated_noise_stopwatch_.reset(); } @@ -1541,6 +1546,48 @@ void NetEqImpl::DoMerge(int16_t* decoded_buffer, } } +bool NetEqImpl::DoCodecPlc() { + AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); + if (!decoder) { + return false; + } + const size_t channels = algorithm_buffer_->Channels(); + const size_t requested_samples_per_channel = + output_size_samples_ - + (sync_buffer_->FutureLength() - expand_->overlap_length()); + concealment_audio_.Clear(); + decoder->GeneratePlc(requested_samples_per_channel, &concealment_audio_); + if (concealment_audio_.empty()) { + // Nothing produced. Resort to regular expand. + return false; + } + RTC_CHECK_GE(concealment_audio_.size(), + requested_samples_per_channel * channels); + sync_buffer_->PushBackInterleaved(concealment_audio_); + RTC_DCHECK_NE(algorithm_buffer_->Channels(), 0); + const size_t concealed_samples_per_channel = + concealment_audio_.size() / channels; + + // Update in-call and post-call statistics. + const bool is_new_concealment_event = (last_mode_ != kModeCodecPlc); + if (std::all_of(concealment_audio_.cbegin(), concealment_audio_.cend(), + [](int16_t i) { return i == 0; })) { + // Expand operation generates only noise. + stats_.ExpandedNoiseSamples(concealed_samples_per_channel, + is_new_concealment_event); + } else { + // Expand operation generates more than only noise. + stats_.ExpandedVoiceSamples(concealed_samples_per_channel, + is_new_concealment_event); + } + last_mode_ = kModeCodecPlc; + if (!generated_noise_stopwatch_) { + // Start a new stopwatch since we may be covering for a lost CNG packet. + generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch(); + } + return true; +} + int NetEqImpl::DoExpand(bool play_dtmf) { while ((sync_buffer_->FutureLength() - expand_->overlap_length()) < output_size_samples_) { diff --git a/modules/audio_coding/neteq/neteq_impl.h b/modules/audio_coding/neteq/neteq_impl.h index 077426b2c3..8ef97ce0af 100644 --- a/modules/audio_coding/neteq/neteq_impl.h +++ b/modules/audio_coding/neteq/neteq_impl.h @@ -290,6 +290,8 @@ class NetEqImpl : public webrtc::NetEq { AudioDecoder::SpeechType speech_type, bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); + bool DoCodecPlc() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); + // Sub-method which calls the Expand class to perform the expand operation. int DoExpand(bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); @@ -425,6 +427,7 @@ class NetEqImpl : public webrtc::NetEq { ExpandUmaLogger expand_uma_logger_ RTC_GUARDED_BY(crit_sect_); ExpandUmaLogger speech_expand_uma_logger_ RTC_GUARDED_BY(crit_sect_); bool no_time_stretching_ RTC_GUARDED_BY(crit_sect_); // Only used for test. + rtc::BufferT concealment_audio_ RTC_GUARDED_BY(crit_sect_); private: RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl); diff --git a/modules/audio_coding/neteq/normal.cc b/modules/audio_coding/neteq/normal.cc index 83f7616d8e..713bfb6186 100644 --- a/modules/audio_coding/neteq/normal.cc +++ b/modules/audio_coding/neteq/normal.cc @@ -41,7 +41,7 @@ int Normal::Process(const int16_t* input, output->Clear(); return 0; } - output->PushBackInterleaved(input, length); + output->PushBackInterleaved(rtc::ArrayView(input, length)); const int fs_mult = fs_hz_ / 8000; RTC_DCHECK_GT(fs_mult, 0); diff --git a/modules/audio_coding/neteq/preemptive_expand.cc b/modules/audio_coding/neteq/preemptive_expand.cc index 4702078d67..6159a9cb15 100644 --- a/modules/audio_coding/neteq/preemptive_expand.cc +++ b/modules/audio_coding/neteq/preemptive_expand.cc @@ -31,7 +31,8 @@ PreemptiveExpand::ReturnCodes PreemptiveExpand::Process( old_data_length >= input_length / num_channels_ - overlap_samples_) { // Length of input data too short to do preemptive expand. Simply move all // data from input to output. - output->PushBackInterleaved(input, input_length); + output->PushBackInterleaved( + rtc::ArrayView(input, input_length)); return kError; } const bool kFastMode = false; // Fast mode is not available for PE Expand. @@ -75,19 +76,19 @@ PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch( size_t unmodified_length = std::max(old_data_length_per_channel_, fs_mult_120); // Copy first part, including cross-fade region. - output->PushBackInterleaved( - input, (unmodified_length + peak_index) * num_channels_); + output->PushBackInterleaved(rtc::ArrayView( + input, (unmodified_length + peak_index) * num_channels_)); // Copy the last |peak_index| samples up to 15 ms to |temp_vector|. AudioMultiVector temp_vector(num_channels_); - temp_vector.PushBackInterleaved( + temp_vector.PushBackInterleaved(rtc::ArrayView( &input[(unmodified_length - peak_index) * num_channels_], - peak_index * num_channels_); + peak_index * num_channels_)); // Cross-fade |temp_vector| onto the end of |output|. output->CrossFade(temp_vector, peak_index); // Copy the last unmodified part, 15 ms + pitch period until the end. - output->PushBackInterleaved( + output->PushBackInterleaved(rtc::ArrayView( &input[unmodified_length * num_channels_], - input_length - unmodified_length * num_channels_); + input_length - unmodified_length * num_channels_)); if (active_speech) { return kSuccess; @@ -96,7 +97,8 @@ PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch( } } else { // Accelerate not allowed. Simply move all data from decoded to outData. - output->PushBackInterleaved(input, input_length); + output->PushBackInterleaved( + rtc::ArrayView(input, input_length)); return kNoStretch; } } diff --git a/modules/audio_coding/neteq/sync_buffer.cc b/modules/audio_coding/neteq/sync_buffer.cc index 82ca16fe60..fee18ccecf 100644 --- a/modules/audio_coding/neteq/sync_buffer.cc +++ b/modules/audio_coding/neteq/sync_buffer.cc @@ -36,6 +36,16 @@ void SyncBuffer::PushBack(const AudioMultiVector& append_this) { dtmf_index_ -= std::min(dtmf_index_, samples_added); } +void SyncBuffer::PushBackInterleaved(const rtc::BufferT& append_this) { + const size_t size_before_adding = Size(); + AudioMultiVector::PushBackInterleaved(append_this); + const size_t samples_added_per_channel = Size() - size_before_adding; + RTC_DCHECK_EQ(samples_added_per_channel * Channels(), append_this.size()); + AudioMultiVector::PopFront(samples_added_per_channel); + next_index_ -= std::min(next_index_, samples_added_per_channel); + dtmf_index_ -= std::min(dtmf_index_, samples_added_per_channel); +} + void SyncBuffer::PushFrontZeros(size_t length) { InsertZerosAtIndex(length, 0); } diff --git a/modules/audio_coding/neteq/sync_buffer.h b/modules/audio_coding/neteq/sync_buffer.h index 3833cb2985..72e320c61f 100644 --- a/modules/audio_coding/neteq/sync_buffer.h +++ b/modules/audio_coding/neteq/sync_buffer.h @@ -13,6 +13,7 @@ #include "api/audio/audio_frame.h" #include "modules/audio_coding/neteq/audio_multi_vector.h" +#include "rtc_base/buffer.h" #include "rtc_base/constructormagic.h" namespace webrtc { @@ -34,6 +35,9 @@ class SyncBuffer : public AudioMultiVector { // the move of the beginning of "future" data. void PushBack(const AudioMultiVector& append_this) override; + // Like PushBack, but reads the samples channel-interleaved from the input. + void PushBackInterleaved(const rtc::BufferT& append_this); + // Adds |length| zeros to the beginning of each channel. Removes // the same number of samples from the end of the SyncBuffer, to // maintain a constant buffer size. The |next_index_| is updated to reflect diff --git a/modules/audio_coding/neteq/tools/encode_neteq_input.cc b/modules/audio_coding/neteq/tools/encode_neteq_input.cc index c576670658..87b987ddb6 100644 --- a/modules/audio_coding/neteq/tools/encode_neteq_input.cc +++ b/modules/audio_coding/neteq/tools/encode_neteq_input.cc @@ -53,7 +53,7 @@ void EncodeNetEqInput::AdvanceOutputEvent() { } bool EncodeNetEqInput::ended() const { - return next_output_event_ms_ <= input_duration_ms_; + return next_output_event_ms_ > input_duration_ms_; } absl::optional EncodeNetEqInput::NextHeader() const {