Let NetEq use the PLC output from a decoder

This change enables NetEq to use the packet concealment audio (aka
PLC) produced by a decoder. The change also includes a new API to the
AudioDecoder interface, which lets the decoder implementation generate
and deliver concealment audio.

Bug: webrtc:9180
Change-Id: Icaacebccf645d4694b0d2d6310f6f2c7132881c4
Reviewed-on: https://webrtc-review.googlesource.com/96340
Commit-Queue: Henrik Lundin <henrik.lundin@webrtc.org>
Reviewed-by: Minyue Li <minyue@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24738}
This commit is contained in:
Henrik Lundin
2018-09-05 18:14:52 +02:00
committed by Commit Bot
parent e899629be4
commit 00eb12a20c
18 changed files with 372 additions and 60 deletions

View File

@ -130,6 +130,12 @@ size_t AudioDecoder::DecodePlc(size_t num_frames, int16_t* decoded) {
return 0;
}
// TODO(bugs.webrtc.org/9676): Remove default impementation.
void AudioDecoder::GeneratePlc(size_t /*requested_samples_per_channel*/,
rtc::BufferT<int16_t>* /*concealment_audio*/) {
return;
}
int AudioDecoder::IncomingPacket(const uint8_t* payload,
size_t payload_len,
uint16_t rtp_sequence_number,

View File

@ -119,6 +119,20 @@ class AudioDecoder {
// memory allocated in |decoded| should accommodate |num_frames| frames.
virtual size_t DecodePlc(size_t num_frames, int16_t* decoded);
// Asks the decoder to generate packet-loss concealment and append it to the
// end of |concealment_audio|. The concealment audio should be in
// channel-interleaved format, with as many channels as the last decoded
// packet produced. The implementation must produce at least
// requested_samples_per_channel, or nothing at all. This is a signal to the
// caller to conceal the loss with other means. If the implementation provides
// concealment samples, it is also responsible for "stitching" it together
// with the decoded audio on either side of the concealment.
// Note: The default implementation of GeneratePlc will be deleted soon. All
// implementations must provide their own, which can be a simple as a no-op.
// TODO(bugs.webrtc.org/9676): Remove default impementation.
virtual void GeneratePlc(size_t requested_samples_per_channel,
rtc::BufferT<int16_t>* concealment_audio);
// Resets the decoder state (empty buffers etc.).
virtual void Reset() = 0;

View File

@ -1046,6 +1046,7 @@ rtc_static_library("neteq") {
":neteq_decoder_enum",
"..:module_api",
"../..:webrtc_common",
"../../api:array_view",
"../../api:libjingle_peerconnection_api",
"../../api/audio:audio_frame_api",
"../../api/audio_codecs:audio_codecs_api",
@ -2044,6 +2045,7 @@ if (rtc_include_tests) {
"neteq/mock/mock_red_payload_splitter.h",
"neteq/mock/mock_statistics_calculator.h",
"neteq/nack_tracker_unittest.cc",
"neteq/neteq_decoder_plc_unittest.cc",
"neteq/neteq_external_decoder_unittest.cc",
"neteq/neteq_impl_unittest.cc",
"neteq/neteq_network_stats_unittest.cc",

View File

@ -25,7 +25,8 @@ Accelerate::ReturnCodes Accelerate::Process(const int16_t* input,
input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_) {
// Length of input data too short to do accelerate. Simply move all data
// from input to output.
output->PushBackInterleaved(input, input_length);
output->PushBackInterleaved(
rtc::ArrayView<const int16_t>(input, input_length));
return kError;
}
return TimeStretch::Process(input, input_length, fast_accelerate, output,
@ -67,17 +68,18 @@ Accelerate::ReturnCodes Accelerate::CheckCriteriaAndStretch(
assert(fs_mult_120 >= peak_index); // Should be handled in Process().
// Copy first part; 0 to 15 ms.
output->PushBackInterleaved(input, fs_mult_120 * num_channels_);
output->PushBackInterleaved(
rtc::ArrayView<const int16_t>(input, fs_mult_120 * num_channels_));
// Copy the |peak_index| starting at 15 ms to |temp_vector|.
AudioMultiVector temp_vector(num_channels_);
temp_vector.PushBackInterleaved(&input[fs_mult_120 * num_channels_],
peak_index * num_channels_);
temp_vector.PushBackInterleaved(rtc::ArrayView<const int16_t>(
&input[fs_mult_120 * num_channels_], peak_index * num_channels_));
// Cross-fade |temp_vector| onto the end of |output|.
output->CrossFade(temp_vector, peak_index);
// Copy the last unmodified part, 15 ms + pitch period until the end.
output->PushBackInterleaved(
output->PushBackInterleaved(rtc::ArrayView<const int16_t>(
&input[(fs_mult_120 + peak_index) * num_channels_],
input_length - (fs_mult_120 + peak_index) * num_channels_);
input_length - (fs_mult_120 + peak_index) * num_channels_));
if (active_speech) {
return kSuccess;
@ -86,7 +88,8 @@ Accelerate::ReturnCodes Accelerate::CheckCriteriaAndStretch(
}
} else {
// Accelerate not allowed. Simply move all data from decoded to outData.
output->PushBackInterleaved(input, input_length);
output->PushBackInterleaved(
rtc::ArrayView<const int16_t>(input, input_length));
return kNoStretch;
}
}

View File

@ -67,15 +67,15 @@ void AudioMultiVector::CopyTo(AudioMultiVector* copy_to) const {
}
}
void AudioMultiVector::PushBackInterleaved(const int16_t* append_this,
size_t length) {
assert(length % num_channels_ == 0);
void AudioMultiVector::PushBackInterleaved(
rtc::ArrayView<const int16_t> append_this) {
RTC_DCHECK_EQ(append_this.size() % num_channels_, 0);
if (num_channels_ == 1) {
// Special case to avoid extra allocation and data shuffling.
channels_[0]->PushBack(append_this, length);
channels_[0]->PushBack(append_this.data(), append_this.size());
return;
}
size_t length_per_channel = length / num_channels_;
size_t length_per_channel = append_this.size() / num_channels_;
int16_t* temp_array = new int16_t[length_per_channel]; // Temporary storage.
for (size_t channel = 0; channel < num_channels_; ++channel) {
// Copy elements to |temp_array|.

View File

@ -15,6 +15,7 @@
#include <vector>
#include "api/array_view.h"
#include "modules/audio_coding/neteq/audio_vector.h"
#include "rtc_base/constructormagic.h"
@ -44,12 +45,11 @@ class AudioMultiVector {
// number of channels.
virtual void CopyTo(AudioMultiVector* copy_to) const;
// Appends the contents of array |append_this| to the end of this
// object. The array is assumed to be channel-interleaved. |length| must be
// an even multiple of this object's number of channels.
// The length of this object is increased with the |length| divided by the
// number of channels.
virtual void PushBackInterleaved(const int16_t* append_this, size_t length);
// Appends the contents of |append_this| to the end of this object. The array
// is assumed to be channel-interleaved. The length must be an even multiple
// of this object's number of channels. The length of this object is increased
// with the length of the array divided by the number of channels.
void PushBackInterleaved(rtc::ArrayView<const int16_t> append_this);
// Appends the contents of AudioMultiVector |append_this| to this object. The
// length of this object is increased with the length of |append_this|.

View File

@ -14,6 +14,7 @@
#include <stdlib.h>
#include <string>
#include <vector>
#include "rtc_base/numerics/safe_conversions.h"
#include "test/gtest.h"
@ -32,18 +33,16 @@ class AudioMultiVectorTest : public ::testing::TestWithParam<size_t> {
protected:
AudioMultiVectorTest()
: num_channels_(GetParam()), // Get the test parameter.
interleaved_length_(num_channels_ * array_length()) {
array_interleaved_ = new int16_t[num_channels_ * array_length()];
}
array_interleaved_(num_channels_ * array_length()) {}
~AudioMultiVectorTest() { delete[] array_interleaved_; }
~AudioMultiVectorTest() = default;
virtual void SetUp() {
// Populate test arrays.
for (size_t i = 0; i < array_length(); ++i) {
array_[i] = static_cast<int16_t>(i);
}
int16_t* ptr = array_interleaved_;
int16_t* ptr = array_interleaved_.data();
// Write 100, 101, 102, ... for first channel.
// Write 200, 201, 202, ... for second channel.
// And so on.
@ -58,9 +57,8 @@ class AudioMultiVectorTest : public ::testing::TestWithParam<size_t> {
size_t array_length() const { return sizeof(array_) / sizeof(array_[0]); }
const size_t num_channels_;
size_t interleaved_length_;
int16_t array_[10];
int16_t* array_interleaved_;
std::vector<int16_t> array_interleaved_;
};
// Create and destroy AudioMultiVector objects, both empty and with a predefined
@ -95,7 +93,7 @@ TEST_P(AudioMultiVectorTest, SubscriptOperator) {
// method is also invoked.
TEST_P(AudioMultiVectorTest, PushBackInterleavedAndCopy) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.PushBackInterleaved(array_interleaved_);
AudioMultiVector vec_copy(num_channels_);
vec.CopyTo(&vec_copy); // Copy from |vec| to |vec_copy|.
ASSERT_EQ(num_channels_, vec.Channels());
@ -122,7 +120,7 @@ TEST_P(AudioMultiVectorTest, PushBackInterleavedAndCopy) {
TEST_P(AudioMultiVectorTest, CopyToNull) {
AudioMultiVector vec(num_channels_);
AudioMultiVector* vec_copy = NULL;
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.PushBackInterleaved(array_interleaved_);
vec.CopyTo(vec_copy);
}
@ -154,7 +152,7 @@ TEST_P(AudioMultiVectorTest, PushBackVector) {
// Test the PushBackFromIndex method.
TEST_P(AudioMultiVectorTest, PushBackFromIndex) {
AudioMultiVector vec1(num_channels_);
vec1.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec1.PushBackInterleaved(array_interleaved_);
AudioMultiVector vec2(num_channels_);
// Append vec1 to the back of vec2 (which is empty). Read vec1 from the second
@ -173,7 +171,7 @@ TEST_P(AudioMultiVectorTest, PushBackFromIndex) {
// Starts with pushing some values to the vector, then test the Zeros method.
TEST_P(AudioMultiVectorTest, Zeros) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.PushBackInterleaved(array_interleaved_);
vec.Zeros(2 * array_length());
ASSERT_EQ(num_channels_, vec.Channels());
ASSERT_EQ(2u * array_length(), vec.Size());
@ -187,20 +185,20 @@ TEST_P(AudioMultiVectorTest, Zeros) {
// Test the ReadInterleaved method
TEST_P(AudioMultiVectorTest, ReadInterleaved) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
int16_t* output = new int16_t[interleaved_length_];
vec.PushBackInterleaved(array_interleaved_);
int16_t* output = new int16_t[array_interleaved_.size()];
// Read 5 samples.
size_t read_samples = 5;
EXPECT_EQ(num_channels_ * read_samples,
vec.ReadInterleaved(read_samples, output));
EXPECT_EQ(0,
memcmp(array_interleaved_, output, read_samples * sizeof(int16_t)));
EXPECT_EQ(0, memcmp(array_interleaved_.data(), output,
read_samples * sizeof(int16_t)));
// Read too many samples. Expect to get all samples from the vector.
EXPECT_EQ(interleaved_length_,
EXPECT_EQ(array_interleaved_.size(),
vec.ReadInterleaved(array_length() + 1, output));
EXPECT_EQ(0,
memcmp(array_interleaved_, output, read_samples * sizeof(int16_t)));
EXPECT_EQ(0, memcmp(array_interleaved_.data(), output,
read_samples * sizeof(int16_t)));
delete[] output;
}
@ -208,7 +206,7 @@ TEST_P(AudioMultiVectorTest, ReadInterleaved) {
// Test the PopFront method.
TEST_P(AudioMultiVectorTest, PopFront) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.PushBackInterleaved(array_interleaved_);
vec.PopFront(1); // Remove one element from each channel.
ASSERT_EQ(array_length() - 1u, vec.Size());
// Let |ptr| point to the second element of the first channel in the
@ -227,12 +225,12 @@ TEST_P(AudioMultiVectorTest, PopFront) {
// Test the PopBack method.
TEST_P(AudioMultiVectorTest, PopBack) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.PushBackInterleaved(array_interleaved_);
vec.PopBack(1); // Remove one element from each channel.
ASSERT_EQ(array_length() - 1u, vec.Size());
// Let |ptr| point to the first element of the first channel in the
// interleaved array.
int16_t* ptr = array_interleaved_;
int16_t* ptr = array_interleaved_.data();
for (size_t i = 0; i < array_length() - 1; ++i) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
EXPECT_EQ(*ptr, vec[channel][i]);
@ -265,7 +263,7 @@ TEST_P(AudioMultiVectorTest, AssertSize) {
// Test the PushBack method with another AudioMultiVector as input argument.
TEST_P(AudioMultiVectorTest, OverwriteAt) {
AudioMultiVector vec1(num_channels_);
vec1.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec1.PushBackInterleaved(array_interleaved_);
AudioMultiVector vec2(num_channels_);
vec2.Zeros(3); // 3 zeros in each channel.
// Overwrite vec2 at position 5.
@ -273,7 +271,7 @@ TEST_P(AudioMultiVectorTest, OverwriteAt) {
// Verify result.
// Length remains the same.
ASSERT_EQ(array_length(), vec1.Size());
int16_t* ptr = array_interleaved_;
int16_t* ptr = array_interleaved_.data();
for (size_t i = 0; i < array_length() - 1; ++i) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
if (i >= 5 && i <= 7) {
@ -294,7 +292,7 @@ TEST_P(AudioMultiVectorTest, CopyChannel) {
return;
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.PushBackInterleaved(array_interleaved_);
// Create a reference copy.
AudioMultiVector ref(num_channels_);
ref.PushBack(vec);

View File

@ -164,7 +164,8 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
// if the mute factor is low enough (otherwise the expansion was short enough
// to not be noticable).
// Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
if (postpone_decoding_after_expand_ && prev_mode == kModeExpand &&
if (postpone_decoding_after_expand_ &&
(prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
!packet_buffer_.ContainsDtxOrCngPacket(decoder_database_) &&
cur_size_samples<static_cast<size_t>(delay_manager_->TargetLevel() *
packet_length_samples_)>> 8 &&
@ -302,9 +303,9 @@ Operations DecisionLogic::FuturePacketAvailable(
// Check if we should continue with an ongoing expand because the new packet
// is too far into the future.
uint32_t timestamp_leap = available_timestamp - target_timestamp;
if ((prev_mode == kModeExpand) && !ReinitAfterExpands(timestamp_leap) &&
!MaxWaitForPacket() && PacketTooEarly(timestamp_leap) &&
UnderTargetLevel()) {
if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
!ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() &&
PacketTooEarly(timestamp_leap) && UnderTargetLevel()) {
if (play_dtmf) {
// Still have DTMF to play, so do not do expand.
return kDtmf;
@ -314,6 +315,10 @@ Operations DecisionLogic::FuturePacketAvailable(
}
}
if (prev_mode == kModeCodecPlc) {
return kNormal;
}
const size_t samples_left =
sync_buffer.FutureLength() - expand.overlap_length();
const size_t cur_size_samples =

View File

@ -39,6 +39,7 @@ enum Modes {
kModePreemptiveExpandFail,
kModeRfc3389Cng,
kModeCodecInternalCng,
kModeCodecPlc,
kModeDtmf,
kModeError,
kModeUndefined = -1

View File

@ -58,7 +58,8 @@ size_t Merge::Process(int16_t* input,
// Transfer input signal to an AudioMultiVector.
AudioMultiVector input_vector(num_channels_);
input_vector.PushBackInterleaved(input, input_length);
input_vector.PushBackInterleaved(
rtc::ArrayView<const int16_t>(input, input_length));
size_t input_length_per_channel = input_vector.Size();
assert(input_length_per_channel == input_length / num_channels_);

View File

@ -0,0 +1,216 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Test to verify correct operation when using the decoder-internal PLC.
#include <algorithm>
#include <utility>
#include <vector>
#include "absl/types/optional.h"
#include "modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h"
#include "modules/audio_coding/neteq/tools/audio_checksum.h"
#include "modules/audio_coding/neteq/tools/audio_sink.h"
#include "modules/audio_coding/neteq/tools/encode_neteq_input.h"
#include "modules/audio_coding/neteq/tools/fake_decode_from_file.h"
#include "modules/audio_coding/neteq/tools/input_audio_file.h"
#include "modules/audio_coding/neteq/tools/neteq_test.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "test/gtest.h"
#include "test/testsupport/fileutils.h"
namespace webrtc {
namespace test {
namespace {
// This class implements a fake decoder. The decoder will read audio from a file
// and present as output, both for regular decoding and for PLC.
class AudioDecoderPlc : public AudioDecoder {
public:
AudioDecoderPlc(std::unique_ptr<InputAudioFile> input, int sample_rate_hz)
: input_(std::move(input)), sample_rate_hz_(sample_rate_hz) {}
void Reset() override {}
int SampleRateHz() const override { return sample_rate_hz_; }
size_t Channels() const override { return 1; }
int DecodeInternal(const uint8_t* /*encoded*/,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override {
RTC_CHECK_EQ(encoded_len / 2, 20 * sample_rate_hz_ / 1000);
RTC_CHECK_EQ(sample_rate_hz, sample_rate_hz_);
RTC_CHECK(decoded);
RTC_CHECK(speech_type);
RTC_CHECK(input_->Read(encoded_len / 2, decoded));
*speech_type = kSpeech;
last_was_plc_ = false;
return encoded_len / 2;
}
void GeneratePlc(size_t requested_samples_per_channel,
rtc::BufferT<int16_t>* concealment_audio) override {
// Must keep a local copy of this since DecodeInternal sets it to false.
const bool last_was_plc = last_was_plc_;
SpeechType speech_type;
std::vector<int16_t> decoded(5760);
int dec_len = DecodeInternal(nullptr, 2 * 20 * sample_rate_hz_ / 1000,
sample_rate_hz_, decoded.data(), &speech_type);
// This fake decoder can only generate 20 ms of PLC data each time. Make
// sure the caller didn't ask for more.
RTC_CHECK_GE(dec_len, requested_samples_per_channel);
concealment_audio->AppendData(decoded.data(), dec_len);
concealed_samples_ += rtc::checked_cast<size_t>(dec_len);
if (!last_was_plc) {
++concealment_events_;
}
last_was_plc_ = true;
}
size_t concealed_samples() { return concealed_samples_; }
size_t concealment_events() { return concealment_events_; }
private:
const std::unique_ptr<InputAudioFile> input_;
const int sample_rate_hz_;
size_t concealed_samples_ = 0;
size_t concealment_events_ = 0;
bool last_was_plc_ = false;
};
// An input sample generator which generates only zero-samples.
class ZeroSampleGenerator : public EncodeNetEqInput::Generator {
public:
rtc::ArrayView<const int16_t> Generate(size_t num_samples) override {
vec.resize(num_samples, 0);
rtc::ArrayView<const int16_t> view(vec);
RTC_DCHECK_EQ(view.size(), num_samples);
return view;
}
private:
std::vector<int16_t> vec;
};
// A NetEqInput which connects to another NetEqInput, but drops a number of
// packets on the way.
class LossyInput : public NetEqInput {
public:
LossyInput(int loss_cadence, std::unique_ptr<NetEqInput> input)
: loss_cadence_(loss_cadence), input_(std::move(input)) {}
absl::optional<int64_t> NextPacketTime() const override {
return input_->NextPacketTime();
}
absl::optional<int64_t> NextOutputEventTime() const override {
return input_->NextOutputEventTime();
}
std::unique_ptr<PacketData> PopPacket() override {
if (loss_cadence_ != 0 && (++count_ % loss_cadence_) == 0) {
// Pop one extra packet to create the loss.
input_->PopPacket();
}
return input_->PopPacket();
}
void AdvanceOutputEvent() override { return input_->AdvanceOutputEvent(); }
bool ended() const override { return input_->ended(); }
absl::optional<RTPHeader> NextHeader() const override {
return input_->NextHeader();
}
private:
const int loss_cadence_;
int count_ = 0;
const std::unique_ptr<NetEqInput> input_;
};
class AudioChecksumWithOutput : public AudioChecksum {
public:
explicit AudioChecksumWithOutput(std::string* output_str)
: output_str_(*output_str) {}
~AudioChecksumWithOutput() { output_str_ = Finish(); }
private:
std::string& output_str_;
};
NetEqNetworkStatistics RunTest(int loss_cadence, std::string* checksum) {
NetEq::Config config;
config.for_test_no_time_stretching = true;
// The input is mostly useless. It sends zero-samples to a PCM16b encoder,
// but the actual encoded samples will never be used by the decoder in the
// test. See below about the decoder.
auto generator = absl::make_unique<ZeroSampleGenerator>();
constexpr int kSampleRateHz = 32000;
constexpr int kPayloadType = 100;
AudioEncoderPcm16B::Config encoder_config;
encoder_config.sample_rate_hz = kSampleRateHz;
encoder_config.payload_type = kPayloadType;
auto encoder = absl::make_unique<AudioEncoderPcm16B>(encoder_config);
constexpr int kRunTimeMs = 10000;
auto input = absl::make_unique<EncodeNetEqInput>(
std::move(generator), std::move(encoder), kRunTimeMs);
// Wrap the input in a loss function.
auto lossy_input =
absl::make_unique<LossyInput>(loss_cadence, std::move(input));
// Settinng up decoders.
NetEqTest::DecoderMap decoders;
// Using a fake decoder which simply reads the output audio from a file.
auto input_file = absl::make_unique<InputAudioFile>(
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"));
AudioDecoderPlc dec(std::move(input_file), kSampleRateHz);
// Masquerading as a PCM16b decoder.
NetEqTest::ExternalDecoderInfo dec_info = {
&dec, NetEqDecoder::kDecoderPCM16Bswb32kHz, "pcm16b_PLC"};
NetEqTest::ExtDecoderMap external_decoders;
external_decoders.insert(std::make_pair(kPayloadType, dec_info));
// Output is simply a checksum calculator.
auto output = absl::make_unique<AudioChecksumWithOutput>(checksum);
// No callback objects.
NetEqTest::Callbacks callbacks;
NetEqTest neteq_test(config, decoders, external_decoders,
std::move(lossy_input), std::move(output), callbacks);
EXPECT_LE(kRunTimeMs, neteq_test.Run());
auto lifetime_stats = neteq_test.LifetimeStats();
EXPECT_EQ(dec.concealed_samples(), lifetime_stats.concealed_samples);
EXPECT_EQ(dec.concealment_events(), lifetime_stats.concealment_events);
return neteq_test.SimulationStats();
}
} // namespace
TEST(NetEqDecoderPlc, Test) {
std::string checksum;
auto stats = RunTest(10, &checksum);
std::string checksum_no_loss;
auto stats_no_loss = RunTest(0, &checksum_no_loss);
EXPECT_EQ(checksum, checksum_no_loss);
EXPECT_EQ(stats.preemptive_rate, stats_no_loss.preemptive_rate);
EXPECT_EQ(stats.accelerate_rate, stats_no_loss.accelerate_rate);
EXPECT_EQ(0, stats_no_loss.expand_rate);
EXPECT_GT(stats.expand_rate, 0);
}
} // namespace test
} // namespace webrtc

View File

@ -885,7 +885,12 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
break;
}
case kExpand: {
return_value = DoExpand(play_dtmf);
RTC_DCHECK_EQ(return_value, 0);
if (!current_rtp_payload_type_ || !DoCodecPlc()) {
return_value = DoExpand(play_dtmf);
}
RTC_DCHECK_GE(sync_buffer_->FutureLength() - expand_->overlap_length(),
output_size_samples_);
break;
}
case kAccelerate:
@ -997,7 +1002,7 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
sync_buffer_->set_dtmf_index(sync_buffer_->Size());
}
if (last_mode_ != kModeExpand) {
if (last_mode_ != kModeExpand && last_mode_ != kModeCodecPlc) {
// If last operation was not expand, calculate the |playout_timestamp_| from
// the |sync_buffer_|. However, do not update the |playout_timestamp_| if it
// would be moved "backwards".
@ -1022,7 +1027,7 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
static_cast<uint32_t>(audio_frame->samples_per_channel_);
if (!(last_mode_ == kModeRfc3389Cng || last_mode_ == kModeCodecInternalCng ||
last_mode_ == kModeExpand)) {
last_mode_ == kModeExpand || last_mode_ == kModeCodecPlc)) {
generated_noise_stopwatch_.reset();
}
@ -1541,6 +1546,48 @@ void NetEqImpl::DoMerge(int16_t* decoded_buffer,
}
}
bool NetEqImpl::DoCodecPlc() {
AudioDecoder* decoder = decoder_database_->GetActiveDecoder();
if (!decoder) {
return false;
}
const size_t channels = algorithm_buffer_->Channels();
const size_t requested_samples_per_channel =
output_size_samples_ -
(sync_buffer_->FutureLength() - expand_->overlap_length());
concealment_audio_.Clear();
decoder->GeneratePlc(requested_samples_per_channel, &concealment_audio_);
if (concealment_audio_.empty()) {
// Nothing produced. Resort to regular expand.
return false;
}
RTC_CHECK_GE(concealment_audio_.size(),
requested_samples_per_channel * channels);
sync_buffer_->PushBackInterleaved(concealment_audio_);
RTC_DCHECK_NE(algorithm_buffer_->Channels(), 0);
const size_t concealed_samples_per_channel =
concealment_audio_.size() / channels;
// Update in-call and post-call statistics.
const bool is_new_concealment_event = (last_mode_ != kModeCodecPlc);
if (std::all_of(concealment_audio_.cbegin(), concealment_audio_.cend(),
[](int16_t i) { return i == 0; })) {
// Expand operation generates only noise.
stats_.ExpandedNoiseSamples(concealed_samples_per_channel,
is_new_concealment_event);
} else {
// Expand operation generates more than only noise.
stats_.ExpandedVoiceSamples(concealed_samples_per_channel,
is_new_concealment_event);
}
last_mode_ = kModeCodecPlc;
if (!generated_noise_stopwatch_) {
// Start a new stopwatch since we may be covering for a lost CNG packet.
generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch();
}
return true;
}
int NetEqImpl::DoExpand(bool play_dtmf) {
while ((sync_buffer_->FutureLength() - expand_->overlap_length()) <
output_size_samples_) {

View File

@ -290,6 +290,8 @@ class NetEqImpl : public webrtc::NetEq {
AudioDecoder::SpeechType speech_type,
bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
bool DoCodecPlc() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Sub-method which calls the Expand class to perform the expand operation.
int DoExpand(bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
@ -425,6 +427,7 @@ class NetEqImpl : public webrtc::NetEq {
ExpandUmaLogger expand_uma_logger_ RTC_GUARDED_BY(crit_sect_);
ExpandUmaLogger speech_expand_uma_logger_ RTC_GUARDED_BY(crit_sect_);
bool no_time_stretching_ RTC_GUARDED_BY(crit_sect_); // Only used for test.
rtc::BufferT<int16_t> concealment_audio_ RTC_GUARDED_BY(crit_sect_);
private:
RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);

View File

@ -41,7 +41,7 @@ int Normal::Process(const int16_t* input,
output->Clear();
return 0;
}
output->PushBackInterleaved(input, length);
output->PushBackInterleaved(rtc::ArrayView<const int16_t>(input, length));
const int fs_mult = fs_hz_ / 8000;
RTC_DCHECK_GT(fs_mult, 0);

View File

@ -31,7 +31,8 @@ PreemptiveExpand::ReturnCodes PreemptiveExpand::Process(
old_data_length >= input_length / num_channels_ - overlap_samples_) {
// Length of input data too short to do preemptive expand. Simply move all
// data from input to output.
output->PushBackInterleaved(input, input_length);
output->PushBackInterleaved(
rtc::ArrayView<const int16_t>(input, input_length));
return kError;
}
const bool kFastMode = false; // Fast mode is not available for PE Expand.
@ -75,19 +76,19 @@ PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch(
size_t unmodified_length =
std::max(old_data_length_per_channel_, fs_mult_120);
// Copy first part, including cross-fade region.
output->PushBackInterleaved(
input, (unmodified_length + peak_index) * num_channels_);
output->PushBackInterleaved(rtc::ArrayView<const int16_t>(
input, (unmodified_length + peak_index) * num_channels_));
// Copy the last |peak_index| samples up to 15 ms to |temp_vector|.
AudioMultiVector temp_vector(num_channels_);
temp_vector.PushBackInterleaved(
temp_vector.PushBackInterleaved(rtc::ArrayView<const int16_t>(
&input[(unmodified_length - peak_index) * num_channels_],
peak_index * num_channels_);
peak_index * num_channels_));
// Cross-fade |temp_vector| onto the end of |output|.
output->CrossFade(temp_vector, peak_index);
// Copy the last unmodified part, 15 ms + pitch period until the end.
output->PushBackInterleaved(
output->PushBackInterleaved(rtc::ArrayView<const int16_t>(
&input[unmodified_length * num_channels_],
input_length - unmodified_length * num_channels_);
input_length - unmodified_length * num_channels_));
if (active_speech) {
return kSuccess;
@ -96,7 +97,8 @@ PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch(
}
} else {
// Accelerate not allowed. Simply move all data from decoded to outData.
output->PushBackInterleaved(input, input_length);
output->PushBackInterleaved(
rtc::ArrayView<const int16_t>(input, input_length));
return kNoStretch;
}
}

View File

@ -36,6 +36,16 @@ void SyncBuffer::PushBack(const AudioMultiVector& append_this) {
dtmf_index_ -= std::min(dtmf_index_, samples_added);
}
void SyncBuffer::PushBackInterleaved(const rtc::BufferT<int16_t>& append_this) {
const size_t size_before_adding = Size();
AudioMultiVector::PushBackInterleaved(append_this);
const size_t samples_added_per_channel = Size() - size_before_adding;
RTC_DCHECK_EQ(samples_added_per_channel * Channels(), append_this.size());
AudioMultiVector::PopFront(samples_added_per_channel);
next_index_ -= std::min(next_index_, samples_added_per_channel);
dtmf_index_ -= std::min(dtmf_index_, samples_added_per_channel);
}
void SyncBuffer::PushFrontZeros(size_t length) {
InsertZerosAtIndex(length, 0);
}

View File

@ -13,6 +13,7 @@
#include "api/audio/audio_frame.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "rtc_base/buffer.h"
#include "rtc_base/constructormagic.h"
namespace webrtc {
@ -34,6 +35,9 @@ class SyncBuffer : public AudioMultiVector {
// the move of the beginning of "future" data.
void PushBack(const AudioMultiVector& append_this) override;
// Like PushBack, but reads the samples channel-interleaved from the input.
void PushBackInterleaved(const rtc::BufferT<int16_t>& append_this);
// Adds |length| zeros to the beginning of each channel. Removes
// the same number of samples from the end of the SyncBuffer, to
// maintain a constant buffer size. The |next_index_| is updated to reflect

View File

@ -53,7 +53,7 @@ void EncodeNetEqInput::AdvanceOutputEvent() {
}
bool EncodeNetEqInput::ended() const {
return next_output_event_ms_ <= input_duration_ms_;
return next_output_event_ms_ > input_duration_ms_;
}
absl::optional<RTPHeader> EncodeNetEqInput::NextHeader() const {