Moving src/webrtc into src/.

In order to eliminate the WebRTC Subtree mirror in Chromium, 
WebRTC is moving the content of the src/webrtc directory up
to the src/ directory.

NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
TBR=tommi@webrtc.org

Bug: chromium:611808
Change-Id: Iac59c5b51b950f174119565bac87955a7994bc38
Reviewed-on: https://webrtc-review.googlesource.com/1560
Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Henrik Kjellander <kjellander@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#19845}
This commit is contained in:
Mirko Bonadei
2017-09-15 06:15:48 +02:00
committed by Commit Bot
parent 6674846b4a
commit bb547203bf
4576 changed files with 1092 additions and 1196 deletions

View File

@ -0,0 +1,101 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/accelerate.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
namespace webrtc {
Accelerate::ReturnCodes Accelerate::Process(const int16_t* input,
size_t input_length,
bool fast_accelerate,
AudioMultiVector* output,
size_t* length_change_samples) {
// Input length must be (almost) 30 ms.
static const size_t k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate.
if (num_channels_ == 0 ||
input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_) {
// Length of input data too short to do accelerate. Simply move all data
// from input to output.
output->PushBackInterleaved(input, input_length);
return kError;
}
return TimeStretch::Process(input, input_length, fast_accelerate, output,
length_change_samples);
}
void Accelerate::SetParametersForPassiveSpeech(size_t /*len*/,
int16_t* best_correlation,
size_t* /*peak_index*/) const {
// When the signal does not contain any active speech, the correlation does
// not matter. Simply set it to zero.
*best_correlation = 0;
}
Accelerate::ReturnCodes Accelerate::CheckCriteriaAndStretch(
const int16_t* input,
size_t input_length,
size_t peak_index,
int16_t best_correlation,
bool active_speech,
bool fast_mode,
AudioMultiVector* output) const {
// Check for strong correlation or passive speech.
// Use 8192 (0.5 in Q14) in fast mode.
const int correlation_threshold = fast_mode ? 8192 : kCorrelationThreshold;
if ((best_correlation > correlation_threshold) || !active_speech) {
// Do accelerate operation by overlap add.
// Pre-calculate common multiplication with |fs_mult_|.
// 120 corresponds to 15 ms.
size_t fs_mult_120 = fs_mult_ * 120;
if (fast_mode) {
// Fit as many multiples of |peak_index| as possible in fs_mult_120.
// TODO(henrik.lundin) Consider finding multiple correlation peaks and
// pick the one with the longest correlation lag in this case.
peak_index = (fs_mult_120 / peak_index) * peak_index;
}
assert(fs_mult_120 >= peak_index); // Should be handled in Process().
// Copy first part; 0 to 15 ms.
output->PushBackInterleaved(input, fs_mult_120 * num_channels_);
// Copy the |peak_index| starting at 15 ms to |temp_vector|.
AudioMultiVector temp_vector(num_channels_);
temp_vector.PushBackInterleaved(&input[fs_mult_120 * num_channels_],
peak_index * num_channels_);
// Cross-fade |temp_vector| onto the end of |output|.
output->CrossFade(temp_vector, peak_index);
// Copy the last unmodified part, 15 ms + pitch period until the end.
output->PushBackInterleaved(
&input[(fs_mult_120 + peak_index) * num_channels_],
input_length - (fs_mult_120 + peak_index) * num_channels_);
if (active_speech) {
return kSuccess;
} else {
return kSuccessLowEnergy;
}
} else {
// Accelerate not allowed. Simply move all data from decoded to outData.
output->PushBackInterleaved(input, input_length);
return kNoStretch;
}
}
Accelerate* AccelerateFactory::Create(
int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise) const {
return new Accelerate(sample_rate_hz, num_channels, background_noise);
}
} // namespace webrtc

View File

@ -0,0 +1,81 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_
#include <assert.h>
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq/time_stretch.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class BackgroundNoise;
// This class implements the Accelerate operation. Most of the work is done
// in the base class TimeStretch, which is shared with the PreemptiveExpand
// operation. In the Accelerate class, the operations that are specific to
// Accelerate are implemented.
class Accelerate : public TimeStretch {
public:
Accelerate(int sample_rate_hz, size_t num_channels,
const BackgroundNoise& background_noise)
: TimeStretch(sample_rate_hz, num_channels, background_noise) {
}
// This method performs the actual Accelerate operation. The samples are
// read from |input|, of length |input_length| elements, and are written to
// |output|. The number of samples removed through time-stretching is
// is provided in the output |length_change_samples|. The method returns
// the outcome of the operation as an enumerator value. If |fast_accelerate|
// is true, the algorithm will relax the requirements on finding strong
// correlations, and may remove multiple pitch periods if possible.
ReturnCodes Process(const int16_t* input,
size_t input_length,
bool fast_accelerate,
AudioMultiVector* output,
size_t* length_change_samples);
protected:
// Sets the parameters |best_correlation| and |peak_index| to suitable
// values when the signal contains no active speech.
void SetParametersForPassiveSpeech(size_t len,
int16_t* best_correlation,
size_t* peak_index) const override;
// Checks the criteria for performing the time-stretching operation and,
// if possible, performs the time-stretching.
ReturnCodes CheckCriteriaAndStretch(const int16_t* input,
size_t input_length,
size_t peak_index,
int16_t best_correlation,
bool active_speech,
bool fast_mode,
AudioMultiVector* output) const override;
private:
RTC_DISALLOW_COPY_AND_ASSIGN(Accelerate);
};
struct AccelerateFactory {
AccelerateFactory() {}
virtual ~AccelerateFactory() {}
virtual Accelerate* Create(int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise) const;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
#include <assert.h>
#include "webrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h"
#include "webrtc/rtc_base/checks.h"
#ifdef WEBRTC_CODEC_G722
#include "webrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h"
#endif
#ifdef WEBRTC_CODEC_ILBC
#include "webrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h"
#endif
#ifdef WEBRTC_CODEC_ISACFX
#include "webrtc/modules/audio_coding/codecs/isac/fix/include/audio_decoder_isacfix.h" // nogncheck
#include "webrtc/modules/audio_coding/codecs/isac/fix/include/audio_encoder_isacfix.h" // nogncheck
#endif
#ifdef WEBRTC_CODEC_ISAC
#include "webrtc/modules/audio_coding/codecs/isac/main/include/audio_decoder_isac.h" // nogncheck
#include "webrtc/modules/audio_coding/codecs/isac/main/include/audio_encoder_isac.h" // nogncheck
#endif
#ifdef WEBRTC_CODEC_OPUS
#include "webrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h"
#endif
#include "webrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h"
namespace webrtc {
bool CodecSupported(NetEqDecoder codec_type) {
switch (codec_type) {
case NetEqDecoder::kDecoderPCMu:
case NetEqDecoder::kDecoderPCMa:
case NetEqDecoder::kDecoderPCMu_2ch:
case NetEqDecoder::kDecoderPCMa_2ch:
#ifdef WEBRTC_CODEC_ILBC
case NetEqDecoder::kDecoderILBC:
#endif
#if defined(WEBRTC_CODEC_ISACFX) || defined(WEBRTC_CODEC_ISAC)
case NetEqDecoder::kDecoderISAC:
#endif
#ifdef WEBRTC_CODEC_ISAC
case NetEqDecoder::kDecoderISACswb:
#endif
case NetEqDecoder::kDecoderPCM16B:
case NetEqDecoder::kDecoderPCM16Bwb:
case NetEqDecoder::kDecoderPCM16Bswb32kHz:
case NetEqDecoder::kDecoderPCM16Bswb48kHz:
case NetEqDecoder::kDecoderPCM16B_2ch:
case NetEqDecoder::kDecoderPCM16Bwb_2ch:
case NetEqDecoder::kDecoderPCM16Bswb32kHz_2ch:
case NetEqDecoder::kDecoderPCM16Bswb48kHz_2ch:
case NetEqDecoder::kDecoderPCM16B_5ch:
#ifdef WEBRTC_CODEC_G722
case NetEqDecoder::kDecoderG722:
case NetEqDecoder::kDecoderG722_2ch:
#endif
#ifdef WEBRTC_CODEC_OPUS
case NetEqDecoder::kDecoderOpus:
case NetEqDecoder::kDecoderOpus_2ch:
#endif
case NetEqDecoder::kDecoderRED:
case NetEqDecoder::kDecoderAVT:
case NetEqDecoder::kDecoderAVT16kHz:
case NetEqDecoder::kDecoderAVT32kHz:
case NetEqDecoder::kDecoderAVT48kHz:
case NetEqDecoder::kDecoderCNGnb:
case NetEqDecoder::kDecoderCNGwb:
case NetEqDecoder::kDecoderCNGswb32kHz:
case NetEqDecoder::kDecoderCNGswb48kHz:
case NetEqDecoder::kDecoderArbitrary: {
return true;
}
default: {
return false;
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,31 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_DECODER_IMPL_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_DECODER_IMPL_H_
#include <assert.h>
#include "webrtc/api/audio_codecs/audio_decoder.h"
#include "webrtc/modules/audio_coding/neteq/neteq_decoder_enum.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
#ifdef WEBRTC_CODEC_G722
#include "webrtc/modules/audio_coding/codecs/g722/g722_interface.h"
#endif
namespace webrtc {
// Returns true if |codec_type| is supported.
bool CodecSupported(NetEqDecoder codec_type);
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_DECODER_IMPL_H_

View File

@ -0,0 +1,693 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
#include <assert.h>
#include <stdlib.h>
#include <memory>
#include <string>
#include <vector>
#include "webrtc/api/audio_codecs/opus/audio_encoder_opus.h"
#include "webrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h"
#include "webrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.h"
#include "webrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h"
#include "webrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.h"
#include "webrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h"
#include "webrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/include/audio_decoder_isacfix.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/include/audio_encoder_isacfix.h"
#include "webrtc/modules/audio_coding/codecs/isac/main/include/audio_decoder_isac.h"
#include "webrtc/modules/audio_coding/codecs/isac/main/include/audio_encoder_isac.h"
#include "webrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h"
#include "webrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h"
#include "webrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h"
#include "webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h"
#include "webrtc/test/gtest.h"
#include "webrtc/test/testsupport/fileutils.h"
namespace webrtc {
namespace {
// The absolute difference between the input and output (the first channel) is
// compared vs |tolerance|. The parameter |delay| is used to correct for codec
// delays.
void CompareInputOutput(const std::vector<int16_t>& input,
const std::vector<int16_t>& output,
size_t num_samples,
size_t channels,
int tolerance,
int delay) {
ASSERT_LE(num_samples, input.size());
ASSERT_LE(num_samples * channels, output.size());
for (unsigned int n = 0; n < num_samples - delay; ++n) {
ASSERT_NEAR(input[n], output[channels * n + delay], tolerance)
<< "Exit test on first diff; n = " << n;
}
}
// The absolute difference between the first two channels in |output| is
// compared vs |tolerance|.
void CompareTwoChannels(const std::vector<int16_t>& output,
size_t samples_per_channel,
size_t channels,
int tolerance) {
ASSERT_GE(channels, 2u);
ASSERT_LE(samples_per_channel * channels, output.size());
for (unsigned int n = 0; n < samples_per_channel; ++n)
ASSERT_NEAR(output[channels * n], output[channels * n + 1], tolerance)
<< "Stereo samples differ.";
}
// Calculates mean-squared error between input and output (the first channel).
// The parameter |delay| is used to correct for codec delays.
double MseInputOutput(const std::vector<int16_t>& input,
const std::vector<int16_t>& output,
size_t num_samples,
size_t channels,
int delay) {
assert(delay < static_cast<int>(num_samples));
assert(num_samples <= input.size());
assert(num_samples * channels <= output.size());
if (num_samples == 0)
return 0.0;
double squared_sum = 0.0;
for (unsigned int n = 0; n < num_samples - delay; ++n) {
squared_sum += (input[n] - output[channels * n + delay]) *
(input[n] - output[channels * n + delay]);
}
return squared_sum / (num_samples - delay);
}
} // namespace
class AudioDecoderTest : public ::testing::Test {
protected:
AudioDecoderTest()
: input_audio_(
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
32000),
codec_input_rate_hz_(32000), // Legacy default value.
frame_size_(0),
data_length_(0),
channels_(1),
payload_type_(17),
decoder_(NULL) {}
virtual ~AudioDecoderTest() {}
virtual void SetUp() {
if (audio_encoder_)
codec_input_rate_hz_ = audio_encoder_->SampleRateHz();
// Create arrays.
ASSERT_GT(data_length_, 0u) << "The test must set data_length_ > 0";
}
virtual void TearDown() {
delete decoder_;
decoder_ = NULL;
}
virtual void InitEncoder() { }
// TODO(henrik.lundin) Change return type to size_t once most/all overriding
// implementations are gone.
virtual int EncodeFrame(const int16_t* input,
size_t input_len_samples,
rtc::Buffer* output) {
AudioEncoder::EncodedInfo encoded_info;
const size_t samples_per_10ms = audio_encoder_->SampleRateHz() / 100;
RTC_CHECK_EQ(samples_per_10ms * audio_encoder_->Num10MsFramesInNextPacket(),
input_len_samples);
std::unique_ptr<int16_t[]> interleaved_input(
new int16_t[channels_ * samples_per_10ms]);
for (size_t i = 0; i < audio_encoder_->Num10MsFramesInNextPacket(); ++i) {
EXPECT_EQ(0u, encoded_info.encoded_bytes);
// Duplicate the mono input signal to however many channels the test
// wants.
test::InputAudioFile::DuplicateInterleaved(input + i * samples_per_10ms,
samples_per_10ms, channels_,
interleaved_input.get());
encoded_info = audio_encoder_->Encode(
0, rtc::ArrayView<const int16_t>(interleaved_input.get(),
audio_encoder_->NumChannels() *
audio_encoder_->SampleRateHz() /
100),
output);
}
EXPECT_EQ(payload_type_, encoded_info.payload_type);
return static_cast<int>(encoded_info.encoded_bytes);
}
// Encodes and decodes audio. The absolute difference between the input and
// output is compared vs |tolerance|, and the mean-squared error is compared
// with |mse|. The encoded stream should contain |expected_bytes|. For stereo
// audio, the absolute difference between the two channels is compared vs
// |channel_diff_tolerance|.
void EncodeDecodeTest(size_t expected_bytes, int tolerance, double mse,
int delay = 0, int channel_diff_tolerance = 0) {
ASSERT_GE(tolerance, 0) << "Test must define a tolerance >= 0";
ASSERT_GE(channel_diff_tolerance, 0) <<
"Test must define a channel_diff_tolerance >= 0";
size_t processed_samples = 0u;
rtc::Buffer encoded;
size_t encoded_bytes = 0u;
InitEncoder();
std::vector<int16_t> input;
std::vector<int16_t> decoded;
while (processed_samples + frame_size_ <= data_length_) {
// Extend input vector with |frame_size_|.
input.resize(input.size() + frame_size_, 0);
// Read from input file.
ASSERT_GE(input.size() - processed_samples, frame_size_);
ASSERT_TRUE(input_audio_.Read(
frame_size_, codec_input_rate_hz_, &input[processed_samples]));
size_t enc_len = EncodeFrame(
&input[processed_samples], frame_size_, &encoded);
// Make sure that frame_size_ * channels_ samples are allocated and free.
decoded.resize((processed_samples + frame_size_) * channels_, 0);
AudioDecoder::SpeechType speech_type;
size_t dec_len = decoder_->Decode(
&encoded.data()[encoded_bytes], enc_len, codec_input_rate_hz_,
frame_size_ * channels_ * sizeof(int16_t),
&decoded[processed_samples * channels_], &speech_type);
EXPECT_EQ(frame_size_ * channels_, dec_len);
encoded_bytes += enc_len;
processed_samples += frame_size_;
}
// For some codecs it doesn't make sense to check expected number of bytes,
// since the number can vary for different platforms. Opus and iSAC are
// such codecs. In this case expected_bytes is set to 0.
if (expected_bytes) {
EXPECT_EQ(expected_bytes, encoded_bytes);
}
CompareInputOutput(
input, decoded, processed_samples, channels_, tolerance, delay);
if (channels_ == 2)
CompareTwoChannels(
decoded, processed_samples, channels_, channel_diff_tolerance);
EXPECT_LE(
MseInputOutput(input, decoded, processed_samples, channels_, delay),
mse);
}
// Encodes a payload and decodes it twice with decoder re-init before each
// decode. Verifies that the decoded result is the same.
void ReInitTest() {
InitEncoder();
std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]);
ASSERT_TRUE(
input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
rtc::Buffer encoded;
size_t enc_len = EncodeFrame(input.get(), frame_size_, &encoded);
size_t dec_len;
AudioDecoder::SpeechType speech_type1, speech_type2;
decoder_->Reset();
std::unique_ptr<int16_t[]> output1(new int16_t[frame_size_ * channels_]);
dec_len = decoder_->Decode(encoded.data(), enc_len, codec_input_rate_hz_,
frame_size_ * channels_ * sizeof(int16_t),
output1.get(), &speech_type1);
ASSERT_LE(dec_len, frame_size_ * channels_);
EXPECT_EQ(frame_size_ * channels_, dec_len);
// Re-init decoder and decode again.
decoder_->Reset();
std::unique_ptr<int16_t[]> output2(new int16_t[frame_size_ * channels_]);
dec_len = decoder_->Decode(encoded.data(), enc_len, codec_input_rate_hz_,
frame_size_ * channels_ * sizeof(int16_t),
output2.get(), &speech_type2);
ASSERT_LE(dec_len, frame_size_ * channels_);
EXPECT_EQ(frame_size_ * channels_, dec_len);
for (unsigned int n = 0; n < frame_size_; ++n) {
ASSERT_EQ(output1[n], output2[n]) << "Exit test on first diff; n = " << n;
}
EXPECT_EQ(speech_type1, speech_type2);
}
// Call DecodePlc and verify that the correct number of samples is produced.
void DecodePlcTest() {
InitEncoder();
std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]);
ASSERT_TRUE(
input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
rtc::Buffer encoded;
size_t enc_len = EncodeFrame(input.get(), frame_size_, &encoded);
AudioDecoder::SpeechType speech_type;
decoder_->Reset();
std::unique_ptr<int16_t[]> output(new int16_t[frame_size_ * channels_]);
size_t dec_len = decoder_->Decode(encoded.data(), enc_len,
codec_input_rate_hz_,
frame_size_ * channels_ * sizeof(int16_t),
output.get(), &speech_type);
EXPECT_EQ(frame_size_ * channels_, dec_len);
// Call DecodePlc and verify that we get one frame of data.
// (Overwrite the output from the above Decode call, but that does not
// matter.)
dec_len = decoder_->DecodePlc(1, output.get());
EXPECT_EQ(frame_size_ * channels_, dec_len);
}
test::ResampleInputAudioFile input_audio_;
int codec_input_rate_hz_;
size_t frame_size_;
size_t data_length_;
size_t channels_;
const int payload_type_;
AudioDecoder* decoder_;
std::unique_ptr<AudioEncoder> audio_encoder_;
};
class AudioDecoderPcmUTest : public AudioDecoderTest {
protected:
AudioDecoderPcmUTest() : AudioDecoderTest() {
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderPcmU(1);
AudioEncoderPcmU::Config config;
config.frame_size_ms = static_cast<int>(frame_size_ / 8);
config.payload_type = payload_type_;
audio_encoder_.reset(new AudioEncoderPcmU(config));
}
};
class AudioDecoderPcmATest : public AudioDecoderTest {
protected:
AudioDecoderPcmATest() : AudioDecoderTest() {
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderPcmA(1);
AudioEncoderPcmA::Config config;
config.frame_size_ms = static_cast<int>(frame_size_ / 8);
config.payload_type = payload_type_;
audio_encoder_.reset(new AudioEncoderPcmA(config));
}
};
class AudioDecoderPcm16BTest : public AudioDecoderTest {
protected:
AudioDecoderPcm16BTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 16000;
frame_size_ = 20 * codec_input_rate_hz_ / 1000;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderPcm16B(codec_input_rate_hz_, 1);
assert(decoder_);
AudioEncoderPcm16B::Config config;
config.sample_rate_hz = codec_input_rate_hz_;
config.frame_size_ms =
static_cast<int>(frame_size_ / (config.sample_rate_hz / 1000));
config.payload_type = payload_type_;
audio_encoder_.reset(new AudioEncoderPcm16B(config));
}
};
class AudioDecoderIlbcTest : public AudioDecoderTest {
protected:
AudioDecoderIlbcTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 8000;
frame_size_ = 240;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderIlbcImpl;
assert(decoder_);
AudioEncoderIlbcConfig config;
config.frame_size_ms = 30;
audio_encoder_.reset(new AudioEncoderIlbcImpl(config, payload_type_));
}
// Overload the default test since iLBC's function WebRtcIlbcfix_NetEqPlc does
// not return any data. It simply resets a few states and returns 0.
void DecodePlcTest() {
InitEncoder();
std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]);
ASSERT_TRUE(
input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
rtc::Buffer encoded;
size_t enc_len = EncodeFrame(input.get(), frame_size_, &encoded);
AudioDecoder::SpeechType speech_type;
decoder_->Reset();
std::unique_ptr<int16_t[]> output(new int16_t[frame_size_ * channels_]);
size_t dec_len = decoder_->Decode(encoded.data(), enc_len,
codec_input_rate_hz_,
frame_size_ * channels_ * sizeof(int16_t),
output.get(), &speech_type);
EXPECT_EQ(frame_size_, dec_len);
// Simply call DecodePlc and verify that we get 0 as return value.
EXPECT_EQ(0U, decoder_->DecodePlc(1, output.get()));
}
};
class AudioDecoderIsacFloatTest : public AudioDecoderTest {
protected:
AudioDecoderIsacFloatTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 16000;
frame_size_ = 480;
data_length_ = 10 * frame_size_;
AudioEncoderIsacFloatImpl::Config config;
config.payload_type = payload_type_;
config.sample_rate_hz = codec_input_rate_hz_;
config.adaptive_mode = false;
config.frame_size_ms =
1000 * static_cast<int>(frame_size_) / codec_input_rate_hz_;
audio_encoder_.reset(new AudioEncoderIsacFloatImpl(config));
decoder_ = new AudioDecoderIsacFloatImpl(codec_input_rate_hz_);
}
};
class AudioDecoderIsacSwbTest : public AudioDecoderTest {
protected:
AudioDecoderIsacSwbTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 32000;
frame_size_ = 960;
data_length_ = 10 * frame_size_;
AudioEncoderIsacFloatImpl::Config config;
config.payload_type = payload_type_;
config.sample_rate_hz = codec_input_rate_hz_;
config.adaptive_mode = false;
config.frame_size_ms =
1000 * static_cast<int>(frame_size_) / codec_input_rate_hz_;
audio_encoder_.reset(new AudioEncoderIsacFloatImpl(config));
decoder_ = new AudioDecoderIsacFloatImpl(codec_input_rate_hz_);
}
};
class AudioDecoderIsacFixTest : public AudioDecoderTest {
protected:
AudioDecoderIsacFixTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 16000;
frame_size_ = 480;
data_length_ = 10 * frame_size_;
AudioEncoderIsacFixImpl::Config config;
config.payload_type = payload_type_;
config.sample_rate_hz = codec_input_rate_hz_;
config.adaptive_mode = false;
config.frame_size_ms =
1000 * static_cast<int>(frame_size_) / codec_input_rate_hz_;
audio_encoder_.reset(new AudioEncoderIsacFixImpl(config));
decoder_ = new AudioDecoderIsacFixImpl(codec_input_rate_hz_);
}
};
class AudioDecoderG722Test : public AudioDecoderTest {
protected:
AudioDecoderG722Test() : AudioDecoderTest() {
codec_input_rate_hz_ = 16000;
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderG722Impl;
assert(decoder_);
AudioEncoderG722Config config;
config.frame_size_ms = 10;
config.num_channels = 1;
audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_));
}
};
class AudioDecoderG722StereoTest : public AudioDecoderTest {
protected:
AudioDecoderG722StereoTest() : AudioDecoderTest() {
channels_ = 2;
codec_input_rate_hz_ = 16000;
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderG722StereoImpl;
assert(decoder_);
AudioEncoderG722Config config;
config.frame_size_ms = 10;
config.num_channels = 2;
audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_));
}
};
class AudioDecoderOpusTest : public AudioDecoderTest {
protected:
AudioDecoderOpusTest() : AudioDecoderTest() {
codec_input_rate_hz_ = 48000;
frame_size_ = 480;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderOpusImpl(1);
AudioEncoderOpusConfig config;
config.frame_size_ms = static_cast<int>(frame_size_) / 48;
config.application = AudioEncoderOpusConfig::ApplicationMode::kVoip;
audio_encoder_ = AudioEncoderOpus::MakeAudioEncoder(config, payload_type_);
}
};
class AudioDecoderOpusStereoTest : public AudioDecoderOpusTest {
protected:
AudioDecoderOpusStereoTest() : AudioDecoderOpusTest() {
channels_ = 2;
delete decoder_;
decoder_ = new AudioDecoderOpusImpl(2);
AudioEncoderOpusConfig config;
config.frame_size_ms = static_cast<int>(frame_size_) / 48;
config.num_channels = 2;
config.application = AudioEncoderOpusConfig::ApplicationMode::kAudio;
audio_encoder_ = AudioEncoderOpus::MakeAudioEncoder(config, payload_type_);
}
};
TEST_F(AudioDecoderPcmUTest, EncodeDecode) {
int tolerance = 251;
double mse = 1734.0;
EncodeDecodeTest(data_length_, tolerance, mse);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
namespace {
int SetAndGetTargetBitrate(AudioEncoder* audio_encoder, int rate) {
audio_encoder->OnReceivedUplinkBandwidth(rate, rtc::Optional<int64_t>());
return audio_encoder->GetTargetBitrate();
}
void TestSetAndGetTargetBitratesWithFixedCodec(AudioEncoder* audio_encoder,
int fixed_rate) {
EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, 32000));
EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate - 1));
EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate));
EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate + 1));
}
} // namespace
TEST_F(AudioDecoderPcmUTest, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000);
}
TEST_F(AudioDecoderPcmATest, EncodeDecode) {
int tolerance = 308;
double mse = 1931.0;
EncodeDecodeTest(data_length_, tolerance, mse);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderPcmATest, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000);
}
TEST_F(AudioDecoderPcm16BTest, EncodeDecode) {
int tolerance = 0;
double mse = 0.0;
EncodeDecodeTest(2 * data_length_, tolerance, mse);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderPcm16BTest, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(),
codec_input_rate_hz_ * 16);
}
TEST_F(AudioDecoderIlbcTest, EncodeDecode) {
int tolerance = 6808;
double mse = 2.13e6;
int delay = 80; // Delay from input to output.
EncodeDecodeTest(500, tolerance, mse, delay);
ReInitTest();
EXPECT_TRUE(decoder_->HasDecodePlc());
DecodePlcTest();
}
TEST_F(AudioDecoderIlbcTest, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 13333);
}
TEST_F(AudioDecoderIsacFloatTest, EncodeDecode) {
int tolerance = 3399;
double mse = 434951.0;
int delay = 48; // Delay from input to output.
EncodeDecodeTest(0, tolerance, mse, delay);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderIsacFloatTest, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 32000);
}
TEST_F(AudioDecoderIsacSwbTest, EncodeDecode) {
int tolerance = 19757;
double mse = 8.18e6;
int delay = 160; // Delay from input to output.
EncodeDecodeTest(0, tolerance, mse, delay);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderIsacSwbTest, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 32000);
}
TEST_F(AudioDecoderIsacFixTest, EncodeDecode) {
int tolerance = 11034;
double mse = 3.46e6;
int delay = 54; // Delay from input to output.
#if defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM)
static const int kEncodedBytes = 685;
#elif defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM64)
static const int kEncodedBytes = 673;
#else
static const int kEncodedBytes = 671;
#endif
EncodeDecodeTest(kEncodedBytes, tolerance, mse, delay);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderIsacFixTest, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 32000);
}
TEST_F(AudioDecoderG722Test, EncodeDecode) {
int tolerance = 6176;
double mse = 238630.0;
int delay = 22; // Delay from input to output.
EncodeDecodeTest(data_length_ / 2, tolerance, mse, delay);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderG722Test, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000);
}
TEST_F(AudioDecoderG722StereoTest, EncodeDecode) {
int tolerance = 6176;
int channel_diff_tolerance = 0;
double mse = 238630.0;
int delay = 22; // Delay from input to output.
EncodeDecodeTest(data_length_, tolerance, mse, delay, channel_diff_tolerance);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderG722StereoTest, SetTargetBitrate) {
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 128000);
}
TEST_F(AudioDecoderOpusTest, EncodeDecode) {
int tolerance = 6176;
double mse = 238630.0;
int delay = 22; // Delay from input to output.
EncodeDecodeTest(0, tolerance, mse, delay);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
namespace {
void TestOpusSetTargetBitrates(AudioEncoder* audio_encoder) {
EXPECT_EQ(6000, SetAndGetTargetBitrate(audio_encoder, 5999));
EXPECT_EQ(6000, SetAndGetTargetBitrate(audio_encoder, 6000));
EXPECT_EQ(32000, SetAndGetTargetBitrate(audio_encoder, 32000));
EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder, 510000));
EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder, 511000));
}
} // namespace
TEST_F(AudioDecoderOpusTest, SetTargetBitrate) {
TestOpusSetTargetBitrates(audio_encoder_.get());
}
TEST_F(AudioDecoderOpusStereoTest, EncodeDecode) {
int tolerance = 6176;
int channel_diff_tolerance = 0;
double mse = 238630.0;
int delay = 22; // Delay from input to output.
EncodeDecodeTest(0, tolerance, mse, delay, channel_diff_tolerance);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderOpusStereoTest, SetTargetBitrate) {
TestOpusSetTargetBitrates(audio_encoder_.get());
}
namespace {
#ifdef WEBRTC_CODEC_ILBC
const bool has_ilbc = true;
#else
const bool has_ilbc = false;
#endif
#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)
const bool has_isac = true;
#else
const bool has_isac = false;
#endif
#ifdef WEBRTC_CODEC_ISAC
const bool has_isac_swb = true;
#else
const bool has_isac_swb = false;
#endif
#ifdef WEBRTC_CODEC_G722
const bool has_g722 = true;
#else
const bool has_g722 = false;
#endif
#ifdef WEBRTC_CODEC_OPUS
const bool has_opus = true;
#else
const bool has_opus = false;
#endif
} // namespace
TEST(AudioDecoder, CodecSupported) {
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCMu));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCMa));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCMu_2ch));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCMa_2ch));
EXPECT_EQ(has_ilbc, CodecSupported(NetEqDecoder::kDecoderILBC));
EXPECT_EQ(has_isac, CodecSupported(NetEqDecoder::kDecoderISAC));
EXPECT_EQ(has_isac_swb, CodecSupported(NetEqDecoder::kDecoderISACswb));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16B));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16Bwb));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16Bswb32kHz));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16Bswb48kHz));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16B_2ch));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16Bwb_2ch));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16Bswb32kHz_2ch));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16Bswb48kHz_2ch));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16B_5ch));
EXPECT_EQ(has_g722, CodecSupported(NetEqDecoder::kDecoderG722));
EXPECT_EQ(has_g722, CodecSupported(NetEqDecoder::kDecoderG722_2ch));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderRED));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderAVT));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderAVT16kHz));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderAVT32kHz));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderAVT48kHz));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderCNGnb));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderCNGwb));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderCNGswb32kHz));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderCNGswb48kHz));
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderArbitrary));
EXPECT_EQ(has_opus, CodecSupported(NetEqDecoder::kDecoderOpus));
EXPECT_EQ(has_opus, CodecSupported(NetEqDecoder::kDecoderOpus_2ch));
}
} // namespace webrtc

View File

@ -0,0 +1,222 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include <assert.h>
#include <algorithm>
#include "webrtc/rtc_base/checks.h"
#include "webrtc/typedefs.h"
namespace webrtc {
AudioMultiVector::AudioMultiVector(size_t N) {
assert(N > 0);
if (N < 1) N = 1;
for (size_t n = 0; n < N; ++n) {
channels_.push_back(new AudioVector);
}
num_channels_ = N;
}
AudioMultiVector::AudioMultiVector(size_t N, size_t initial_size) {
assert(N > 0);
if (N < 1) N = 1;
for (size_t n = 0; n < N; ++n) {
channels_.push_back(new AudioVector(initial_size));
}
num_channels_ = N;
}
AudioMultiVector::~AudioMultiVector() {
std::vector<AudioVector*>::iterator it = channels_.begin();
while (it != channels_.end()) {
delete (*it);
++it;
}
}
void AudioMultiVector::Clear() {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->Clear();
}
}
void AudioMultiVector::Zeros(size_t length) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->Clear();
channels_[i]->Extend(length);
}
}
void AudioMultiVector::CopyTo(AudioMultiVector* copy_to) const {
if (copy_to) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->CopyTo(&(*copy_to)[i]);
}
}
}
void AudioMultiVector::PushBackInterleaved(const int16_t* append_this,
size_t length) {
assert(length % num_channels_ == 0);
if (num_channels_ == 1) {
// Special case to avoid extra allocation and data shuffling.
channels_[0]->PushBack(append_this, length);
return;
}
size_t length_per_channel = length / num_channels_;
int16_t* temp_array = new int16_t[length_per_channel]; // Temporary storage.
for (size_t channel = 0; channel < num_channels_; ++channel) {
// Copy elements to |temp_array|.
// Set |source_ptr| to first element of this channel.
const int16_t* source_ptr = &append_this[channel];
for (size_t i = 0; i < length_per_channel; ++i) {
temp_array[i] = *source_ptr;
source_ptr += num_channels_; // Jump to next element of this channel.
}
channels_[channel]->PushBack(temp_array, length_per_channel);
}
delete [] temp_array;
}
void AudioMultiVector::PushBack(const AudioMultiVector& append_this) {
assert(num_channels_ == append_this.num_channels_);
if (num_channels_ == append_this.num_channels_) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->PushBack(append_this[i]);
}
}
}
void AudioMultiVector::PushBackFromIndex(const AudioMultiVector& append_this,
size_t index) {
assert(index < append_this.Size());
index = std::min(index, append_this.Size() - 1);
size_t length = append_this.Size() - index;
assert(num_channels_ == append_this.num_channels_);
if (num_channels_ == append_this.num_channels_) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->PushBack(append_this[i], length, index);
}
}
}
void AudioMultiVector::PopFront(size_t length) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->PopFront(length);
}
}
void AudioMultiVector::PopBack(size_t length) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->PopBack(length);
}
}
size_t AudioMultiVector::ReadInterleaved(size_t length,
int16_t* destination) const {
return ReadInterleavedFromIndex(0, length, destination);
}
size_t AudioMultiVector::ReadInterleavedFromIndex(size_t start_index,
size_t length,
int16_t* destination) const {
RTC_DCHECK(destination);
size_t index = 0; // Number of elements written to |destination| so far.
RTC_DCHECK_LE(start_index, Size());
start_index = std::min(start_index, Size());
if (length + start_index > Size()) {
length = Size() - start_index;
}
if (num_channels_ == 1) {
// Special case to avoid the nested for loop below.
(*this)[0].CopyTo(length, start_index, destination);
return length;
}
for (size_t i = 0; i < length; ++i) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
destination[index] = (*this)[channel][i + start_index];
++index;
}
}
return index;
}
size_t AudioMultiVector::ReadInterleavedFromEnd(size_t length,
int16_t* destination) const {
length = std::min(length, Size()); // Cannot read more than Size() elements.
return ReadInterleavedFromIndex(Size() - length, length, destination);
}
void AudioMultiVector::OverwriteAt(const AudioMultiVector& insert_this,
size_t length,
size_t position) {
assert(num_channels_ == insert_this.num_channels_);
// Cap |length| at the length of |insert_this|.
assert(length <= insert_this.Size());
length = std::min(length, insert_this.Size());
if (num_channels_ == insert_this.num_channels_) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->OverwriteAt(insert_this[i], length, position);
}
}
}
void AudioMultiVector::CrossFade(const AudioMultiVector& append_this,
size_t fade_length) {
assert(num_channels_ == append_this.num_channels_);
if (num_channels_ == append_this.num_channels_) {
for (size_t i = 0; i < num_channels_; ++i) {
channels_[i]->CrossFade(append_this[i], fade_length);
}
}
}
size_t AudioMultiVector::Channels() const {
return num_channels_;
}
size_t AudioMultiVector::Size() const {
assert(channels_[0]);
return channels_[0]->Size();
}
void AudioMultiVector::AssertSize(size_t required_size) {
if (Size() < required_size) {
size_t extend_length = required_size - Size();
for (size_t channel = 0; channel < num_channels_; ++channel) {
channels_[channel]->Extend(extend_length);
}
}
}
bool AudioMultiVector::Empty() const {
assert(channels_[0]);
return channels_[0]->Empty();
}
void AudioMultiVector::CopyChannel(size_t from_channel, size_t to_channel) {
assert(from_channel < num_channels_);
assert(to_channel < num_channels_);
channels_[from_channel]->CopyTo(channels_[to_channel]);
}
const AudioVector& AudioMultiVector::operator[](size_t index) const {
return *(channels_[index]);
}
AudioVector& AudioMultiVector::operator[](size_t index) {
return *(channels_[index]);
}
} // namespace webrtc

View File

@ -0,0 +1,139 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_
#include <string.h> // Access to size_t.
#include <vector>
#include "webrtc/modules/audio_coding/neteq/audio_vector.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class AudioMultiVector {
public:
// Creates an empty AudioMultiVector with |N| audio channels. |N| must be
// larger than 0.
explicit AudioMultiVector(size_t N);
// Creates an AudioMultiVector with |N| audio channels, each channel having
// an initial size. |N| must be larger than 0.
AudioMultiVector(size_t N, size_t initial_size);
virtual ~AudioMultiVector();
// Deletes all values and make the vector empty.
virtual void Clear();
// Clears the vector and inserts |length| zeros into each channel.
virtual void Zeros(size_t length);
// Copies all values from this vector to |copy_to|. Any contents in |copy_to|
// are deleted. After the operation is done, |copy_to| will be an exact
// replica of this object. The source and the destination must have the same
// number of channels.
virtual void CopyTo(AudioMultiVector* copy_to) const;
// Appends the contents of array |append_this| to the end of this
// object. The array is assumed to be channel-interleaved. |length| must be
// an even multiple of this object's number of channels.
// The length of this object is increased with the |length| divided by the
// number of channels.
virtual void PushBackInterleaved(const int16_t* append_this, size_t length);
// Appends the contents of AudioMultiVector |append_this| to this object. The
// length of this object is increased with the length of |append_this|.
virtual void PushBack(const AudioMultiVector& append_this);
// Appends the contents of AudioMultiVector |append_this| to this object,
// taken from |index| up until the end of |append_this|. The length of this
// object is increased.
virtual void PushBackFromIndex(const AudioMultiVector& append_this,
size_t index);
// Removes |length| elements from the beginning of this object, from each
// channel.
virtual void PopFront(size_t length);
// Removes |length| elements from the end of this object, from each
// channel.
virtual void PopBack(size_t length);
// Reads |length| samples from each channel and writes them interleaved to
// |destination|. The total number of elements written to |destination| is
// returned, i.e., |length| * number of channels. If the AudioMultiVector
// contains less than |length| samples per channel, this is reflected in the
// return value.
virtual size_t ReadInterleaved(size_t length, int16_t* destination) const;
// Like ReadInterleaved() above, but reads from |start_index| instead of from
// the beginning.
virtual size_t ReadInterleavedFromIndex(size_t start_index,
size_t length,
int16_t* destination) const;
// Like ReadInterleaved() above, but reads from the end instead of from
// the beginning.
virtual size_t ReadInterleavedFromEnd(size_t length,
int16_t* destination) const;
// Overwrites each channel in this AudioMultiVector with values taken from
// |insert_this|. The values are taken from the beginning of |insert_this| and
// are inserted starting at |position|. |length| values are written into each
// channel. If |length| and |position| are selected such that the new data
// extends beyond the end of the current AudioVector, the vector is extended
// to accommodate the new data. |length| is limited to the length of
// |insert_this|.
virtual void OverwriteAt(const AudioMultiVector& insert_this,
size_t length,
size_t position);
// Appends |append_this| to the end of the current vector. Lets the two
// vectors overlap by |fade_length| samples (per channel), and cross-fade
// linearly in this region.
virtual void CrossFade(const AudioMultiVector& append_this,
size_t fade_length);
// Returns the number of channels.
virtual size_t Channels() const;
// Returns the number of elements per channel in this AudioMultiVector.
virtual size_t Size() const;
// Verify that each channel can hold at least |required_size| elements. If
// not, extend accordingly.
virtual void AssertSize(size_t required_size);
virtual bool Empty() const;
// Copies the data between two channels in the AudioMultiVector. The method
// does not add any new channel. Thus, |from_channel| and |to_channel| must
// both be valid channel numbers.
virtual void CopyChannel(size_t from_channel, size_t to_channel);
// Accesses and modifies a channel (i.e., an AudioVector object) of this
// AudioMultiVector.
const AudioVector& operator[](size_t index) const;
AudioVector& operator[](size_t index);
protected:
std::vector<AudioVector*> channels_;
size_t num_channels_;
private:
RTC_DISALLOW_COPY_AND_ASSIGN(AudioMultiVector);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_

View File

@ -0,0 +1,323 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include <assert.h>
#include <stdlib.h>
#include <string>
#include "webrtc/test/gtest.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This is a value-parameterized test. The test cases are instantiated with
// different values for the test parameter, which is used to determine the
// number of channels in the AudioMultiBuffer. Note that it is not possible
// to combine typed testing with value-parameterized testing, and since the
// tests for AudioVector already covers a number of different type parameters,
// this test focuses on testing different number of channels, and keeping the
// value type constant.
class AudioMultiVectorTest : public ::testing::TestWithParam<size_t> {
protected:
AudioMultiVectorTest()
: num_channels_(GetParam()), // Get the test parameter.
interleaved_length_(num_channels_ * array_length()) {
array_interleaved_ = new int16_t[num_channels_ * array_length()];
}
~AudioMultiVectorTest() {
delete [] array_interleaved_;
}
virtual void SetUp() {
// Populate test arrays.
for (size_t i = 0; i < array_length(); ++i) {
array_[i] = static_cast<int16_t>(i);
}
int16_t* ptr = array_interleaved_;
// Write 100, 101, 102, ... for first channel.
// Write 200, 201, 202, ... for second channel.
// And so on.
for (size_t i = 0; i < array_length(); ++i) {
for (size_t j = 1; j <= num_channels_; ++j) {
*ptr = j * 100 + i;
++ptr;
}
}
}
size_t array_length() const {
return sizeof(array_) / sizeof(array_[0]);
}
const size_t num_channels_;
size_t interleaved_length_;
int16_t array_[10];
int16_t* array_interleaved_;
};
// Create and destroy AudioMultiVector objects, both empty and with a predefined
// length.
TEST_P(AudioMultiVectorTest, CreateAndDestroy) {
AudioMultiVector vec1(num_channels_);
EXPECT_TRUE(vec1.Empty());
EXPECT_EQ(num_channels_, vec1.Channels());
EXPECT_EQ(0u, vec1.Size());
size_t initial_size = 17;
AudioMultiVector vec2(num_channels_, initial_size);
EXPECT_FALSE(vec2.Empty());
EXPECT_EQ(num_channels_, vec2.Channels());
EXPECT_EQ(initial_size, vec2.Size());
}
// Test the subscript operator [] for getting and setting.
TEST_P(AudioMultiVectorTest, SubscriptOperator) {
AudioMultiVector vec(num_channels_, array_length());
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < array_length(); ++i) {
vec[channel][i] = static_cast<int16_t>(i);
// Make sure to use the const version.
const AudioVector& audio_vec = vec[channel];
EXPECT_EQ(static_cast<int16_t>(i), audio_vec[i]);
}
}
}
// Test the PushBackInterleaved method and the CopyFrom method. The Clear
// method is also invoked.
TEST_P(AudioMultiVectorTest, PushBackInterleavedAndCopy) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
AudioMultiVector vec_copy(num_channels_);
vec.CopyTo(&vec_copy); // Copy from |vec| to |vec_copy|.
ASSERT_EQ(num_channels_, vec.Channels());
ASSERT_EQ(array_length(), vec.Size());
ASSERT_EQ(num_channels_, vec_copy.Channels());
ASSERT_EQ(array_length(), vec_copy.Size());
for (size_t channel = 0; channel < vec.Channels(); ++channel) {
for (size_t i = 0; i < array_length(); ++i) {
EXPECT_EQ(static_cast<int16_t>((channel + 1) * 100 + i), vec[channel][i]);
EXPECT_EQ(vec[channel][i], vec_copy[channel][i]);
}
}
// Clear |vec| and verify that it is empty.
vec.Clear();
EXPECT_TRUE(vec.Empty());
// Now copy the empty vector and verify that the copy becomes empty too.
vec.CopyTo(&vec_copy);
EXPECT_TRUE(vec_copy.Empty());
}
// Try to copy to a NULL pointer. Nothing should happen.
TEST_P(AudioMultiVectorTest, CopyToNull) {
AudioMultiVector vec(num_channels_);
AudioMultiVector* vec_copy = NULL;
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.CopyTo(vec_copy);
}
// Test the PushBack method with another AudioMultiVector as input argument.
TEST_P(AudioMultiVectorTest, PushBackVector) {
AudioMultiVector vec1(num_channels_, array_length());
AudioMultiVector vec2(num_channels_, array_length());
// Set the first vector to [0, 1, ..., array_length() - 1] +
// 100 * channel_number.
// Set the second vector to [array_length(), array_length() + 1, ...,
// 2 * array_length() - 1] + 100 * channel_number.
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < array_length(); ++i) {
vec1[channel][i] = static_cast<int16_t>(i + 100 * channel);
vec2[channel][i] =
static_cast<int16_t>(i + 100 * channel + array_length());
}
}
// Append vec2 to the back of vec1.
vec1.PushBack(vec2);
ASSERT_EQ(2u * array_length(), vec1.Size());
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < 2 * array_length(); ++i) {
EXPECT_EQ(static_cast<int16_t>(i + 100 * channel), vec1[channel][i]);
}
}
}
// Test the PushBackFromIndex method.
TEST_P(AudioMultiVectorTest, PushBackFromIndex) {
AudioMultiVector vec1(num_channels_);
vec1.PushBackInterleaved(array_interleaved_, interleaved_length_);
AudioMultiVector vec2(num_channels_);
// Append vec1 to the back of vec2 (which is empty). Read vec1 from the second
// last element.
vec2.PushBackFromIndex(vec1, array_length() - 2);
ASSERT_EQ(2u, vec2.Size());
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < 2; ++i) {
EXPECT_EQ(array_interleaved_[channel + num_channels_ *
(array_length() - 2 + i)], vec2[channel][i]);
}
}
}
// Starts with pushing some values to the vector, then test the Zeros method.
TEST_P(AudioMultiVectorTest, Zeros) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.Zeros(2 * array_length());
ASSERT_EQ(num_channels_, vec.Channels());
ASSERT_EQ(2u * array_length(), vec.Size());
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < 2 * array_length(); ++i) {
EXPECT_EQ(0, vec[channel][i]);
}
}
}
// Test the ReadInterleaved method
TEST_P(AudioMultiVectorTest, ReadInterleaved) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
int16_t* output = new int16_t[interleaved_length_];
// Read 5 samples.
size_t read_samples = 5;
EXPECT_EQ(num_channels_ * read_samples,
vec.ReadInterleaved(read_samples, output));
EXPECT_EQ(0,
memcmp(array_interleaved_, output, read_samples * sizeof(int16_t)));
// Read too many samples. Expect to get all samples from the vector.
EXPECT_EQ(interleaved_length_,
vec.ReadInterleaved(array_length() + 1, output));
EXPECT_EQ(0,
memcmp(array_interleaved_, output, read_samples * sizeof(int16_t)));
delete [] output;
}
// Test the PopFront method.
TEST_P(AudioMultiVectorTest, PopFront) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.PopFront(1); // Remove one element from each channel.
ASSERT_EQ(array_length() - 1u, vec.Size());
// Let |ptr| point to the second element of the first channel in the
// interleaved array.
int16_t* ptr = &array_interleaved_[num_channels_];
for (size_t i = 0; i < array_length() - 1; ++i) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
EXPECT_EQ(*ptr, vec[channel][i]);
++ptr;
}
}
vec.PopFront(array_length()); // Remove more elements than vector size.
EXPECT_EQ(0u, vec.Size());
}
// Test the PopBack method.
TEST_P(AudioMultiVectorTest, PopBack) {
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.PopBack(1); // Remove one element from each channel.
ASSERT_EQ(array_length() - 1u, vec.Size());
// Let |ptr| point to the first element of the first channel in the
// interleaved array.
int16_t* ptr = array_interleaved_;
for (size_t i = 0; i < array_length() - 1; ++i) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
EXPECT_EQ(*ptr, vec[channel][i]);
++ptr;
}
}
vec.PopBack(array_length()); // Remove more elements than vector size.
EXPECT_EQ(0u, vec.Size());
}
// Test the AssertSize method.
TEST_P(AudioMultiVectorTest, AssertSize) {
AudioMultiVector vec(num_channels_, array_length());
EXPECT_EQ(array_length(), vec.Size());
// Start with asserting with smaller sizes than already allocated.
vec.AssertSize(0);
vec.AssertSize(array_length() - 1);
// Nothing should have changed.
EXPECT_EQ(array_length(), vec.Size());
// Assert with one element longer than already allocated.
vec.AssertSize(array_length() + 1);
// Expect vector to have grown.
EXPECT_EQ(array_length() + 1, vec.Size());
// Also check the individual AudioVectors.
for (size_t channel = 0; channel < vec.Channels(); ++channel) {
EXPECT_EQ(array_length() + 1u, vec[channel].Size());
}
}
// Test the PushBack method with another AudioMultiVector as input argument.
TEST_P(AudioMultiVectorTest, OverwriteAt) {
AudioMultiVector vec1(num_channels_);
vec1.PushBackInterleaved(array_interleaved_, interleaved_length_);
AudioMultiVector vec2(num_channels_);
vec2.Zeros(3); // 3 zeros in each channel.
// Overwrite vec2 at position 5.
vec1.OverwriteAt(vec2, 3, 5);
// Verify result.
// Length remains the same.
ASSERT_EQ(array_length(), vec1.Size());
int16_t* ptr = array_interleaved_;
for (size_t i = 0; i < array_length() - 1; ++i) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
if (i >= 5 && i <= 7) {
// Elements 5, 6, 7 should have been replaced with zeros.
EXPECT_EQ(0, vec1[channel][i]);
} else {
EXPECT_EQ(*ptr, vec1[channel][i]);
}
++ptr;
}
}
}
// Test the CopyChannel method, when the test is instantiated with at least two
// channels.
TEST_P(AudioMultiVectorTest, CopyChannel) {
if (num_channels_ < 2)
return;
AudioMultiVector vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
// Create a reference copy.
AudioMultiVector ref(num_channels_);
ref.PushBack(vec);
// Copy from first to last channel.
vec.CopyChannel(0, num_channels_ - 1);
// Verify that the first and last channels are identical; the others should
// be left untouched.
for (size_t i = 0; i < array_length(); ++i) {
// Verify that all but the last channel are untouched.
for (size_t channel = 0; channel < num_channels_ - 1; ++channel) {
EXPECT_EQ(ref[channel][i], vec[channel][i]);
}
// Verify that the last and the first channels are identical.
EXPECT_EQ(vec[0][i], vec[num_channels_ - 1][i]);
}
}
INSTANTIATE_TEST_CASE_P(TestNumChannels,
AudioMultiVectorTest,
::testing::Values(static_cast<size_t>(1),
static_cast<size_t>(2),
static_cast<size_t>(5)));
} // namespace webrtc

View File

@ -0,0 +1,386 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/audio_vector.h"
#include <assert.h>
#include <algorithm>
#include <memory>
#include "webrtc/rtc_base/checks.h"
#include "webrtc/typedefs.h"
namespace webrtc {
AudioVector::AudioVector()
: AudioVector(kDefaultInitialSize) {
Clear();
}
AudioVector::AudioVector(size_t initial_size)
: array_(new int16_t[initial_size + 1]),
capacity_(initial_size + 1),
begin_index_(0),
end_index_(capacity_ - 1) {
memset(array_.get(), 0, capacity_ * sizeof(int16_t));
}
AudioVector::~AudioVector() = default;
void AudioVector::Clear() {
end_index_ = begin_index_ = 0;
}
void AudioVector::CopyTo(AudioVector* copy_to) const {
RTC_DCHECK(copy_to);
copy_to->Reserve(Size());
CopyTo(Size(), 0, copy_to->array_.get());
copy_to->begin_index_ = 0;
copy_to->end_index_ = Size();
}
void AudioVector::CopyTo(
size_t length, size_t position, int16_t* copy_to) const {
if (length == 0)
return;
length = std::min(length, Size() - position);
const size_t copy_index = (begin_index_ + position) % capacity_;
const size_t first_chunk_length =
std::min(length, capacity_ - copy_index);
memcpy(copy_to, &array_[copy_index],
first_chunk_length * sizeof(int16_t));
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0) {
memcpy(&copy_to[first_chunk_length], array_.get(),
remaining_length * sizeof(int16_t));
}
}
void AudioVector::PushFront(const AudioVector& prepend_this) {
const size_t length = prepend_this.Size();
if (length == 0)
return;
// Although the subsequent calling to PushFront does Reserve in it, it is
// always more efficient to do a big Reserve first.
Reserve(Size() + length);
const size_t first_chunk_length =
std::min(length, prepend_this.capacity_ - prepend_this.begin_index_);
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0)
PushFront(prepend_this.array_.get(), remaining_length);
PushFront(&prepend_this.array_[prepend_this.begin_index_],
first_chunk_length);
}
void AudioVector::PushFront(const int16_t* prepend_this, size_t length) {
if (length == 0)
return;
Reserve(Size() + length);
const size_t first_chunk_length = std::min(length, begin_index_);
memcpy(&array_[begin_index_ - first_chunk_length],
&prepend_this[length - first_chunk_length],
first_chunk_length * sizeof(int16_t));
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0) {
memcpy(&array_[capacity_ - remaining_length], prepend_this,
remaining_length * sizeof(int16_t));
}
begin_index_ = (begin_index_ + capacity_ - length) % capacity_;
}
void AudioVector::PushBack(const AudioVector& append_this) {
PushBack(append_this, append_this.Size(), 0);
}
void AudioVector::PushBack(
const AudioVector& append_this, size_t length, size_t position) {
RTC_DCHECK_LE(position, append_this.Size());
RTC_DCHECK_LE(length, append_this.Size() - position);
if (length == 0)
return;
// Although the subsequent calling to PushBack does Reserve in it, it is
// always more efficient to do a big Reserve first.
Reserve(Size() + length);
const size_t start_index =
(append_this.begin_index_ + position) % append_this.capacity_;
const size_t first_chunk_length = std::min(
length, append_this.capacity_ - start_index);
PushBack(&append_this.array_[start_index], first_chunk_length);
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0)
PushBack(append_this.array_.get(), remaining_length);
}
void AudioVector::PushBack(const int16_t* append_this, size_t length) {
if (length == 0)
return;
Reserve(Size() + length);
const size_t first_chunk_length = std::min(length, capacity_ - end_index_);
memcpy(&array_[end_index_], append_this,
first_chunk_length * sizeof(int16_t));
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0) {
memcpy(array_.get(), &append_this[first_chunk_length],
remaining_length * sizeof(int16_t));
}
end_index_ = (end_index_ + length) % capacity_;
}
void AudioVector::PopFront(size_t length) {
if (length == 0)
return;
length = std::min(length, Size());
begin_index_ = (begin_index_ + length) % capacity_;
}
void AudioVector::PopBack(size_t length) {
if (length == 0)
return;
// Never remove more than what is in the array.
length = std::min(length, Size());
end_index_ = (end_index_ + capacity_ - length) % capacity_;
}
void AudioVector::Extend(size_t extra_length) {
if (extra_length == 0)
return;
InsertZerosByPushBack(extra_length, Size());
}
void AudioVector::InsertAt(const int16_t* insert_this,
size_t length,
size_t position) {
if (length == 0)
return;
// Cap the insert position at the current array length.
position = std::min(Size(), position);
// When inserting to a position closer to the beginning, it is more efficient
// to insert by pushing front than to insert by pushing back, since less data
// will be moved, vice versa.
if (position <= Size() - position) {
InsertByPushFront(insert_this, length, position);
} else {
InsertByPushBack(insert_this, length, position);
}
}
void AudioVector::InsertZerosAt(size_t length,
size_t position) {
if (length == 0)
return;
// Cap the insert position at the current array length.
position = std::min(Size(), position);
// When inserting to a position closer to the beginning, it is more efficient
// to insert by pushing front than to insert by pushing back, since less data
// will be moved, vice versa.
if (position <= Size() - position) {
InsertZerosByPushFront(length, position);
} else {
InsertZerosByPushBack(length, position);
}
}
void AudioVector::OverwriteAt(const AudioVector& insert_this,
size_t length,
size_t position) {
RTC_DCHECK_LE(length, insert_this.Size());
if (length == 0)
return;
// Cap the insert position at the current array length.
position = std::min(Size(), position);
// Although the subsequent calling to OverwriteAt does Reserve in it, it is
// always more efficient to do a big Reserve first.
size_t new_size = std::max(Size(), position + length);
Reserve(new_size);
const size_t first_chunk_length =
std::min(length, insert_this.capacity_ - insert_this.begin_index_);
OverwriteAt(&insert_this.array_[insert_this.begin_index_], first_chunk_length,
position);
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0) {
OverwriteAt(insert_this.array_.get(), remaining_length,
position + first_chunk_length);
}
}
void AudioVector::OverwriteAt(const int16_t* insert_this,
size_t length,
size_t position) {
if (length == 0)
return;
// Cap the insert position at the current array length.
position = std::min(Size(), position);
size_t new_size = std::max(Size(), position + length);
Reserve(new_size);
const size_t overwrite_index = (begin_index_ + position) % capacity_;
const size_t first_chunk_length =
std::min(length, capacity_ - overwrite_index);
memcpy(&array_[overwrite_index], insert_this,
first_chunk_length * sizeof(int16_t));
const size_t remaining_length = length - first_chunk_length;
if (remaining_length > 0) {
memcpy(array_.get(), &insert_this[first_chunk_length],
remaining_length * sizeof(int16_t));
}
end_index_ = (begin_index_ + new_size) % capacity_;
}
void AudioVector::CrossFade(const AudioVector& append_this,
size_t fade_length) {
// Fade length cannot be longer than the current vector or |append_this|.
assert(fade_length <= Size());
assert(fade_length <= append_this.Size());
fade_length = std::min(fade_length, Size());
fade_length = std::min(fade_length, append_this.Size());
size_t position = Size() - fade_length + begin_index_;
// Cross fade the overlapping regions.
// |alpha| is the mixing factor in Q14.
// TODO(hlundin): Consider skipping +1 in the denominator to produce a
// smoother cross-fade, in particular at the end of the fade.
int alpha_step = 16384 / (static_cast<int>(fade_length) + 1);
int alpha = 16384;
for (size_t i = 0; i < fade_length; ++i) {
alpha -= alpha_step;
array_[(position + i) % capacity_] =
(alpha * array_[(position + i) % capacity_] +
(16384 - alpha) * append_this[i] + 8192) >> 14;
}
assert(alpha >= 0); // Verify that the slope was correct.
// Append what is left of |append_this|.
size_t samples_to_push_back = append_this.Size() - fade_length;
if (samples_to_push_back > 0)
PushBack(append_this, samples_to_push_back, fade_length);
}
// Returns the number of elements in this AudioVector.
size_t AudioVector::Size() const {
return (end_index_ + capacity_ - begin_index_) % capacity_;
}
// Returns true if this AudioVector is empty.
bool AudioVector::Empty() const {
return begin_index_ == end_index_;
}
void AudioVector::Reserve(size_t n) {
if (capacity_ > n)
return;
const size_t length = Size();
// Reserve one more sample to remove the ambiguity between empty vector and
// full vector. Therefore |begin_index_| == |end_index_| indicates empty
// vector, and |begin_index_| == (|end_index_| + 1) % capacity indicates
// full vector.
std::unique_ptr<int16_t[]> temp_array(new int16_t[n + 1]);
CopyTo(length, 0, temp_array.get());
array_.swap(temp_array);
begin_index_ = 0;
end_index_ = length;
capacity_ = n + 1;
}
void AudioVector::InsertByPushBack(const int16_t* insert_this,
size_t length,
size_t position) {
const size_t move_chunk_length = Size() - position;
std::unique_ptr<int16_t[]> temp_array(nullptr);
if (move_chunk_length > 0) {
// TODO(minyue): see if it is possible to avoid copying to a buffer.
temp_array.reset(new int16_t[move_chunk_length]);
CopyTo(move_chunk_length, position, temp_array.get());
PopBack(move_chunk_length);
}
Reserve(Size() + length + move_chunk_length);
PushBack(insert_this, length);
if (move_chunk_length > 0)
PushBack(temp_array.get(), move_chunk_length);
}
void AudioVector::InsertByPushFront(const int16_t* insert_this,
size_t length,
size_t position) {
std::unique_ptr<int16_t[]> temp_array(nullptr);
if (position > 0) {
// TODO(minyue): see if it is possible to avoid copying to a buffer.
temp_array.reset(new int16_t[position]);
CopyTo(position, 0, temp_array.get());
PopFront(position);
}
Reserve(Size() + length + position);
PushFront(insert_this, length);
if (position > 0)
PushFront(temp_array.get(), position);
}
void AudioVector::InsertZerosByPushBack(size_t length,
size_t position) {
const size_t move_chunk_length = Size() - position;
std::unique_ptr<int16_t[]> temp_array(nullptr);
if (move_chunk_length > 0) {
temp_array.reset(new int16_t[move_chunk_length]);
CopyTo(move_chunk_length, position, temp_array.get());
PopBack(move_chunk_length);
}
Reserve(Size() + length + move_chunk_length);
const size_t first_zero_chunk_length =
std::min(length, capacity_ - end_index_);
memset(&array_[end_index_], 0, first_zero_chunk_length * sizeof(int16_t));
const size_t remaining_zero_length = length - first_zero_chunk_length;
if (remaining_zero_length > 0)
memset(array_.get(), 0, remaining_zero_length * sizeof(int16_t));
end_index_ = (end_index_ + length) % capacity_;
if (move_chunk_length > 0)
PushBack(temp_array.get(), move_chunk_length);
}
void AudioVector::InsertZerosByPushFront(size_t length,
size_t position) {
std::unique_ptr<int16_t[]> temp_array(nullptr);
if (position > 0) {
temp_array.reset(new int16_t[position]);
CopyTo(position, 0, temp_array.get());
PopFront(position);
}
Reserve(Size() + length + position);
const size_t first_zero_chunk_length = std::min(length, begin_index_);
memset(&array_[begin_index_ - first_zero_chunk_length], 0,
first_zero_chunk_length * sizeof(int16_t));
const size_t remaining_zero_length = length - first_zero_chunk_length;
if (remaining_zero_length > 0)
memset(&array_[capacity_ - remaining_zero_length], 0,
remaining_zero_length * sizeof(int16_t));
begin_index_ = (begin_index_ + capacity_ - length) % capacity_;
if (position > 0)
PushFront(temp_array.get(), position);
}
} // namespace webrtc

View File

@ -0,0 +1,168 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_
#include <string.h> // Access to size_t.
#include <memory>
#include "webrtc/rtc_base/checks.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class AudioVector {
public:
// Creates an empty AudioVector.
AudioVector();
// Creates an AudioVector with an initial size.
explicit AudioVector(size_t initial_size);
virtual ~AudioVector();
// Deletes all values and make the vector empty.
virtual void Clear();
// Copies all values from this vector to |copy_to|. Any contents in |copy_to|
// are deleted before the copy operation. After the operation is done,
// |copy_to| will be an exact replica of this object.
virtual void CopyTo(AudioVector* copy_to) const;
// Copies |length| values from |position| in this vector to |copy_to|.
virtual void CopyTo(size_t length, size_t position, int16_t* copy_to) const;
// Prepends the contents of AudioVector |prepend_this| to this object. The
// length of this object is increased with the length of |prepend_this|.
virtual void PushFront(const AudioVector& prepend_this);
// Same as above, but with an array |prepend_this| with |length| elements as
// source.
virtual void PushFront(const int16_t* prepend_this, size_t length);
// Same as PushFront but will append to the end of this object.
virtual void PushBack(const AudioVector& append_this);
// Appends a segment of |append_this| to the end of this object. The segment
// starts from |position| and has |length| samples.
virtual void PushBack(const AudioVector& append_this,
size_t length,
size_t position);
// Same as PushFront but will append to the end of this object.
virtual void PushBack(const int16_t* append_this, size_t length);
// Removes |length| elements from the beginning of this object.
virtual void PopFront(size_t length);
// Removes |length| elements from the end of this object.
virtual void PopBack(size_t length);
// Extends this object with |extra_length| elements at the end. The new
// elements are initialized to zero.
virtual void Extend(size_t extra_length);
// Inserts |length| elements taken from the array |insert_this| and insert
// them at |position|. The length of the AudioVector is increased by |length|.
// |position| = 0 means that the new values are prepended to the vector.
// |position| = Size() means that the new values are appended to the vector.
virtual void InsertAt(const int16_t* insert_this, size_t length,
size_t position);
// Like InsertAt, but inserts |length| zero elements at |position|.
virtual void InsertZerosAt(size_t length, size_t position);
// Overwrites |length| elements of this AudioVector starting from |position|
// with first values in |AudioVector|. The definition of |position|
// is the same as for InsertAt(). If |length| and |position| are selected
// such that the new data extends beyond the end of the current AudioVector,
// the vector is extended to accommodate the new data.
virtual void OverwriteAt(const AudioVector& insert_this,
size_t length,
size_t position);
// Overwrites |length| elements of this AudioVector with values taken from the
// array |insert_this|, starting at |position|. The definition of |position|
// is the same as for InsertAt(). If |length| and |position| are selected
// such that the new data extends beyond the end of the current AudioVector,
// the vector is extended to accommodate the new data.
virtual void OverwriteAt(const int16_t* insert_this,
size_t length,
size_t position);
// Appends |append_this| to the end of the current vector. Lets the two
// vectors overlap by |fade_length| samples, and cross-fade linearly in this
// region.
virtual void CrossFade(const AudioVector& append_this, size_t fade_length);
// Returns the number of elements in this AudioVector.
virtual size_t Size() const;
// Returns true if this AudioVector is empty.
virtual bool Empty() const;
// Accesses and modifies an element of AudioVector.
inline const int16_t& operator[](size_t index) const {
return array_[WrapIndex(index, begin_index_, capacity_)];
}
inline int16_t& operator[](size_t index) {
return array_[WrapIndex(index, begin_index_, capacity_)];
}
private:
static const size_t kDefaultInitialSize = 10;
// This method is used by the [] operators to calculate an index within the
// capacity of the array, but without using the modulo operation (%).
static inline size_t WrapIndex(size_t index,
size_t begin_index,
size_t capacity) {
RTC_DCHECK_LT(index, capacity);
RTC_DCHECK_LT(begin_index, capacity);
size_t ix = begin_index + index;
RTC_DCHECK_GE(ix, index); // Check for overflow.
if (ix >= capacity) {
ix -= capacity;
}
RTC_DCHECK_LT(ix, capacity);
return ix;
}
void Reserve(size_t n);
void InsertByPushBack(const int16_t* insert_this, size_t length,
size_t position);
void InsertByPushFront(const int16_t* insert_this, size_t length,
size_t position);
void InsertZerosByPushBack(size_t length, size_t position);
void InsertZerosByPushFront(size_t length, size_t position);
std::unique_ptr<int16_t[]> array_;
size_t capacity_; // Allocated number of samples in the array.
// The index of the first sample in |array_|, except when
// |begin_index_ == end_index_|, which indicates an empty buffer.
size_t begin_index_;
// The index of the sample after the last sample in |array_|.
size_t end_index_;
RTC_DISALLOW_COPY_AND_ASSIGN(AudioVector);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_

View File

@ -0,0 +1,386 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/audio_vector.h"
#include <assert.h>
#include <stdlib.h>
#include <string>
#include "webrtc/test/gtest.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class AudioVectorTest : public ::testing::Test {
protected:
virtual void SetUp() {
// Populate test array.
for (size_t i = 0; i < array_length(); ++i) {
array_[i] = i;
}
}
size_t array_length() const {
return sizeof(array_) / sizeof(array_[0]);
}
int16_t array_[10];
};
// Create and destroy AudioVector objects, both empty and with a predefined
// length.
TEST_F(AudioVectorTest, CreateAndDestroy) {
AudioVector vec1;
EXPECT_TRUE(vec1.Empty());
EXPECT_EQ(0u, vec1.Size());
size_t initial_size = 17;
AudioVector vec2(initial_size);
EXPECT_FALSE(vec2.Empty());
EXPECT_EQ(initial_size, vec2.Size());
}
// Test the subscript operator [] for getting and setting.
TEST_F(AudioVectorTest, SubscriptOperator) {
AudioVector vec(array_length());
for (size_t i = 0; i < array_length(); ++i) {
vec[i] = static_cast<int16_t>(i);
const int16_t& value = vec[i]; // Make sure to use the const version.
EXPECT_EQ(static_cast<int16_t>(i), value);
}
}
// Test the PushBack method and the CopyFrom method. The Clear method is also
// invoked.
TEST_F(AudioVectorTest, PushBackAndCopy) {
AudioVector vec;
AudioVector vec_copy;
vec.PushBack(array_, array_length());
vec.CopyTo(&vec_copy); // Copy from |vec| to |vec_copy|.
ASSERT_EQ(array_length(), vec.Size());
ASSERT_EQ(array_length(), vec_copy.Size());
for (size_t i = 0; i < array_length(); ++i) {
EXPECT_EQ(array_[i], vec[i]);
EXPECT_EQ(array_[i], vec_copy[i]);
}
// Clear |vec| and verify that it is empty.
vec.Clear();
EXPECT_TRUE(vec.Empty());
// Now copy the empty vector and verify that the copy becomes empty too.
vec.CopyTo(&vec_copy);
EXPECT_TRUE(vec_copy.Empty());
}
// Test the PushBack method with another AudioVector as input argument.
TEST_F(AudioVectorTest, PushBackVector) {
static const size_t kLength = 10;
AudioVector vec1(kLength);
AudioVector vec2(kLength);
// Set the first vector to [0, 1, ..., kLength - 1].
// Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1].
for (size_t i = 0; i < kLength; ++i) {
vec1[i] = static_cast<int16_t>(i);
vec2[i] = static_cast<int16_t>(i + kLength);
}
// Append vec2 to the back of vec1.
vec1.PushBack(vec2);
ASSERT_EQ(2 * kLength, vec1.Size());
for (size_t i = 0; i < 2 * kLength; ++i) {
EXPECT_EQ(static_cast<int16_t>(i), vec1[i]);
}
}
// Test the PushFront method.
TEST_F(AudioVectorTest, PushFront) {
AudioVector vec;
vec.PushFront(array_, array_length());
ASSERT_EQ(array_length(), vec.Size());
for (size_t i = 0; i < array_length(); ++i) {
EXPECT_EQ(array_[i], vec[i]);
}
}
// Test the PushFront method with another AudioVector as input argument.
TEST_F(AudioVectorTest, PushFrontVector) {
static const size_t kLength = 10;
AudioVector vec1(kLength);
AudioVector vec2(kLength);
// Set the first vector to [0, 1, ..., kLength - 1].
// Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1].
for (size_t i = 0; i < kLength; ++i) {
vec1[i] = static_cast<int16_t>(i);
vec2[i] = static_cast<int16_t>(i + kLength);
}
// Prepend vec1 to the front of vec2.
vec2.PushFront(vec1);
ASSERT_EQ(2 * kLength, vec2.Size());
for (size_t i = 0; i < 2 * kLength; ++i) {
EXPECT_EQ(static_cast<int16_t>(i), vec2[i]);
}
}
// Test the PopFront method.
TEST_F(AudioVectorTest, PopFront) {
AudioVector vec;
vec.PushBack(array_, array_length());
vec.PopFront(1); // Remove one element.
EXPECT_EQ(array_length() - 1u, vec.Size());
for (size_t i = 0; i < array_length() - 1; ++i) {
EXPECT_EQ(static_cast<int16_t>(i + 1), vec[i]);
}
vec.PopFront(array_length()); // Remove more elements than vector size.
EXPECT_EQ(0u, vec.Size());
}
// Test the PopBack method.
TEST_F(AudioVectorTest, PopBack) {
AudioVector vec;
vec.PushBack(array_, array_length());
vec.PopBack(1); // Remove one element.
EXPECT_EQ(array_length() - 1u, vec.Size());
for (size_t i = 0; i < array_length() - 1; ++i) {
EXPECT_EQ(static_cast<int16_t>(i), vec[i]);
}
vec.PopBack(array_length()); // Remove more elements than vector size.
EXPECT_EQ(0u, vec.Size());
}
// Test the Extend method.
TEST_F(AudioVectorTest, Extend) {
AudioVector vec;
vec.PushBack(array_, array_length());
vec.Extend(5); // Extend with 5 elements, which should all be zeros.
ASSERT_EQ(array_length() + 5u, vec.Size());
// Verify that all are zero.
for (size_t i = array_length(); i < array_length() + 5; ++i) {
EXPECT_EQ(0, vec[i]);
}
}
// Test the InsertAt method with an insert position in the middle of the vector.
TEST_F(AudioVectorTest, InsertAt) {
AudioVector vec;
vec.PushBack(array_, array_length());
static const int kNewLength = 5;
int16_t new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = 5;
vec.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {0, 1, ..., |insert_position| - 1, 100, 101, ..., 100 + kNewLength - 1,
// |insert_position|, |insert_position| + 1, ..., kLength - 1}.
size_t pos = 0;
for (int i = 0; i < insert_position; ++i) {
EXPECT_EQ(array_[i], vec[pos]);
++pos;
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
for (size_t i = insert_position; i < array_length(); ++i) {
EXPECT_EQ(array_[i], vec[pos]);
++pos;
}
}
// Test the InsertZerosAt method with an insert position in the middle of the
// vector. Use the InsertAt method as reference.
TEST_F(AudioVectorTest, InsertZerosAt) {
AudioVector vec;
AudioVector vec_ref;
vec.PushBack(array_, array_length());
vec_ref.PushBack(array_, array_length());
static const int kNewLength = 5;
int insert_position = 5;
vec.InsertZerosAt(kNewLength, insert_position);
int16_t new_array[kNewLength] = {0}; // All zero elements.
vec_ref.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vectors are identical.
ASSERT_EQ(vec_ref.Size(), vec.Size());
for (size_t i = 0; i < vec.Size(); ++i) {
EXPECT_EQ(vec_ref[i], vec[i]);
}
}
// Test the InsertAt method with an insert position at the start of the vector.
TEST_F(AudioVectorTest, InsertAtBeginning) {
AudioVector vec;
vec.PushBack(array_, array_length());
static const int kNewLength = 5;
int16_t new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = 0;
vec.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {100, 101, ..., 100 + kNewLength - 1,
// 0, 1, ..., kLength - 1}.
size_t pos = 0;
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
for (size_t i = insert_position; i < array_length(); ++i) {
EXPECT_EQ(array_[i], vec[pos]);
++pos;
}
}
// Test the InsertAt method with an insert position at the end of the vector.
TEST_F(AudioVectorTest, InsertAtEnd) {
AudioVector vec;
vec.PushBack(array_, array_length());
static const int kNewLength = 5;
int16_t new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = array_length();
vec.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }.
size_t pos = 0;
for (size_t i = 0; i < array_length(); ++i) {
EXPECT_EQ(array_[i], vec[pos]);
++pos;
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
}
// Test the InsertAt method with an insert position beyond the end of the
// vector. Verify that a position beyond the end of the vector does not lead to
// an error. The expected outcome is the same as if the vector end was used as
// input position. That is, the input position should be capped at the maximum
// allowed value.
TEST_F(AudioVectorTest, InsertBeyondEnd) {
AudioVector vec;
vec.PushBack(array_, array_length());
static const int kNewLength = 5;
int16_t new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = array_length() + 10; // Too large.
vec.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }.
size_t pos = 0;
for (size_t i = 0; i < array_length(); ++i) {
EXPECT_EQ(array_[i], vec[pos]);
++pos;
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
}
// Test the OverwriteAt method with a position such that all of the new values
// fit within the old vector.
TEST_F(AudioVectorTest, OverwriteAt) {
AudioVector vec;
vec.PushBack(array_, array_length());
static const int kNewLength = 5;
int16_t new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
size_t insert_position = 2;
vec.OverwriteAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {0, ..., |insert_position| - 1, 100, 101, ..., 100 + kNewLength - 1,
// |insert_position|, |insert_position| + 1, ..., kLength - 1}.
size_t pos = 0;
for (pos = 0; pos < insert_position; ++pos) {
EXPECT_EQ(array_[pos], vec[pos]);
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
for (; pos < array_length(); ++pos) {
EXPECT_EQ(array_[pos], vec[pos]);
}
}
// Test the OverwriteAt method with a position such that some of the new values
// extend beyond the end of the current vector. This is valid, and the vector is
// expected to expand to accommodate the new values.
TEST_F(AudioVectorTest, OverwriteBeyondEnd) {
AudioVector vec;
vec.PushBack(array_, array_length());
static const int kNewLength = 5;
int16_t new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = array_length() - 2;
vec.OverwriteAt(new_array, kNewLength, insert_position);
ASSERT_EQ(array_length() - 2u + kNewLength, vec.Size());
// Verify that the vector looks as follows:
// {0, ..., |insert_position| - 1, 100, 101, ..., 100 + kNewLength - 1,
// |insert_position|, |insert_position| + 1, ..., kLength - 1}.
int pos = 0;
for (pos = 0; pos < insert_position; ++pos) {
EXPECT_EQ(array_[pos], vec[pos]);
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
// Verify that we checked to the end of |vec|.
EXPECT_EQ(vec.Size(), static_cast<size_t>(pos));
}
TEST_F(AudioVectorTest, CrossFade) {
static const size_t kLength = 100;
static const size_t kFadeLength = 10;
AudioVector vec1(kLength);
AudioVector vec2(kLength);
// Set all vector elements to 0 in |vec1| and 100 in |vec2|.
for (size_t i = 0; i < kLength; ++i) {
vec1[i] = 0;
vec2[i] = 100;
}
vec1.CrossFade(vec2, kFadeLength);
ASSERT_EQ(2 * kLength - kFadeLength, vec1.Size());
// First part untouched.
for (size_t i = 0; i < kLength - kFadeLength; ++i) {
EXPECT_EQ(0, vec1[i]);
}
// Check mixing zone.
for (size_t i = 0 ; i < kFadeLength; ++i) {
EXPECT_NEAR((i + 1) * 100 / (kFadeLength + 1),
vec1[kLength - kFadeLength + i], 1);
}
// Second part untouched.
for (size_t i = kLength; i < vec1.Size(); ++i) {
EXPECT_EQ(100, vec1[i]);
}
}
} // namespace webrtc

View File

@ -0,0 +1,255 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
#include <assert.h>
#include <string.h> // memcpy
#include <algorithm> // min, max
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
#include "webrtc/modules/audio_coding/neteq/post_decode_vad.h"
namespace webrtc {
// static
const size_t BackgroundNoise::kMaxLpcOrder;
BackgroundNoise::BackgroundNoise(size_t num_channels)
: num_channels_(num_channels),
channel_parameters_(new ChannelParameters[num_channels_]),
mode_(NetEq::kBgnOn) {
Reset();
}
BackgroundNoise::~BackgroundNoise() {}
void BackgroundNoise::Reset() {
initialized_ = false;
for (size_t channel = 0; channel < num_channels_; ++channel) {
channel_parameters_[channel].Reset();
}
// Keep _bgnMode as it is.
}
void BackgroundNoise::Update(const AudioMultiVector& input,
const PostDecodeVad& vad) {
if (vad.running() && vad.active_speech()) {
// Do not update the background noise parameters if we know that the signal
// is active speech.
return;
}
int32_t auto_correlation[kMaxLpcOrder + 1];
int16_t fiter_output[kMaxLpcOrder + kResidualLength];
int16_t reflection_coefficients[kMaxLpcOrder];
int16_t lpc_coefficients[kMaxLpcOrder + 1];
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
ChannelParameters& parameters = channel_parameters_[channel_ix];
int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0};
int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder];
input[channel_ix].CopyTo(kVecLen, input.Size() - kVecLen, temp_signal);
int32_t sample_energy = CalculateAutoCorrelation(temp_signal, kVecLen,
auto_correlation);
if ((!vad.running() &&
sample_energy < parameters.energy_update_threshold) ||
(vad.running() && !vad.active_speech())) {
// Generate LPC coefficients.
if (auto_correlation[0] > 0) {
// Regardless of whether the filter is actually updated or not,
// update energy threshold levels, since we have in fact observed
// a low energy signal.
if (sample_energy < parameters.energy_update_threshold) {
// Never go under 1.0 in average sample energy.
parameters.energy_update_threshold = std::max(sample_energy, 1);
parameters.low_energy_update_threshold = 0;
}
// Only update BGN if filter is stable, i.e., if return value from
// Levinson-Durbin function is 1.
if (WebRtcSpl_LevinsonDurbin(auto_correlation, lpc_coefficients,
reflection_coefficients,
kMaxLpcOrder) != 1) {
return;
}
} else {
// Center value in auto-correlation is not positive. Do not update.
return;
}
// Generate the CNG gain factor by looking at the energy of the residual.
WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength,
fiter_output, lpc_coefficients,
kMaxLpcOrder + 1, kResidualLength);
int32_t residual_energy = WebRtcSpl_DotProductWithScale(fiter_output,
fiter_output,
kResidualLength,
0);
// Check spectral flatness.
// Comparing the residual variance with the input signal variance tells
// if the spectrum is flat or not.
// If 5 * residual_energy >= 16 * sample_energy, the spectrum is flat
// enough. Also ensure that the energy is non-zero.
if ((sample_energy > 0) &&
(int64_t{5} * residual_energy >= int64_t{16} * sample_energy)) {
// Spectrum is flat enough; save filter parameters.
// |temp_signal| + |kVecLen| - |kMaxLpcOrder| points at the first of the
// |kMaxLpcOrder| samples in the residual signal, which will form the
// filter state for the next noise generation.
SaveParameters(channel_ix, lpc_coefficients,
temp_signal + kVecLen - kMaxLpcOrder, sample_energy,
residual_energy);
}
} else {
// Will only happen if post-decode VAD is disabled and |sample_energy| is
// not low enough. Increase the threshold for update so that it increases
// by a factor 4 in 4 seconds.
IncrementEnergyThreshold(channel_ix, sample_energy);
}
}
return;
}
int32_t BackgroundNoise::Energy(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].energy;
}
void BackgroundNoise::SetMuteFactor(size_t channel, int16_t value) {
assert(channel < num_channels_);
channel_parameters_[channel].mute_factor = value;
}
int16_t BackgroundNoise::MuteFactor(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].mute_factor;
}
const int16_t* BackgroundNoise::Filter(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].filter;
}
const int16_t* BackgroundNoise::FilterState(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].filter_state;
}
void BackgroundNoise::SetFilterState(size_t channel, const int16_t* input,
size_t length) {
assert(channel < num_channels_);
length = std::min(length, kMaxLpcOrder);
memcpy(channel_parameters_[channel].filter_state, input,
length * sizeof(int16_t));
}
int16_t BackgroundNoise::Scale(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].scale;
}
int16_t BackgroundNoise::ScaleShift(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].scale_shift;
}
int32_t BackgroundNoise::CalculateAutoCorrelation(
const int16_t* signal, size_t length, int32_t* auto_correlation) const {
static const int kCorrelationStep = -1;
const int correlation_scale =
CrossCorrelationWithAutoShift(signal, signal, length, kMaxLpcOrder + 1,
kCorrelationStep, auto_correlation);
// Number of shifts to normalize energy to energy/sample.
int energy_sample_shift = kLogVecLen - correlation_scale;
return auto_correlation[0] >> energy_sample_shift;
}
void BackgroundNoise::IncrementEnergyThreshold(size_t channel,
int32_t sample_energy) {
// TODO(hlundin): Simplify the below threshold update. What this code
// does is simply "threshold += (increment * threshold) >> 16", but due
// to the limited-width operations, it is not exactly the same. The
// difference should be inaudible, but bit-exactness would not be
// maintained.
assert(channel < num_channels_);
ChannelParameters& parameters = channel_parameters_[channel];
int32_t temp_energy =
(kThresholdIncrement * parameters.low_energy_update_threshold) >> 16;
temp_energy += kThresholdIncrement *
(parameters.energy_update_threshold & 0xFF);
temp_energy += (kThresholdIncrement *
((parameters.energy_update_threshold>>8) & 0xFF)) << 8;
parameters.low_energy_update_threshold += temp_energy;
parameters.energy_update_threshold += kThresholdIncrement *
(parameters.energy_update_threshold>>16);
parameters.energy_update_threshold +=
parameters.low_energy_update_threshold >> 16;
parameters.low_energy_update_threshold =
parameters.low_energy_update_threshold & 0x0FFFF;
// Update maximum energy.
// Decrease by a factor 1/1024 each time.
parameters.max_energy = parameters.max_energy -
(parameters.max_energy >> 10);
if (sample_energy > parameters.max_energy) {
parameters.max_energy = sample_energy;
}
// Set |energy_update_threshold| to no less than 60 dB lower than
// |max_energy_|. Adding 524288 assures proper rounding.
int32_t energy_update_threshold = (parameters.max_energy + 524288) >> 20;
if (energy_update_threshold > parameters.energy_update_threshold) {
parameters.energy_update_threshold = energy_update_threshold;
}
}
void BackgroundNoise::SaveParameters(size_t channel,
const int16_t* lpc_coefficients,
const int16_t* filter_state,
int32_t sample_energy,
int32_t residual_energy) {
assert(channel < num_channels_);
ChannelParameters& parameters = channel_parameters_[channel];
memcpy(parameters.filter, lpc_coefficients,
(kMaxLpcOrder+1) * sizeof(int16_t));
memcpy(parameters.filter_state, filter_state,
kMaxLpcOrder * sizeof(int16_t));
// Save energy level and update energy threshold levels.
// Never get under 1.0 in average sample energy.
parameters.energy = std::max(sample_energy, 1);
parameters.energy_update_threshold = parameters.energy;
parameters.low_energy_update_threshold = 0;
// Normalize residual_energy to 29 or 30 bits before sqrt.
int16_t norm_shift = WebRtcSpl_NormW32(residual_energy) - 1;
if (norm_shift & 0x1) {
norm_shift -= 1; // Even number of shifts required.
}
residual_energy = WEBRTC_SPL_SHIFT_W32(residual_energy, norm_shift);
// Calculate scale and shift factor.
parameters.scale = static_cast<int16_t>(WebRtcSpl_SqrtFloor(residual_energy));
// Add 13 to the |scale_shift_|, since the random numbers table is in
// Q13.
// TODO(hlundin): Move the "13" to where the |scale_shift_| is used?
parameters.scale_shift =
static_cast<int16_t>(13 + ((kLogResidualLength + norm_shift) / 2));
initialized_ = true;
}
} // namespace webrtc

View File

@ -0,0 +1,137 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_
#include <string.h> // size_t
#include <memory>
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq/include/neteq.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class PostDecodeVad;
// This class handles estimation of background noise parameters.
class BackgroundNoise {
public:
// TODO(hlundin): For 48 kHz support, increase kMaxLpcOrder to 10.
// Will work anyway, but probably sound a little worse.
static const size_t kMaxLpcOrder = 8; // 32000 / 8000 + 4.
explicit BackgroundNoise(size_t num_channels);
virtual ~BackgroundNoise();
void Reset();
// Updates the parameter estimates based on the signal currently in the
// |sync_buffer|, and on the latest decision in |vad| if it is running.
void Update(const AudioMultiVector& sync_buffer,
const PostDecodeVad& vad);
// Returns |energy_| for |channel|.
int32_t Energy(size_t channel) const;
// Sets the value of |mute_factor_| for |channel| to |value|.
void SetMuteFactor(size_t channel, int16_t value);
// Returns |mute_factor_| for |channel|.
int16_t MuteFactor(size_t channel) const;
// Returns a pointer to |filter_| for |channel|.
const int16_t* Filter(size_t channel) const;
// Returns a pointer to |filter_state_| for |channel|.
const int16_t* FilterState(size_t channel) const;
// Copies |length| elements from |input| to the filter state. Will not copy
// more than |kMaxLpcOrder| elements.
void SetFilterState(size_t channel, const int16_t* input, size_t length);
// Returns |scale_| for |channel|.
int16_t Scale(size_t channel) const;
// Returns |scale_shift_| for |channel|.
int16_t ScaleShift(size_t channel) const;
// Accessors.
bool initialized() const { return initialized_; }
NetEq::BackgroundNoiseMode mode() const { return mode_; }
// Sets the mode of the background noise playout for cases when there is long
// duration of packet loss.
void set_mode(NetEq::BackgroundNoiseMode mode) { mode_ = mode; }
private:
static const int kThresholdIncrement = 229; // 0.0035 in Q16.
static const size_t kVecLen = 256;
static const int kLogVecLen = 8; // log2(kVecLen).
static const size_t kResidualLength = 64;
static const int16_t kLogResidualLength = 6; // log2(kResidualLength)
struct ChannelParameters {
// Constructor.
ChannelParameters() {
Reset();
}
void Reset() {
energy = 2500;
max_energy = 0;
energy_update_threshold = 500000;
low_energy_update_threshold = 0;
memset(filter_state, 0, sizeof(filter_state));
memset(filter, 0, sizeof(filter));
filter[0] = 4096;
mute_factor = 0,
scale = 20000;
scale_shift = 24;
}
int32_t energy;
int32_t max_energy;
int32_t energy_update_threshold;
int32_t low_energy_update_threshold;
int16_t filter_state[kMaxLpcOrder];
int16_t filter[kMaxLpcOrder + 1];
int16_t mute_factor;
int16_t scale;
int16_t scale_shift;
};
int32_t CalculateAutoCorrelation(const int16_t* signal,
size_t length,
int32_t* auto_correlation) const;
// Increments the energy threshold by a factor 1 + |kThresholdIncrement|.
void IncrementEnergyThreshold(size_t channel, int32_t sample_energy);
// Updates the filter parameters.
void SaveParameters(size_t channel,
const int16_t* lpc_coefficients,
const int16_t* filter_state,
int32_t sample_energy,
int32_t residual_energy);
size_t num_channels_;
std::unique_ptr<ChannelParameters[]> channel_parameters_;
bool initialized_;
NetEq::BackgroundNoiseMode mode_;
RTC_DISALLOW_COPY_AND_ASSIGN(BackgroundNoise);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_

View File

@ -0,0 +1,26 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for BackgroundNoise class.
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
#include "webrtc/test/gtest.h"
namespace webrtc {
TEST(BackgroundNoise, CreateAndDestroy) {
size_t channels = 1;
BackgroundNoise bgn(channels);
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,65 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
#include <algorithm> // Provide access to std::max.
namespace webrtc {
BufferLevelFilter::BufferLevelFilter() {
Reset();
}
void BufferLevelFilter::Reset() {
filtered_current_level_ = 0;
level_factor_ = 253;
}
void BufferLevelFilter::Update(size_t buffer_size_packets,
int time_stretched_samples,
size_t packet_len_samples) {
// Filter:
// |filtered_current_level_| = |level_factor_| * |filtered_current_level_| +
// (1 - |level_factor_|) * |buffer_size_packets|
// |level_factor_| and |filtered_current_level_| are in Q8.
// |buffer_size_packets| is in Q0.
filtered_current_level_ = ((level_factor_ * filtered_current_level_) >> 8) +
((256 - level_factor_) * static_cast<int>(buffer_size_packets));
// Account for time-scale operations (accelerate and pre-emptive expand).
if (time_stretched_samples && packet_len_samples > 0) {
// Time-scaling has been performed since last filter update. Subtract the
// value of |time_stretched_samples| from |filtered_current_level_| after
// converting |time_stretched_samples| from samples to packets in Q8.
// Make sure that the filtered value remains non-negative.
filtered_current_level_ = std::max(0,
filtered_current_level_ -
(time_stretched_samples << 8) / static_cast<int>(packet_len_samples));
}
}
void BufferLevelFilter::SetTargetBufferLevel(int target_buffer_level) {
if (target_buffer_level <= 1) {
level_factor_ = 251;
} else if (target_buffer_level <= 3) {
level_factor_ = 252;
} else if (target_buffer_level <= 7) {
level_factor_ = 253;
} else {
level_factor_ = 254;
}
}
int BufferLevelFilter::filtered_current_level() const {
return filtered_current_level_;
}
} // namespace webrtc

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_
#include <stddef.h>
#include "webrtc/rtc_base/constructormagic.h"
namespace webrtc {
class BufferLevelFilter {
public:
BufferLevelFilter();
virtual ~BufferLevelFilter() {}
virtual void Reset();
// Updates the filter. Current buffer size is |buffer_size_packets| (Q0).
// If |time_stretched_samples| is non-zero, the value is converted to the
// corresponding number of packets, and is subtracted from the filtered
// value (thus bypassing the filter operation). |packet_len_samples| is the
// number of audio samples carried in each incoming packet.
virtual void Update(size_t buffer_size_packets, int time_stretched_samples,
size_t packet_len_samples);
// Set the current target buffer level (obtained from
// DelayManager::base_target_level()). Used to select the appropriate
// filter coefficient.
virtual void SetTargetBufferLevel(int target_buffer_level);
virtual int filtered_current_level() const;
private:
int level_factor_; // Filter factor for the buffer level filter in Q8.
int filtered_current_level_; // Filtered current buffer level in Q8.
RTC_DISALLOW_COPY_AND_ASSIGN(BufferLevelFilter);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_

View File

@ -0,0 +1,162 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for BufferLevelFilter class.
#include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
#include <math.h> // Access to pow function.
#include "webrtc/test/gtest.h"
namespace webrtc {
TEST(BufferLevelFilter, CreateAndDestroy) {
BufferLevelFilter* filter = new BufferLevelFilter();
EXPECT_EQ(0, filter->filtered_current_level());
delete filter;
}
TEST(BufferLevelFilter, ConvergenceTest) {
BufferLevelFilter filter;
for (int times = 10; times <= 50; times += 10) {
for (int value = 100; value <= 200; value += 10) {
filter.Reset();
filter.SetTargetBufferLevel(1); // Makes filter coefficient 251/256.
std::ostringstream ss;
ss << "times = " << times << ", value = " << value;
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
for (int i = 0; i < times; ++i) {
filter.Update(value, 0 /* time_stretched_samples */,
160 /* packet_len_samples */);
}
// Expect the filtered value to be (theoretically)
// (1 - (251/256) ^ |times|) * |value|.
double expected_value_double =
(1 - pow(251.0 / 256.0, times)) * value;
int expected_value = static_cast<int>(expected_value_double);
// filtered_current_level() returns the value in Q8.
// The actual value may differ slightly from the expected value due to
// intermediate-stage rounding errors in the filter implementation.
// This is why we have to use EXPECT_NEAR with a tolerance of +/-1.
EXPECT_NEAR(expected_value, filter.filtered_current_level() >> 8, 1);
}
}
}
// Verify that target buffer level impacts on the filter convergence.
TEST(BufferLevelFilter, FilterFactor) {
BufferLevelFilter filter;
// Update 10 times with value 100.
const int kTimes = 10;
const int kValue = 100;
filter.SetTargetBufferLevel(3); // Makes filter coefficient 252/256.
for (int i = 0; i < kTimes; ++i) {
filter.Update(kValue, 0 /* time_stretched_samples */,
160 /* packet_len_samples */);
}
// Expect the filtered value to be
// (1 - (252/256) ^ |kTimes|) * |kValue|.
int expected_value = 14;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8);
filter.Reset();
filter.SetTargetBufferLevel(7); // Makes filter coefficient 253/256.
for (int i = 0; i < kTimes; ++i) {
filter.Update(kValue, 0 /* time_stretched_samples */,
160 /* packet_len_samples */);
}
// Expect the filtered value to be
// (1 - (253/256) ^ |kTimes|) * |kValue|.
expected_value = 11;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8);
filter.Reset();
filter.SetTargetBufferLevel(8); // Makes filter coefficient 254/256.
for (int i = 0; i < kTimes; ++i) {
filter.Update(kValue, 0 /* time_stretched_samples */,
160 /* packet_len_samples */);
}
// Expect the filtered value to be
// (1 - (254/256) ^ |kTimes|) * |kValue|.
expected_value = 7;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8);
}
TEST(BufferLevelFilter, TimeStretchedSamples) {
BufferLevelFilter filter;
filter.SetTargetBufferLevel(1); // Makes filter coefficient 251/256.
// Update 10 times with value 100.
const int kTimes = 10;
const int kValue = 100;
const int kPacketSizeSamples = 160;
const int kNumPacketsStretched = 2;
const int kTimeStretchedSamples = kNumPacketsStretched * kPacketSizeSamples;
for (int i = 0; i < kTimes; ++i) {
// Packet size set to 0. Do not expect the parameter
// |kTimeStretchedSamples| to have any effect.
filter.Update(kValue, kTimeStretchedSamples, 0 /* packet_len_samples */);
}
// Expect the filtered value to be
// (1 - (251/256) ^ |kTimes|) * |kValue|.
const int kExpectedValue = 17;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
// Update filter again, now with non-zero value for packet length.
// Set the current filtered value to be the input, in order to isolate the
// impact of |kTimeStretchedSamples|.
filter.Update(filter.filtered_current_level() >> 8, kTimeStretchedSamples,
kPacketSizeSamples);
EXPECT_EQ(kExpectedValue - kNumPacketsStretched,
filter.filtered_current_level() >> 8);
// Try negative value and verify that we come back to the previous result.
filter.Update(filter.filtered_current_level() >> 8, -kTimeStretchedSamples,
kPacketSizeSamples);
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
}
TEST(BufferLevelFilter, TimeStretchedSamplesNegativeUnevenFrames) {
BufferLevelFilter filter;
filter.SetTargetBufferLevel(1); // Makes filter coefficient 251/256.
// Update 10 times with value 100.
const int kTimes = 10;
const int kValue = 100;
const int kPacketSizeSamples = 160;
const int kTimeStretchedSamples = -3.1415 * kPacketSizeSamples;
for (int i = 0; i < kTimes; ++i) {
// Packet size set to 0. Do not expect the parameter
// |kTimeStretchedSamples| to have any effect.
filter.Update(kValue, kTimeStretchedSamples, 0 /* packet_len_samples */);
}
// Expect the filtered value to be
// (1 - (251/256) ^ |kTimes|) * |kValue|.
const int kExpectedValue = 17;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
// Update filter again, now with non-zero value for packet length.
// Set the current filtered value to be the input, in order to isolate the
// impact of |kTimeStretchedSamples|.
filter.Update(filter.filtered_current_level() >> 8, kTimeStretchedSamples,
kPacketSizeSamples);
EXPECT_EQ(21, filter.filtered_current_level() >> 8);
// Try negative value and verify that we come back to the previous result.
filter.Update(filter.filtered_current_level() >> 8, -kTimeStretchedSamples,
kPacketSizeSamples);
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
}
} // namespace webrtc

View File

@ -0,0 +1,124 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/comfort_noise.h"
#include <assert.h>
#include "webrtc/api/audio_codecs/audio_decoder.h"
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
#include "webrtc/rtc_base/logging.h"
namespace webrtc {
void ComfortNoise::Reset() {
first_call_ = true;
}
int ComfortNoise::UpdateParameters(const Packet& packet) {
// Get comfort noise decoder.
if (decoder_database_->SetActiveCngDecoder(packet.payload_type) != kOK) {
return kUnknownPayloadType;
}
ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
RTC_DCHECK(cng_decoder);
cng_decoder->UpdateSid(packet.payload);
return kOK;
}
int ComfortNoise::Generate(size_t requested_length,
AudioMultiVector* output) {
// TODO(hlundin): Change to an enumerator and skip assert.
assert(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 ||
fs_hz_ == 48000);
// Not adapted for multi-channel yet.
if (output->Channels() != 1) {
LOG(LS_ERROR) << "No multi-channel support";
return kMultiChannelNotSupported;
}
size_t number_of_samples = requested_length;
bool new_period = false;
if (first_call_) {
// Generate noise and overlap slightly with old data.
number_of_samples = requested_length + overlap_length_;
new_period = true;
}
output->AssertSize(number_of_samples);
// Get the decoder from the database.
ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
if (!cng_decoder) {
LOG(LS_ERROR) << "Unknwown payload type";
return kUnknownPayloadType;
}
std::unique_ptr<int16_t[]> temp(new int16_t[number_of_samples]);
if (!cng_decoder->Generate(
rtc::ArrayView<int16_t>(temp.get(), number_of_samples),
new_period)) {
// Error returned.
output->Zeros(requested_length);
LOG(LS_ERROR) <<
"ComfortNoiseDecoder::Genererate failed to generate comfort noise";
return kInternalError;
}
(*output)[0].OverwriteAt(temp.get(), number_of_samples, 0);
if (first_call_) {
// Set tapering window parameters. Values are in Q15.
int16_t muting_window; // Mixing factor for overlap data.
int16_t muting_window_increment; // Mixing factor increment (negative).
int16_t unmuting_window; // Mixing factor for comfort noise.
int16_t unmuting_window_increment; // Mixing factor increment.
if (fs_hz_ == 8000) {
muting_window = DspHelper::kMuteFactorStart8kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement8kHz;
unmuting_window = DspHelper::kUnmuteFactorStart8kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz;
} else if (fs_hz_ == 16000) {
muting_window = DspHelper::kMuteFactorStart16kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement16kHz;
unmuting_window = DspHelper::kUnmuteFactorStart16kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz;
} else if (fs_hz_ == 32000) {
muting_window = DspHelper::kMuteFactorStart32kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement32kHz;
unmuting_window = DspHelper::kUnmuteFactorStart32kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz;
} else { // fs_hz_ == 48000
muting_window = DspHelper::kMuteFactorStart48kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement48kHz;
unmuting_window = DspHelper::kUnmuteFactorStart48kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz;
}
// Do overlap-add between new vector and overlap.
size_t start_ix = sync_buffer_->Size() - overlap_length_;
for (size_t i = 0; i < overlap_length_; i++) {
/* overlapVec[i] = WinMute * overlapVec[i] + WinUnMute * outData[i] */
// The expression (*output)[0][i] is the i-th element in the first
// channel.
(*sync_buffer_)[0][start_ix + i] =
(((*sync_buffer_)[0][start_ix + i] * muting_window) +
((*output)[0][i] * unmuting_window) + 16384) >> 15;
muting_window += muting_window_increment;
unmuting_window += unmuting_window_increment;
}
// Remove |overlap_length_| samples from the front of |output| since they
// were mixed into |sync_buffer_| above.
output->PopFront(overlap_length_);
}
first_call_ = false;
return kOK;
}
} // namespace webrtc

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class DecoderDatabase;
class SyncBuffer;
struct Packet;
// This class acts as an interface to the CNG generator.
class ComfortNoise {
public:
enum ReturnCodes {
kOK = 0,
kUnknownPayloadType,
kInternalError,
kMultiChannelNotSupported
};
ComfortNoise(int fs_hz, DecoderDatabase* decoder_database,
SyncBuffer* sync_buffer)
: fs_hz_(fs_hz),
first_call_(true),
overlap_length_(5 * fs_hz_ / 8000),
decoder_database_(decoder_database),
sync_buffer_(sync_buffer) {
}
// Resets the state. Should be called before each new comfort noise period.
void Reset();
// Update the comfort noise generator with the parameters in |packet|.
int UpdateParameters(const Packet& packet);
// Generates |requested_length| samples of comfort noise and writes to
// |output|. If this is the first in call after Reset (or first after creating
// the object), it will also mix in comfort noise at the end of the
// SyncBuffer object provided in the constructor.
int Generate(size_t requested_length, AudioMultiVector* output);
// Returns the last error code that was produced by the comfort noise
// decoder. Returns 0 if no error has been encountered since the last reset.
int internal_error_code() { return internal_error_code_; }
private:
int fs_hz_;
bool first_call_;
size_t overlap_length_;
DecoderDatabase* decoder_database_;
SyncBuffer* sync_buffer_;
int internal_error_code_;
RTC_DISALLOW_COPY_AND_ASSIGN(ComfortNoise);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_

View File

@ -0,0 +1,31 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for ComfortNoise class.
#include "webrtc/modules/audio_coding/neteq/comfort_noise.h"
#include "webrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h"
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
#include "webrtc/test/gtest.h"
namespace webrtc {
TEST(ComfortNoise, CreateAndDestroy) {
int fs = 8000;
MockDecoderDatabase db;
SyncBuffer sync_buffer(1, 1000);
ComfortNoise cn(fs, &db, &sync_buffer);
EXPECT_CALL(db, Die()); // Called when |db| goes out of scope.
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,62 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
#include <cstdlib>
#include <limits>
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
namespace webrtc {
// This function decides the overflow-protecting scaling and calls
// WebRtcSpl_CrossCorrelation.
int CrossCorrelationWithAutoShift(const int16_t* sequence_1,
const int16_t* sequence_2,
size_t sequence_1_length,
size_t cross_correlation_length,
int cross_correlation_step,
int32_t* cross_correlation) {
// Find the maximum absolute value of sequence_1 and 2.
const int16_t max_1 = WebRtcSpl_MaxAbsValueW16(sequence_1, sequence_1_length);
const int sequence_2_shift =
cross_correlation_step * (static_cast<int>(cross_correlation_length) - 1);
const int16_t* sequence_2_start =
sequence_2_shift >= 0 ? sequence_2 : sequence_2 + sequence_2_shift;
const size_t sequence_2_length =
sequence_1_length + std::abs(sequence_2_shift);
const int16_t max_2 =
WebRtcSpl_MaxAbsValueW16(sequence_2_start, sequence_2_length);
// In order to avoid overflow when computing the sum we should scale the
// samples so that (in_vector_length * max_1 * max_2) will not overflow.
// Expected scaling fulfills
// 1) sufficient:
// sequence_1_length * (max_1 * max_2 >> scaling) <= 0x7fffffff;
// 2) necessary:
// if (scaling > 0)
// sequence_1_length * (max_1 * max_2 >> (scaling - 1)) > 0x7fffffff;
// The following calculation fulfills 1) and almost fulfills 2).
// There are some corner cases that 2) is not satisfied, e.g.,
// max_1 = 17, max_2 = 30848, sequence_1_length = 4095, in such case,
// optimal scaling is 0, while the following calculation results in 1.
const int32_t factor = (max_1 * max_2) / (std::numeric_limits<int32_t>::max()
/ static_cast<int32_t>(sequence_1_length));
const int scaling = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
WebRtcSpl_CrossCorrelation(cross_correlation, sequence_1, sequence_2,
sequence_1_length, cross_correlation_length,
scaling, cross_correlation_step);
return scaling;
}
} // namespace webrtc

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_
#include "webrtc/common_types.h"
namespace webrtc {
// The function calculates the cross-correlation between two sequences
// |sequence_1| and |sequence_2|. |sequence_1| is taken as reference, with
// |sequence_1_length| as its length. |sequence_2| slides for the calculation of
// cross-correlation. The result will be saved in |cross_correlation|.
// |cross_correlation_length| correlation points are calculated.
// The corresponding lag starts from 0, and increases with a step of
// |cross_correlation_step|. The result is without normalization. To avoid
// overflow, the result will be right shifted. The amount of shifts will be
// returned.
//
// Input:
// - sequence_1 : First sequence (reference).
// - sequence_2 : Second sequence (sliding during calculation).
// - sequence_1_length : Length of |sequence_1|.
// - cross_correlation_length : Number of cross-correlations to calculate.
// - cross_correlation_step : Step in the lag for the cross-correlation.
//
// Output:
// - cross_correlation : The cross-correlation in Q(-right_shifts)
//
// Return:
// Number of right shifts in cross_correlation.
int CrossCorrelationWithAutoShift(const int16_t* sequence_1,
const int16_t* sequence_2,
size_t sequence_1_length,
size_t cross_correlation_length,
int cross_correlation_step,
int32_t* cross_correlation);
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_

View File

@ -0,0 +1,170 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/decision_logic.h"
#include <algorithm>
#include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
#include "webrtc/modules/audio_coding/neteq/decision_logic_fax.h"
#include "webrtc/modules/audio_coding/neteq/decision_logic_normal.h"
#include "webrtc/modules/audio_coding/neteq/delay_manager.h"
#include "webrtc/modules/audio_coding/neteq/expand.h"
#include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
namespace webrtc {
DecisionLogic* DecisionLogic::Create(int fs_hz,
size_t output_size_samples,
NetEqPlayoutMode playout_mode,
DecoderDatabase* decoder_database,
const PacketBuffer& packet_buffer,
DelayManager* delay_manager,
BufferLevelFilter* buffer_level_filter,
const TickTimer* tick_timer) {
switch (playout_mode) {
case kPlayoutOn:
case kPlayoutStreaming:
return new DecisionLogicNormal(
fs_hz, output_size_samples, playout_mode, decoder_database,
packet_buffer, delay_manager, buffer_level_filter, tick_timer);
case kPlayoutFax:
case kPlayoutOff:
return new DecisionLogicFax(
fs_hz, output_size_samples, playout_mode, decoder_database,
packet_buffer, delay_manager, buffer_level_filter, tick_timer);
}
// This line cannot be reached, but must be here to avoid compiler errors.
assert(false);
return NULL;
}
DecisionLogic::DecisionLogic(int fs_hz,
size_t output_size_samples,
NetEqPlayoutMode playout_mode,
DecoderDatabase* decoder_database,
const PacketBuffer& packet_buffer,
DelayManager* delay_manager,
BufferLevelFilter* buffer_level_filter,
const TickTimer* tick_timer)
: decoder_database_(decoder_database),
packet_buffer_(packet_buffer),
delay_manager_(delay_manager),
buffer_level_filter_(buffer_level_filter),
tick_timer_(tick_timer),
cng_state_(kCngOff),
packet_length_samples_(0),
sample_memory_(0),
prev_time_scale_(false),
timescale_countdown_(
tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
num_consecutive_expands_(0),
playout_mode_(playout_mode) {
delay_manager_->set_streaming_mode(playout_mode_ == kPlayoutStreaming);
SetSampleRate(fs_hz, output_size_samples);
}
DecisionLogic::~DecisionLogic() = default;
void DecisionLogic::Reset() {
cng_state_ = kCngOff;
noise_fast_forward_ = 0;
packet_length_samples_ = 0;
sample_memory_ = 0;
prev_time_scale_ = false;
timescale_countdown_.reset();
num_consecutive_expands_ = 0;
}
void DecisionLogic::SoftReset() {
packet_length_samples_ = 0;
sample_memory_ = 0;
prev_time_scale_ = false;
timescale_countdown_ =
tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
}
void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
// TODO(hlundin): Change to an enumerator and skip assert.
assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
fs_mult_ = fs_hz / 8000;
output_size_samples_ = output_size_samples;
}
Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
const Expand& expand,
size_t decoder_frame_length,
const Packet* next_packet,
Modes prev_mode,
bool play_dtmf,
size_t generated_noise_samples,
bool* reset_decoder) {
// If last mode was CNG (or Expand, since this could be covering up for
// a lost CNG packet), remember that CNG is on. This is needed if comfort
// noise is interrupted by DTMF.
if (prev_mode == kModeRfc3389Cng) {
cng_state_ = kCngRfc3389On;
} else if (prev_mode == kModeCodecInternalCng) {
cng_state_ = kCngInternalOn;
}
const size_t samples_left =
sync_buffer.FutureLength() - expand.overlap_length();
const size_t cur_size_samples =
samples_left + packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
prev_time_scale_ = prev_time_scale_ &&
(prev_mode == kModeAccelerateSuccess ||
prev_mode == kModeAccelerateLowEnergy ||
prev_mode == kModePreemptiveExpandSuccess ||
prev_mode == kModePreemptiveExpandLowEnergy);
FilterBufferLevel(cur_size_samples, prev_mode);
return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length,
next_packet, prev_mode, play_dtmf,
reset_decoder, generated_noise_samples);
}
void DecisionLogic::ExpandDecision(Operations operation) {
if (operation == kExpand) {
num_consecutive_expands_++;
} else {
num_consecutive_expands_ = 0;
}
}
void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples,
Modes prev_mode) {
// Do not update buffer history if currently playing CNG since it will bias
// the filtered buffer level.
if ((prev_mode != kModeRfc3389Cng) && (prev_mode != kModeCodecInternalCng)) {
buffer_level_filter_->SetTargetBufferLevel(
delay_manager_->base_target_level());
size_t buffer_size_packets = 0;
if (packet_length_samples_ > 0) {
// Calculate size in packets.
buffer_size_packets = buffer_size_samples / packet_length_samples_;
}
int sample_memory_local = 0;
if (prev_time_scale_) {
sample_memory_local = sample_memory_;
timescale_countdown_ =
tick_timer_->GetNewCountdown(kMinTimescaleInterval);
}
buffer_level_filter_->Update(buffer_size_packets, sample_memory_local,
packet_length_samples_);
prev_time_scale_ = false;
}
}
} // namespace webrtc

View File

@ -0,0 +1,168 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_
#include "webrtc/modules/audio_coding/neteq/defines.h"
#include "webrtc/modules/audio_coding/neteq/include/neteq.h"
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class BufferLevelFilter;
class DecoderDatabase;
class DelayManager;
class Expand;
class PacketBuffer;
class SyncBuffer;
struct Packet;
// This is the base class for the decision tree implementations. Derived classes
// must implement the method GetDecisionSpecialized().
class DecisionLogic {
public:
// Static factory function which creates different types of objects depending
// on the |playout_mode|.
static DecisionLogic* Create(int fs_hz,
size_t output_size_samples,
NetEqPlayoutMode playout_mode,
DecoderDatabase* decoder_database,
const PacketBuffer& packet_buffer,
DelayManager* delay_manager,
BufferLevelFilter* buffer_level_filter,
const TickTimer* tick_timer);
// Constructor.
DecisionLogic(int fs_hz,
size_t output_size_samples,
NetEqPlayoutMode playout_mode,
DecoderDatabase* decoder_database,
const PacketBuffer& packet_buffer,
DelayManager* delay_manager,
BufferLevelFilter* buffer_level_filter,
const TickTimer* tick_timer);
virtual ~DecisionLogic();
// Resets object to a clean state.
void Reset();
// Resets parts of the state. Typically done when switching codecs.
void SoftReset();
// Sets the sample rate and the output block size.
void SetSampleRate(int fs_hz, size_t output_size_samples);
// Returns the operation that should be done next. |sync_buffer| and |expand|
// are provided for reference. |decoder_frame_length| is the number of samples
// obtained from the last decoded frame. If there is a packet available, it
// should be supplied in |next_packet|; otherwise it should be NULL. The mode
// resulting from the last call to NetEqImpl::GetAudio is supplied in
// |prev_mode|. If there is a DTMF event to play, |play_dtmf| should be set to
// true. The output variable |reset_decoder| will be set to true if a reset is
// required; otherwise it is left unchanged (i.e., it can remain true if it
// was true before the call). This method end with calling
// GetDecisionSpecialized to get the actual return value.
Operations GetDecision(const SyncBuffer& sync_buffer,
const Expand& expand,
size_t decoder_frame_length,
const Packet* next_packet,
Modes prev_mode,
bool play_dtmf,
size_t generated_noise_samples,
bool* reset_decoder);
// These methods test the |cng_state_| for different conditions.
bool CngRfc3389On() const { return cng_state_ == kCngRfc3389On; }
bool CngOff() const { return cng_state_ == kCngOff; }
// Resets the |cng_state_| to kCngOff.
void SetCngOff() { cng_state_ = kCngOff; }
// Reports back to DecisionLogic whether the decision to do expand remains or
// not. Note that this is necessary, since an expand decision can be changed
// to kNormal in NetEqImpl::GetDecision if there is still enough data in the
// sync buffer.
virtual void ExpandDecision(Operations operation);
// Adds |value| to |sample_memory_|.
void AddSampleMemory(int32_t value) {
sample_memory_ += value;
}
// Accessors and mutators.
void set_sample_memory(int32_t value) { sample_memory_ = value; }
size_t noise_fast_forward() const { return noise_fast_forward_; }
size_t packet_length_samples() const { return packet_length_samples_; }
void set_packet_length_samples(size_t value) {
packet_length_samples_ = value;
}
void set_prev_time_scale(bool value) { prev_time_scale_ = value; }
NetEqPlayoutMode playout_mode() const { return playout_mode_; }
protected:
// The value 5 sets maximum time-stretch rate to about 100 ms/s.
static const int kMinTimescaleInterval = 5;
enum CngState {
kCngOff,
kCngRfc3389On,
kCngInternalOn
};
// Returns the operation that should be done next. |sync_buffer| and |expand|
// are provided for reference. |decoder_frame_length| is the number of samples
// obtained from the last decoded frame. If there is a packet available, it
// should be supplied in |next_packet|; otherwise it should be NULL. The mode
// resulting from the last call to NetEqImpl::GetAudio is supplied in
// |prev_mode|. If there is a DTMF event to play, |play_dtmf| should be set to
// true. The output variable |reset_decoder| will be set to true if a reset is
// required; otherwise it is left unchanged (i.e., it can remain true if it
// was true before the call). Should be implemented by derived classes.
virtual Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer,
const Expand& expand,
size_t decoder_frame_length,
const Packet* next_packet,
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
size_t generated_noise_samples) = 0;
// Updates the |buffer_level_filter_| with the current buffer level
// |buffer_size_packets|.
void FilterBufferLevel(size_t buffer_size_packets, Modes prev_mode);
DecoderDatabase* decoder_database_;
const PacketBuffer& packet_buffer_;
DelayManager* delay_manager_;
BufferLevelFilter* buffer_level_filter_;
const TickTimer* tick_timer_;
int fs_mult_;
size_t output_size_samples_;
CngState cng_state_; // Remember if comfort noise is interrupted by other
// event (e.g., DTMF).
size_t noise_fast_forward_ = 0;
size_t packet_length_samples_;
int sample_memory_;
bool prev_time_scale_;
std::unique_ptr<TickTimer::Countdown> timescale_countdown_;
int num_consecutive_expands_;
const NetEqPlayoutMode playout_mode_;
private:
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogic);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_

View File

@ -0,0 +1,103 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/decision_logic_fax.h"
#include <assert.h>
#include <algorithm>
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
namespace webrtc {
Operations DecisionLogicFax::GetDecisionSpecialized(
const SyncBuffer& sync_buffer,
const Expand& expand,
size_t decoder_frame_length,
const Packet* next_packet,
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
size_t generated_noise_samples) {
assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff);
uint32_t target_timestamp = sync_buffer.end_timestamp();
uint32_t available_timestamp = 0;
int is_cng_packet = 0;
if (next_packet) {
available_timestamp = next_packet->timestamp;
is_cng_packet =
decoder_database_->IsComfortNoise(next_packet->payload_type);
}
if (is_cng_packet) {
if (static_cast<int32_t>((generated_noise_samples + target_timestamp)
- available_timestamp) >= 0) {
// Time to play this packet now.
return kRfc3389Cng;
} else {
// Wait before playing this packet.
return kRfc3389CngNoPacket;
}
}
if (!next_packet) {
// No packet. If in CNG mode, play as usual. Otherwise, use other method to
// generate data.
if (cng_state_ == kCngRfc3389On) {
// Continue playing comfort noise.
return kRfc3389CngNoPacket;
} else if (cng_state_ == kCngInternalOn) {
// Continue playing codec-internal comfort noise.
return kCodecInternalCng;
} else {
// Nothing to play. Generate some data to play out.
switch (playout_mode_) {
case kPlayoutOff:
return kAlternativePlc;
case kPlayoutFax:
return kAudioRepetition;
default:
assert(false);
return kUndefined;
}
}
} else if (target_timestamp == available_timestamp) {
return kNormal;
} else {
if (static_cast<int32_t>((generated_noise_samples + target_timestamp)
- available_timestamp) >= 0) {
return kNormal;
} else {
// If currently playing comfort noise, continue with that. Do not
// increase the timestamp counter since generated_noise_stopwatch_ in
// NetEqImpl will take care of the time-keeping.
if (cng_state_ == kCngRfc3389On) {
return kRfc3389CngNoPacket;
} else if (cng_state_ == kCngInternalOn) {
return kCodecInternalCng;
} else {
// Otherwise, do packet-loss concealment and increase the
// timestamp while waiting for the time to play this packet.
switch (playout_mode_) {
case kPlayoutOff:
return kAlternativePlcIncreaseTimestamp;
case kPlayoutFax:
return kAudioRepetitionIncreaseTimestamp;
default:
assert(0);
return kUndefined;
}
}
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_FAX_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_FAX_H_
#include "webrtc/modules/audio_coding/neteq/decision_logic.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Implementation of the DecisionLogic class for playout modes kPlayoutFax and
// kPlayoutOff.
class DecisionLogicFax : public DecisionLogic {
public:
// Constructor.
DecisionLogicFax(int fs_hz,
size_t output_size_samples,
NetEqPlayoutMode playout_mode,
DecoderDatabase* decoder_database,
const PacketBuffer& packet_buffer,
DelayManager* delay_manager,
BufferLevelFilter* buffer_level_filter,
const TickTimer* tick_timer)
: DecisionLogic(fs_hz,
output_size_samples,
playout_mode,
decoder_database,
packet_buffer,
delay_manager,
buffer_level_filter,
tick_timer) {}
protected:
Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer,
const Expand& expand,
size_t decoder_frame_length,
const Packet* next_packet,
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
size_t generated_noise_samples) override;
private:
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_FAX_H_

View File

@ -0,0 +1,242 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/decision_logic_normal.h"
#include <assert.h>
#include <algorithm>
#include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq/delay_manager.h"
#include "webrtc/modules/audio_coding/neteq/expand.h"
#include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
#include "webrtc/modules/include/module_common_types.h"
namespace webrtc {
Operations DecisionLogicNormal::GetDecisionSpecialized(
const SyncBuffer& sync_buffer,
const Expand& expand,
size_t decoder_frame_length,
const Packet* next_packet,
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
size_t generated_noise_samples) {
assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming);
// Guard for errors, to avoid getting stuck in error mode.
if (prev_mode == kModeError) {
if (!next_packet) {
return kExpand;
} else {
return kUndefined; // Use kUndefined to flag for a reset.
}
}
uint32_t target_timestamp = sync_buffer.end_timestamp();
uint32_t available_timestamp = 0;
bool is_cng_packet = false;
if (next_packet) {
available_timestamp = next_packet->timestamp;
is_cng_packet =
decoder_database_->IsComfortNoise(next_packet->payload_type);
}
if (is_cng_packet) {
return CngOperation(prev_mode, target_timestamp, available_timestamp,
generated_noise_samples);
}
// Handle the case with no packet at all available (except maybe DTMF).
if (!next_packet) {
return NoPacket(play_dtmf);
}
// If the expand period was very long, reset NetEQ since it is likely that the
// sender was restarted.
if (num_consecutive_expands_ > kReinitAfterExpands) {
*reset_decoder = true;
return kNormal;
}
const uint32_t five_seconds_samples =
static_cast<uint32_t>(5 * 8000 * fs_mult_);
// Check if the required packet is available.
if (target_timestamp == available_timestamp) {
return ExpectedPacketAvailable(prev_mode, play_dtmf);
} else if (!PacketBuffer::IsObsoleteTimestamp(
available_timestamp, target_timestamp, five_seconds_samples)) {
return FuturePacketAvailable(sync_buffer, expand, decoder_frame_length,
prev_mode, target_timestamp,
available_timestamp, play_dtmf,
generated_noise_samples);
} else {
// This implies that available_timestamp < target_timestamp, which can
// happen when a new stream or codec is received. Signal for a reset.
return kUndefined;
}
}
Operations DecisionLogicNormal::CngOperation(Modes prev_mode,
uint32_t target_timestamp,
uint32_t available_timestamp,
size_t generated_noise_samples) {
// Signed difference between target and available timestamp.
int32_t timestamp_diff = static_cast<int32_t>(
static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
available_timestamp);
int32_t optimal_level_samp = static_cast<int32_t>(
(delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
const int64_t excess_waiting_time_samp =
-static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
if (excess_waiting_time_samp > optimal_level_samp / 2) {
// The waiting time for this packet will be longer than 1.5
// times the wanted buffer delay. Apply fast-forward to cut the
// waiting time down to the optimal.
noise_fast_forward_ = rtc::dchecked_cast<size_t>(noise_fast_forward_ +
excess_waiting_time_samp);
timestamp_diff =
rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
}
if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) {
// Not time to play this packet yet. Wait another round before using this
// packet. Keep on playing CNG from previous CNG parameters.
return kRfc3389CngNoPacket;
} else {
// Otherwise, go for the CNG packet now.
noise_fast_forward_ = 0;
return kRfc3389Cng;
}
}
Operations DecisionLogicNormal::NoPacket(bool play_dtmf) {
if (cng_state_ == kCngRfc3389On) {
// Keep on playing comfort noise.
return kRfc3389CngNoPacket;
} else if (cng_state_ == kCngInternalOn) {
// Keep on playing codec internal comfort noise.
return kCodecInternalCng;
} else if (play_dtmf) {
return kDtmf;
} else {
// Nothing to play, do expand.
return kExpand;
}
}
Operations DecisionLogicNormal::ExpectedPacketAvailable(Modes prev_mode,
bool play_dtmf) {
if (prev_mode != kModeExpand && !play_dtmf) {
// Check criterion for time-stretching.
int low_limit, high_limit;
delay_manager_->BufferLimits(&low_limit, &high_limit);
if (buffer_level_filter_->filtered_current_level() >= high_limit << 2)
return kFastAccelerate;
if (TimescaleAllowed()) {
if (buffer_level_filter_->filtered_current_level() >= high_limit)
return kAccelerate;
if (buffer_level_filter_->filtered_current_level() < low_limit)
return kPreemptiveExpand;
}
}
return kNormal;
}
Operations DecisionLogicNormal::FuturePacketAvailable(
const SyncBuffer& sync_buffer,
const Expand& expand,
size_t decoder_frame_length,
Modes prev_mode,
uint32_t target_timestamp,
uint32_t available_timestamp,
bool play_dtmf,
size_t generated_noise_samples) {
// Required packet is not available, but a future packet is.
// Check if we should continue with an ongoing expand because the new packet
// is too far into the future.
uint32_t timestamp_leap = available_timestamp - target_timestamp;
if ((prev_mode == kModeExpand) &&
!ReinitAfterExpands(timestamp_leap) &&
!MaxWaitForPacket() &&
PacketTooEarly(timestamp_leap) &&
UnderTargetLevel()) {
if (play_dtmf) {
// Still have DTMF to play, so do not do expand.
return kDtmf;
} else {
// Nothing to play.
return kExpand;
}
}
const size_t samples_left =
sync_buffer.FutureLength() - expand.overlap_length();
const size_t cur_size_samples = samples_left +
packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
// If previous was comfort noise, then no merge is needed.
if (prev_mode == kModeRfc3389Cng ||
prev_mode == kModeCodecInternalCng) {
// Keep the same delay as before the CNG (or maximum 70 ms in buffer as
// safety precaution), but make sure that the number of samples in buffer
// is no higher than 4 times the optimal level. (Note that TargetLevel()
// is in Q8.)
if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
available_timestamp ||
cur_size_samples >
((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *
4) {
// Time to play this new packet.
return kNormal;
} else {
// Too early to play this new packet; keep on playing comfort noise.
if (prev_mode == kModeRfc3389Cng) {
return kRfc3389CngNoPacket;
} else { // prevPlayMode == kModeCodecInternalCng.
return kCodecInternalCng;
}
}
}
// Do not merge unless we have done an expand before.
if (prev_mode == kModeExpand) {
return kMerge;
} else if (play_dtmf) {
// Play DTMF instead of expand.
return kDtmf;
} else {
return kExpand;
}
}
bool DecisionLogicNormal::UnderTargetLevel() const {
return buffer_level_filter_->filtered_current_level() <=
delay_manager_->TargetLevel();
}
bool DecisionLogicNormal::ReinitAfterExpands(uint32_t timestamp_leap) const {
return timestamp_leap >=
static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
}
bool DecisionLogicNormal::PacketTooEarly(uint32_t timestamp_leap) const {
return timestamp_leap >
static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
}
bool DecisionLogicNormal::MaxWaitForPacket() const {
return num_consecutive_expands_ >= kMaxWaitForPacket;
}
} // namespace webrtc

View File

@ -0,0 +1,107 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_NORMAL_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_NORMAL_H_
#include "webrtc/modules/audio_coding/neteq/decision_logic.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Implementation of the DecisionLogic class for playout modes kPlayoutOn and
// kPlayoutStreaming.
class DecisionLogicNormal : public DecisionLogic {
public:
// Constructor.
DecisionLogicNormal(int fs_hz,
size_t output_size_samples,
NetEqPlayoutMode playout_mode,
DecoderDatabase* decoder_database,
const PacketBuffer& packet_buffer,
DelayManager* delay_manager,
BufferLevelFilter* buffer_level_filter,
const TickTimer* tick_timer)
: DecisionLogic(fs_hz,
output_size_samples,
playout_mode,
decoder_database,
packet_buffer,
delay_manager,
buffer_level_filter,
tick_timer) {}
protected:
static const int kReinitAfterExpands = 100;
static const int kMaxWaitForPacket = 10;
Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer,
const Expand& expand,
size_t decoder_frame_length,
const Packet* next_packet,
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder,
size_t generated_noise_samples) override;
// Returns the operation to do given that the expected packet is not
// available, but a packet further into the future is at hand.
virtual Operations FuturePacketAvailable(
const SyncBuffer& sync_buffer,
const Expand& expand,
size_t decoder_frame_length,
Modes prev_mode,
uint32_t target_timestamp,
uint32_t available_timestamp,
bool play_dtmf,
size_t generated_noise_samples);
// Returns the operation to do given that the expected packet is available.
virtual Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf);
// Returns the operation given that no packets are available (except maybe
// a DTMF event, flagged by setting |play_dtmf| true).
virtual Operations NoPacket(bool play_dtmf);
private:
// Returns the operation given that the next available packet is a comfort
// noise payload (RFC 3389 only, not codec-internal).
Operations CngOperation(Modes prev_mode,
uint32_t target_timestamp,
uint32_t available_timestamp,
size_t generated_noise_samples);
// Checks if enough time has elapsed since the last successful timescale
// operation was done (i.e., accelerate or preemptive expand).
bool TimescaleAllowed() const {
return !timescale_countdown_ || timescale_countdown_->Finished();
}
// Checks if the current (filtered) buffer level is under the target level.
bool UnderTargetLevel() const;
// Checks if |timestamp_leap| is so long into the future that a reset due
// to exceeding kReinitAfterExpands will be done.
bool ReinitAfterExpands(uint32_t timestamp_leap) const;
// Checks if we still have not done enough expands to cover the distance from
// the last decoded packet to the next available packet, the distance beeing
// conveyed in |timestamp_leap|.
bool PacketTooEarly(uint32_t timestamp_leap) const;
// Checks if num_consecutive_expands_ >= kMaxWaitForPacket.
bool MaxWaitForPacket() const;
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicNormal);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_NORMAL_H_

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for DecisionLogic class and derived classes.
#include "webrtc/modules/audio_coding/neteq/decision_logic.h"
#include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq/delay_manager.h"
#include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h"
#include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
#include "webrtc/test/gtest.h"
#include "webrtc/test/mock_audio_decoder_factory.h"
namespace webrtc {
TEST(DecisionLogic, CreateAndDestroy) {
int fs_hz = 8000;
int output_size_samples = fs_hz / 100; // Samples per 10 ms.
DecoderDatabase decoder_database(
new rtc::RefCountedObject<MockAudioDecoderFactory>);
TickTimer tick_timer;
PacketBuffer packet_buffer(10, &tick_timer);
DelayPeakDetector delay_peak_detector(&tick_timer);
DelayManager delay_manager(240, &delay_peak_detector, &tick_timer);
BufferLevelFilter buffer_level_filter;
DecisionLogic* logic = DecisionLogic::Create(
fs_hz, output_size_samples, kPlayoutOn, &decoder_database, packet_buffer,
&delay_manager, &buffer_level_filter, &tick_timer);
delete logic;
logic = DecisionLogic::Create(
fs_hz, output_size_samples, kPlayoutStreaming, &decoder_database,
packet_buffer, &delay_manager, &buffer_level_filter, &tick_timer);
delete logic;
logic = DecisionLogic::Create(
fs_hz, output_size_samples, kPlayoutFax, &decoder_database, packet_buffer,
&delay_manager, &buffer_level_filter, &tick_timer);
delete logic;
logic = DecisionLogic::Create(
fs_hz, output_size_samples, kPlayoutOff, &decoder_database, packet_buffer,
&delay_manager, &buffer_level_filter, &tick_timer);
delete logic;
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,355 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
#include <utility> // pair
#include "webrtc/api/audio_codecs/audio_decoder.h"
#include "webrtc/rtc_base/checks.h"
#include "webrtc/rtc_base/logging.h"
namespace webrtc {
DecoderDatabase::DecoderDatabase(
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory)
: active_decoder_type_(-1),
active_cng_decoder_type_(-1),
decoder_factory_(decoder_factory) {}
DecoderDatabase::~DecoderDatabase() = default;
DecoderDatabase::DecoderInfo::DecoderInfo(const SdpAudioFormat& audio_format,
AudioDecoderFactory* factory,
const std::string& codec_name)
: name_(codec_name),
audio_format_(audio_format),
factory_(factory),
external_decoder_(nullptr),
cng_decoder_(CngDecoder::Create(audio_format)),
subtype_(SubtypeFromFormat(audio_format)) {}
DecoderDatabase::DecoderInfo::DecoderInfo(const SdpAudioFormat& audio_format,
AudioDecoderFactory* factory)
: DecoderInfo(audio_format, factory, audio_format.name) {}
DecoderDatabase::DecoderInfo::DecoderInfo(NetEqDecoder ct,
AudioDecoderFactory* factory)
: DecoderInfo(*NetEqDecoderToSdpAudioFormat(ct), factory) {}
DecoderDatabase::DecoderInfo::DecoderInfo(const SdpAudioFormat& audio_format,
AudioDecoder* ext_dec,
const std::string& codec_name)
: name_(codec_name),
audio_format_(audio_format),
factory_(nullptr),
external_decoder_(ext_dec),
subtype_(Subtype::kNormal) {
RTC_CHECK(ext_dec);
}
DecoderDatabase::DecoderInfo::DecoderInfo(DecoderInfo&&) = default;
DecoderDatabase::DecoderInfo::~DecoderInfo() = default;
AudioDecoder* DecoderDatabase::DecoderInfo::GetDecoder() const {
if (subtype_ != Subtype::kNormal) {
// These are handled internally, so they have no AudioDecoder objects.
return nullptr;
}
if (external_decoder_) {
RTC_DCHECK(!decoder_);
RTC_DCHECK(!cng_decoder_);
return external_decoder_;
}
if (!decoder_) {
// TODO(ossu): Keep a check here for now, since a number of tests create
// DecoderInfos without factories.
RTC_DCHECK(factory_);
decoder_ = factory_->MakeAudioDecoder(audio_format_);
}
RTC_DCHECK(decoder_) << "Failed to create: " << audio_format_;
return decoder_.get();
}
bool DecoderDatabase::DecoderInfo::IsType(const char* name) const {
return STR_CASE_CMP(audio_format_.name.c_str(), name) == 0;
}
bool DecoderDatabase::DecoderInfo::IsType(const std::string& name) const {
return IsType(name.c_str());
}
rtc::Optional<DecoderDatabase::DecoderInfo::CngDecoder>
DecoderDatabase::DecoderInfo::CngDecoder::Create(const SdpAudioFormat& format) {
if (STR_CASE_CMP(format.name.c_str(), "CN") == 0) {
// CN has a 1:1 RTP clock rate to sample rate ratio.
const int sample_rate_hz = format.clockrate_hz;
RTC_DCHECK(sample_rate_hz == 8000 || sample_rate_hz == 16000 ||
sample_rate_hz == 32000 || sample_rate_hz == 48000);
return rtc::Optional<DecoderDatabase::DecoderInfo::CngDecoder>(
{sample_rate_hz});
} else {
return rtc::Optional<CngDecoder>();
}
}
DecoderDatabase::DecoderInfo::Subtype
DecoderDatabase::DecoderInfo::SubtypeFromFormat(const SdpAudioFormat& format) {
if (STR_CASE_CMP(format.name.c_str(), "CN") == 0) {
return Subtype::kComfortNoise;
} else if (STR_CASE_CMP(format.name.c_str(), "telephone-event") == 0) {
return Subtype::kDtmf;
} else if (STR_CASE_CMP(format.name.c_str(), "red") == 0) {
return Subtype::kRed;
}
return Subtype::kNormal;
}
bool DecoderDatabase::Empty() const { return decoders_.empty(); }
int DecoderDatabase::Size() const { return static_cast<int>(decoders_.size()); }
void DecoderDatabase::Reset() {
decoders_.clear();
active_decoder_type_ = -1;
active_cng_decoder_type_ = -1;
}
std::vector<int> DecoderDatabase::SetCodecs(
const std::map<int, SdpAudioFormat>& codecs) {
// First collect all payload types that we'll remove or reassign, then remove
// them from the database.
std::vector<int> changed_payload_types;
for (const std::pair<uint8_t, const DecoderInfo&> kv : decoders_) {
auto i = codecs.find(kv.first);
if (i == codecs.end() || i->second != kv.second.GetFormat()) {
changed_payload_types.push_back(kv.first);
}
}
for (int pl_type : changed_payload_types) {
Remove(pl_type);
}
// Enter the new and changed payload type mappings into the database.
for (const auto& kv : codecs) {
const int& rtp_payload_type = kv.first;
const SdpAudioFormat& audio_format = kv.second;
RTC_DCHECK_GE(rtp_payload_type, 0);
RTC_DCHECK_LE(rtp_payload_type, 0x7f);
if (decoders_.count(rtp_payload_type) == 0) {
decoders_.insert(std::make_pair(
rtp_payload_type, DecoderInfo(audio_format, decoder_factory_.get())));
} else {
// The mapping for this payload type hasn't changed.
}
}
return changed_payload_types;
}
int DecoderDatabase::RegisterPayload(uint8_t rtp_payload_type,
NetEqDecoder codec_type,
const std::string& name) {
if (rtp_payload_type > 0x7F) {
return kInvalidRtpPayloadType;
}
// kCodecArbitrary is only supported through InsertExternal.
if (codec_type == NetEqDecoder::kDecoderArbitrary ||
!CodecSupported(codec_type)) {
return kCodecNotSupported;
}
const auto opt_format = NetEqDecoderToSdpAudioFormat(codec_type);
if (!opt_format) {
return kCodecNotSupported;
}
DecoderInfo info(*opt_format, decoder_factory_, name);
auto ret =
decoders_.insert(std::make_pair(rtp_payload_type, std::move(info)));
if (ret.second == false) {
// Database already contains a decoder with type |rtp_payload_type|.
return kDecoderExists;
}
return kOK;
}
int DecoderDatabase::RegisterPayload(int rtp_payload_type,
const SdpAudioFormat& audio_format) {
if (rtp_payload_type < 0 || rtp_payload_type > 0x7f) {
return kInvalidRtpPayloadType;
}
const auto ret = decoders_.insert(std::make_pair(
rtp_payload_type, DecoderInfo(audio_format, decoder_factory_.get())));
if (ret.second == false) {
// Database already contains a decoder with type |rtp_payload_type|.
return kDecoderExists;
}
return kOK;
}
int DecoderDatabase::InsertExternal(uint8_t rtp_payload_type,
NetEqDecoder codec_type,
const std::string& codec_name,
AudioDecoder* decoder) {
if (rtp_payload_type > 0x7F) {
return kInvalidRtpPayloadType;
}
if (!decoder) {
return kInvalidPointer;
}
const auto opt_db_format = NetEqDecoderToSdpAudioFormat(codec_type);
const SdpAudioFormat format = opt_db_format.value_or({"arbitrary", 0, 0});
std::pair<DecoderMap::iterator, bool> ret;
DecoderInfo info(format, decoder, codec_name);
ret = decoders_.insert(std::make_pair(rtp_payload_type, std::move(info)));
if (ret.second == false) {
// Database already contains a decoder with type |rtp_payload_type|.
return kDecoderExists;
}
return kOK;
}
int DecoderDatabase::Remove(uint8_t rtp_payload_type) {
if (decoders_.erase(rtp_payload_type) == 0) {
// No decoder with that |rtp_payload_type|.
return kDecoderNotFound;
}
if (active_decoder_type_ == rtp_payload_type) {
active_decoder_type_ = -1; // No active decoder.
}
if (active_cng_decoder_type_ == rtp_payload_type) {
active_cng_decoder_type_ = -1; // No active CNG decoder.
}
return kOK;
}
void DecoderDatabase::RemoveAll() {
decoders_.clear();
active_decoder_type_ = -1; // No active decoder.
active_cng_decoder_type_ = -1; // No active CNG decoder.
}
const DecoderDatabase::DecoderInfo* DecoderDatabase::GetDecoderInfo(
uint8_t rtp_payload_type) const {
DecoderMap::const_iterator it = decoders_.find(rtp_payload_type);
if (it == decoders_.end()) {
// Decoder not found.
return NULL;
}
return &it->second;
}
int DecoderDatabase::SetActiveDecoder(uint8_t rtp_payload_type,
bool* new_decoder) {
// Check that |rtp_payload_type| exists in the database.
const DecoderInfo *info = GetDecoderInfo(rtp_payload_type);
if (!info) {
// Decoder not found.
return kDecoderNotFound;
}
RTC_CHECK(!info->IsComfortNoise());
RTC_DCHECK(new_decoder);
*new_decoder = false;
if (active_decoder_type_ < 0) {
// This is the first active decoder.
*new_decoder = true;
} else if (active_decoder_type_ != rtp_payload_type) {
// Moving from one active decoder to another. Delete the first one.
const DecoderInfo *old_info = GetDecoderInfo(active_decoder_type_);
RTC_DCHECK(old_info);
old_info->DropDecoder();
*new_decoder = true;
}
active_decoder_type_ = rtp_payload_type;
return kOK;
}
AudioDecoder* DecoderDatabase::GetActiveDecoder() const {
if (active_decoder_type_ < 0) {
// No active decoder.
return NULL;
}
return GetDecoder(active_decoder_type_);
}
int DecoderDatabase::SetActiveCngDecoder(uint8_t rtp_payload_type) {
// Check that |rtp_payload_type| exists in the database.
const DecoderInfo *info = GetDecoderInfo(rtp_payload_type);
if (!info) {
// Decoder not found.
return kDecoderNotFound;
}
if (active_cng_decoder_type_ >= 0 &&
active_cng_decoder_type_ != rtp_payload_type) {
// Moving from one active CNG decoder to another. Delete the first one.
RTC_DCHECK(active_cng_decoder_);
active_cng_decoder_.reset();
}
active_cng_decoder_type_ = rtp_payload_type;
return kOK;
}
ComfortNoiseDecoder* DecoderDatabase::GetActiveCngDecoder() const {
if (active_cng_decoder_type_ < 0) {
// No active CNG decoder.
return NULL;
}
if (!active_cng_decoder_) {
active_cng_decoder_.reset(new ComfortNoiseDecoder);
}
return active_cng_decoder_.get();
}
AudioDecoder* DecoderDatabase::GetDecoder(uint8_t rtp_payload_type) const {
const DecoderInfo *info = GetDecoderInfo(rtp_payload_type);
return info ? info->GetDecoder() : nullptr;
}
bool DecoderDatabase::IsType(uint8_t rtp_payload_type, const char* name) const {
const DecoderInfo* info = GetDecoderInfo(rtp_payload_type);
return info && info->IsType(name);
}
bool DecoderDatabase::IsType(uint8_t rtp_payload_type,
const std::string& name) const {
return IsType(rtp_payload_type, name.c_str());
}
bool DecoderDatabase::IsComfortNoise(uint8_t rtp_payload_type) const {
const DecoderInfo *info = GetDecoderInfo(rtp_payload_type);
return info && info->IsComfortNoise();
}
bool DecoderDatabase::IsDtmf(uint8_t rtp_payload_type) const {
const DecoderInfo *info = GetDecoderInfo(rtp_payload_type);
return info && info->IsDtmf();
}
bool DecoderDatabase::IsRed(uint8_t rtp_payload_type) const {
const DecoderInfo *info = GetDecoderInfo(rtp_payload_type);
return info && info->IsRed();
}
int DecoderDatabase::CheckPayloadTypes(const PacketList& packet_list) const {
PacketList::const_iterator it;
for (it = packet_list.begin(); it != packet_list.end(); ++it) {
if (!GetDecoderInfo(it->payload_type)) {
// Payload type is not found.
LOG(LS_WARNING) << "CheckPayloadTypes: unknown RTP payload type "
<< static_cast<int>(it->payload_type);
return kDecoderNotFound;
}
}
return kOK;
}
} // namespace webrtc

View File

@ -0,0 +1,246 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_
#include <map>
#include <memory>
#include <string>
#include "webrtc/api/audio_codecs/audio_decoder_factory.h"
#include "webrtc/api/audio_codecs/audio_format.h"
#include "webrtc/common_types.h" // NULL
#include "webrtc/modules/audio_coding/codecs/cng/webrtc_cng.h"
#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
#include "webrtc/modules/audio_coding/neteq/packet.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/rtc_base/scoped_ref_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class DecoderDatabase {
public:
enum DatabaseReturnCodes {
kOK = 0,
kInvalidRtpPayloadType = -1,
kCodecNotSupported = -2,
kInvalidSampleRate = -3,
kDecoderExists = -4,
kDecoderNotFound = -5,
kInvalidPointer = -6
};
// Class that stores decoder info in the database.
class DecoderInfo {
public:
DecoderInfo(const SdpAudioFormat& audio_format,
AudioDecoderFactory* factory,
const std::string& codec_name);
explicit DecoderInfo(const SdpAudioFormat& audio_format,
AudioDecoderFactory* factory = nullptr);
explicit DecoderInfo(NetEqDecoder ct,
AudioDecoderFactory* factory = nullptr);
DecoderInfo(const SdpAudioFormat& audio_format,
AudioDecoder* ext_dec,
const std::string& codec_name);
DecoderInfo(DecoderInfo&&);
~DecoderInfo();
// Get the AudioDecoder object, creating it first if necessary.
AudioDecoder* GetDecoder() const;
// Delete the AudioDecoder object, unless it's external. (This means we can
// always recreate it later if we need it.)
void DropDecoder() const { decoder_.reset(); }
int SampleRateHz() const {
if (IsDtmf()) {
// DTMF has a 1:1 mapping between clock rate and sample rate.
return audio_format_.clockrate_hz;
}
const AudioDecoder* decoder = GetDecoder();
RTC_DCHECK_EQ(1, !!decoder + !!cng_decoder_);
return decoder ? decoder->SampleRateHz() : cng_decoder_->sample_rate_hz;
}
const SdpAudioFormat& GetFormat() const { return audio_format_; }
// Returns true if the decoder's format is comfort noise.
bool IsComfortNoise() const {
RTC_DCHECK_EQ(!!cng_decoder_, subtype_ == Subtype::kComfortNoise);
return subtype_ == Subtype::kComfortNoise;
}
// Returns true if the decoder's format is DTMF.
bool IsDtmf() const {
return subtype_ == Subtype::kDtmf;
}
// Returns true if the decoder's format is RED.
bool IsRed() const {
return subtype_ == Subtype::kRed;
}
// Returns true if the decoder's format is named |name|.
bool IsType(const char* name) const;
// Returns true if the decoder's format is named |name|.
bool IsType(const std::string& name) const;
const std::string& get_name() const { return name_; }
private:
// TODO(ossu): |name_| is kept here while we retain the old external
// decoder interface. Remove this once using an
// AudioDecoderFactory has supplanted the old functionality.
const std::string name_;
const SdpAudioFormat audio_format_;
AudioDecoderFactory* const factory_;
mutable std::unique_ptr<AudioDecoder> decoder_;
// Set iff this is an external decoder.
AudioDecoder* const external_decoder_;
// Set iff this is a comfort noise decoder.
struct CngDecoder {
static rtc::Optional<CngDecoder> Create(const SdpAudioFormat& format);
int sample_rate_hz;
};
const rtc::Optional<CngDecoder> cng_decoder_;
enum class Subtype : int8_t {
kNormal,
kComfortNoise,
kDtmf,
kRed
};
static Subtype SubtypeFromFormat(const SdpAudioFormat& format);
const Subtype subtype_;
};
// Maximum value for 8 bits, and an invalid RTP payload type (since it is
// only 7 bits).
static const uint8_t kRtpPayloadTypeError = 0xFF;
DecoderDatabase(
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);
virtual ~DecoderDatabase();
// Returns true if the database is empty.
virtual bool Empty() const;
// Returns the number of decoders registered in the database.
virtual int Size() const;
// Resets the database, erasing all registered payload types, and deleting
// any AudioDecoder objects that were not externally created and inserted
// using InsertExternal().
virtual void Reset();
// Replaces the existing set of decoders with the given set. Returns the
// payload types that were reassigned or removed while doing so.
virtual std::vector<int> SetCodecs(
const std::map<int, SdpAudioFormat>& codecs);
// Registers |rtp_payload_type| as a decoder of type |codec_type|. The |name|
// is only used to populate the name field in the DecoderInfo struct in the
// database, and can be arbitrary (including empty). Returns kOK on success;
// otherwise an error code.
virtual int RegisterPayload(uint8_t rtp_payload_type,
NetEqDecoder codec_type,
const std::string& name);
// Registers a decoder for the given payload type. Returns kOK on success;
// otherwise an error code.
virtual int RegisterPayload(int rtp_payload_type,
const SdpAudioFormat& audio_format);
// Registers an externally created AudioDecoder object, and associates it
// as a decoder of type |codec_type| with |rtp_payload_type|.
virtual int InsertExternal(uint8_t rtp_payload_type,
NetEqDecoder codec_type,
const std::string& codec_name,
AudioDecoder* decoder);
// Removes the entry for |rtp_payload_type| from the database.
// Returns kDecoderNotFound or kOK depending on the outcome of the operation.
virtual int Remove(uint8_t rtp_payload_type);
// Remove all entries.
virtual void RemoveAll();
// Returns a pointer to the DecoderInfo struct for |rtp_payload_type|. If
// no decoder is registered with that |rtp_payload_type|, NULL is returned.
virtual const DecoderInfo* GetDecoderInfo(uint8_t rtp_payload_type) const;
// Sets the active decoder to be |rtp_payload_type|. If this call results in a
// change of active decoder, |new_decoder| is set to true. The previous active
// decoder's AudioDecoder object is deleted.
virtual int SetActiveDecoder(uint8_t rtp_payload_type, bool* new_decoder);
// Returns the current active decoder, or NULL if no active decoder exists.
virtual AudioDecoder* GetActiveDecoder() const;
// Sets the active comfort noise decoder to be |rtp_payload_type|. If this
// call results in a change of active comfort noise decoder, the previous
// active decoder's AudioDecoder object is deleted.
virtual int SetActiveCngDecoder(uint8_t rtp_payload_type);
// Returns the current active comfort noise decoder, or NULL if no active
// comfort noise decoder exists.
virtual ComfortNoiseDecoder* GetActiveCngDecoder() const;
// The following are utility methods: they will look up DecoderInfo through
// GetDecoderInfo and call the respective method on that info object, if it
// exists.
// Returns a pointer to the AudioDecoder object associated with
// |rtp_payload_type|, or NULL if none is registered. If the AudioDecoder
// object does not exist for that decoder, the object is created.
AudioDecoder* GetDecoder(uint8_t rtp_payload_type) const;
// Returns if |rtp_payload_type| is registered with a format named |name|.
bool IsType(uint8_t rtp_payload_type, const char* name) const;
// Returns if |rtp_payload_type| is registered with a format named |name|.
bool IsType(uint8_t rtp_payload_type, const std::string& name) const;
// Returns true if |rtp_payload_type| is registered as comfort noise.
bool IsComfortNoise(uint8_t rtp_payload_type) const;
// Returns true if |rtp_payload_type| is registered as DTMF.
bool IsDtmf(uint8_t rtp_payload_type) const;
// Returns true if |rtp_payload_type| is registered as RED.
bool IsRed(uint8_t rtp_payload_type) const;
// Returns kOK if all packets in |packet_list| carry payload types that are
// registered in the database. Otherwise, returns kDecoderNotFound.
int CheckPayloadTypes(const PacketList& packet_list) const;
private:
typedef std::map<uint8_t, DecoderInfo> DecoderMap;
DecoderMap decoders_;
int active_decoder_type_;
int active_cng_decoder_type_;
mutable std::unique_ptr<ComfortNoiseDecoder> active_cng_decoder_;
rtc::scoped_refptr<AudioDecoderFactory> decoder_factory_;
RTC_DISALLOW_COPY_AND_ASSIGN(DecoderDatabase);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_

View File

@ -0,0 +1,256 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
#include <assert.h>
#include <stdlib.h>
#include <string>
#include "webrtc/api/audio_codecs/builtin_audio_decoder_factory.h"
#include "webrtc/test/gmock.h"
#include "webrtc/test/gtest.h"
#include "webrtc/test/mock_audio_decoder.h"
#include "webrtc/test/mock_audio_decoder_factory.h"
using testing::_;
using testing::Invoke;
namespace webrtc {
TEST(DecoderDatabase, CreateAndDestroy) {
DecoderDatabase db(new rtc::RefCountedObject<MockAudioDecoderFactory>);
EXPECT_EQ(0, db.Size());
EXPECT_TRUE(db.Empty());
}
TEST(DecoderDatabase, InsertAndRemove) {
DecoderDatabase db(new rtc::RefCountedObject<MockAudioDecoderFactory>);
const uint8_t kPayloadType = 0;
const std::string kCodecName = "Robert\'); DROP TABLE Students;";
EXPECT_EQ(
DecoderDatabase::kOK,
db.RegisterPayload(kPayloadType, NetEqDecoder::kDecoderPCMu, kCodecName));
EXPECT_EQ(1, db.Size());
EXPECT_FALSE(db.Empty());
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(kPayloadType));
EXPECT_EQ(0, db.Size());
EXPECT_TRUE(db.Empty());
}
TEST(DecoderDatabase, InsertAndRemoveAll) {
DecoderDatabase db(new rtc::RefCountedObject<MockAudioDecoderFactory>);
const std::string kCodecName1 = "Robert\'); DROP TABLE Students;";
const std::string kCodecName2 = "https://xkcd.com/327/";
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(0, NetEqDecoder::kDecoderPCMu, kCodecName1));
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(1, NetEqDecoder::kDecoderPCMa, kCodecName2));
EXPECT_EQ(2, db.Size());
EXPECT_FALSE(db.Empty());
db.RemoveAll();
EXPECT_EQ(0, db.Size());
EXPECT_TRUE(db.Empty());
}
TEST(DecoderDatabase, GetDecoderInfo) {
rtc::scoped_refptr<MockAudioDecoderFactory> factory(
new rtc::RefCountedObject<MockAudioDecoderFactory>);
auto* decoder = new MockAudioDecoder;
EXPECT_CALL(*factory, MakeAudioDecoderMock(_, _))
.WillOnce(Invoke([decoder](const SdpAudioFormat& format,
std::unique_ptr<AudioDecoder>* dec) {
EXPECT_EQ("pcmu", format.name);
dec->reset(decoder);
}));
DecoderDatabase db(factory);
const uint8_t kPayloadType = 0;
const std::string kCodecName = "Robert\'); DROP TABLE Students;";
EXPECT_EQ(
DecoderDatabase::kOK,
db.RegisterPayload(kPayloadType, NetEqDecoder::kDecoderPCMu, kCodecName));
const DecoderDatabase::DecoderInfo* info;
info = db.GetDecoderInfo(kPayloadType);
ASSERT_TRUE(info != NULL);
EXPECT_TRUE(info->IsType("pcmu"));
EXPECT_EQ(kCodecName, info->get_name());
EXPECT_EQ(decoder, db.GetDecoder(kPayloadType));
info = db.GetDecoderInfo(kPayloadType + 1); // Other payload type.
EXPECT_TRUE(info == NULL); // Should not be found.
}
TEST(DecoderDatabase, GetDecoder) {
DecoderDatabase db(CreateBuiltinAudioDecoderFactory());
const uint8_t kPayloadType = 0;
const std::string kCodecName = "Robert\'); DROP TABLE Students;";
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(kPayloadType, NetEqDecoder::kDecoderPCM16B,
kCodecName));
AudioDecoder* dec = db.GetDecoder(kPayloadType);
ASSERT_TRUE(dec != NULL);
}
TEST(DecoderDatabase, TypeTests) {
DecoderDatabase db(new rtc::RefCountedObject<MockAudioDecoderFactory>);
const uint8_t kPayloadTypePcmU = 0;
const uint8_t kPayloadTypeCng = 13;
const uint8_t kPayloadTypeDtmf = 100;
const uint8_t kPayloadTypeRed = 101;
const uint8_t kPayloadNotUsed = 102;
// Load into database.
EXPECT_EQ(
DecoderDatabase::kOK,
db.RegisterPayload(kPayloadTypePcmU, NetEqDecoder::kDecoderPCMu, "pcmu"));
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(kPayloadTypeCng, NetEqDecoder::kDecoderCNGnb,
"cng-nb"));
EXPECT_EQ(
DecoderDatabase::kOK,
db.RegisterPayload(kPayloadTypeDtmf, NetEqDecoder::kDecoderAVT, "avt"));
EXPECT_EQ(
DecoderDatabase::kOK,
db.RegisterPayload(kPayloadTypeRed, NetEqDecoder::kDecoderRED, "red"));
EXPECT_EQ(4, db.Size());
// Test.
EXPECT_FALSE(db.IsComfortNoise(kPayloadNotUsed));
EXPECT_FALSE(db.IsDtmf(kPayloadNotUsed));
EXPECT_FALSE(db.IsRed(kPayloadNotUsed));
EXPECT_FALSE(db.IsComfortNoise(kPayloadTypePcmU));
EXPECT_FALSE(db.IsDtmf(kPayloadTypePcmU));
EXPECT_FALSE(db.IsRed(kPayloadTypePcmU));
EXPECT_FALSE(db.IsType(kPayloadTypePcmU, "isac"));
EXPECT_TRUE(db.IsType(kPayloadTypePcmU, "pcmu"));
EXPECT_TRUE(db.IsComfortNoise(kPayloadTypeCng));
EXPECT_TRUE(db.IsDtmf(kPayloadTypeDtmf));
EXPECT_TRUE(db.IsRed(kPayloadTypeRed));
}
TEST(DecoderDatabase, ExternalDecoder) {
DecoderDatabase db(new rtc::RefCountedObject<MockAudioDecoderFactory>);
const uint8_t kPayloadType = 0;
const std::string kCodecName = "Robert\'); DROP TABLE Students;";
MockAudioDecoder decoder;
// Load into database.
EXPECT_EQ(DecoderDatabase::kOK,
db.InsertExternal(kPayloadType, NetEqDecoder::kDecoderPCMu,
kCodecName, &decoder));
EXPECT_EQ(1, db.Size());
// Get decoder and make sure we get the external one.
EXPECT_EQ(&decoder, db.GetDecoder(kPayloadType));
// Get the decoder info struct and check it too.
const DecoderDatabase::DecoderInfo* info;
info = db.GetDecoderInfo(kPayloadType);
ASSERT_TRUE(info != NULL);
EXPECT_TRUE(info->IsType("pcmu"));
EXPECT_EQ(info->get_name(), kCodecName);
EXPECT_EQ(kCodecName, info->get_name());
// Expect not to delete the decoder when removing it from the database, since
// it was declared externally.
EXPECT_CALL(decoder, Die()).Times(0);
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(kPayloadType));
EXPECT_TRUE(db.Empty());
EXPECT_CALL(decoder, Die()).Times(1); // Will be called when |db| is deleted.
}
TEST(DecoderDatabase, CheckPayloadTypes) {
DecoderDatabase db(new rtc::RefCountedObject<MockAudioDecoderFactory>);
// Load a number of payloads into the database. Payload types are 0, 1, ...,
// while the decoder type is the same for all payload types (this does not
// matter for the test).
const int kNumPayloads = 10;
for (uint8_t payload_type = 0; payload_type < kNumPayloads; ++payload_type) {
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(payload_type, NetEqDecoder::kDecoderPCMu, ""));
}
PacketList packet_list;
for (int i = 0; i < kNumPayloads + 1; ++i) {
// Create packet with payload type |i|. The last packet will have a payload
// type that is not registered in the decoder database.
Packet packet;
packet.payload_type = i;
packet_list.push_back(std::move(packet));
}
// Expect to return false, since the last packet is of an unknown type.
EXPECT_EQ(DecoderDatabase::kDecoderNotFound,
db.CheckPayloadTypes(packet_list));
packet_list.pop_back(); // Remove the unknown one.
EXPECT_EQ(DecoderDatabase::kOK, db.CheckPayloadTypes(packet_list));
// Delete all packets.
PacketList::iterator it = packet_list.begin();
while (it != packet_list.end()) {
it = packet_list.erase(it);
}
}
#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)
#define IF_ISAC(x) x
#else
#define IF_ISAC(x) DISABLED_##x
#endif
// Test the methods for setting and getting active speech and CNG decoders.
TEST(DecoderDatabase, IF_ISAC(ActiveDecoders)) {
DecoderDatabase db(CreateBuiltinAudioDecoderFactory());
// Load payload types.
ASSERT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(0, NetEqDecoder::kDecoderPCMu, "pcmu"));
ASSERT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(103, NetEqDecoder::kDecoderISAC, "isac"));
ASSERT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(13, NetEqDecoder::kDecoderCNGnb, "cng-nb"));
// Verify that no decoders are active from the start.
EXPECT_EQ(NULL, db.GetActiveDecoder());
EXPECT_EQ(NULL, db.GetActiveCngDecoder());
// Set active speech codec.
bool changed; // Should be true when the active decoder changed.
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed));
EXPECT_TRUE(changed);
AudioDecoder* decoder = db.GetActiveDecoder();
ASSERT_FALSE(decoder == NULL); // Should get a decoder here.
// Set the same again. Expect no change.
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed));
EXPECT_FALSE(changed);
decoder = db.GetActiveDecoder();
ASSERT_FALSE(decoder == NULL); // Should get a decoder here.
// Change active decoder.
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(103, &changed));
EXPECT_TRUE(changed);
decoder = db.GetActiveDecoder();
ASSERT_FALSE(decoder == NULL); // Should get a decoder here.
// Remove the active decoder, and verify that the active becomes NULL.
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(103));
EXPECT_EQ(NULL, db.GetActiveDecoder());
// Set active CNG codec.
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveCngDecoder(13));
ComfortNoiseDecoder* cng = db.GetActiveCngDecoder();
ASSERT_FALSE(cng == NULL); // Should get a decoder here.
// Remove the active CNG decoder, and verify that the active becomes NULL.
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(13));
EXPECT_EQ(NULL, db.GetActiveCngDecoder());
// Try to set non-existing codecs as active.
EXPECT_EQ(DecoderDatabase::kDecoderNotFound,
db.SetActiveDecoder(17, &changed));
EXPECT_EQ(DecoderDatabase::kDecoderNotFound,
db.SetActiveCngDecoder(17));
}
} // namespace webrtc

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DEFINES_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DEFINES_H_
namespace webrtc {
enum Operations {
kNormal = 0,
kMerge,
kExpand,
kAccelerate,
kFastAccelerate,
kPreemptiveExpand,
kRfc3389Cng,
kRfc3389CngNoPacket,
kCodecInternalCng,
kDtmf,
kAlternativePlc,
kAlternativePlcIncreaseTimestamp,
kAudioRepetition,
kAudioRepetitionIncreaseTimestamp,
kUndefined = -1
};
enum Modes {
kModeNormal = 0,
kModeExpand,
kModeMerge,
kModeAccelerateSuccess,
kModeAccelerateLowEnergy,
kModeAccelerateFail,
kModePreemptiveExpandSuccess,
kModePreemptiveExpandLowEnergy,
kModePreemptiveExpandFail,
kModeRfc3389Cng,
kModeCodecInternalCng,
kModeDtmf,
kModeError,
kModeUndefined = -1
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DEFINES_H_

View File

@ -0,0 +1,421 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/delay_manager.h"
#include <assert.h>
#include <math.h>
#include <algorithm> // max, min
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/rtc_base/logging.h"
#include "webrtc/rtc_base/safe_conversions.h"
namespace webrtc {
DelayManager::DelayManager(size_t max_packets_in_buffer,
DelayPeakDetector* peak_detector,
const TickTimer* tick_timer)
: first_packet_received_(false),
max_packets_in_buffer_(max_packets_in_buffer),
iat_vector_(kMaxIat + 1, 0),
iat_factor_(0),
tick_timer_(tick_timer),
base_target_level_(4), // In Q0 domain.
target_level_(base_target_level_ << 8), // In Q8 domain.
packet_len_ms_(0),
streaming_mode_(false),
last_seq_no_(0),
last_timestamp_(0),
minimum_delay_ms_(0),
least_required_delay_ms_(target_level_),
maximum_delay_ms_(target_level_),
iat_cumulative_sum_(0),
max_iat_cumulative_sum_(0),
peak_detector_(*peak_detector),
last_pack_cng_or_dtmf_(1) {
assert(peak_detector); // Should never be NULL.
Reset();
}
DelayManager::~DelayManager() {}
const DelayManager::IATVector& DelayManager::iat_vector() const {
return iat_vector_;
}
// Set the histogram vector to an exponentially decaying distribution
// iat_vector_[i] = 0.5^(i+1), i = 0, 1, 2, ...
// iat_vector_ is in Q30.
void DelayManager::ResetHistogram() {
// Set temp_prob to (slightly more than) 1 in Q14. This ensures that the sum
// of iat_vector_ is 1.
uint16_t temp_prob = 0x4002; // 16384 + 2 = 100000000000010 binary.
IATVector::iterator it = iat_vector_.begin();
for (; it < iat_vector_.end(); it++) {
temp_prob >>= 1;
(*it) = temp_prob << 16;
}
base_target_level_ = 4;
target_level_ = base_target_level_ << 8;
}
int DelayManager::Update(uint16_t sequence_number,
uint32_t timestamp,
int sample_rate_hz) {
if (sample_rate_hz <= 0) {
return -1;
}
if (!first_packet_received_) {
// Prepare for next packet arrival.
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
last_seq_no_ = sequence_number;
last_timestamp_ = timestamp;
first_packet_received_ = true;
return 0;
}
// Try calculating packet length from current and previous timestamps.
int packet_len_ms;
if (!IsNewerTimestamp(timestamp, last_timestamp_) ||
!IsNewerSequenceNumber(sequence_number, last_seq_no_)) {
// Wrong timestamp or sequence order; use stored value.
packet_len_ms = packet_len_ms_;
} else {
// Calculate timestamps per packet and derive packet length in ms.
int64_t packet_len_samp =
static_cast<uint32_t>(timestamp - last_timestamp_) /
static_cast<uint16_t>(sequence_number - last_seq_no_);
packet_len_ms =
rtc::saturated_cast<int>(1000 * packet_len_samp / sample_rate_hz);
}
if (packet_len_ms > 0) {
// Cannot update statistics unless |packet_len_ms| is valid.
// Calculate inter-arrival time (IAT) in integer "packet times"
// (rounding down). This is the value used as index to the histogram
// vector |iat_vector_|.
int iat_packets = packet_iat_stopwatch_->ElapsedMs() / packet_len_ms;
if (streaming_mode_) {
UpdateCumulativeSums(packet_len_ms, sequence_number);
}
// Check for discontinuous packet sequence and re-ordering.
if (IsNewerSequenceNumber(sequence_number, last_seq_no_ + 1)) {
// Compensate for gap in the sequence numbers. Reduce IAT with the
// expected extra time due to lost packets, but ensure that the IAT is
// not negative.
iat_packets -= static_cast<uint16_t>(sequence_number - last_seq_no_ - 1);
iat_packets = std::max(iat_packets, 0);
} else if (!IsNewerSequenceNumber(sequence_number, last_seq_no_)) {
iat_packets += static_cast<uint16_t>(last_seq_no_ + 1 - sequence_number);
}
// Saturate IAT at maximum value.
const int max_iat = kMaxIat;
iat_packets = std::min(iat_packets, max_iat);
UpdateHistogram(iat_packets);
// Calculate new |target_level_| based on updated statistics.
target_level_ = CalculateTargetLevel(iat_packets);
if (streaming_mode_) {
target_level_ = std::max(target_level_, max_iat_cumulative_sum_);
}
LimitTargetLevel();
} // End if (packet_len_ms > 0).
// Prepare for next packet arrival.
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
last_seq_no_ = sequence_number;
last_timestamp_ = timestamp;
return 0;
}
void DelayManager::UpdateCumulativeSums(int packet_len_ms,
uint16_t sequence_number) {
// Calculate IAT in Q8, including fractions of a packet (i.e., more
// accurate than |iat_packets|.
int iat_packets_q8 =
(packet_iat_stopwatch_->ElapsedMs() << 8) / packet_len_ms;
// Calculate cumulative sum IAT with sequence number compensation. The sum
// is zero if there is no clock-drift.
iat_cumulative_sum_ += (iat_packets_q8 -
(static_cast<int>(sequence_number - last_seq_no_) << 8));
// Subtract drift term.
iat_cumulative_sum_ -= kCumulativeSumDrift;
// Ensure not negative.
iat_cumulative_sum_ = std::max(iat_cumulative_sum_, 0);
if (iat_cumulative_sum_ > max_iat_cumulative_sum_) {
// Found a new maximum.
max_iat_cumulative_sum_ = iat_cumulative_sum_;
max_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
}
if (max_iat_stopwatch_->ElapsedMs() > kMaxStreamingPeakPeriodMs) {
// Too long since the last maximum was observed; decrease max value.
max_iat_cumulative_sum_ -= kCumulativeSumDrift;
}
}
// Each element in the vector is first multiplied by the forgetting factor
// |iat_factor_|. Then the vector element indicated by |iat_packets| is then
// increased (additive) by 1 - |iat_factor_|. This way, the probability of
// |iat_packets| is slightly increased, while the sum of the histogram remains
// constant (=1).
// Due to inaccuracies in the fixed-point arithmetic, the histogram may no
// longer sum up to 1 (in Q30) after the update. To correct this, a correction
// term is added or subtracted from the first element (or elements) of the
// vector.
// The forgetting factor |iat_factor_| is also updated. When the DelayManager
// is reset, the factor is set to 0 to facilitate rapid convergence in the
// beginning. With each update of the histogram, the factor is increased towards
// the steady-state value |kIatFactor_|.
void DelayManager::UpdateHistogram(size_t iat_packets) {
assert(iat_packets < iat_vector_.size());
int vector_sum = 0; // Sum up the vector elements as they are processed.
// Multiply each element in |iat_vector_| with |iat_factor_|.
for (IATVector::iterator it = iat_vector_.begin();
it != iat_vector_.end(); ++it) {
*it = (static_cast<int64_t>(*it) * iat_factor_) >> 15;
vector_sum += *it;
}
// Increase the probability for the currently observed inter-arrival time
// by 1 - |iat_factor_|. The factor is in Q15, |iat_vector_| in Q30.
// Thus, left-shift 15 steps to obtain result in Q30.
iat_vector_[iat_packets] += (32768 - iat_factor_) << 15;
vector_sum += (32768 - iat_factor_) << 15; // Add to vector sum.
// |iat_vector_| should sum up to 1 (in Q30), but it may not due to
// fixed-point rounding errors.
vector_sum -= 1 << 30; // Should be zero. Compensate if not.
if (vector_sum != 0) {
// Modify a few values early in |iat_vector_|.
int flip_sign = vector_sum > 0 ? -1 : 1;
IATVector::iterator it = iat_vector_.begin();
while (it != iat_vector_.end() && abs(vector_sum) > 0) {
// Add/subtract 1/16 of the element, but not more than |vector_sum|.
int correction = flip_sign * std::min(abs(vector_sum), (*it) >> 4);
*it += correction;
vector_sum += correction;
++it;
}
}
assert(vector_sum == 0); // Verify that the above is correct.
// Update |iat_factor_| (changes only during the first seconds after a reset).
// The factor converges to |kIatFactor_|.
iat_factor_ += (kIatFactor_ - iat_factor_ + 3) >> 2;
}
// Enforces upper and lower limits for |target_level_|. The upper limit is
// chosen to be minimum of i) 75% of |max_packets_in_buffer_|, to leave some
// headroom for natural fluctuations around the target, and ii) equivalent of
// |maximum_delay_ms_| in packets. Note that in practice, if no
// |maximum_delay_ms_| is specified, this does not have any impact, since the
// target level is far below the buffer capacity in all reasonable cases.
// The lower limit is equivalent of |minimum_delay_ms_| in packets. We update
// |least_required_level_| while the above limits are applied.
// TODO(hlundin): Move this check to the buffer logistics class.
void DelayManager::LimitTargetLevel() {
least_required_delay_ms_ = (target_level_ * packet_len_ms_) >> 8;
if (packet_len_ms_ > 0 && minimum_delay_ms_ > 0) {
int minimum_delay_packet_q8 = (minimum_delay_ms_ << 8) / packet_len_ms_;
target_level_ = std::max(target_level_, minimum_delay_packet_q8);
}
if (maximum_delay_ms_ > 0 && packet_len_ms_ > 0) {
int maximum_delay_packet_q8 = (maximum_delay_ms_ << 8) / packet_len_ms_;
target_level_ = std::min(target_level_, maximum_delay_packet_q8);
}
// Shift to Q8, then 75%.;
int max_buffer_packets_q8 =
static_cast<int>((3 * (max_packets_in_buffer_ << 8)) / 4);
target_level_ = std::min(target_level_, max_buffer_packets_q8);
// Sanity check, at least 1 packet (in Q8).
target_level_ = std::max(target_level_, 1 << 8);
}
int DelayManager::CalculateTargetLevel(int iat_packets) {
int limit_probability = kLimitProbability;
if (streaming_mode_) {
limit_probability = kLimitProbabilityStreaming;
}
// Calculate target buffer level from inter-arrival time histogram.
// Find the |iat_index| for which the probability of observing an
// inter-arrival time larger than or equal to |iat_index| is less than or
// equal to |limit_probability|. The sought probability is estimated using
// the histogram as the reverse cumulant PDF, i.e., the sum of elements from
// the end up until |iat_index|. Now, since the sum of all elements is 1
// (in Q30) by definition, and since the solution is often a low value for
// |iat_index|, it is more efficient to start with |sum| = 1 and subtract
// elements from the start of the histogram.
size_t index = 0; // Start from the beginning of |iat_vector_|.
int sum = 1 << 30; // Assign to 1 in Q30.
sum -= iat_vector_[index]; // Ensure that target level is >= 1.
do {
// Subtract the probabilities one by one until the sum is no longer greater
// than limit_probability.
++index;
sum -= iat_vector_[index];
} while ((sum > limit_probability) && (index < iat_vector_.size() - 1));
// This is the base value for the target buffer level.
int target_level = static_cast<int>(index);
base_target_level_ = static_cast<int>(index);
// Update detector for delay peaks.
bool delay_peak_found = peak_detector_.Update(iat_packets, target_level);
if (delay_peak_found) {
target_level = std::max(target_level, peak_detector_.MaxPeakHeight());
}
// Sanity check. |target_level| must be strictly positive.
target_level = std::max(target_level, 1);
// Scale to Q8 and assign to member variable.
target_level_ = target_level << 8;
return target_level_;
}
int DelayManager::SetPacketAudioLength(int length_ms) {
if (length_ms <= 0) {
LOG_F(LS_ERROR) << "length_ms = " << length_ms;
return -1;
}
packet_len_ms_ = length_ms;
peak_detector_.SetPacketAudioLength(packet_len_ms_);
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
last_pack_cng_or_dtmf_ = 1; // TODO(hlundin): Legacy. Remove?
return 0;
}
void DelayManager::Reset() {
packet_len_ms_ = 0; // Packet size unknown.
streaming_mode_ = false;
peak_detector_.Reset();
ResetHistogram(); // Resets target levels too.
iat_factor_ = 0; // Adapt the histogram faster for the first few packets.
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
max_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
iat_cumulative_sum_ = 0;
max_iat_cumulative_sum_ = 0;
last_pack_cng_or_dtmf_ = 1;
}
double DelayManager::EstimatedClockDriftPpm() const {
double sum = 0.0;
// Calculate the expected value based on the probabilities in |iat_vector_|.
for (size_t i = 0; i < iat_vector_.size(); ++i) {
sum += static_cast<double>(iat_vector_[i]) * i;
}
// The probabilities in |iat_vector_| are in Q30. Divide by 1 << 30 to convert
// to Q0; subtract the nominal inter-arrival time (1) to make a zero
// clockdrift represent as 0; mulitply by 1000000 to produce parts-per-million
// (ppm).
return (sum / (1 << 30) - 1) * 1e6;
}
bool DelayManager::PeakFound() const {
return peak_detector_.peak_found();
}
void DelayManager::ResetPacketIatCount() {
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
}
// Note that |low_limit| and |higher_limit| are not assigned to
// |minimum_delay_ms_| and |maximum_delay_ms_| defined by the client of this
// class. They are computed from |target_level_| and used for decision making.
void DelayManager::BufferLimits(int* lower_limit, int* higher_limit) const {
if (!lower_limit || !higher_limit) {
LOG_F(LS_ERROR) << "NULL pointers supplied as input";
assert(false);
return;
}
int window_20ms = 0x7FFF; // Default large value for legacy bit-exactness.
if (packet_len_ms_ > 0) {
window_20ms = (20 << 8) / packet_len_ms_;
}
// |target_level_| is in Q8 already.
*lower_limit = (target_level_ * 3) / 4;
// |higher_limit| is equal to |target_level_|, but should at
// least be 20 ms higher than |lower_limit_|.
*higher_limit = std::max(target_level_, *lower_limit + window_20ms);
}
int DelayManager::TargetLevel() const {
return target_level_;
}
void DelayManager::LastDecodedWasCngOrDtmf(bool it_was) {
if (it_was) {
last_pack_cng_or_dtmf_ = 1;
} else if (last_pack_cng_or_dtmf_ != 0) {
last_pack_cng_or_dtmf_ = -1;
}
}
void DelayManager::RegisterEmptyPacket() {
++last_seq_no_;
}
bool DelayManager::SetMinimumDelay(int delay_ms) {
// Minimum delay shouldn't be more than maximum delay, if any maximum is set.
// Also, if possible check |delay| to less than 75% of
// |max_packets_in_buffer_|.
if ((maximum_delay_ms_ > 0 && delay_ms > maximum_delay_ms_) ||
(packet_len_ms_ > 0 &&
delay_ms >
static_cast<int>(3 * max_packets_in_buffer_ * packet_len_ms_ / 4))) {
return false;
}
minimum_delay_ms_ = delay_ms;
return true;
}
bool DelayManager::SetMaximumDelay(int delay_ms) {
if (delay_ms == 0) {
// Zero input unsets the maximum delay.
maximum_delay_ms_ = 0;
return true;
} else if (delay_ms < minimum_delay_ms_ || delay_ms < packet_len_ms_) {
// Maximum delay shouldn't be less than minimum delay or less than a packet.
return false;
}
maximum_delay_ms_ = delay_ms;
return true;
}
int DelayManager::least_required_delay_ms() const {
return least_required_delay_ms_;
}
int DelayManager::base_target_level() const { return base_target_level_; }
void DelayManager::set_streaming_mode(bool value) { streaming_mode_ = value; }
int DelayManager::last_pack_cng_or_dtmf() const {
return last_pack_cng_or_dtmf_;
}
void DelayManager::set_last_pack_cng_or_dtmf(int value) {
last_pack_cng_or_dtmf_ = value;
}
} // namespace webrtc

View File

@ -0,0 +1,174 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_
#include <string.h> // Provide access to size_t.
#include <memory>
#include <vector>
#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declaration.
class DelayPeakDetector;
class DelayManager {
public:
typedef std::vector<int> IATVector;
// Create a DelayManager object. Notify the delay manager that the packet
// buffer can hold no more than |max_packets_in_buffer| packets (i.e., this
// is the number of packet slots in the buffer). Supply a PeakDetector
// object to the DelayManager.
DelayManager(size_t max_packets_in_buffer,
DelayPeakDetector* peak_detector,
const TickTimer* tick_timer);
virtual ~DelayManager();
// Read the inter-arrival time histogram. Mainly for testing purposes.
virtual const IATVector& iat_vector() const;
// Updates the delay manager with a new incoming packet, with
// |sequence_number| and |timestamp| from the RTP header. This updates the
// inter-arrival time histogram and other statistics, as well as the
// associated DelayPeakDetector. A new target buffer level is calculated.
// Returns 0 on success, -1 on failure (invalid sample rate).
virtual int Update(uint16_t sequence_number,
uint32_t timestamp,
int sample_rate_hz);
// Calculates a new target buffer level. Called from the Update() method.
// Sets target_level_ (in Q8) and returns the same value. Also calculates
// and updates base_target_level_, which is the target buffer level before
// taking delay peaks into account.
virtual int CalculateTargetLevel(int iat_packets);
// Notifies the DelayManager of how much audio data is carried in each packet.
// The method updates the DelayPeakDetector too, and resets the inter-arrival
// time counter. Returns 0 on success, -1 on failure.
virtual int SetPacketAudioLength(int length_ms);
// Resets the DelayManager and the associated DelayPeakDetector.
virtual void Reset();
// Calculates the average inter-arrival time deviation from the histogram.
// The result is returned as parts-per-million deviation from the nominal
// inter-arrival time. That is, if the average inter-arrival time is equal to
// the nominal frame time, the return value is zero. A positive value
// corresponds to packet spacing being too large, while a negative value means
// that the packets arrive with less spacing than expected.
virtual double EstimatedClockDriftPpm() const;
// Returns true if peak-mode is active. That is, delay peaks were observed
// recently. This method simply asks for the same information from the
// DelayPeakDetector object.
virtual bool PeakFound() const;
// Reset the inter-arrival time counter to 0.
virtual void ResetPacketIatCount();
// Writes the lower and higher limits which the buffer level should stay
// within to the corresponding pointers. The values are in (fractions of)
// packets in Q8.
virtual void BufferLimits(int* lower_limit, int* higher_limit) const;
// Gets the target buffer level, in (fractions of) packets in Q8. This value
// includes any extra delay set through the set_extra_delay_ms() method.
virtual int TargetLevel() const;
// Informs the delay manager whether or not the last decoded packet contained
// speech.
virtual void LastDecodedWasCngOrDtmf(bool it_was);
// Notify the delay manager that empty packets have been received. These are
// packets that are part of the sequence number series, so that an empty
// packet will shift the sequence numbers for the following packets.
virtual void RegisterEmptyPacket();
// Accessors and mutators.
// Assuming |delay| is in valid range.
virtual bool SetMinimumDelay(int delay_ms);
virtual bool SetMaximumDelay(int delay_ms);
virtual int least_required_delay_ms() const;
virtual int base_target_level() const;
virtual void set_streaming_mode(bool value);
virtual int last_pack_cng_or_dtmf() const;
virtual void set_last_pack_cng_or_dtmf(int value);
private:
static const int kLimitProbability = 53687091; // 1/20 in Q30.
static const int kLimitProbabilityStreaming = 536871; // 1/2000 in Q30.
static const int kMaxStreamingPeakPeriodMs = 600000; // 10 minutes in ms.
static const int kCumulativeSumDrift = 2; // Drift term for cumulative sum
// |iat_cumulative_sum_|.
// Steady-state forgetting factor for |iat_vector_|, 0.9993 in Q15.
static const int kIatFactor_ = 32745;
static const int kMaxIat = 64; // Max inter-arrival time to register.
// Sets |iat_vector_| to the default start distribution and sets the
// |base_target_level_| and |target_level_| to the corresponding values.
void ResetHistogram();
// Updates |iat_cumulative_sum_| and |max_iat_cumulative_sum_|. (These are
// used by the streaming mode.) This method is called by Update().
void UpdateCumulativeSums(int packet_len_ms, uint16_t sequence_number);
// Updates the histogram |iat_vector_|. The probability for inter-arrival time
// equal to |iat_packets| (in integer packets) is increased slightly, while
// all other entries are decreased. This method is called by Update().
void UpdateHistogram(size_t iat_packets);
// Makes sure that |target_level_| is not too large, taking
// |max_packets_in_buffer_| and |extra_delay_ms_| into account. This method is
// called by Update().
void LimitTargetLevel();
bool first_packet_received_;
const size_t max_packets_in_buffer_; // Capacity of the packet buffer.
IATVector iat_vector_; // Histogram of inter-arrival times.
int iat_factor_; // Forgetting factor for updating the IAT histogram (Q15).
const TickTimer* tick_timer_;
// Time elapsed since last packet.
std::unique_ptr<TickTimer::Stopwatch> packet_iat_stopwatch_;
int base_target_level_; // Currently preferred buffer level before peak
// detection and streaming mode (Q0).
// TODO(turajs) change the comment according to the implementation of
// minimum-delay.
int target_level_; // Currently preferred buffer level in (fractions)
// of packets (Q8), before adding any extra delay.
int packet_len_ms_; // Length of audio in each incoming packet [ms].
bool streaming_mode_;
uint16_t last_seq_no_; // Sequence number for last received packet.
uint32_t last_timestamp_; // Timestamp for the last received packet.
int minimum_delay_ms_; // Externally set minimum delay.
int least_required_delay_ms_; // Smallest preferred buffer level (same unit
// as |target_level_|), before applying
// |minimum_delay_ms_| and/or |maximum_delay_ms_|.
int maximum_delay_ms_; // Externally set maximum allowed delay.
int iat_cumulative_sum_; // Cumulative sum of delta inter-arrival times.
int max_iat_cumulative_sum_; // Max of |iat_cumulative_sum_|.
// Time elapsed since maximum was observed.
std::unique_ptr<TickTimer::Stopwatch> max_iat_stopwatch_;
DelayPeakDetector& peak_detector_;
int last_pack_cng_or_dtmf_;
RTC_DISALLOW_COPY_AND_ASSIGN(DelayManager);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_

View File

@ -0,0 +1,338 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for DelayManager class.
#include "webrtc/modules/audio_coding/neteq/delay_manager.h"
#include <math.h>
#include "webrtc/modules/audio_coding/neteq/mock/mock_delay_peak_detector.h"
#include "webrtc/test/gmock.h"
#include "webrtc/test/gtest.h"
namespace webrtc {
using ::testing::Return;
using ::testing::_;
class DelayManagerTest : public ::testing::Test {
protected:
static const int kMaxNumberOfPackets = 240;
static const int kTimeStepMs = 10;
static const int kFs = 8000;
static const int kFrameSizeMs = 20;
static const int kTsIncrement = kFrameSizeMs * kFs / 1000;
DelayManagerTest();
virtual void SetUp();
virtual void TearDown();
void SetPacketAudioLength(int lengt_ms);
void InsertNextPacket();
void IncreaseTime(int inc_ms);
DelayManager* dm_;
TickTimer tick_timer_;
MockDelayPeakDetector detector_;
uint16_t seq_no_;
uint32_t ts_;
};
DelayManagerTest::DelayManagerTest()
: dm_(NULL), detector_(&tick_timer_), seq_no_(0x1234), ts_(0x12345678) {}
void DelayManagerTest::SetUp() {
EXPECT_CALL(detector_, Reset())
.Times(1);
dm_ = new DelayManager(kMaxNumberOfPackets, &detector_, &tick_timer_);
}
void DelayManagerTest::SetPacketAudioLength(int lengt_ms) {
EXPECT_CALL(detector_, SetPacketAudioLength(lengt_ms));
dm_->SetPacketAudioLength(lengt_ms);
}
void DelayManagerTest::InsertNextPacket() {
EXPECT_EQ(0, dm_->Update(seq_no_, ts_, kFs));
seq_no_ += 1;
ts_ += kTsIncrement;
}
void DelayManagerTest::IncreaseTime(int inc_ms) {
for (int t = 0; t < inc_ms; t += kTimeStepMs) {
tick_timer_.Increment();
}
}
void DelayManagerTest::TearDown() {
EXPECT_CALL(detector_, Die());
delete dm_;
}
TEST_F(DelayManagerTest, CreateAndDestroy) {
// Nothing to do here. The test fixture creates and destroys the DelayManager
// object.
}
TEST_F(DelayManagerTest, VectorInitialization) {
const DelayManager::IATVector& vec = dm_->iat_vector();
double sum = 0.0;
for (size_t i = 0; i < vec.size(); i++) {
EXPECT_NEAR(ldexp(pow(0.5, static_cast<int>(i + 1)), 30), vec[i], 65537);
// Tolerance 65537 in Q30 corresponds to a delta of approximately 0.00006.
sum += vec[i];
}
EXPECT_EQ(1 << 30, static_cast<int>(sum)); // Should be 1 in Q30.
}
TEST_F(DelayManagerTest, SetPacketAudioLength) {
const int kLengthMs = 30;
// Expect DelayManager to pass on the new length to the detector object.
EXPECT_CALL(detector_, SetPacketAudioLength(kLengthMs))
.Times(1);
EXPECT_EQ(0, dm_->SetPacketAudioLength(kLengthMs));
EXPECT_EQ(-1, dm_->SetPacketAudioLength(-1)); // Illegal parameter value.
}
TEST_F(DelayManagerTest, PeakFound) {
// Expect DelayManager to pass on the question to the detector.
// Call twice, and let the detector return true the first time and false the
// second time.
EXPECT_CALL(detector_, peak_found())
.WillOnce(Return(true))
.WillOnce(Return(false));
EXPECT_TRUE(dm_->PeakFound());
EXPECT_FALSE(dm_->PeakFound());
}
TEST_F(DelayManagerTest, UpdateNormal) {
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Advance time by one frame size.
IncreaseTime(kFrameSizeMs);
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to 1 packet, and (base) target level equal to 1 as well.
// Return false to indicate no peaks found.
EXPECT_CALL(detector_, Update(1, 1))
.WillOnce(Return(false));
InsertNextPacket();
EXPECT_EQ(1 << 8, dm_->TargetLevel()); // In Q8.
EXPECT_EQ(1, dm_->base_target_level());
int lower, higher;
dm_->BufferLimits(&lower, &higher);
// Expect |lower| to be 75% of target level, and |higher| to be target level,
// but also at least 20 ms higher than |lower|, which is the limiting case
// here.
EXPECT_EQ((1 << 8) * 3 / 4, lower);
EXPECT_EQ(lower + (20 << 8) / kFrameSizeMs, higher);
}
TEST_F(DelayManagerTest, UpdateLongInterArrivalTime) {
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Advance time by two frame size.
IncreaseTime(2 * kFrameSizeMs);
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to 1 packet, and (base) target level equal to 1 as well.
// Return false to indicate no peaks found.
EXPECT_CALL(detector_, Update(2, 2))
.WillOnce(Return(false));
InsertNextPacket();
EXPECT_EQ(2 << 8, dm_->TargetLevel()); // In Q8.
EXPECT_EQ(2, dm_->base_target_level());
int lower, higher;
dm_->BufferLimits(&lower, &higher);
// Expect |lower| to be 75% of target level, and |higher| to be target level,
// but also at least 20 ms higher than |lower|, which is the limiting case
// here.
EXPECT_EQ((2 << 8) * 3 / 4, lower);
EXPECT_EQ(lower + (20 << 8) / kFrameSizeMs, higher);
}
TEST_F(DelayManagerTest, UpdatePeakFound) {
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Advance time by one frame size.
IncreaseTime(kFrameSizeMs);
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to 1 packet, and (base) target level equal to 1 as well.
// Return true to indicate that peaks are found. Let the peak height be 5.
EXPECT_CALL(detector_, Update(1, 1))
.WillOnce(Return(true));
EXPECT_CALL(detector_, MaxPeakHeight())
.WillOnce(Return(5));
InsertNextPacket();
EXPECT_EQ(5 << 8, dm_->TargetLevel());
EXPECT_EQ(1, dm_->base_target_level()); // Base target level is w/o peaks.
int lower, higher;
dm_->BufferLimits(&lower, &higher);
// Expect |lower| to be 75% of target level, and |higher| to be target level.
EXPECT_EQ((5 << 8) * 3 / 4, lower);
EXPECT_EQ(5 << 8, higher);
}
TEST_F(DelayManagerTest, TargetDelay) {
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Advance time by one frame size.
IncreaseTime(kFrameSizeMs);
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to 1 packet, and (base) target level equal to 1 as well.
// Return false to indicate no peaks found.
EXPECT_CALL(detector_, Update(1, 1))
.WillOnce(Return(false));
InsertNextPacket();
const int kExpectedTarget = 1;
EXPECT_EQ(kExpectedTarget << 8, dm_->TargetLevel()); // In Q8.
EXPECT_EQ(1, dm_->base_target_level());
int lower, higher;
dm_->BufferLimits(&lower, &higher);
// Expect |lower| to be 75% of base target level, and |higher| to be
// lower + 20 ms headroom.
EXPECT_EQ((1 << 8) * 3 / 4, lower);
EXPECT_EQ(lower + (20 << 8) / kFrameSizeMs, higher);
}
TEST_F(DelayManagerTest, MaxAndRequiredDelay) {
const int kExpectedTarget = 5;
const int kTimeIncrement = kExpectedTarget * kFrameSizeMs;
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to |kExpectedTarget| packet. Return true to indicate peaks found.
EXPECT_CALL(detector_, Update(kExpectedTarget, _))
.WillRepeatedly(Return(true));
EXPECT_CALL(detector_, MaxPeakHeight())
.WillRepeatedly(Return(kExpectedTarget));
IncreaseTime(kTimeIncrement);
InsertNextPacket();
// No limit is set.
EXPECT_EQ(kExpectedTarget << 8, dm_->TargetLevel());
int kMaxDelayPackets = kExpectedTarget - 2;
int kMaxDelayMs = kMaxDelayPackets * kFrameSizeMs;
EXPECT_TRUE(dm_->SetMaximumDelay(kMaxDelayMs));
IncreaseTime(kTimeIncrement);
InsertNextPacket();
EXPECT_EQ(kExpectedTarget * kFrameSizeMs, dm_->least_required_delay_ms());
EXPECT_EQ(kMaxDelayPackets << 8, dm_->TargetLevel());
// Target level at least should be one packet.
EXPECT_FALSE(dm_->SetMaximumDelay(kFrameSizeMs - 1));
}
TEST_F(DelayManagerTest, MinAndRequiredDelay) {
const int kExpectedTarget = 5;
const int kTimeIncrement = kExpectedTarget * kFrameSizeMs;
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to |kExpectedTarget| packet. Return true to indicate peaks found.
EXPECT_CALL(detector_, Update(kExpectedTarget, _))
.WillRepeatedly(Return(true));
EXPECT_CALL(detector_, MaxPeakHeight())
.WillRepeatedly(Return(kExpectedTarget));
IncreaseTime(kTimeIncrement);
InsertNextPacket();
// No limit is applied.
EXPECT_EQ(kExpectedTarget << 8, dm_->TargetLevel());
int kMinDelayPackets = kExpectedTarget + 2;
int kMinDelayMs = kMinDelayPackets * kFrameSizeMs;
dm_->SetMinimumDelay(kMinDelayMs);
IncreaseTime(kTimeIncrement);
InsertNextPacket();
EXPECT_EQ(kExpectedTarget * kFrameSizeMs, dm_->least_required_delay_ms());
EXPECT_EQ(kMinDelayPackets << 8, dm_->TargetLevel());
}
// Tests that skipped sequence numbers (simulating empty packets) are handled
// correctly.
TEST_F(DelayManagerTest, EmptyPacketsReported) {
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Advance time by one frame size.
IncreaseTime(kFrameSizeMs);
// Advance the sequence number by 5, simulating that 5 empty packets were
// received, but never inserted.
seq_no_ += 10;
for (int j = 0; j < 10; ++j) {
dm_->RegisterEmptyPacket();
}
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to 1 packet, and (base) target level equal to 1 as well.
// Return false to indicate no peaks found.
EXPECT_CALL(detector_, Update(1, 1)).WillOnce(Return(false));
InsertNextPacket();
EXPECT_EQ(1 << 8, dm_->TargetLevel()); // In Q8.
}
// Same as above, but do not call RegisterEmptyPacket. Observe the target level
// increase dramatically.
TEST_F(DelayManagerTest, EmptyPacketsNotReported) {
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Advance time by one frame size.
IncreaseTime(kFrameSizeMs);
// Advance the sequence number by 5, simulating that 5 empty packets were
// received, but never inserted.
seq_no_ += 10;
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to 1 packet, and (base) target level equal to 1 as well.
// Return false to indicate no peaks found.
EXPECT_CALL(detector_, Update(10, 10)).WillOnce(Return(false));
InsertNextPacket();
// Note 10 times higher target value.
EXPECT_EQ(10 * 1 << 8, dm_->TargetLevel()); // In Q8.
}
TEST_F(DelayManagerTest, Failures) {
// Wrong sample rate.
EXPECT_EQ(-1, dm_->Update(0, 0, -1));
// Wrong packet size.
EXPECT_EQ(-1, dm_->SetPacketAudioLength(0));
EXPECT_EQ(-1, dm_->SetPacketAudioLength(-1));
// Minimum delay higher than a maximum delay is not accepted.
EXPECT_TRUE(dm_->SetMaximumDelay(10));
EXPECT_FALSE(dm_->SetMinimumDelay(20));
// Maximum delay less than minimum delay is not accepted.
EXPECT_TRUE(dm_->SetMaximumDelay(100));
EXPECT_TRUE(dm_->SetMinimumDelay(80));
EXPECT_FALSE(dm_->SetMaximumDelay(60));
}
} // namespace webrtc

View File

@ -0,0 +1,118 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h"
#include <algorithm> // max
#include "webrtc/rtc_base/checks.h"
#include "webrtc/rtc_base/safe_conversions.h"
namespace webrtc {
// The DelayPeakDetector keeps track of severe inter-arrival times, called
// delay peaks. When a peak is observed, the "height" (the time elapsed since
// the previous packet arrival) and the peak "period" (the time since the last
// observed peak) is recorded in a vector. When enough peaks have been observed,
// peak-mode is engaged and the DelayManager asks the DelayPeakDetector for
// the worst peak height.
DelayPeakDetector::~DelayPeakDetector() = default;
DelayPeakDetector::DelayPeakDetector(const TickTimer* tick_timer)
: peak_found_(false),
peak_detection_threshold_(0),
tick_timer_(tick_timer) {
RTC_DCHECK(!peak_period_stopwatch_);
}
void DelayPeakDetector::Reset() {
peak_period_stopwatch_.reset();
peak_found_ = false;
peak_history_.clear();
}
// Calculates the threshold in number of packets.
void DelayPeakDetector::SetPacketAudioLength(int length_ms) {
if (length_ms > 0) {
peak_detection_threshold_ = kPeakHeightMs / length_ms;
}
}
bool DelayPeakDetector::peak_found() {
return peak_found_;
}
int DelayPeakDetector::MaxPeakHeight() const {
int max_height = -1; // Returns -1 for an empty history.
std::list<Peak>::const_iterator it;
for (it = peak_history_.begin(); it != peak_history_.end(); ++it) {
max_height = std::max(max_height, it->peak_height_packets);
}
return max_height;
}
uint64_t DelayPeakDetector::MaxPeakPeriod() const {
auto max_period_element = std::max_element(
peak_history_.begin(), peak_history_.end(),
[](Peak a, Peak b) { return a.period_ms < b.period_ms; });
if (max_period_element == peak_history_.end()) {
return 0; // |peak_history_| is empty.
}
RTC_DCHECK_GT(max_period_element->period_ms, 0);
return max_period_element->period_ms;
}
bool DelayPeakDetector::Update(int inter_arrival_time, int target_level) {
if (inter_arrival_time > target_level + peak_detection_threshold_ ||
inter_arrival_time > 2 * target_level) {
// A delay peak is observed.
if (!peak_period_stopwatch_) {
// This is the first peak. Reset the period counter.
peak_period_stopwatch_ = tick_timer_->GetNewStopwatch();
} else if (peak_period_stopwatch_->ElapsedMs() > 0) {
if (peak_period_stopwatch_->ElapsedMs() <= kMaxPeakPeriodMs) {
// This is not the first peak, and the period is valid.
// Store peak data in the vector.
Peak peak_data;
peak_data.period_ms = peak_period_stopwatch_->ElapsedMs();
peak_data.peak_height_packets = inter_arrival_time;
peak_history_.push_back(peak_data);
while (peak_history_.size() > kMaxNumPeaks) {
// Delete the oldest data point.
peak_history_.pop_front();
}
peak_period_stopwatch_ = tick_timer_->GetNewStopwatch();
} else if (peak_period_stopwatch_->ElapsedMs() <= 2 * kMaxPeakPeriodMs) {
// Invalid peak due to too long period. Reset period counter and start
// looking for next peak.
peak_period_stopwatch_ = tick_timer_->GetNewStopwatch();
} else {
// More than 2 times the maximum period has elapsed since the last peak
// was registered. It seams that the network conditions have changed.
// Reset the peak statistics.
Reset();
}
}
}
return CheckPeakConditions();
}
bool DelayPeakDetector::CheckPeakConditions() {
size_t s = peak_history_.size();
if (s >= kMinPeaksToTrigger &&
peak_period_stopwatch_->ElapsedMs() <= 2 * MaxPeakPeriod()) {
peak_found_ = true;
} else {
peak_found_ = false;
}
return peak_found_;
}
} // namespace webrtc

View File

@ -0,0 +1,74 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DELAY_PEAK_DETECTOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DELAY_PEAK_DETECTOR_H_
#include <string.h> // size_t
#include <list>
#include <memory>
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
#include "webrtc/rtc_base/constructormagic.h"
namespace webrtc {
class DelayPeakDetector {
public:
DelayPeakDetector(const TickTimer* tick_timer);
virtual ~DelayPeakDetector();
virtual void Reset();
// Notifies the DelayPeakDetector of how much audio data is carried in each
// packet.
virtual void SetPacketAudioLength(int length_ms);
// Returns true if peak-mode is active. That is, delay peaks were observed
// recently.
virtual bool peak_found();
// Calculates and returns the maximum delay peak height. Returns -1 if no
// delay peaks have been observed recently. The unit is number of packets.
virtual int MaxPeakHeight() const;
// Calculates and returns the maximum delay peak distance in ms (strictly
// larger than 0), or 0 if no delay peaks have been observed recently.
virtual uint64_t MaxPeakPeriod() const;
// Updates the DelayPeakDetector with a new inter-arrival time (in packets)
// and the current target buffer level (needed to decide if a peak is observed
// or not). Returns true if peak-mode is active, false if not.
virtual bool Update(int inter_arrival_time, int target_level);
private:
static const size_t kMaxNumPeaks = 8;
static const size_t kMinPeaksToTrigger = 2;
static const int kPeakHeightMs = 78;
static const int kMaxPeakPeriodMs = 10000;
typedef struct {
uint64_t period_ms;
int peak_height_packets;
} Peak;
bool CheckPeakConditions();
std::list<Peak> peak_history_;
bool peak_found_;
int peak_detection_threshold_;
const TickTimer* tick_timer_;
std::unique_ptr<TickTimer::Stopwatch> peak_period_stopwatch_;
RTC_DISALLOW_COPY_AND_ASSIGN(DelayPeakDetector);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DELAY_PEAK_DETECTOR_H_

View File

@ -0,0 +1,143 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for DelayPeakDetector class.
#include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h"
#include "webrtc/test/gtest.h"
namespace webrtc {
TEST(DelayPeakDetector, CreateAndDestroy) {
TickTimer tick_timer;
DelayPeakDetector* detector = new DelayPeakDetector(&tick_timer);
EXPECT_FALSE(detector->peak_found());
delete detector;
}
TEST(DelayPeakDetector, EmptyHistory) {
TickTimer tick_timer;
DelayPeakDetector detector(&tick_timer);
EXPECT_EQ(-1, detector.MaxPeakHeight());
EXPECT_EQ(0u, detector.MaxPeakPeriod());
}
// Inject a series of packet arrivals into the detector. Three of the packets
// have suffered delays. After the third delay peak, peak-mode is expected to
// start. This should then continue until it is disengaged due to lack of peaks.
TEST(DelayPeakDetector, TriggerPeakMode) {
TickTimer tick_timer;
DelayPeakDetector detector(&tick_timer);
const int kPacketSizeMs = 30;
detector.SetPacketAudioLength(kPacketSizeMs);
// Load up normal arrival times; 0 ms, 30 ms, 60 ms, 90 ms, ...
const int kNumPackets = 1000;
int arrival_times_ms[kNumPackets];
for (int i = 0; i < kNumPackets; ++i) {
arrival_times_ms[i] = i * kPacketSizeMs;
}
// Delay three packets.
const int kPeakDelayMs = 100;
// First delay peak.
arrival_times_ms[100] += kPeakDelayMs;
// Second delay peak.
arrival_times_ms[200] += kPeakDelayMs;
// Third delay peak. Trigger peak-mode after this packet.
arrival_times_ms[400] += kPeakDelayMs;
// The second peak period is the longest, 200 packets.
const uint64_t kWorstPeakPeriod = 200 * kPacketSizeMs;
int peak_mode_start_ms = arrival_times_ms[400];
// Expect to disengage after no peaks are observed for two period times.
int peak_mode_end_ms = peak_mode_start_ms + 2 * kWorstPeakPeriod;
// Load into detector.
int time = 0;
int next = 1; // Start with the second packet to get a proper IAT.
while (next < kNumPackets) {
while (next < kNumPackets && arrival_times_ms[next] <= time) {
int iat_packets = (arrival_times_ms[next] - arrival_times_ms[next - 1]) /
kPacketSizeMs;
const int kTargetBufferLevel = 1; // Define peaks to be iat > 2.
if (time < peak_mode_start_ms || time > peak_mode_end_ms) {
EXPECT_FALSE(detector.Update(iat_packets, kTargetBufferLevel));
} else {
EXPECT_TRUE(detector.Update(iat_packets, kTargetBufferLevel));
EXPECT_EQ(kWorstPeakPeriod, detector.MaxPeakPeriod());
EXPECT_EQ(kPeakDelayMs / kPacketSizeMs + 1, detector.MaxPeakHeight());
}
++next;
}
tick_timer.Increment();
time += 10; // Increase time 10 ms.
}
}
// Same test as TriggerPeakMode, but with base target buffer level increased to
// 2, in order to raise the bar for delay peaks to inter-arrival times > 4.
// The delay pattern has peaks with delay = 3, thus should not trigger.
TEST(DelayPeakDetector, DoNotTriggerPeakMode) {
TickTimer tick_timer;
DelayPeakDetector detector(&tick_timer);
const int kPacketSizeMs = 30;
detector.SetPacketAudioLength(kPacketSizeMs);
// Load up normal arrival times; 0 ms, 30 ms, 60 ms, 90 ms, ...
const int kNumPackets = 1000;
int arrival_times_ms[kNumPackets];
for (int i = 0; i < kNumPackets; ++i) {
arrival_times_ms[i] = i * kPacketSizeMs;
}
// Delay three packets.
const int kPeakDelayMs = 100;
// First delay peak.
arrival_times_ms[100] += kPeakDelayMs;
// Second delay peak.
arrival_times_ms[200] += kPeakDelayMs;
// Third delay peak.
arrival_times_ms[400] += kPeakDelayMs;
// Load into detector.
int time = 0;
int next = 1; // Start with the second packet to get a proper IAT.
while (next < kNumPackets) {
while (next < kNumPackets && arrival_times_ms[next] <= time) {
int iat_packets = (arrival_times_ms[next] - arrival_times_ms[next - 1]) /
kPacketSizeMs;
const int kTargetBufferLevel = 2; // Define peaks to be iat > 4.
EXPECT_FALSE(detector.Update(iat_packets, kTargetBufferLevel));
++next;
}
tick_timer.Increment();
time += 10; // Increase time 10 ms.
}
}
// In situations with reordered packets, the DelayPeakDetector may be updated
// back-to-back (i.e., without the tick_timer moving) but still with non-zero
// inter-arrival time. This test is to make sure that this does not cause
// problems.
TEST(DelayPeakDetector, ZeroDistancePeaks) {
TickTimer tick_timer;
DelayPeakDetector detector(&tick_timer);
const int kPacketSizeMs = 30;
detector.SetPacketAudioLength(kPacketSizeMs);
const int kTargetBufferLevel = 2; // Define peaks to be iat > 4.
const int kInterArrivalTime = 3 * kTargetBufferLevel; // Will trigger a peak.
EXPECT_FALSE(detector.Update(kInterArrivalTime, kTargetBufferLevel));
EXPECT_FALSE(detector.Update(kInterArrivalTime, kTargetBufferLevel));
EXPECT_FALSE(detector.Update(kInterArrivalTime, kTargetBufferLevel));
}
} // namespace webrtc

View File

@ -0,0 +1,368 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
#include <assert.h>
#include <string.h> // Access to memset.
#include <algorithm> // Access to min, max.
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
namespace webrtc {
// Table of constants used in method DspHelper::ParabolicFit().
const int16_t DspHelper::kParabolaCoefficients[17][3] = {
{ 120, 32, 64 },
{ 140, 44, 75 },
{ 150, 50, 80 },
{ 160, 57, 85 },
{ 180, 72, 96 },
{ 200, 89, 107 },
{ 210, 98, 112 },
{ 220, 108, 117 },
{ 240, 128, 128 },
{ 260, 150, 139 },
{ 270, 162, 144 },
{ 280, 174, 149 },
{ 300, 200, 160 },
{ 320, 228, 171 },
{ 330, 242, 176 },
{ 340, 257, 181 },
{ 360, 288, 192 } };
// Filter coefficients used when downsampling from the indicated sample rates
// (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12. The corresponding Q0
// values are provided in the comments before each array.
// Q0 values: {0.3, 0.4, 0.3}.
const int16_t DspHelper::kDownsample8kHzTbl[3] = { 1229, 1638, 1229 };
// Q0 values: {0.15, 0.2, 0.3, 0.2, 0.15}.
const int16_t DspHelper::kDownsample16kHzTbl[5] = { 614, 819, 1229, 819, 614 };
// Q0 values: {0.1425, 0.1251, 0.1525, 0.1628, 0.1525, 0.1251, 0.1425}.
const int16_t DspHelper::kDownsample32kHzTbl[7] = {
584, 512, 625, 667, 625, 512, 584 };
// Q0 values: {0.2487, 0.0952, 0.1042, 0.1074, 0.1042, 0.0952, 0.2487}.
const int16_t DspHelper::kDownsample48kHzTbl[7] = {
1019, 390, 427, 440, 427, 390, 1019 };
int DspHelper::RampSignal(const int16_t* input,
size_t length,
int factor,
int increment,
int16_t* output) {
int factor_q20 = (factor << 6) + 32;
// TODO(hlundin): Add 32 to factor_q20 when converting back to Q14?
for (size_t i = 0; i < length; ++i) {
output[i] = (factor * input[i] + 8192) >> 14;
factor_q20 += increment;
factor_q20 = std::max(factor_q20, 0); // Never go negative.
factor = std::min(factor_q20 >> 6, 16384);
}
return factor;
}
int DspHelper::RampSignal(int16_t* signal,
size_t length,
int factor,
int increment) {
return RampSignal(signal, length, factor, increment, signal);
}
int DspHelper::RampSignal(AudioVector* signal,
size_t start_index,
size_t length,
int factor,
int increment) {
int factor_q20 = (factor << 6) + 32;
// TODO(hlundin): Add 32 to factor_q20 when converting back to Q14?
for (size_t i = start_index; i < start_index + length; ++i) {
(*signal)[i] = (factor * (*signal)[i] + 8192) >> 14;
factor_q20 += increment;
factor_q20 = std::max(factor_q20, 0); // Never go negative.
factor = std::min(factor_q20 >> 6, 16384);
}
return factor;
}
int DspHelper::RampSignal(AudioMultiVector* signal,
size_t start_index,
size_t length,
int factor,
int increment) {
assert(start_index + length <= signal->Size());
if (start_index + length > signal->Size()) {
// Wrong parameters. Do nothing and return the scale factor unaltered.
return factor;
}
int end_factor = 0;
// Loop over the channels, starting at the same |factor| each time.
for (size_t channel = 0; channel < signal->Channels(); ++channel) {
end_factor =
RampSignal(&(*signal)[channel], start_index, length, factor, increment);
}
return end_factor;
}
void DspHelper::PeakDetection(int16_t* data, size_t data_length,
size_t num_peaks, int fs_mult,
size_t* peak_index, int16_t* peak_value) {
size_t min_index = 0;
size_t max_index = 0;
for (size_t i = 0; i <= num_peaks - 1; i++) {
if (num_peaks == 1) {
// Single peak. The parabola fit assumes that an extra point is
// available; worst case it gets a zero on the high end of the signal.
// TODO(hlundin): This can potentially get much worse. It breaks the
// API contract, that the length of |data| is |data_length|.
data_length++;
}
peak_index[i] = WebRtcSpl_MaxIndexW16(data, data_length - 1);
if (i != num_peaks - 1) {
min_index = (peak_index[i] > 2) ? (peak_index[i] - 2) : 0;
max_index = std::min(data_length - 1, peak_index[i] + 2);
}
if ((peak_index[i] != 0) && (peak_index[i] != (data_length - 2))) {
ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i],
&peak_value[i]);
} else {
if (peak_index[i] == data_length - 2) {
if (data[peak_index[i]] > data[peak_index[i] + 1]) {
ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i],
&peak_value[i]);
} else if (data[peak_index[i]] <= data[peak_index[i] + 1]) {
// Linear approximation.
peak_value[i] = (data[peak_index[i]] + data[peak_index[i] + 1]) >> 1;
peak_index[i] = (peak_index[i] * 2 + 1) * fs_mult;
}
} else {
peak_value[i] = data[peak_index[i]];
peak_index[i] = peak_index[i] * 2 * fs_mult;
}
}
if (i != num_peaks - 1) {
memset(&data[min_index], 0,
sizeof(data[0]) * (max_index - min_index + 1));
}
}
}
void DspHelper::ParabolicFit(int16_t* signal_points, int fs_mult,
size_t* peak_index, int16_t* peak_value) {
uint16_t fit_index[13];
if (fs_mult == 1) {
fit_index[0] = 0;
fit_index[1] = 8;
fit_index[2] = 16;
} else if (fs_mult == 2) {
fit_index[0] = 0;
fit_index[1] = 4;
fit_index[2] = 8;
fit_index[3] = 12;
fit_index[4] = 16;
} else if (fs_mult == 4) {
fit_index[0] = 0;
fit_index[1] = 2;
fit_index[2] = 4;
fit_index[3] = 6;
fit_index[4] = 8;
fit_index[5] = 10;
fit_index[6] = 12;
fit_index[7] = 14;
fit_index[8] = 16;
} else {
fit_index[0] = 0;
fit_index[1] = 1;
fit_index[2] = 3;
fit_index[3] = 4;
fit_index[4] = 5;
fit_index[5] = 7;
fit_index[6] = 8;
fit_index[7] = 9;
fit_index[8] = 11;
fit_index[9] = 12;
fit_index[10] = 13;
fit_index[11] = 15;
fit_index[12] = 16;
}
// num = -3 * signal_points[0] + 4 * signal_points[1] - signal_points[2];
// den = signal_points[0] - 2 * signal_points[1] + signal_points[2];
int32_t num = (signal_points[0] * -3) + (signal_points[1] * 4)
- signal_points[2];
int32_t den = signal_points[0] + (signal_points[1] * -2) + signal_points[2];
int32_t temp = num * 120;
int flag = 1;
int16_t stp = kParabolaCoefficients[fit_index[fs_mult]][0]
- kParabolaCoefficients[fit_index[fs_mult - 1]][0];
int16_t strt = (kParabolaCoefficients[fit_index[fs_mult]][0]
+ kParabolaCoefficients[fit_index[fs_mult - 1]][0]) / 2;
int16_t lmt;
if (temp < -den * strt) {
lmt = strt - stp;
while (flag) {
if ((flag == fs_mult) || (temp > -den * lmt)) {
*peak_value = (den * kParabolaCoefficients[fit_index[fs_mult - flag]][1]
+ num * kParabolaCoefficients[fit_index[fs_mult - flag]][2]
+ signal_points[0] * 256) / 256;
*peak_index = *peak_index * 2 * fs_mult - flag;
flag = 0;
} else {
flag++;
lmt -= stp;
}
}
} else if (temp > -den * (strt + stp)) {
lmt = strt + 2 * stp;
while (flag) {
if ((flag == fs_mult) || (temp < -den * lmt)) {
int32_t temp_term_1 =
den * kParabolaCoefficients[fit_index[fs_mult+flag]][1];
int32_t temp_term_2 =
num * kParabolaCoefficients[fit_index[fs_mult+flag]][2];
int32_t temp_term_3 = signal_points[0] * 256;
*peak_value = (temp_term_1 + temp_term_2 + temp_term_3) / 256;
*peak_index = *peak_index * 2 * fs_mult + flag;
flag = 0;
} else {
flag++;
lmt += stp;
}
}
} else {
*peak_value = signal_points[1];
*peak_index = *peak_index * 2 * fs_mult;
}
}
size_t DspHelper::MinDistortion(const int16_t* signal, size_t min_lag,
size_t max_lag, size_t length,
int32_t* distortion_value) {
size_t best_index = 0;
int32_t min_distortion = WEBRTC_SPL_WORD32_MAX;
for (size_t i = min_lag; i <= max_lag; i++) {
int32_t sum_diff = 0;
const int16_t* data1 = signal;
const int16_t* data2 = signal - i;
for (size_t j = 0; j < length; j++) {
sum_diff += WEBRTC_SPL_ABS_W32(data1[j] - data2[j]);
}
// Compare with previous minimum.
if (sum_diff < min_distortion) {
min_distortion = sum_diff;
best_index = i;
}
}
*distortion_value = min_distortion;
return best_index;
}
void DspHelper::CrossFade(const int16_t* input1, const int16_t* input2,
size_t length, int16_t* mix_factor,
int16_t factor_decrement, int16_t* output) {
int16_t factor = *mix_factor;
int16_t complement_factor = 16384 - factor;
for (size_t i = 0; i < length; i++) {
output[i] =
(factor * input1[i] + complement_factor * input2[i] + 8192) >> 14;
factor -= factor_decrement;
complement_factor += factor_decrement;
}
*mix_factor = factor;
}
void DspHelper::UnmuteSignal(const int16_t* input, size_t length,
int16_t* factor, int increment,
int16_t* output) {
uint16_t factor_16b = *factor;
int32_t factor_32b = (static_cast<int32_t>(factor_16b) << 6) + 32;
for (size_t i = 0; i < length; i++) {
output[i] = (factor_16b * input[i] + 8192) >> 14;
factor_32b = std::max(factor_32b + increment, 0);
factor_16b = std::min(16384, factor_32b >> 6);
}
*factor = factor_16b;
}
void DspHelper::MuteSignal(int16_t* signal, int mute_slope, size_t length) {
int32_t factor = (16384 << 6) + 32;
for (size_t i = 0; i < length; i++) {
signal[i] = ((factor >> 6) * signal[i] + 8192) >> 14;
factor -= mute_slope;
}
}
int DspHelper::DownsampleTo4kHz(const int16_t* input, size_t input_length,
size_t output_length, int input_rate_hz,
bool compensate_delay, int16_t* output) {
// Set filter parameters depending on input frequency.
// NOTE: The phase delay values are wrong compared to the true phase delay
// of the filters. However, the error is preserved (through the +1 term) for
// consistency.
const int16_t* filter_coefficients; // Filter coefficients.
size_t filter_length; // Number of coefficients.
size_t filter_delay; // Phase delay in samples.
int16_t factor; // Conversion rate (inFsHz / 8000).
switch (input_rate_hz) {
case 8000: {
filter_length = 3;
factor = 2;
filter_coefficients = kDownsample8kHzTbl;
filter_delay = 1 + 1;
break;
}
case 16000: {
filter_length = 5;
factor = 4;
filter_coefficients = kDownsample16kHzTbl;
filter_delay = 2 + 1;
break;
}
case 32000: {
filter_length = 7;
factor = 8;
filter_coefficients = kDownsample32kHzTbl;
filter_delay = 3 + 1;
break;
}
case 48000: {
filter_length = 7;
factor = 12;
filter_coefficients = kDownsample48kHzTbl;
filter_delay = 3 + 1;
break;
}
default: {
assert(false);
return -1;
}
}
if (!compensate_delay) {
// Disregard delay compensation.
filter_delay = 0;
}
// Returns -1 if input signal is too short; 0 otherwise.
return WebRtcSpl_DownsampleFast(
&input[filter_length - 1], input_length - filter_length + 1, output,
output_length, filter_coefficients, filter_length, factor, filter_delay);
}
} // namespace webrtc

View File

@ -0,0 +1,144 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_
#include <string.h> // Access to size_t.
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This class contains various signal processing functions, all implemented as
// static methods.
class DspHelper {
public:
// Filter coefficients used when downsampling from the indicated sample rates
// (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12.
static const int16_t kDownsample8kHzTbl[3];
static const int16_t kDownsample16kHzTbl[5];
static const int16_t kDownsample32kHzTbl[7];
static const int16_t kDownsample48kHzTbl[7];
// Constants used to mute and unmute over 5 samples. The coefficients are
// in Q15.
static const int kMuteFactorStart8kHz = 27307;
static const int kMuteFactorIncrement8kHz = -5461;
static const int kUnmuteFactorStart8kHz = 5461;
static const int kUnmuteFactorIncrement8kHz = 5461;
static const int kMuteFactorStart16kHz = 29789;
static const int kMuteFactorIncrement16kHz = -2979;
static const int kUnmuteFactorStart16kHz = 2979;
static const int kUnmuteFactorIncrement16kHz = 2979;
static const int kMuteFactorStart32kHz = 31208;
static const int kMuteFactorIncrement32kHz = -1560;
static const int kUnmuteFactorStart32kHz = 1560;
static const int kUnmuteFactorIncrement32kHz = 1560;
static const int kMuteFactorStart48kHz = 31711;
static const int kMuteFactorIncrement48kHz = -1057;
static const int kUnmuteFactorStart48kHz = 1057;
static const int kUnmuteFactorIncrement48kHz = 1057;
// Multiplies the signal with a gradually changing factor.
// The first sample is multiplied with |factor| (in Q14). For each sample,
// |factor| is increased (additive) by the |increment| (in Q20), which can
// be negative. Returns the scale factor after the last increment.
static int RampSignal(const int16_t* input,
size_t length,
int factor,
int increment,
int16_t* output);
// Same as above, but with the samples of |signal| being modified in-place.
static int RampSignal(int16_t* signal,
size_t length,
int factor,
int increment);
// Same as above, but processes |length| samples from |signal|, starting at
// |start_index|.
static int RampSignal(AudioVector* signal,
size_t start_index,
size_t length,
int factor,
int increment);
// Same as above, but for an AudioMultiVector.
static int RampSignal(AudioMultiVector* signal,
size_t start_index,
size_t length,
int factor,
int increment);
// Peak detection with parabolic fit. Looks for |num_peaks| maxima in |data|,
// having length |data_length| and sample rate multiplier |fs_mult|. The peak
// locations and values are written to the arrays |peak_index| and
// |peak_value|, respectively. Both arrays must hold at least |num_peaks|
// elements.
static void PeakDetection(int16_t* data, size_t data_length,
size_t num_peaks, int fs_mult,
size_t* peak_index, int16_t* peak_value);
// Estimates the height and location of a maximum. The three values in the
// array |signal_points| are used as basis for a parabolic fit, which is then
// used to find the maximum in an interpolated signal. The |signal_points| are
// assumed to be from a 4 kHz signal, while the maximum, written to
// |peak_index| and |peak_value| is given in the full sample rate, as
// indicated by the sample rate multiplier |fs_mult|.
static void ParabolicFit(int16_t* signal_points, int fs_mult,
size_t* peak_index, int16_t* peak_value);
// Calculates the sum-abs-diff for |signal| when compared to a displaced
// version of itself. Returns the displacement lag that results in the minimum
// distortion. The resulting distortion is written to |distortion_value|.
// The values of |min_lag| and |max_lag| are boundaries for the search.
static size_t MinDistortion(const int16_t* signal, size_t min_lag,
size_t max_lag, size_t length,
int32_t* distortion_value);
// Mixes |length| samples from |input1| and |input2| together and writes the
// result to |output|. The gain for |input1| starts at |mix_factor| (Q14) and
// is decreased by |factor_decrement| (Q14) for each sample. The gain for
// |input2| is the complement 16384 - mix_factor.
static void CrossFade(const int16_t* input1, const int16_t* input2,
size_t length, int16_t* mix_factor,
int16_t factor_decrement, int16_t* output);
// Scales |input| with an increasing gain. Applies |factor| (Q14) to the first
// sample and increases the gain by |increment| (Q20) for each sample. The
// result is written to |output|. |length| samples are processed.
static void UnmuteSignal(const int16_t* input, size_t length, int16_t* factor,
int increment, int16_t* output);
// Starts at unity gain and gradually fades out |signal|. For each sample,
// the gain is reduced by |mute_slope| (Q14). |length| samples are processed.
static void MuteSignal(int16_t* signal, int mute_slope, size_t length);
// Downsamples |input| from |sample_rate_hz| to 4 kHz sample rate. The input
// has |input_length| samples, and the method will write |output_length|
// samples to |output|. Compensates for the phase delay of the downsampling
// filters if |compensate_delay| is true. Returns -1 if the input is too short
// to produce |output_length| samples, otherwise 0.
static int DownsampleTo4kHz(const int16_t* input, size_t input_length,
size_t output_length, int input_rate_hz,
bool compensate_delay, int16_t* output);
private:
// Table of constants used in method DspHelper::ParabolicFit().
static const int16_t kParabolaCoefficients[17][3];
RTC_DISALLOW_COPY_AND_ASSIGN(DspHelper);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_

View File

@ -0,0 +1,89 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/test/gtest.h"
#include "webrtc/typedefs.h"
namespace webrtc {
TEST(DspHelper, RampSignalArray) {
static const int kLen = 100;
int16_t input[kLen];
int16_t output[kLen];
// Fill input with 1000.
for (int i = 0; i < kLen; ++i) {
input[i] = 1000;
}
int start_factor = 0;
// Ramp from 0 to 1 (in Q14) over the array. Note that |increment| is in Q20,
// while the factor is in Q14, hence the shift by 6.
int increment = (16384 << 6) / kLen;
// Test first method.
int stop_factor = DspHelper::RampSignal(input, kLen, start_factor, increment,
output);
EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14.
for (int i = 0; i < kLen; ++i) {
EXPECT_EQ(1000 * i / kLen, output[i]);
}
// Test second method. (Note that this modifies |input|.)
stop_factor = DspHelper::RampSignal(input, kLen, start_factor, increment);
EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14.
for (int i = 0; i < kLen; ++i) {
EXPECT_EQ(1000 * i / kLen, input[i]);
}
}
TEST(DspHelper, RampSignalAudioMultiVector) {
static const int kLen = 100;
static const int kChannels = 5;
AudioMultiVector input(kChannels, kLen * 3);
// Fill input with 1000.
for (int i = 0; i < kLen * 3; ++i) {
for (int channel = 0; channel < kChannels; ++channel) {
input[channel][i] = 1000;
}
}
// We want to start ramping at |start_index| and keep ramping for |kLen|
// samples.
int start_index = kLen;
int start_factor = 0;
// Ramp from 0 to 1 (in Q14) in |kLen| samples. Note that |increment| is in
// Q20, while the factor is in Q14, hence the shift by 6.
int increment = (16384 << 6) / kLen;
int stop_factor = DspHelper::RampSignal(&input, start_index, kLen,
start_factor, increment);
EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14.
// Verify that the first |kLen| samples are left untouched.
int i;
for (i = 0; i < kLen; ++i) {
for (int channel = 0; channel < kChannels; ++channel) {
EXPECT_EQ(1000, input[channel][i]);
}
}
// Verify that the next block of |kLen| samples are ramped.
for (; i < 2 * kLen; ++i) {
for (int channel = 0; channel < kChannels; ++channel) {
EXPECT_EQ(1000 * (i - kLen) / kLen, input[channel][i]);
}
}
// Verify the last |kLen| samples are left untouched.
for (; i < 3 * kLen; ++i) {
for (int channel = 0; channel < kChannels; ++channel) {
EXPECT_EQ(1000, input[channel][i]);
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,248 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/dtmf_buffer.h"
#include <assert.h>
#include <algorithm> // max
#include "webrtc/rtc_base/checks.h"
#include "webrtc/rtc_base/logging.h"
// Modify the code to obtain backwards bit-exactness. Once bit-exactness is no
// longer required, this #define should be removed (and the code that it
// enables).
#define LEGACY_BITEXACT
namespace webrtc {
DtmfBuffer::DtmfBuffer(int fs_hz) {
SetSampleRate(fs_hz);
}
DtmfBuffer::~DtmfBuffer() = default;
void DtmfBuffer::Flush() {
buffer_.clear();
}
// The ParseEvent method parses 4 bytes from |payload| according to this format
// from RFC 4733:
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | event |E|R| volume | duration |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
// Legend (adapted from RFC 4733)
// - event: The event field is a number between 0 and 255 identifying a
// specific telephony event. The buffer will not accept any event
// numbers larger than 15.
// - E: If set to a value of one, the "end" bit indicates that this
// packet contains the end of the event. For long-lasting events
// that have to be split into segments, only the final packet for
// the final segment will have the E bit set.
// - R: Reserved.
// - volume: For DTMF digits and other events representable as tones, this
// field describes the power level of the tone, expressed in dBm0
// after dropping the sign. Power levels range from 0 to -63 dBm0.
// Thus, larger values denote lower volume. The buffer discards
// values larger than 36 (i.e., lower than -36 dBm0).
// - duration: The duration field indicates the duration of the event or segment
// being reported, in timestamp units, expressed as an unsigned
// integer in network byte order. For a non-zero value, the event
// or segment began at the instant identified by the RTP timestamp
// and has so far lasted as long as indicated by this parameter.
// The event may or may not have ended. If the event duration
// exceeds the maximum representable by the duration field, the
// event is split into several contiguous segments. The buffer will
// discard zero-duration events.
//
int DtmfBuffer::ParseEvent(uint32_t rtp_timestamp,
const uint8_t* payload,
size_t payload_length_bytes,
DtmfEvent* event) {
RTC_CHECK(payload);
RTC_CHECK(event);
if (payload_length_bytes < 4) {
LOG(LS_WARNING) << "ParseEvent payload too short";
return kPayloadTooShort;
}
event->event_no = payload[0];
event->end_bit = ((payload[1] & 0x80) != 0);
event->volume = (payload[1] & 0x3F);
event->duration = payload[2] << 8 | payload[3];
event->timestamp = rtp_timestamp;
return kOK;
}
// Inserts a DTMF event into the buffer. The event should be parsed from the
// bit stream using the ParseEvent method above before inserting it in the
// buffer.
// DTMF events can be quite long, and in most cases the duration of the event
// is not known when the first packet describing it is sent. To deal with that,
// the RFC 4733 specifies that multiple packets are sent for one and the same
// event as it is being created (typically, as the user is pressing the key).
// These packets will all share the same start timestamp and event number,
// while the duration will be the cumulative duration from the start. When
// inserting a new event, the InsertEvent method tries to find a matching event
// already in the buffer. If so, the new event is simply merged with the
// existing one.
int DtmfBuffer::InsertEvent(const DtmfEvent& event) {
if (event.event_no < 0 || event.event_no > 15 ||
event.volume < 0 || event.volume > 63 ||
event.duration <= 0 || event.duration > 65535) {
LOG(LS_WARNING) << "InsertEvent invalid parameters";
return kInvalidEventParameters;
}
DtmfList::iterator it = buffer_.begin();
while (it != buffer_.end()) {
if (MergeEvents(it, event)) {
// A matching event was found and the new event was merged.
return kOK;
}
++it;
}
buffer_.push_back(event);
// Sort the buffer using CompareEvents to rank the events.
buffer_.sort(CompareEvents);
return kOK;
}
bool DtmfBuffer::GetEvent(uint32_t current_timestamp, DtmfEvent* event) {
DtmfList::iterator it = buffer_.begin();
while (it != buffer_.end()) {
// |event_end| is an estimate of where the current event ends. If the end
// bit is set, we know that the event ends at |timestamp| + |duration|.
uint32_t event_end = it->timestamp + it->duration;
#ifdef LEGACY_BITEXACT
bool next_available = false;
#endif
if (!it->end_bit) {
// If the end bit is not set, we allow extrapolation of the event for
// some time.
event_end += max_extrapolation_samples_;
DtmfList::iterator next = it;
++next;
if (next != buffer_.end()) {
// If there is a next event in the buffer, we will not extrapolate over
// the start of that new event.
event_end = std::min(event_end, next->timestamp);
#ifdef LEGACY_BITEXACT
next_available = true;
#endif
}
}
if (current_timestamp >= it->timestamp
&& current_timestamp <= event_end) { // TODO(hlundin): Change to <.
// Found a matching event.
if (event) {
event->event_no = it->event_no;
event->end_bit = it->end_bit;
event->volume = it->volume;
event->duration = it->duration;
event->timestamp = it->timestamp;
}
#ifdef LEGACY_BITEXACT
if (it->end_bit &&
current_timestamp + frame_len_samples_ >= event_end) {
// We are done playing this. Erase the event.
buffer_.erase(it);
}
#endif
return true;
} else if (current_timestamp > event_end) { // TODO(hlundin): Change to >=.
// Erase old event. Operation returns a valid pointer to the next element
// in the list.
#ifdef LEGACY_BITEXACT
if (!next_available) {
if (event) {
event->event_no = it->event_no;
event->end_bit = it->end_bit;
event->volume = it->volume;
event->duration = it->duration;
event->timestamp = it->timestamp;
}
it = buffer_.erase(it);
return true;
} else {
it = buffer_.erase(it);
}
#else
it = buffer_.erase(it);
#endif
} else {
++it;
}
}
return false;
}
size_t DtmfBuffer::Length() const {
return buffer_.size();
}
bool DtmfBuffer::Empty() const {
return buffer_.empty();
}
int DtmfBuffer::SetSampleRate(int fs_hz) {
if (fs_hz != 8000 &&
fs_hz != 16000 &&
fs_hz != 32000 &&
fs_hz != 48000) {
return kInvalidSampleRate;
}
max_extrapolation_samples_ = 7 * fs_hz / 100;
frame_len_samples_ = fs_hz / 100;
return kOK;
}
// The method returns true if the two events are considered to be the same.
// The are defined as equal if they share the same timestamp and event number.
// The special case with long-lasting events that have to be split into segments
// is not handled in this method. These will be treated as separate events in
// the buffer.
bool DtmfBuffer::SameEvent(const DtmfEvent& a, const DtmfEvent& b) {
return (a.event_no == b.event_no) && (a.timestamp == b.timestamp);
}
bool DtmfBuffer::MergeEvents(DtmfList::iterator it, const DtmfEvent& event) {
if (SameEvent(*it, event)) {
if (!it->end_bit) {
// Do not extend the duration of an event for which the end bit was
// already received.
it->duration = std::max(event.duration, it->duration);
}
if (event.end_bit) {
it->end_bit = true;
}
return true;
} else {
return false;
}
}
// Returns true if |a| goes before |b| in the sorting order ("|a| < |b|").
// The events are ranked using their start timestamp (taking wrap-around into
// account). In the unlikely situation that two events share the same start
// timestamp, the event number is used to rank the two. Note that packets
// that belong to the same events, and therefore sharing the same start
// timestamp, have already been merged before the sort method is called.
bool DtmfBuffer::CompareEvents(const DtmfEvent& a, const DtmfEvent& b) {
if (a.timestamp == b.timestamp) {
return a.event_no < b.event_no;
}
// Take wrap-around into account.
return (static_cast<uint32_t>(b.timestamp - a.timestamp) < 0xFFFFFFFF / 2);
}
} // namespace webrtc

View File

@ -0,0 +1,114 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_
#include <list>
#include <string> // size_t
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
struct DtmfEvent {
uint32_t timestamp;
int event_no;
int volume;
int duration;
bool end_bit;
// Constructors
DtmfEvent()
: timestamp(0),
event_no(0),
volume(0),
duration(0),
end_bit(false) {
}
DtmfEvent(uint32_t ts, int ev, int vol, int dur, bool end)
: timestamp(ts),
event_no(ev),
volume(vol),
duration(dur),
end_bit(end) {
}
};
// This is the buffer holding DTMF events while waiting for them to be played.
class DtmfBuffer {
public:
enum BufferReturnCodes {
kOK = 0,
kInvalidPointer,
kPayloadTooShort,
kInvalidEventParameters,
kInvalidSampleRate
};
// Set up the buffer for use at sample rate |fs_hz|.
explicit DtmfBuffer(int fs_hz);
virtual ~DtmfBuffer();
// Flushes the buffer.
virtual void Flush();
// Static method to parse 4 bytes from |payload| as a DTMF event (RFC 4733)
// and write the parsed information into the struct |event|. Input variable
// |rtp_timestamp| is simply copied into the struct.
static int ParseEvent(uint32_t rtp_timestamp,
const uint8_t* payload,
size_t payload_length_bytes,
DtmfEvent* event);
// Inserts |event| into the buffer. The method looks for a matching event and
// merges the two if a match is found.
virtual int InsertEvent(const DtmfEvent& event);
// Checks if a DTMF event should be played at time |current_timestamp|. If so,
// the method returns true; otherwise false. The parameters of the event to
// play will be written to |event|.
virtual bool GetEvent(uint32_t current_timestamp, DtmfEvent* event);
// Number of events in the buffer.
virtual size_t Length() const;
virtual bool Empty() const;
// Set a new sample rate.
virtual int SetSampleRate(int fs_hz);
private:
typedef std::list<DtmfEvent> DtmfList;
int max_extrapolation_samples_;
int frame_len_samples_; // TODO(hlundin): Remove this later.
// Compares two events and returns true if they are the same.
static bool SameEvent(const DtmfEvent& a, const DtmfEvent& b);
// Merges |event| to the event pointed out by |it|. The method checks that
// the two events are the same (using the SameEvent method), and merges them
// if that was the case, returning true. If the events are not the same, false
// is returned.
bool MergeEvents(DtmfList::iterator it, const DtmfEvent& event);
// Method used by the sort algorithm to rank events in the buffer.
static bool CompareEvents(const DtmfEvent& a, const DtmfEvent& b);
DtmfList buffer_;
RTC_DISALLOW_COPY_AND_ASSIGN(DtmfBuffer);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_

View File

@ -0,0 +1,301 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/dtmf_buffer.h"
#ifdef WIN32
#include <winsock2.h> // ntohl()
#else
#include <arpa/inet.h> // ntohl()
#endif
#include <iostream>
#include "webrtc/test/gtest.h"
// Modify the tests so that they pass with the modifications done to DtmfBuffer
// for backwards bit-exactness. Once bit-exactness is no longer required, this
// #define should be removed (and the code that it enables).
#define LEGACY_BITEXACT
namespace webrtc {
static int sample_rate_hz = 8000;
static uint32_t MakeDtmfPayload(int event, bool end, int volume, int duration) {
uint32_t payload = 0;
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | event |E|R| volume | duration |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
payload |= (event & 0x00FF) << 24;
payload |= (end ? 0x00800000 : 0x00000000);
payload |= (volume & 0x003F) << 16;
payload |= (duration & 0xFFFF);
payload = ntohl(payload);
return payload;
}
static bool EqualEvents(const DtmfEvent& a,
const DtmfEvent& b) {
return (a.duration == b.duration
&& a.end_bit == b.end_bit
&& a.event_no == b.event_no
&& a.timestamp == b.timestamp
&& a.volume == b.volume);
}
TEST(DtmfBuffer, CreateAndDestroy) {
DtmfBuffer* buffer = new DtmfBuffer(sample_rate_hz);
delete buffer;
}
// Test the event parser.
TEST(DtmfBuffer, ParseEvent) {
int event_no = 7;
bool end_bit = true;
int volume = 17;
int duration = 4711;
uint32_t timestamp = 0x12345678;
uint32_t payload = MakeDtmfPayload(event_no, end_bit, volume, duration);
uint8_t* payload_ptr = reinterpret_cast<uint8_t*>(&payload);
DtmfEvent event;
EXPECT_EQ(DtmfBuffer::kOK,
DtmfBuffer::ParseEvent(timestamp, payload_ptr, sizeof(payload),
&event));
EXPECT_EQ(duration, event.duration);
EXPECT_EQ(end_bit, event.end_bit);
EXPECT_EQ(event_no, event.event_no);
EXPECT_EQ(timestamp, event.timestamp);
EXPECT_EQ(volume, event.volume);
EXPECT_EQ(DtmfBuffer::kPayloadTooShort,
DtmfBuffer::ParseEvent(timestamp, payload_ptr, 3, &event));
}
TEST(DtmfBuffer, SimpleInsertAndGet) {
int event_no = 7;
bool end_bit = true;
int volume = 17;
int duration = 4711;
uint32_t timestamp = 0x12345678;
DtmfEvent event(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
EXPECT_EQ(1u, buffer.Length());
EXPECT_FALSE(buffer.Empty());
DtmfEvent out_event;
// Too early to get event.
EXPECT_FALSE(buffer.GetEvent(timestamp - 10, &out_event));
EXPECT_EQ(1u, buffer.Length());
EXPECT_FALSE(buffer.Empty());
// Get the event at its starting timestamp.
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
EXPECT_TRUE(EqualEvents(event, out_event));
EXPECT_EQ(1u, buffer.Length());
EXPECT_FALSE(buffer.Empty());
// Get the event some time into the event.
EXPECT_TRUE(buffer.GetEvent(timestamp + duration / 2, &out_event));
EXPECT_TRUE(EqualEvents(event, out_event));
EXPECT_EQ(1u, buffer.Length());
EXPECT_FALSE(buffer.Empty());
// Give a "current" timestamp after the event has ended.
#ifdef LEGACY_BITEXACT
EXPECT_TRUE(buffer.GetEvent(timestamp + duration + 10, &out_event));
#endif
EXPECT_FALSE(buffer.GetEvent(timestamp + duration + 10, &out_event));
EXPECT_EQ(0u, buffer.Length());
EXPECT_TRUE(buffer.Empty());
}
TEST(DtmfBuffer, MergingPackets) {
int event_no = 0;
bool end_bit = false;
int volume = 17;
int duration = 80;
uint32_t timestamp = 0x12345678;
DtmfEvent event(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
event.duration += 80;
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
event.duration += 80;
event.end_bit = true;
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
EXPECT_EQ(1u, buffer.Length());
DtmfEvent out_event;
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
EXPECT_TRUE(EqualEvents(event, out_event));
}
// This test case inserts one shorter event completely overlapped by one longer
// event. The expected outcome is that only the longer event is played.
TEST(DtmfBuffer, OverlappingEvents) {
int event_no = 0;
bool end_bit = true;
int volume = 1;
int duration = 80;
uint32_t timestamp = 0x12345678 + 80;
DtmfEvent short_event(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(short_event));
event_no = 10;
end_bit = false;
timestamp = 0x12345678;
DtmfEvent long_event(timestamp, event_no, volume, duration, end_bit);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event));
long_event.duration += 80;
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event));
long_event.duration += 80;
long_event.end_bit = true;
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event));
EXPECT_EQ(2u, buffer.Length());
DtmfEvent out_event;
// Expect to get the long event.
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
EXPECT_TRUE(EqualEvents(long_event, out_event));
// Expect no more events.
#ifdef LEGACY_BITEXACT
EXPECT_TRUE(buffer.GetEvent(timestamp + long_event.duration + 10,
&out_event));
EXPECT_TRUE(EqualEvents(long_event, out_event));
EXPECT_TRUE(buffer.GetEvent(timestamp + long_event.duration + 10,
&out_event));
EXPECT_TRUE(EqualEvents(short_event, out_event));
#else
EXPECT_FALSE(buffer.GetEvent(timestamp + long_event.duration + 10,
&out_event));
#endif
EXPECT_TRUE(buffer.Empty());
}
TEST(DtmfBuffer, ExtrapolationTime) {
int event_no = 0;
bool end_bit = false;
int volume = 1;
int duration = 80;
uint32_t timestamp = 0x12345678;
DtmfEvent event1(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1));
EXPECT_EQ(1u, buffer.Length());
DtmfEvent out_event;
// Get the event at the start.
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
EXPECT_TRUE(EqualEvents(event1, out_event));
// Also get the event 100 samples after the end of the event (since we're
// missing the end bit).
uint32_t timestamp_now = timestamp + duration + 100;
EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event));
EXPECT_TRUE(EqualEvents(event1, out_event));
// Insert another event starting back-to-back with the previous event.
timestamp += duration;
event_no = 1;
DtmfEvent event2(timestamp, event_no, volume, duration, end_bit);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2));
EXPECT_EQ(2u, buffer.Length());
// Now we expect to get the new event when supplying |timestamp_now|.
EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event));
EXPECT_TRUE(EqualEvents(event2, out_event));
// Expect the the first event to be erased now.
EXPECT_EQ(1u, buffer.Length());
// Move |timestamp_now| to more than 560 samples after the end of the second
// event. Expect that event to be erased.
timestamp_now = timestamp + duration + 600;
#ifdef LEGACY_BITEXACT
EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event));
#endif
EXPECT_FALSE(buffer.GetEvent(timestamp_now, &out_event));
EXPECT_TRUE(buffer.Empty());
}
TEST(DtmfBuffer, TimestampWraparound) {
int event_no = 0;
bool end_bit = true;
int volume = 1;
int duration = 80;
uint32_t timestamp1 = 0xFFFFFFFF - duration;
DtmfEvent event1(timestamp1, event_no, volume, duration, end_bit);
uint32_t timestamp2 = 0;
DtmfEvent event2(timestamp2, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1));
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2));
EXPECT_EQ(2u, buffer.Length());
DtmfEvent out_event;
EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event));
EXPECT_TRUE(EqualEvents(event1, out_event));
#ifdef LEGACY_BITEXACT
EXPECT_EQ(1u, buffer.Length());
#else
EXPECT_EQ(2u, buffer.Length());
#endif
buffer.Flush();
// Reverse the insert order. Expect same results.
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2));
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1));
EXPECT_EQ(2u, buffer.Length());
EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event));
EXPECT_TRUE(EqualEvents(event1, out_event));
#ifdef LEGACY_BITEXACT
EXPECT_EQ(1u, buffer.Length());
#else
EXPECT_EQ(2u, buffer.Length());
#endif
}
TEST(DtmfBuffer, InvalidEvents) {
int event_no = 0;
bool end_bit = true;
int volume = 1;
int duration = 80;
uint32_t timestamp = 0x12345678;
DtmfEvent event(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
// Invalid event number.
event.event_no = -1;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.event_no = 16;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.event_no = 0; // Valid value;
// Invalid volume.
event.volume = -1;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.volume = 64;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.volume = 0; // Valid value;
// Invalid duration.
event.duration = -1;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.duration = 0;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.duration = 0xFFFF + 1;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.duration = 1; // Valid value;
// Finish with a valid event, just to verify that all is ok.
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
}
} // namespace webrtc

View File

@ -0,0 +1,218 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This class provides a generator for DTMF tones. The tone generation is based
// on a sinusoid recursion. Each sinusoid is generated using a recursion
// formula; x[n] = a * x[n-1] - x[n-2], where the coefficient
// a = 2*cos(2*pi*f/fs). The recursion is started with x[-1] = 0 and
// x[-2] = sin(2*pi*f/fs). (Note that with this initialization, the resulting
// sinusoid gets a "negative" rotation; x[n] = sin(-2*pi*f/fs * n + phi), but
// kept this way due to historical reasons.)
// TODO(hlundin): Change to positive rotation?
//
// Each key on the telephone keypad corresponds to an "event", 0-15. Each event
// is mapped to a tone pair, with a low and a high frequency. There are four
// low and four high frequencies, each corresponding to a row and column,
// respectively, on the keypad as illustrated below.
//
// 1209 Hz 1336 Hz 1477 Hz 1633 Hz
// 697 Hz 1 2 3 12
// 770 Hz 4 5 6 13
// 852 Hz 7 8 9 14
// 941 Hz 10 0 11 15
#include "webrtc/modules/audio_coding/neteq/dtmf_tone_generator.h"
#include "webrtc/rtc_base/arraysize.h"
#include "webrtc/rtc_base/checks.h"
namespace webrtc {
// The filter coefficient a = 2*cos(2*pi*f/fs) for the low frequency tone, for
// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15.
// Values are in Q14.
const int DtmfToneGenerator::kCoeff1[4][16] = {
{ 24219, 27980, 27980, 27980, 26956, 26956, 26956, 25701, 25701, 25701,
24219, 24219, 27980, 26956, 25701, 24219 },
{ 30556, 31548, 31548, 31548, 31281, 31281, 31281, 30951, 30951, 30951,
30556, 30556, 31548, 31281, 30951, 30556 },
{ 32210, 32462, 32462, 32462, 32394, 32394, 32394, 32311, 32311, 32311,
32210, 32210, 32462, 32394, 32311, 32210 },
{ 32520, 32632, 32632, 32632, 32602, 32602, 32602, 32564, 32564, 32564,
32520, 32520, 32632, 32602, 32564, 32520 } };
// The filter coefficient a = 2*cos(2*pi*f/fs) for the high frequency tone, for
// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15.
// Values are in Q14.
const int DtmfToneGenerator::kCoeff2[4][16] = {
{ 16325, 19073, 16325, 13085, 19073, 16325, 13085, 19073, 16325, 13085,
19073, 13085, 9315, 9315, 9315, 9315},
{ 28361, 29144, 28361, 27409, 29144, 28361, 27409, 29144, 28361, 27409,
29144, 27409, 26258, 26258, 26258, 26258},
{ 31647, 31849, 31647, 31400, 31849, 31647, 31400, 31849, 31647, 31400,
31849, 31400, 31098, 31098, 31098, 31098},
{ 32268, 32359, 32268, 32157, 32359, 32268, 32157, 32359, 32268, 32157,
32359, 32157, 32022, 32022, 32022, 32022} };
// The initialization value x[-2] = sin(2*pi*f/fs) for the low frequency tone,
// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15.
// Values are in Q14.
const int DtmfToneGenerator::kInitValue1[4][16] = {
{ 11036, 8528, 8528, 8528, 9315, 9315, 9315, 10163, 10163, 10163, 11036,
11036, 8528, 9315, 10163, 11036},
{ 5918, 4429, 4429, 4429, 4879, 4879, 4879, 5380, 5380, 5380, 5918, 5918,
4429, 4879, 5380, 5918},
{ 3010, 2235, 2235, 2235, 2468, 2468, 2468, 2728, 2728, 2728, 3010, 3010,
2235, 2468, 2728, 3010},
{ 2013, 1493, 1493, 1493, 1649, 1649, 1649, 1823, 1823, 1823, 2013, 2013,
1493, 1649, 1823, 2013 } };
// The initialization value x[-2] = sin(2*pi*f/fs) for the high frequency tone,
// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15.
// Values are in Q14.
const int DtmfToneGenerator::kInitValue2[4][16] = {
{ 14206, 13323, 14206, 15021, 13323, 14206, 15021, 13323, 14206, 15021,
13323, 15021, 15708, 15708, 15708, 15708},
{ 8207, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8979,
9801, 9801, 9801, 9801},
{ 4249, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4685,
5164, 5164, 5164, 5164},
{ 2851, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 3148,
3476, 3476, 3476, 3476} };
// Amplitude multipliers for volume values 0 through 63, corresponding to
// 0 dBm0 through -63 dBm0. Values are in Q14.
// for a in range(0, 64):
// print round(16141.0 * 10**(-float(a)/20))
const int DtmfToneGenerator::kAmplitude[64] = {
16141, 14386, 12821, 11427, 10184, 9077, 8090, 7210, 6426, 5727, 5104, 4549,
4054, 3614, 3221, 2870, 2558, 2280, 2032, 1811, 1614, 1439, 1282, 1143,
1018, 908, 809, 721, 643, 573, 510, 455, 405, 361, 322, 287, 256, 228, 203,
181, 161, 144, 128, 114, 102, 91, 81, 72, 64, 57, 51, 45, 41, 36, 32, 29,
26, 23, 20, 18, 16, 14, 13, 11 };
// Constructor.
DtmfToneGenerator::DtmfToneGenerator()
: initialized_(false),
coeff1_(0),
coeff2_(0),
amplitude_(0) {
}
// Initialize the DTMF generator with sample rate fs Hz (8000, 16000, 32000,
// 48000), event (0-15) and attenuation (0-36 dB).
// Returns 0 on success, otherwise an error code.
int DtmfToneGenerator::Init(int fs, int event, int attenuation) {
initialized_ = false;
size_t fs_index;
if (fs == 8000) {
fs_index = 0;
} else if (fs == 16000) {
fs_index = 1;
} else if (fs == 32000) {
fs_index = 2;
} else if (fs == 48000) {
fs_index = 3;
} else {
RTC_NOTREACHED();
fs_index = 1; // Default to 8000 Hz.
}
if (event < 0 || event > 15) {
return kParameterError; // Invalid event number.
}
if (attenuation < 0 || attenuation > 63) {
return kParameterError; // Invalid attenuation.
}
// Look up oscillator coefficient for low and high frequencies.
RTC_DCHECK_LE(0, fs_index);
RTC_DCHECK_GT(arraysize(kCoeff1), fs_index);
RTC_DCHECK_GT(arraysize(kCoeff2), fs_index);
RTC_DCHECK_LE(0, event);
RTC_DCHECK_GT(arraysize(kCoeff1[fs_index]), event);
RTC_DCHECK_GT(arraysize(kCoeff2[fs_index]), event);
coeff1_ = kCoeff1[fs_index][event];
coeff2_ = kCoeff2[fs_index][event];
// Look up amplitude multiplier.
RTC_DCHECK_LE(0, attenuation);
RTC_DCHECK_GT(arraysize(kAmplitude), attenuation);
amplitude_ = kAmplitude[attenuation];
// Initialize sample history.
RTC_DCHECK_LE(0, fs_index);
RTC_DCHECK_GT(arraysize(kInitValue1), fs_index);
RTC_DCHECK_GT(arraysize(kInitValue2), fs_index);
RTC_DCHECK_LE(0, event);
RTC_DCHECK_GT(arraysize(kInitValue1[fs_index]), event);
RTC_DCHECK_GT(arraysize(kInitValue2[fs_index]), event);
sample_history1_[0] = kInitValue1[fs_index][event];
sample_history1_[1] = 0;
sample_history2_[0] = kInitValue2[fs_index][event];
sample_history2_[1] = 0;
initialized_ = true;
return 0;
}
// Reset tone generator to uninitialized state.
void DtmfToneGenerator::Reset() {
initialized_ = false;
}
// Generate num_samples of DTMF signal and write to |output|.
int DtmfToneGenerator::Generate(size_t num_samples,
AudioMultiVector* output) {
if (!initialized_) {
return kNotInitialized;
}
if (!output) {
return kParameterError;
}
output->AssertSize(num_samples);
for (size_t i = 0; i < num_samples; ++i) {
// Use recursion formula y[n] = a * y[n - 1] - y[n - 2].
int16_t temp_val_low = ((coeff1_ * sample_history1_[1] + 8192) >> 14)
- sample_history1_[0];
int16_t temp_val_high = ((coeff2_ * sample_history2_[1] + 8192) >> 14)
- sample_history2_[0];
// Update recursion memory.
sample_history1_[0] = sample_history1_[1];
sample_history1_[1] = temp_val_low;
sample_history2_[0] = sample_history2_[1];
sample_history2_[1] = temp_val_high;
// Attenuate the low frequency tone 3 dB.
int32_t temp_val =
kAmpMultiplier * temp_val_low + temp_val_high * (1 << 15);
// Normalize the signal to Q14 with proper rounding.
temp_val = (temp_val + 16384) >> 15;
// Scale the signal to correct volume.
(*output)[0][i] =
static_cast<int16_t>((temp_val * amplitude_ + 8192) >> 14);
}
// Copy first channel to all other channels.
for (size_t channel = 1; channel < output->Channels(); ++channel) {
output->CopyChannel(0, channel);
}
return static_cast<int>(num_samples);
}
bool DtmfToneGenerator::initialized() const {
return initialized_;
}
} // namespace webrtc

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This class provides a generator for DTMF tones.
class DtmfToneGenerator {
public:
enum ReturnCodes {
kNotInitialized = -1,
kParameterError = -2,
};
DtmfToneGenerator();
virtual ~DtmfToneGenerator() {}
virtual int Init(int fs, int event, int attenuation);
virtual void Reset();
virtual int Generate(size_t num_samples, AudioMultiVector* output);
virtual bool initialized() const;
private:
static const int kCoeff1[4][16]; // 1st oscillator model coefficient table.
static const int kCoeff2[4][16]; // 2nd oscillator model coefficient table.
static const int kInitValue1[4][16]; // Initialization for 1st oscillator.
static const int kInitValue2[4][16]; // Initialization for 2nd oscillator.
static const int kAmplitude[64]; // Amplitude for 0 through -63 dBm0.
static const int16_t kAmpMultiplier = 23171; // 3 dB attenuation (in Q15).
bool initialized_; // True if generator is initialized properly.
int coeff1_; // 1st oscillator coefficient for this event.
int coeff2_; // 2nd oscillator coefficient for this event.
int amplitude_; // Amplitude for this event.
int16_t sample_history1_[2]; // Last 2 samples for the 1st oscillator.
int16_t sample_history2_[2]; // Last 2 samples for the 2nd oscillator.
RTC_DISALLOW_COPY_AND_ASSIGN(DtmfToneGenerator);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_

View File

@ -0,0 +1,179 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for DtmfToneGenerator class.
#include "webrtc/modules/audio_coding/neteq/dtmf_tone_generator.h"
#include <math.h>
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/test/gtest.h"
namespace webrtc {
class DtmfToneGeneratorTest : public ::testing::Test {
protected:
static const double kLowFreqHz[16];
static const double kHighFreqHz[16];
// This is the attenuation applied to all cases.
const double kBaseAttenuation = 16141.0 / 16384.0;
const double k3dbAttenuation = 23171.0 / 32768;
const int kNumSamples = 10;
void TestAllTones(int fs_hz, int channels) {
AudioMultiVector signal(channels);
for (int event = 0; event <= 15; ++event) {
std::ostringstream ss;
ss << "Checking event " << event << " at sample rate " << fs_hz;
SCOPED_TRACE(ss.str());
const int kAttenuation = 0;
ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, kAttenuation));
EXPECT_TRUE(tone_gen_.initialized());
EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &signal));
double f1 = kLowFreqHz[event];
double f2 = kHighFreqHz[event];
const double pi = 3.14159265358979323846;
for (int n = 0; n < kNumSamples; ++n) {
double x = k3dbAttenuation * sin(2.0 * pi * f1 / fs_hz * (-n - 1)) +
sin(2.0 * pi * f2 / fs_hz * (-n - 1));
x *= kBaseAttenuation;
x = ldexp(x, 14); // Scale to Q14.
for (int channel = 0; channel < channels; ++channel) {
EXPECT_NEAR(x, static_cast<double>(signal[channel][n]), 25);
}
}
tone_gen_.Reset();
EXPECT_FALSE(tone_gen_.initialized());
}
}
void TestAmplitudes(int fs_hz, int channels) {
AudioMultiVector signal(channels);
AudioMultiVector ref_signal(channels);
const int event_vec[] = {0, 4, 9, 13}; // Test a few events.
for (int e = 0; e < 4; ++e) {
int event = event_vec[e];
// Create full-scale reference.
ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, 0)); // 0 attenuation.
EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &ref_signal));
// Test every 5 steps (to save time).
for (int attenuation = 1; attenuation <= 63; attenuation += 5) {
std::ostringstream ss;
ss << "Checking event " << event << " at sample rate " << fs_hz;
ss << "; attenuation " << attenuation;
SCOPED_TRACE(ss.str());
ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, attenuation));
EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &signal));
for (int n = 0; n < kNumSamples; ++n) {
double attenuation_factor =
pow(10, -static_cast<double>(attenuation) / 20);
// Verify that the attenuation is correct.
for (int channel = 0; channel < channels; ++channel) {
EXPECT_NEAR(attenuation_factor * ref_signal[channel][n],
signal[channel][n],
2);
}
}
tone_gen_.Reset();
}
}
}
DtmfToneGenerator tone_gen_;
};
// Low and high frequencies for events 0 through 15.
const double DtmfToneGeneratorTest::kLowFreqHz[16] = {
941.0, 697.0, 697.0, 697.0, 770.0, 770.0, 770.0, 852.0,
852.0, 852.0, 941.0, 941.0, 697.0, 770.0, 852.0, 941.0};
const double DtmfToneGeneratorTest::kHighFreqHz[16] = {
1336.0, 1209.0, 1336.0, 1477.0, 1209.0, 1336.0, 1477.0, 1209.0,
1336.0, 1477.0, 1209.0, 1477.0, 1633.0, 1633.0, 1633.0, 1633.0};
TEST_F(DtmfToneGeneratorTest, Test8000Mono) {
TestAllTones(8000, 1);
TestAmplitudes(8000, 1);
}
TEST_F(DtmfToneGeneratorTest, Test16000Mono) {
TestAllTones(16000, 1);
TestAmplitudes(16000, 1);
}
TEST_F(DtmfToneGeneratorTest, Test32000Mono) {
TestAllTones(32000, 1);
TestAmplitudes(32000, 1);
}
TEST_F(DtmfToneGeneratorTest, Test48000Mono) {
TestAllTones(48000, 1);
TestAmplitudes(48000, 1);
}
TEST_F(DtmfToneGeneratorTest, Test8000Stereo) {
TestAllTones(8000, 2);
TestAmplitudes(8000, 2);
}
TEST_F(DtmfToneGeneratorTest, Test16000Stereo) {
TestAllTones(16000, 2);
TestAmplitudes(16000, 2);
}
TEST_F(DtmfToneGeneratorTest, Test32000Stereo) {
TestAllTones(32000, 2);
TestAmplitudes(32000, 2);
}
TEST_F(DtmfToneGeneratorTest, Test48000Stereo) {
TestAllTones(48000, 2);
TestAmplitudes(48000, 2);
}
TEST(DtmfToneGenerator, TestErrors) {
DtmfToneGenerator tone_gen;
const int kNumSamples = 10;
AudioMultiVector signal(1); // One channel.
// Try to generate tones without initializing.
EXPECT_EQ(DtmfToneGenerator::kNotInitialized,
tone_gen.Generate(kNumSamples, &signal));
const int fs = 16000; // Valid sample rate.
const int event = 7; // Valid event.
const int attenuation = 0; // Valid attenuation.
// Initialize with invalid event -1.
EXPECT_EQ(DtmfToneGenerator::kParameterError,
tone_gen.Init(fs, -1, attenuation));
// Initialize with invalid event 16.
EXPECT_EQ(DtmfToneGenerator::kParameterError,
tone_gen.Init(fs, 16, attenuation));
// Initialize with invalid attenuation -1.
EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, -1));
// Initialize with invalid attenuation 64.
EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, 64));
EXPECT_FALSE(tone_gen.initialized()); // Should still be uninitialized.
// Initialize with valid parameters.
ASSERT_EQ(0, tone_gen.Init(fs, event, attenuation));
EXPECT_TRUE(tone_gen.initialized());
// NULL pointer to destination.
EXPECT_EQ(DtmfToneGenerator::kParameterError,
tone_gen.Generate(kNumSamples, NULL));
}
} // namespace webrtc

View File

@ -0,0 +1,978 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/expand.h"
#include <assert.h>
#include <string.h> // memset
#include <algorithm> // min, max
#include <limits> // numeric_limits<T>
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
#include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
#include "webrtc/rtc_base/safe_conversions.h"
namespace webrtc {
Expand::Expand(BackgroundNoise* background_noise,
SyncBuffer* sync_buffer,
RandomVector* random_vector,
StatisticsCalculator* statistics,
int fs,
size_t num_channels)
: random_vector_(random_vector),
sync_buffer_(sync_buffer),
first_expand_(true),
fs_hz_(fs),
num_channels_(num_channels),
consecutive_expands_(0),
background_noise_(background_noise),
statistics_(statistics),
overlap_length_(5 * fs / 8000),
lag_index_direction_(0),
current_lag_index_(0),
stop_muting_(false),
expand_duration_samples_(0),
channel_parameters_(new ChannelParameters[num_channels_]) {
assert(fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000);
assert(fs <= static_cast<int>(kMaxSampleRate)); // Should not be possible.
assert(num_channels_ > 0);
memset(expand_lags_, 0, sizeof(expand_lags_));
Reset();
}
Expand::~Expand() = default;
void Expand::Reset() {
first_expand_ = true;
consecutive_expands_ = 0;
max_lag_ = 0;
for (size_t ix = 0; ix < num_channels_; ++ix) {
channel_parameters_[ix].expand_vector0.Clear();
channel_parameters_[ix].expand_vector1.Clear();
}
}
int Expand::Process(AudioMultiVector* output) {
int16_t random_vector[kMaxSampleRate / 8000 * 120 + 30];
int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125];
static const int kTempDataSize = 3600;
int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this.
int16_t* voiced_vector_storage = temp_data;
int16_t* voiced_vector = &voiced_vector_storage[overlap_length_];
static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125];
int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder;
int16_t* noise_vector = unvoiced_array_memory + kNoiseLpcOrder;
int fs_mult = fs_hz_ / 8000;
if (first_expand_) {
// Perform initial setup if this is the first expansion since last reset.
AnalyzeSignal(random_vector);
first_expand_ = false;
expand_duration_samples_ = 0;
} else {
// This is not the first expansion, parameters are already estimated.
// Extract a noise segment.
size_t rand_length = max_lag_;
// This only applies to SWB where length could be larger than 256.
assert(rand_length <= kMaxSampleRate / 8000 * 120 + 30);
GenerateRandomVector(2, rand_length, random_vector);
}
// Generate signal.
UpdateLagIndex();
// Voiced part.
// Generate a weighted vector with the current lag.
size_t expansion_vector_length = max_lag_ + overlap_length_;
size_t current_lag = expand_lags_[current_lag_index_];
// Copy lag+overlap data.
size_t expansion_vector_position = expansion_vector_length - current_lag -
overlap_length_;
size_t temp_length = current_lag + overlap_length_;
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
ChannelParameters& parameters = channel_parameters_[channel_ix];
if (current_lag_index_ == 0) {
// Use only expand_vector0.
assert(expansion_vector_position + temp_length <=
parameters.expand_vector0.Size());
parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position,
voiced_vector_storage);
} else if (current_lag_index_ == 1) {
std::unique_ptr<int16_t[]> temp_0(new int16_t[temp_length]);
parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position,
temp_0.get());
std::unique_ptr<int16_t[]> temp_1(new int16_t[temp_length]);
parameters.expand_vector1.CopyTo(temp_length, expansion_vector_position,
temp_1.get());
// Mix 3/4 of expand_vector0 with 1/4 of expand_vector1.
WebRtcSpl_ScaleAndAddVectorsWithRound(temp_0.get(), 3, temp_1.get(), 1, 2,
voiced_vector_storage, temp_length);
} else if (current_lag_index_ == 2) {
// Mix 1/2 of expand_vector0 with 1/2 of expand_vector1.
assert(expansion_vector_position + temp_length <=
parameters.expand_vector0.Size());
assert(expansion_vector_position + temp_length <=
parameters.expand_vector1.Size());
std::unique_ptr<int16_t[]> temp_0(new int16_t[temp_length]);
parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position,
temp_0.get());
std::unique_ptr<int16_t[]> temp_1(new int16_t[temp_length]);
parameters.expand_vector1.CopyTo(temp_length, expansion_vector_position,
temp_1.get());
WebRtcSpl_ScaleAndAddVectorsWithRound(temp_0.get(), 1, temp_1.get(), 1, 1,
voiced_vector_storage, temp_length);
}
// Get tapering window parameters. Values are in Q15.
int16_t muting_window, muting_window_increment;
int16_t unmuting_window, unmuting_window_increment;
if (fs_hz_ == 8000) {
muting_window = DspHelper::kMuteFactorStart8kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement8kHz;
unmuting_window = DspHelper::kUnmuteFactorStart8kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz;
} else if (fs_hz_ == 16000) {
muting_window = DspHelper::kMuteFactorStart16kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement16kHz;
unmuting_window = DspHelper::kUnmuteFactorStart16kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz;
} else if (fs_hz_ == 32000) {
muting_window = DspHelper::kMuteFactorStart32kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement32kHz;
unmuting_window = DspHelper::kUnmuteFactorStart32kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz;
} else { // fs_ == 48000
muting_window = DspHelper::kMuteFactorStart48kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement48kHz;
unmuting_window = DspHelper::kUnmuteFactorStart48kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz;
}
// Smooth the expanded if it has not been muted to a low amplitude and
// |current_voice_mix_factor| is larger than 0.5.
if ((parameters.mute_factor > 819) &&
(parameters.current_voice_mix_factor > 8192)) {
size_t start_ix = sync_buffer_->Size() - overlap_length_;
for (size_t i = 0; i < overlap_length_; i++) {
// Do overlap add between new vector and overlap.
(*sync_buffer_)[channel_ix][start_ix + i] =
(((*sync_buffer_)[channel_ix][start_ix + i] * muting_window) +
(((parameters.mute_factor * voiced_vector_storage[i]) >> 14) *
unmuting_window) + 16384) >> 15;
muting_window += muting_window_increment;
unmuting_window += unmuting_window_increment;
}
} else if (parameters.mute_factor == 0) {
// The expanded signal will consist of only comfort noise if
// mute_factor = 0. Set the output length to 15 ms for best noise
// production.
// TODO(hlundin): This has been disabled since the length of
// parameters.expand_vector0 and parameters.expand_vector1 no longer
// match with expand_lags_, causing invalid reads and writes. Is it a good
// idea to enable this again, and solve the vector size problem?
// max_lag_ = fs_mult * 120;
// expand_lags_[0] = fs_mult * 120;
// expand_lags_[1] = fs_mult * 120;
// expand_lags_[2] = fs_mult * 120;
}
// Unvoiced part.
// Filter |scaled_random_vector| through |ar_filter_|.
memcpy(unvoiced_vector - kUnvoicedLpcOrder, parameters.ar_filter_state,
sizeof(int16_t) * kUnvoicedLpcOrder);
int32_t add_constant = 0;
if (parameters.ar_gain_scale > 0) {
add_constant = 1 << (parameters.ar_gain_scale - 1);
}
WebRtcSpl_AffineTransformVector(scaled_random_vector, random_vector,
parameters.ar_gain, add_constant,
parameters.ar_gain_scale,
current_lag);
WebRtcSpl_FilterARFastQ12(scaled_random_vector, unvoiced_vector,
parameters.ar_filter, kUnvoicedLpcOrder + 1,
current_lag);
memcpy(parameters.ar_filter_state,
&(unvoiced_vector[current_lag - kUnvoicedLpcOrder]),
sizeof(int16_t) * kUnvoicedLpcOrder);
// Combine voiced and unvoiced contributions.
// Set a suitable cross-fading slope.
// For lag =
// <= 31 * fs_mult => go from 1 to 0 in about 8 ms;
// (>= 31 .. <= 63) * fs_mult => go from 1 to 0 in about 16 ms;
// >= 64 * fs_mult => go from 1 to 0 in about 32 ms.
// temp_shift = getbits(max_lag_) - 5.
int temp_shift =
(31 - WebRtcSpl_NormW32(rtc::dchecked_cast<int32_t>(max_lag_))) - 5;
int16_t mix_factor_increment = 256 >> temp_shift;
if (stop_muting_) {
mix_factor_increment = 0;
}
// Create combined signal by shifting in more and more of unvoiced part.
temp_shift = 8 - temp_shift; // = getbits(mix_factor_increment).
size_t temp_length = (parameters.current_voice_mix_factor -
parameters.voice_mix_factor) >> temp_shift;
temp_length = std::min(temp_length, current_lag);
DspHelper::CrossFade(voiced_vector, unvoiced_vector, temp_length,
&parameters.current_voice_mix_factor,
mix_factor_increment, temp_data);
// End of cross-fading period was reached before end of expanded signal
// path. Mix the rest with a fixed mixing factor.
if (temp_length < current_lag) {
if (mix_factor_increment != 0) {
parameters.current_voice_mix_factor = parameters.voice_mix_factor;
}
int16_t temp_scale = 16384 - parameters.current_voice_mix_factor;
WebRtcSpl_ScaleAndAddVectorsWithRound(
voiced_vector + temp_length, parameters.current_voice_mix_factor,
unvoiced_vector + temp_length, temp_scale, 14,
temp_data + temp_length, current_lag - temp_length);
}
// Select muting slope depending on how many consecutive expands we have
// done.
if (consecutive_expands_ == 3) {
// Let the mute factor decrease from 1.0 to 0.95 in 6.25 ms.
// mute_slope = 0.0010 / fs_mult in Q20.
parameters.mute_slope = std::max(parameters.mute_slope, 1049 / fs_mult);
}
if (consecutive_expands_ == 7) {
// Let the mute factor decrease from 1.0 to 0.90 in 6.25 ms.
// mute_slope = 0.0020 / fs_mult in Q20.
parameters.mute_slope = std::max(parameters.mute_slope, 2097 / fs_mult);
}
// Mute segment according to slope value.
if ((consecutive_expands_ != 0) || !parameters.onset) {
// Mute to the previous level, then continue with the muting.
WebRtcSpl_AffineTransformVector(temp_data, temp_data,
parameters.mute_factor, 8192,
14, current_lag);
if (!stop_muting_) {
DspHelper::MuteSignal(temp_data, parameters.mute_slope, current_lag);
// Shift by 6 to go from Q20 to Q14.
// TODO(hlundin): Adding 8192 before shifting 6 steps seems wrong.
// Legacy.
int16_t gain = static_cast<int16_t>(16384 -
(((current_lag * parameters.mute_slope) + 8192) >> 6));
gain = ((gain * parameters.mute_factor) + 8192) >> 14;
// Guard against getting stuck with very small (but sometimes audible)
// gain.
if ((consecutive_expands_ > 3) && (gain >= parameters.mute_factor)) {
parameters.mute_factor = 0;
} else {
parameters.mute_factor = gain;
}
}
}
// Background noise part.
GenerateBackgroundNoise(random_vector,
channel_ix,
channel_parameters_[channel_ix].mute_slope,
TooManyExpands(),
current_lag,
unvoiced_array_memory);
// Add background noise to the combined voiced-unvoiced signal.
for (size_t i = 0; i < current_lag; i++) {
temp_data[i] = temp_data[i] + noise_vector[i];
}
if (channel_ix == 0) {
output->AssertSize(current_lag);
} else {
assert(output->Size() == current_lag);
}
(*output)[channel_ix].OverwriteAt(temp_data, current_lag, 0);
}
// Increase call number and cap it.
consecutive_expands_ = consecutive_expands_ >= kMaxConsecutiveExpands ?
kMaxConsecutiveExpands : consecutive_expands_ + 1;
expand_duration_samples_ += output->Size();
// Clamp the duration counter at 2 seconds.
expand_duration_samples_ = std::min(expand_duration_samples_,
rtc::dchecked_cast<size_t>(fs_hz_ * 2));
return 0;
}
void Expand::SetParametersForNormalAfterExpand() {
current_lag_index_ = 0;
lag_index_direction_ = 0;
stop_muting_ = true; // Do not mute signal any more.
statistics_->LogDelayedPacketOutageEvent(
rtc::dchecked_cast<int>(expand_duration_samples_) / (fs_hz_ / 1000));
}
void Expand::SetParametersForMergeAfterExpand() {
current_lag_index_ = -1; /* out of the 3 possible ones */
lag_index_direction_ = 1; /* make sure we get the "optimal" lag */
stop_muting_ = true;
}
bool Expand::Muted() const {
if (first_expand_ || stop_muting_)
return false;
RTC_DCHECK(channel_parameters_);
for (size_t ch = 0; ch < num_channels_; ++ch) {
if (channel_parameters_[ch].mute_factor != 0)
return false;
}
return true;
}
size_t Expand::overlap_length() const {
return overlap_length_;
}
void Expand::InitializeForAnExpandPeriod() {
lag_index_direction_ = 1;
current_lag_index_ = -1;
stop_muting_ = false;
random_vector_->set_seed_increment(1);
consecutive_expands_ = 0;
for (size_t ix = 0; ix < num_channels_; ++ix) {
channel_parameters_[ix].current_voice_mix_factor = 16384; // 1.0 in Q14.
channel_parameters_[ix].mute_factor = 16384; // 1.0 in Q14.
// Start with 0 gain for background noise.
background_noise_->SetMuteFactor(ix, 0);
}
}
bool Expand::TooManyExpands() {
return consecutive_expands_ >= kMaxConsecutiveExpands;
}
void Expand::AnalyzeSignal(int16_t* random_vector) {
int32_t auto_correlation[kUnvoicedLpcOrder + 1];
int16_t reflection_coeff[kUnvoicedLpcOrder];
int16_t correlation_vector[kMaxSampleRate / 8000 * 102];
size_t best_correlation_index[kNumCorrelationCandidates];
int16_t best_correlation[kNumCorrelationCandidates];
size_t best_distortion_index[kNumCorrelationCandidates];
int16_t best_distortion[kNumCorrelationCandidates];
int32_t correlation_vector2[(99 * kMaxSampleRate / 8000) + 1];
int32_t best_distortion_w32[kNumCorrelationCandidates];
static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125];
int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder;
int fs_mult = fs_hz_ / 8000;
// Pre-calculate common multiplications with fs_mult.
size_t fs_mult_4 = static_cast<size_t>(fs_mult * 4);
size_t fs_mult_20 = static_cast<size_t>(fs_mult * 20);
size_t fs_mult_120 = static_cast<size_t>(fs_mult * 120);
size_t fs_mult_dist_len = fs_mult * kDistortionLength;
size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;
const size_t signal_length = static_cast<size_t>(256 * fs_mult);
const size_t audio_history_position = sync_buffer_->Size() - signal_length;
std::unique_ptr<int16_t[]> audio_history(new int16_t[signal_length]);
(*sync_buffer_)[0].CopyTo(signal_length, audio_history_position,
audio_history.get());
// Initialize.
InitializeForAnExpandPeriod();
// Calculate correlation in downsampled domain (4 kHz sample rate).
size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.
// If it is decided to break bit-exactness |correlation_length| should be
// initialized to the return value of Correlation().
Correlation(audio_history.get(), signal_length, correlation_vector);
// Find peaks in correlation vector.
DspHelper::PeakDetection(correlation_vector, correlation_length,
kNumCorrelationCandidates, fs_mult,
best_correlation_index, best_correlation);
// Adjust peak locations; cross-correlation lags start at 2.5 ms
// (20 * fs_mult samples).
best_correlation_index[0] += fs_mult_20;
best_correlation_index[1] += fs_mult_20;
best_correlation_index[2] += fs_mult_20;
// Calculate distortion around the |kNumCorrelationCandidates| best lags.
int distortion_scale = 0;
for (size_t i = 0; i < kNumCorrelationCandidates; i++) {
size_t min_index = std::max(fs_mult_20,
best_correlation_index[i] - fs_mult_4);
size_t max_index = std::min(fs_mult_120 - 1,
best_correlation_index[i] + fs_mult_4);
best_distortion_index[i] = DspHelper::MinDistortion(
&(audio_history[signal_length - fs_mult_dist_len]), min_index,
max_index, fs_mult_dist_len, &best_distortion_w32[i]);
distortion_scale = std::max(16 - WebRtcSpl_NormW32(best_distortion_w32[i]),
distortion_scale);
}
// Shift the distortion values to fit in 16 bits.
WebRtcSpl_VectorBitShiftW32ToW16(best_distortion, kNumCorrelationCandidates,
best_distortion_w32, distortion_scale);
// Find the maximizing index |i| of the cost function
// f[i] = best_correlation[i] / best_distortion[i].
int32_t best_ratio = std::numeric_limits<int32_t>::min();
size_t best_index = std::numeric_limits<size_t>::max();
for (size_t i = 0; i < kNumCorrelationCandidates; ++i) {
int32_t ratio;
if (best_distortion[i] > 0) {
ratio = (best_correlation[i] * (1 << 16)) / best_distortion[i];
} else if (best_correlation[i] == 0) {
ratio = 0; // No correlation set result to zero.
} else {
ratio = std::numeric_limits<int32_t>::max(); // Denominator is zero.
}
if (ratio > best_ratio) {
best_index = i;
best_ratio = ratio;
}
}
size_t distortion_lag = best_distortion_index[best_index];
size_t correlation_lag = best_correlation_index[best_index];
max_lag_ = std::max(distortion_lag, correlation_lag);
// Calculate the exact best correlation in the range between
// |correlation_lag| and |distortion_lag|.
correlation_length =
std::max(std::min(distortion_lag + 10, fs_mult_120),
static_cast<size_t>(60 * fs_mult));
size_t start_index = std::min(distortion_lag, correlation_lag);
size_t correlation_lags = static_cast<size_t>(
WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1);
assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1));
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
ChannelParameters& parameters = channel_parameters_[channel_ix];
// Calculate suitable scaling.
int16_t signal_max = WebRtcSpl_MaxAbsValueW16(
&audio_history[signal_length - correlation_length - start_index
- correlation_lags],
correlation_length + start_index + correlation_lags - 1);
int correlation_scale = (31 - WebRtcSpl_NormW32(signal_max * signal_max)) +
(31 - WebRtcSpl_NormW32(static_cast<int32_t>(correlation_length))) - 31;
correlation_scale = std::max(0, correlation_scale);
// Calculate the correlation, store in |correlation_vector2|.
WebRtcSpl_CrossCorrelation(
correlation_vector2,
&(audio_history[signal_length - correlation_length]),
&(audio_history[signal_length - correlation_length - start_index]),
correlation_length, correlation_lags, correlation_scale, -1);
// Find maximizing index.
best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags);
int32_t max_correlation = correlation_vector2[best_index];
// Compensate index with start offset.
best_index = best_index + start_index;
// Calculate energies.
int32_t energy1 = WebRtcSpl_DotProductWithScale(
&(audio_history[signal_length - correlation_length]),
&(audio_history[signal_length - correlation_length]),
correlation_length, correlation_scale);
int32_t energy2 = WebRtcSpl_DotProductWithScale(
&(audio_history[signal_length - correlation_length - best_index]),
&(audio_history[signal_length - correlation_length - best_index]),
correlation_length, correlation_scale);
// Calculate the correlation coefficient between the two portions of the
// signal.
int32_t corr_coefficient;
if ((energy1 > 0) && (energy2 > 0)) {
int energy1_scale = std::max(16 - WebRtcSpl_NormW32(energy1), 0);
int energy2_scale = std::max(16 - WebRtcSpl_NormW32(energy2), 0);
// Make sure total scaling is even (to simplify scale factor after sqrt).
if ((energy1_scale + energy2_scale) & 1) {
// If sum is odd, add 1 to make it even.
energy1_scale += 1;
}
int32_t scaled_energy1 = energy1 >> energy1_scale;
int32_t scaled_energy2 = energy2 >> energy2_scale;
int16_t sqrt_energy_product = static_cast<int16_t>(
WebRtcSpl_SqrtFloor(scaled_energy1 * scaled_energy2));
// Calculate max_correlation / sqrt(energy1 * energy2) in Q14.
int cc_shift = 14 - (energy1_scale + energy2_scale) / 2;
max_correlation = WEBRTC_SPL_SHIFT_W32(max_correlation, cc_shift);
corr_coefficient = WebRtcSpl_DivW32W16(max_correlation,
sqrt_energy_product);
// Cap at 1.0 in Q14.
corr_coefficient = std::min(16384, corr_coefficient);
} else {
corr_coefficient = 0;
}
// Extract the two vectors expand_vector0 and expand_vector1 from
// |audio_history|.
size_t expansion_length = max_lag_ + overlap_length_;
const int16_t* vector1 = &(audio_history[signal_length - expansion_length]);
const int16_t* vector2 = vector1 - distortion_lag;
// Normalize the second vector to the same energy as the first.
energy1 = WebRtcSpl_DotProductWithScale(vector1, vector1, expansion_length,
correlation_scale);
energy2 = WebRtcSpl_DotProductWithScale(vector2, vector2, expansion_length,
correlation_scale);
// Confirm that amplitude ratio sqrt(energy1 / energy2) is within 0.5 - 2.0,
// i.e., energy1 / energy2 is within 0.25 - 4.
int16_t amplitude_ratio;
if ((energy1 / 4 < energy2) && (energy1 > energy2 / 4)) {
// Energy constraint fulfilled. Use both vectors and scale them
// accordingly.
int32_t scaled_energy2 = std::max(16 - WebRtcSpl_NormW32(energy2), 0);
int32_t scaled_energy1 = scaled_energy2 - 13;
// Calculate scaled_energy1 / scaled_energy2 in Q13.
int32_t energy_ratio = WebRtcSpl_DivW32W16(
WEBRTC_SPL_SHIFT_W32(energy1, -scaled_energy1),
static_cast<int16_t>(energy2 >> scaled_energy2));
// Calculate sqrt ratio in Q13 (sqrt of en1/en2 in Q26).
amplitude_ratio =
static_cast<int16_t>(WebRtcSpl_SqrtFloor(energy_ratio << 13));
// Copy the two vectors and give them the same energy.
parameters.expand_vector0.Clear();
parameters.expand_vector0.PushBack(vector1, expansion_length);
parameters.expand_vector1.Clear();
if (parameters.expand_vector1.Size() < expansion_length) {
parameters.expand_vector1.Extend(
expansion_length - parameters.expand_vector1.Size());
}
std::unique_ptr<int16_t[]> temp_1(new int16_t[expansion_length]);
WebRtcSpl_AffineTransformVector(temp_1.get(),
const_cast<int16_t*>(vector2),
amplitude_ratio,
4096,
13,
expansion_length);
parameters.expand_vector1.OverwriteAt(temp_1.get(), expansion_length, 0);
} else {
// Energy change constraint not fulfilled. Only use last vector.
parameters.expand_vector0.Clear();
parameters.expand_vector0.PushBack(vector1, expansion_length);
// Copy from expand_vector0 to expand_vector1.
parameters.expand_vector0.CopyTo(&parameters.expand_vector1);
// Set the energy_ratio since it is used by muting slope.
if ((energy1 / 4 < energy2) || (energy2 == 0)) {
amplitude_ratio = 4096; // 0.5 in Q13.
} else {
amplitude_ratio = 16384; // 2.0 in Q13.
}
}
// Set the 3 lag values.
if (distortion_lag == correlation_lag) {
expand_lags_[0] = distortion_lag;
expand_lags_[1] = distortion_lag;
expand_lags_[2] = distortion_lag;
} else {
// |distortion_lag| and |correlation_lag| are not equal; use different
// combinations of the two.
// First lag is |distortion_lag| only.
expand_lags_[0] = distortion_lag;
// Second lag is the average of the two.
expand_lags_[1] = (distortion_lag + correlation_lag) / 2;
// Third lag is the average again, but rounding towards |correlation_lag|.
if (distortion_lag > correlation_lag) {
expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2;
} else {
expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2;
}
}
// Calculate the LPC and the gain of the filters.
// Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.
size_t temp_index = signal_length - fs_mult_lpc_analysis_len -
kUnvoicedLpcOrder;
// Copy signal to temporary vector to be able to pad with leading zeros.
int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len
+ kUnvoicedLpcOrder];
memset(temp_signal, 0,
sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder));
memcpy(&temp_signal[kUnvoicedLpcOrder],
&audio_history[temp_index + kUnvoicedLpcOrder],
sizeof(int16_t) * fs_mult_lpc_analysis_len);
CrossCorrelationWithAutoShift(
&temp_signal[kUnvoicedLpcOrder], &temp_signal[kUnvoicedLpcOrder],
fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, -1, auto_correlation);
delete [] temp_signal;
// Verify that variance is positive.
if (auto_correlation[0] > 0) {
// Estimate AR filter parameters using Levinson-Durbin algorithm;
// kUnvoicedLpcOrder + 1 filter coefficients.
int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation,
parameters.ar_filter,
reflection_coeff,
kUnvoicedLpcOrder);
// Keep filter parameters only if filter is stable.
if (stability != 1) {
// Set first coefficient to 4096 (1.0 in Q12).
parameters.ar_filter[0] = 4096;
// Set remaining |kUnvoicedLpcOrder| coefficients to zero.
WebRtcSpl_MemSetW16(parameters.ar_filter + 1, 0, kUnvoicedLpcOrder);
}
}
if (channel_ix == 0) {
// Extract a noise segment.
size_t noise_length;
if (distortion_lag < 40) {
noise_length = 2 * distortion_lag + 30;
} else {
noise_length = distortion_lag + 30;
}
if (noise_length <= RandomVector::kRandomTableSize) {
memcpy(random_vector, RandomVector::kRandomTable,
sizeof(int16_t) * noise_length);
} else {
// Only applies to SWB where length could be larger than
// |kRandomTableSize|.
memcpy(random_vector, RandomVector::kRandomTable,
sizeof(int16_t) * RandomVector::kRandomTableSize);
assert(noise_length <= kMaxSampleRate / 8000 * 120 + 30);
random_vector_->IncreaseSeedIncrement(2);
random_vector_->Generate(
noise_length - RandomVector::kRandomTableSize,
&random_vector[RandomVector::kRandomTableSize]);
}
}
// Set up state vector and calculate scale factor for unvoiced filtering.
memcpy(parameters.ar_filter_state,
&(audio_history[signal_length - kUnvoicedLpcOrder]),
sizeof(int16_t) * kUnvoicedLpcOrder);
memcpy(unvoiced_vector - kUnvoicedLpcOrder,
&(audio_history[signal_length - 128 - kUnvoicedLpcOrder]),
sizeof(int16_t) * kUnvoicedLpcOrder);
WebRtcSpl_FilterMAFastQ12(&audio_history[signal_length - 128],
unvoiced_vector,
parameters.ar_filter,
kUnvoicedLpcOrder + 1,
128);
const int unvoiced_max_abs = [&] {
const int16_t max_abs = WebRtcSpl_MaxAbsValueW16(unvoiced_vector, 128);
// Since WebRtcSpl_MaxAbsValueW16 returns 2^15 - 1 when the input contains
// -2^15, we have to conservatively bump the return value by 1
// if it is 2^15 - 1.
return max_abs == WEBRTC_SPL_WORD16_MAX ? max_abs + 1 : max_abs;
}();
// Pick the smallest n such that 2^n > unvoiced_max_abs; then the maximum
// value of the dot product is less than 2^7 * 2^(2*n) = 2^(2*n + 7), so to
// prevent overflows we want 2n + 7 <= 31, which means we should shift by
// 2n + 7 - 31 bits, if this value is greater than zero.
int unvoiced_prescale =
std::max(0, 2 * WebRtcSpl_GetSizeInBits(unvoiced_max_abs) - 24);
int32_t unvoiced_energy = WebRtcSpl_DotProductWithScale(unvoiced_vector,
unvoiced_vector,
128,
unvoiced_prescale);
// Normalize |unvoiced_energy| to 28 or 29 bits to preserve sqrt() accuracy.
int16_t unvoiced_scale = WebRtcSpl_NormW32(unvoiced_energy) - 3;
// Make sure we do an odd number of shifts since we already have 7 shifts
// from dividing with 128 earlier. This will make the total scale factor
// even, which is suitable for the sqrt.
unvoiced_scale += ((unvoiced_scale & 0x1) ^ 0x1);
unvoiced_energy = WEBRTC_SPL_SHIFT_W32(unvoiced_energy, unvoiced_scale);
int16_t unvoiced_gain =
static_cast<int16_t>(WebRtcSpl_SqrtFloor(unvoiced_energy));
parameters.ar_gain_scale = 13
+ (unvoiced_scale + 7 - unvoiced_prescale) / 2;
parameters.ar_gain = unvoiced_gain;
// Calculate voice_mix_factor from corr_coefficient.
// Let x = corr_coefficient. Then, we compute:
// if (x > 0.48)
// voice_mix_factor = (-5179 + 19931x - 16422x^2 + 5776x^3) / 4096;
// else
// voice_mix_factor = 0;
if (corr_coefficient > 7875) {
int16_t x1, x2, x3;
// |corr_coefficient| is in Q14.
x1 = static_cast<int16_t>(corr_coefficient);
x2 = (x1 * x1) >> 14; // Shift 14 to keep result in Q14.
x3 = (x1 * x2) >> 14;
static const int kCoefficients[4] = { -5179, 19931, -16422, 5776 };
int32_t temp_sum = kCoefficients[0] * 16384;
temp_sum += kCoefficients[1] * x1;
temp_sum += kCoefficients[2] * x2;
temp_sum += kCoefficients[3] * x3;
parameters.voice_mix_factor =
static_cast<int16_t>(std::min(temp_sum / 4096, 16384));
parameters.voice_mix_factor = std::max(parameters.voice_mix_factor,
static_cast<int16_t>(0));
} else {
parameters.voice_mix_factor = 0;
}
// Calculate muting slope. Reuse value from earlier scaling of
// |expand_vector0| and |expand_vector1|.
int16_t slope = amplitude_ratio;
if (slope > 12288) {
// slope > 1.5.
// Calculate (1 - (1 / slope)) / distortion_lag =
// (slope - 1) / (distortion_lag * slope).
// |slope| is in Q13, so 1 corresponds to 8192. Shift up to Q25 before
// the division.
// Shift the denominator from Q13 to Q5 before the division. The result of
// the division will then be in Q20.
int temp_ratio = WebRtcSpl_DivW32W16(
(slope - 8192) << 12,
static_cast<int16_t>((distortion_lag * slope) >> 8));
if (slope > 14746) {
// slope > 1.8.
// Divide by 2, with proper rounding.
parameters.mute_slope = (temp_ratio + 1) / 2;
} else {
// Divide by 8, with proper rounding.
parameters.mute_slope = (temp_ratio + 4) / 8;
}
parameters.onset = true;
} else {
// Calculate (1 - slope) / distortion_lag.
// Shift |slope| by 7 to Q20 before the division. The result is in Q20.
parameters.mute_slope = WebRtcSpl_DivW32W16(
(8192 - slope) * 128, static_cast<int16_t>(distortion_lag));
if (parameters.voice_mix_factor <= 13107) {
// Make sure the mute factor decreases from 1.0 to 0.9 in no more than
// 6.25 ms.
// mute_slope >= 0.005 / fs_mult in Q20.
parameters.mute_slope = std::max(5243 / fs_mult, parameters.mute_slope);
} else if (slope > 8028) {
parameters.mute_slope = 0;
}
parameters.onset = false;
}
}
}
Expand::ChannelParameters::ChannelParameters()
: mute_factor(16384),
ar_gain(0),
ar_gain_scale(0),
voice_mix_factor(0),
current_voice_mix_factor(0),
onset(false),
mute_slope(0) {
memset(ar_filter, 0, sizeof(ar_filter));
memset(ar_filter_state, 0, sizeof(ar_filter_state));
}
void Expand::Correlation(const int16_t* input,
size_t input_length,
int16_t* output) const {
// Set parameters depending on sample rate.
const int16_t* filter_coefficients;
size_t num_coefficients;
int16_t downsampling_factor;
if (fs_hz_ == 8000) {
num_coefficients = 3;
downsampling_factor = 2;
filter_coefficients = DspHelper::kDownsample8kHzTbl;
} else if (fs_hz_ == 16000) {
num_coefficients = 5;
downsampling_factor = 4;
filter_coefficients = DspHelper::kDownsample16kHzTbl;
} else if (fs_hz_ == 32000) {
num_coefficients = 7;
downsampling_factor = 8;
filter_coefficients = DspHelper::kDownsample32kHzTbl;
} else { // fs_hz_ == 48000.
num_coefficients = 7;
downsampling_factor = 12;
filter_coefficients = DspHelper::kDownsample48kHzTbl;
}
// Correlate from lag 10 to lag 60 in downsampled domain.
// (Corresponds to 20-120 for narrow-band, 40-240 for wide-band, and so on.)
static const size_t kCorrelationStartLag = 10;
static const size_t kNumCorrelationLags = 54;
static const size_t kCorrelationLength = 60;
// Downsample to 4 kHz sample rate.
static const size_t kDownsampledLength = kCorrelationStartLag
+ kNumCorrelationLags + kCorrelationLength;
int16_t downsampled_input[kDownsampledLength];
static const size_t kFilterDelay = 0;
WebRtcSpl_DownsampleFast(
input + input_length - kDownsampledLength * downsampling_factor,
kDownsampledLength * downsampling_factor, downsampled_input,
kDownsampledLength, filter_coefficients, num_coefficients,
downsampling_factor, kFilterDelay);
// Normalize |downsampled_input| to using all 16 bits.
int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input,
kDownsampledLength);
int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value);
WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength,
downsampled_input, norm_shift);
int32_t correlation[kNumCorrelationLags];
CrossCorrelationWithAutoShift(
&downsampled_input[kDownsampledLength - kCorrelationLength],
&downsampled_input[kDownsampledLength - kCorrelationLength
- kCorrelationStartLag],
kCorrelationLength, kNumCorrelationLags, -1, correlation);
// Normalize and move data from 32-bit to 16-bit vector.
int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,
kNumCorrelationLags);
int16_t norm_shift2 = static_cast<int16_t>(
std::max(18 - WebRtcSpl_NormW32(max_correlation), 0));
WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,
norm_shift2);
}
void Expand::UpdateLagIndex() {
current_lag_index_ = current_lag_index_ + lag_index_direction_;
// Change direction if needed.
if (current_lag_index_ <= 0) {
lag_index_direction_ = 1;
}
if (current_lag_index_ >= kNumLags - 1) {
lag_index_direction_ = -1;
}
}
Expand* ExpandFactory::Create(BackgroundNoise* background_noise,
SyncBuffer* sync_buffer,
RandomVector* random_vector,
StatisticsCalculator* statistics,
int fs,
size_t num_channels) const {
return new Expand(background_noise, sync_buffer, random_vector, statistics,
fs, num_channels);
}
// TODO(turajs): This can be moved to BackgroundNoise class.
void Expand::GenerateBackgroundNoise(int16_t* random_vector,
size_t channel,
int mute_slope,
bool too_many_expands,
size_t num_noise_samples,
int16_t* buffer) {
static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125];
assert(num_noise_samples <= (kMaxSampleRate / 8000 * 125));
int16_t* noise_samples = &buffer[kNoiseLpcOrder];
if (background_noise_->initialized()) {
// Use background noise parameters.
memcpy(noise_samples - kNoiseLpcOrder,
background_noise_->FilterState(channel),
sizeof(int16_t) * kNoiseLpcOrder);
int dc_offset = 0;
if (background_noise_->ScaleShift(channel) > 1) {
dc_offset = 1 << (background_noise_->ScaleShift(channel) - 1);
}
// Scale random vector to correct energy level.
WebRtcSpl_AffineTransformVector(
scaled_random_vector, random_vector,
background_noise_->Scale(channel), dc_offset,
background_noise_->ScaleShift(channel),
num_noise_samples);
WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_samples,
background_noise_->Filter(channel),
kNoiseLpcOrder + 1,
num_noise_samples);
background_noise_->SetFilterState(
channel,
&(noise_samples[num_noise_samples - kNoiseLpcOrder]),
kNoiseLpcOrder);
// Unmute the background noise.
int16_t bgn_mute_factor = background_noise_->MuteFactor(channel);
NetEq::BackgroundNoiseMode bgn_mode = background_noise_->mode();
if (bgn_mode == NetEq::kBgnFade && too_many_expands &&
bgn_mute_factor > 0) {
// Fade BGN to zero.
// Calculate muting slope, approximately -2^18 / fs_hz.
int mute_slope;
if (fs_hz_ == 8000) {
mute_slope = -32;
} else if (fs_hz_ == 16000) {
mute_slope = -16;
} else if (fs_hz_ == 32000) {
mute_slope = -8;
} else {
mute_slope = -5;
}
// Use UnmuteSignal function with negative slope.
// |bgn_mute_factor| is in Q14. |mute_slope| is in Q20.
DspHelper::UnmuteSignal(noise_samples,
num_noise_samples,
&bgn_mute_factor,
mute_slope,
noise_samples);
} else if (bgn_mute_factor < 16384) {
// If mode is kBgnOn, or if kBgnFade has started fading,
// use regular |mute_slope|.
if (!stop_muting_ && bgn_mode != NetEq::kBgnOff &&
!(bgn_mode == NetEq::kBgnFade && too_many_expands)) {
DspHelper::UnmuteSignal(noise_samples,
static_cast<int>(num_noise_samples),
&bgn_mute_factor,
mute_slope,
noise_samples);
} else {
// kBgnOn and stop muting, or
// kBgnOff (mute factor is always 0), or
// kBgnFade has reached 0.
WebRtcSpl_AffineTransformVector(noise_samples, noise_samples,
bgn_mute_factor, 8192, 14,
num_noise_samples);
}
}
// Update mute_factor in BackgroundNoise class.
background_noise_->SetMuteFactor(channel, bgn_mute_factor);
} else {
// BGN parameters have not been initialized; use zero noise.
memset(noise_samples, 0, sizeof(int16_t) * num_noise_samples);
}
}
void Expand::GenerateRandomVector(int16_t seed_increment,
size_t length,
int16_t* random_vector) {
// TODO(turajs): According to hlundin The loop should not be needed. Should be
// just as good to generate all of the vector in one call.
size_t samples_generated = 0;
const size_t kMaxRandSamples = RandomVector::kRandomTableSize;
while (samples_generated < length) {
size_t rand_length = std::min(length - samples_generated, kMaxRandSamples);
random_vector_->IncreaseSeedIncrement(seed_increment);
random_vector_->Generate(rand_length, &random_vector[samples_generated]);
samples_generated += rand_length;
}
}
} // namespace webrtc

View File

@ -0,0 +1,161 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_EXPAND_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_EXPAND_H_
#include <assert.h>
#include <memory>
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class BackgroundNoise;
class RandomVector;
class StatisticsCalculator;
class SyncBuffer;
// This class handles extrapolation of audio data from the sync_buffer to
// produce packet-loss concealment.
// TODO(hlundin): Refactor this class to divide the long methods into shorter
// ones.
class Expand {
public:
Expand(BackgroundNoise* background_noise,
SyncBuffer* sync_buffer,
RandomVector* random_vector,
StatisticsCalculator* statistics,
int fs,
size_t num_channels);
virtual ~Expand();
// Resets the object.
virtual void Reset();
// The main method to produce concealment data. The data is appended to the
// end of |output|.
virtual int Process(AudioMultiVector* output);
// Prepare the object to do extra expansion during normal operation following
// a period of expands.
virtual void SetParametersForNormalAfterExpand();
// Prepare the object to do extra expansion during merge operation following
// a period of expands.
virtual void SetParametersForMergeAfterExpand();
// Returns the mute factor for |channel|.
int16_t MuteFactor(size_t channel) {
assert(channel < num_channels_);
return channel_parameters_[channel].mute_factor;
}
// Returns true if expansion has been faded down to zero amplitude (for all
// channels); false otherwise.
bool Muted() const;
// Accessors and mutators.
virtual size_t overlap_length() const;
size_t max_lag() const { return max_lag_; }
protected:
static const int kMaxConsecutiveExpands = 200;
void GenerateRandomVector(int16_t seed_increment,
size_t length,
int16_t* random_vector);
void GenerateBackgroundNoise(int16_t* random_vector,
size_t channel,
int mute_slope,
bool too_many_expands,
size_t num_noise_samples,
int16_t* buffer);
// Initializes member variables at the beginning of an expand period.
void InitializeForAnExpandPeriod();
bool TooManyExpands();
// Analyzes the signal history in |sync_buffer_|, and set up all parameters
// necessary to produce concealment data.
void AnalyzeSignal(int16_t* random_vector);
RandomVector* const random_vector_;
SyncBuffer* const sync_buffer_;
bool first_expand_;
const int fs_hz_;
const size_t num_channels_;
int consecutive_expands_;
private:
static const size_t kUnvoicedLpcOrder = 6;
static const size_t kNumCorrelationCandidates = 3;
static const size_t kDistortionLength = 20;
static const size_t kLpcAnalysisLength = 160;
static const size_t kMaxSampleRate = 48000;
static const int kNumLags = 3;
struct ChannelParameters {
ChannelParameters();
int16_t mute_factor;
int16_t ar_filter[kUnvoicedLpcOrder + 1];
int16_t ar_filter_state[kUnvoicedLpcOrder];
int16_t ar_gain;
int16_t ar_gain_scale;
int16_t voice_mix_factor; /* Q14 */
int16_t current_voice_mix_factor; /* Q14 */
AudioVector expand_vector0;
AudioVector expand_vector1;
bool onset;
int mute_slope; /* Q20 */
};
// Calculate the auto-correlation of |input|, with length |input_length|
// samples. The correlation is calculated from a downsampled version of
// |input|, and is written to |output|.
void Correlation(const int16_t* input,
size_t input_length,
int16_t* output) const;
void UpdateLagIndex();
BackgroundNoise* const background_noise_;
StatisticsCalculator* const statistics_;
const size_t overlap_length_;
size_t max_lag_;
size_t expand_lags_[kNumLags];
int lag_index_direction_;
int current_lag_index_;
bool stop_muting_;
size_t expand_duration_samples_;
std::unique_ptr<ChannelParameters[]> channel_parameters_;
RTC_DISALLOW_COPY_AND_ASSIGN(Expand);
};
struct ExpandFactory {
ExpandFactory() {}
virtual ~ExpandFactory() {}
virtual Expand* Create(BackgroundNoise* background_noise,
SyncBuffer* sync_buffer,
RandomVector* random_vector,
StatisticsCalculator* statistics,
int fs,
size_t num_channels) const;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_EXPAND_H_

View File

@ -0,0 +1,206 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for Expand class.
#include "webrtc/modules/audio_coding/neteq/expand.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
#include "webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h"
#include "webrtc/rtc_base/safe_conversions.h"
#include "webrtc/test/gtest.h"
#include "webrtc/test/testsupport/fileutils.h"
namespace webrtc {
TEST(Expand, CreateAndDestroy) {
int fs = 8000;
size_t channels = 1;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(1, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels);
}
TEST(Expand, CreateUsingFactory) {
int fs = 8000;
size_t channels = 1;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(1, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
ExpandFactory expand_factory;
Expand* expand = expand_factory.Create(&bgn, &sync_buffer, &random_vector,
&statistics, fs, channels);
EXPECT_TRUE(expand != NULL);
delete expand;
}
namespace {
class FakeStatisticsCalculator : public StatisticsCalculator {
public:
void LogDelayedPacketOutageEvent(int outage_duration_ms) override {
last_outage_duration_ms_ = outage_duration_ms;
}
int last_outage_duration_ms() const { return last_outage_duration_ms_; }
private:
int last_outage_duration_ms_ = 0;
};
// This is the same size that is given to the SyncBuffer object in NetEq.
const size_t kNetEqSyncBufferLengthMs = 720;
} // namespace
class ExpandTest : public ::testing::Test {
protected:
ExpandTest()
: input_file_(test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
32000),
test_sample_rate_hz_(32000),
num_channels_(1),
background_noise_(num_channels_),
sync_buffer_(num_channels_,
kNetEqSyncBufferLengthMs * test_sample_rate_hz_ / 1000),
expand_(&background_noise_,
&sync_buffer_,
&random_vector_,
&statistics_,
test_sample_rate_hz_,
num_channels_) {
WebRtcSpl_Init();
input_file_.set_output_rate_hz(test_sample_rate_hz_);
}
void SetUp() override {
// Fast-forward the input file until there is speech (about 1.1 second into
// the file).
const size_t speech_start_samples =
static_cast<size_t>(test_sample_rate_hz_ * 1.1f);
ASSERT_TRUE(input_file_.Seek(speech_start_samples));
// Pre-load the sync buffer with speech data.
std::unique_ptr<int16_t[]> temp(new int16_t[sync_buffer_.Size()]);
ASSERT_TRUE(input_file_.Read(sync_buffer_.Size(), temp.get()));
sync_buffer_.Channel(0).OverwriteAt(temp.get(), sync_buffer_.Size(), 0);
ASSERT_EQ(1u, num_channels_) << "Fix: Must populate all channels.";
}
test::ResampleInputAudioFile input_file_;
int test_sample_rate_hz_;
size_t num_channels_;
BackgroundNoise background_noise_;
SyncBuffer sync_buffer_;
RandomVector random_vector_;
FakeStatisticsCalculator statistics_;
Expand expand_;
};
// This test calls the expand object to produce concealment data a few times,
// and then ends by calling SetParametersForNormalAfterExpand. This simulates
// the situation where the packet next up for decoding was just delayed, not
// lost.
TEST_F(ExpandTest, DelayedPacketOutage) {
AudioMultiVector output(num_channels_);
size_t sum_output_len_samples = 0;
for (int i = 0; i < 10; ++i) {
EXPECT_EQ(0, expand_.Process(&output));
EXPECT_GT(output.Size(), 0u);
sum_output_len_samples += output.Size();
EXPECT_EQ(0, statistics_.last_outage_duration_ms());
}
expand_.SetParametersForNormalAfterExpand();
// Convert |sum_output_len_samples| to milliseconds.
EXPECT_EQ(rtc::checked_cast<int>(sum_output_len_samples /
(test_sample_rate_hz_ / 1000)),
statistics_.last_outage_duration_ms());
}
// This test is similar to DelayedPacketOutage, but ends by calling
// SetParametersForMergeAfterExpand. This simulates the situation where the
// packet next up for decoding was actually lost (or at least a later packet
// arrived before it).
TEST_F(ExpandTest, LostPacketOutage) {
AudioMultiVector output(num_channels_);
size_t sum_output_len_samples = 0;
for (int i = 0; i < 10; ++i) {
EXPECT_EQ(0, expand_.Process(&output));
EXPECT_GT(output.Size(), 0u);
sum_output_len_samples += output.Size();
EXPECT_EQ(0, statistics_.last_outage_duration_ms());
}
expand_.SetParametersForMergeAfterExpand();
EXPECT_EQ(0, statistics_.last_outage_duration_ms());
}
// This test is similar to the DelayedPacketOutage test above, but with the
// difference that Expand::Reset() is called after 5 calls to Expand::Process().
// This should reset the statistics, and will in the end lead to an outage of
// 5 periods instead of 10.
TEST_F(ExpandTest, CheckOutageStatsAfterReset) {
AudioMultiVector output(num_channels_);
size_t sum_output_len_samples = 0;
for (int i = 0; i < 10; ++i) {
EXPECT_EQ(0, expand_.Process(&output));
EXPECT_GT(output.Size(), 0u);
sum_output_len_samples += output.Size();
if (i == 5) {
expand_.Reset();
sum_output_len_samples = 0;
}
EXPECT_EQ(0, statistics_.last_outage_duration_ms());
}
expand_.SetParametersForNormalAfterExpand();
// Convert |sum_output_len_samples| to milliseconds.
EXPECT_EQ(rtc::checked_cast<int>(sum_output_len_samples /
(test_sample_rate_hz_ / 1000)),
statistics_.last_outage_duration_ms());
}
namespace {
// Runs expand until Muted() returns true. Times out after 1000 calls.
void ExpandUntilMuted(size_t num_channels, Expand* expand) {
EXPECT_FALSE(expand->Muted()) << "Instance is muted from the start";
AudioMultiVector output(num_channels);
int num_calls = 0;
while (!expand->Muted()) {
ASSERT_LT(num_calls++, 1000) << "Test timed out";
EXPECT_EQ(0, expand->Process(&output));
}
}
} // namespace
// Verifies that Muted() returns true after a long expand period. Also verifies
// that Muted() is reset to false after calling Reset(),
// SetParametersForMergeAfterExpand() and SetParametersForNormalAfterExpand().
TEST_F(ExpandTest, Muted) {
ExpandUntilMuted(num_channels_, &expand_);
expand_.Reset();
EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted.
ExpandUntilMuted(num_channels_, &expand_);
expand_.SetParametersForMergeAfterExpand();
EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted.
expand_.Reset(); // Must reset in order to start a new expand period.
ExpandUntilMuted(num_channels_, &expand_);
expand_.SetParametersForNormalAfterExpand();
EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted.
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,314 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
#include <string.h> // Provide access to size_t.
#include <string>
#include <vector>
#include "webrtc/api/optional.h"
#include "webrtc/common_types.h"
#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/rtc_base/scoped_ref_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class AudioFrame;
class AudioDecoderFactory;
struct NetEqNetworkStatistics {
uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
// jitter; 0 otherwise.
uint16_t packet_loss_rate; // Loss rate (network + late) in Q14.
uint16_t expand_rate; // Fraction (of original stream) of synthesized
// audio inserted through expansion (in Q14).
uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized
// speech inserted through expansion (in Q14).
uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
// expansion (in Q14).
uint16_t accelerate_rate; // Fraction of data removed through acceleration
// (in Q14).
uint16_t secondary_decoded_rate; // Fraction of data coming from FEC/RED
// decoding (in Q14).
uint16_t secondary_discarded_rate; // Fraction of discarded FEC/RED data (in
// Q14).
int32_t clockdrift_ppm; // Average clock-drift in parts-per-million
// (positive or negative).
size_t added_zero_samples; // Number of zero samples added in "off" mode.
// Statistics for packet waiting times, i.e., the time between a packet
// arrives until it is decoded.
int mean_waiting_time_ms;
int median_waiting_time_ms;
int min_waiting_time_ms;
int max_waiting_time_ms;
};
// NetEq statistics that persist over the lifetime of the class.
// These metrics are never reset.
struct NetEqLifetimeStatistics {
// Total number of audio samples received, including synthesized samples.
// https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalsamplesreceived
uint64_t total_samples_received = 0;
// Total number of inbound audio samples that are based on synthesized data to
// conceal packet loss.
// https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-concealedsamples
uint64_t concealed_samples = 0;
};
enum NetEqPlayoutMode {
kPlayoutOn,
kPlayoutOff,
kPlayoutFax,
kPlayoutStreaming
};
// This is the interface class for NetEq.
class NetEq {
public:
enum BackgroundNoiseMode {
kBgnOn, // Default behavior with eternal noise.
kBgnFade, // Noise fades to zero after some time.
kBgnOff // Background noise is always zero.
};
struct Config {
Config()
: sample_rate_hz(16000),
enable_post_decode_vad(false),
max_packets_in_buffer(50),
// |max_delay_ms| has the same effect as calling SetMaximumDelay().
max_delay_ms(2000),
background_noise_mode(kBgnOff),
playout_mode(kPlayoutOn),
enable_fast_accelerate(false) {}
std::string ToString() const;
int sample_rate_hz; // Initial value. Will change with input data.
bool enable_post_decode_vad;
size_t max_packets_in_buffer;
int max_delay_ms;
BackgroundNoiseMode background_noise_mode;
NetEqPlayoutMode playout_mode;
bool enable_fast_accelerate;
bool enable_muted_state = false;
};
enum ReturnCodes {
kOK = 0,
kFail = -1,
kNotImplemented = -2
};
// Creates a new NetEq object, with parameters set in |config|. The |config|
// object will only have to be valid for the duration of the call to this
// method.
static NetEq* Create(
const NetEq::Config& config,
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);
virtual ~NetEq() {}
// Inserts a new packet into NetEq. The |receive_timestamp| is an indication
// of the time when the packet was received, and should be measured with
// the same tick rate as the RTP timestamp of the current payload.
// Returns 0 on success, -1 on failure.
virtual int InsertPacket(const RTPHeader& rtp_header,
rtc::ArrayView<const uint8_t> payload,
uint32_t receive_timestamp) = 0;
// Lets NetEq know that a packet arrived with an empty payload. This typically
// happens when empty packets are used for probing the network channel, and
// these packets use RTP sequence numbers from the same series as the actual
// audio packets.
virtual void InsertEmptyPacket(const RTPHeader& rtp_header) = 0;
// Instructs NetEq to deliver 10 ms of audio data. The data is written to
// |audio_frame|. All data in |audio_frame| is wiped; |data_|, |speech_type_|,
// |num_channels_|, |sample_rate_hz_|, |samples_per_channel_|, and
// |vad_activity_| are updated upon success. If an error is returned, some
// fields may not have been updated, or may contain inconsistent values.
// If muted state is enabled (through Config::enable_muted_state), |muted|
// may be set to true after a prolonged expand period. When this happens, the
// |data_| in |audio_frame| is not written, but should be interpreted as being
// all zeros.
// Returns kOK on success, or kFail in case of an error.
virtual int GetAudio(AudioFrame* audio_frame, bool* muted) = 0;
// Replaces the current set of decoders with the given one.
virtual void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) = 0;
// Associates |rtp_payload_type| with |codec| and |codec_name|, and stores the
// information in the codec database. Returns 0 on success, -1 on failure.
// The name is only used to provide information back to the caller about the
// decoders. Hence, the name is arbitrary, and may be empty.
virtual int RegisterPayloadType(NetEqDecoder codec,
const std::string& codec_name,
uint8_t rtp_payload_type) = 0;
// Provides an externally created decoder object |decoder| to insert in the
// decoder database. The decoder implements a decoder of type |codec| and
// associates it with |rtp_payload_type| and |codec_name|. Returns kOK on
// success, kFail on failure. The name is only used to provide information
// back to the caller about the decoders. Hence, the name is arbitrary, and
// may be empty.
virtual int RegisterExternalDecoder(AudioDecoder* decoder,
NetEqDecoder codec,
const std::string& codec_name,
uint8_t rtp_payload_type) = 0;
// Associates |rtp_payload_type| with the given codec, which NetEq will
// instantiate when it needs it. Returns true iff successful.
virtual bool RegisterPayloadType(int rtp_payload_type,
const SdpAudioFormat& audio_format) = 0;
// Removes |rtp_payload_type| from the codec database. Returns 0 on success,
// -1 on failure. Removing a payload type that is not registered is ok and
// will not result in an error.
virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
// Removes all payload types from the codec database.
virtual void RemoveAllPayloadTypes() = 0;
// Sets a minimum delay in millisecond for packet buffer. The minimum is
// maintained unless a higher latency is dictated by channel condition.
// Returns true if the minimum is successfully applied, otherwise false is
// returned.
virtual bool SetMinimumDelay(int delay_ms) = 0;
// Sets a maximum delay in milliseconds for packet buffer. The latency will
// not exceed the given value, even required delay (given the channel
// conditions) is higher. Calling this method has the same effect as setting
// the |max_delay_ms| value in the NetEq::Config struct.
virtual bool SetMaximumDelay(int delay_ms) = 0;
// The smallest latency required. This is computed bases on inter-arrival
// time and internal NetEq logic. Note that in computing this latency none of
// the user defined limits (applied by calling setMinimumDelay() and/or
// SetMaximumDelay()) are applied.
virtual int LeastRequiredDelayMs() const = 0;
// Not implemented.
virtual int SetTargetDelay() = 0;
// Returns the current target delay in ms. This includes any extra delay
// requested through SetMinimumDelay.
virtual int TargetDelayMs() = 0;
// Returns the current total delay (packet buffer and sync buffer) in ms.
virtual int CurrentDelayMs() const = 0;
// Returns the current total delay (packet buffer and sync buffer) in ms,
// with smoothing applied to even out short-time fluctuations due to jitter.
// The packet buffer part of the delay is not updated during DTX/CNG periods.
virtual int FilteredCurrentDelayMs() const = 0;
// Sets the playout mode to |mode|.
// Deprecated. Set the mode in the Config struct passed to the constructor.
// TODO(henrik.lundin) Delete.
virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
// Returns the current playout mode.
// Deprecated.
// TODO(henrik.lundin) Delete.
virtual NetEqPlayoutMode PlayoutMode() const = 0;
// Writes the current network statistics to |stats|. The statistics are reset
// after the call.
virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
// Returns a copy of this class's lifetime statistics. These statistics are
// never reset.
virtual NetEqLifetimeStatistics GetLifetimeStatistics() const = 0;
// Writes the current RTCP statistics to |stats|. The statistics are reset
// and a new report period is started with the call.
virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
// Same as RtcpStatistics(), but does not reset anything.
virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
// Enables post-decode VAD. When enabled, GetAudio() will return
// kOutputVADPassive when the signal contains no speech.
virtual void EnableVad() = 0;
// Disables post-decode VAD.
virtual void DisableVad() = 0;
// Returns the RTP timestamp for the last sample delivered by GetAudio().
// The return value will be empty if no valid timestamp is available.
virtual rtc::Optional<uint32_t> GetPlayoutTimestamp() const = 0;
// Returns the sample rate in Hz of the audio produced in the last GetAudio
// call. If GetAudio has not been called yet, the configured sample rate
// (Config::sample_rate_hz) is returned.
virtual int last_output_sample_rate_hz() const = 0;
// Returns info about the decoder for the given payload type, or an empty
// value if we have no decoder for that payload type.
virtual rtc::Optional<CodecInst> GetDecoder(int payload_type) const = 0;
// Returns the decoder format for the given payload type. Returns empty if no
// such payload type was registered.
virtual rtc::Optional<SdpAudioFormat> GetDecoderFormat(
int payload_type) const = 0;
// Not implemented.
virtual int SetTargetNumberOfChannels() = 0;
// Not implemented.
virtual int SetTargetSampleRate() = 0;
// Flushes both the packet buffer and the sync buffer.
virtual void FlushBuffers() = 0;
// Current usage of packet-buffer and it's limits.
virtual void PacketBufferStatistics(int* current_num_packets,
int* max_num_packets) const = 0;
// Enables NACK and sets the maximum size of the NACK list, which should be
// positive and no larger than Nack::kNackListSizeLimit. If NACK is already
// enabled then the maximum NACK list size is modified accordingly.
virtual void EnableNack(size_t max_nack_list_size) = 0;
virtual void DisableNack() = 0;
// Returns a list of RTP sequence numbers corresponding to packets to be
// retransmitted, given an estimate of the round-trip time in milliseconds.
virtual std::vector<uint16_t> GetNackList(
int64_t round_trip_time_ms) const = 0;
// Returns a vector containing the timestamps of the packets that were decoded
// in the last GetAudio call. If no packets were decoded in the last call, the
// vector is empty.
// Mainly intended for testing.
virtual std::vector<uint32_t> LastDecodedTimestamps() const = 0;
// Returns the length of the audio yet to play in the sync buffer.
// Mainly intended for testing.
virtual int SyncBufferSizeMs() const = 0;
protected:
NetEq() {}
private:
RTC_DISALLOW_COPY_AND_ASSIGN(NetEq);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_

View File

@ -0,0 +1,381 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/merge.h"
#include <assert.h>
#include <string.h> // memmove, memcpy, memset, size_t
#include <algorithm> // min, max
#include <memory>
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
#include "webrtc/modules/audio_coding/neteq/expand.h"
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
#include "webrtc/rtc_base/safe_conversions.h"
#include "webrtc/rtc_base/safe_minmax.h"
namespace webrtc {
Merge::Merge(int fs_hz,
size_t num_channels,
Expand* expand,
SyncBuffer* sync_buffer)
: fs_hz_(fs_hz),
num_channels_(num_channels),
fs_mult_(fs_hz_ / 8000),
timestamps_per_call_(static_cast<size_t>(fs_hz_ / 100)),
expand_(expand),
sync_buffer_(sync_buffer),
expanded_(num_channels_) {
assert(num_channels_ > 0);
}
Merge::~Merge() = default;
size_t Merge::Process(int16_t* input, size_t input_length,
int16_t* external_mute_factor_array,
AudioMultiVector* output) {
// TODO(hlundin): Change to an enumerator and skip assert.
assert(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 ||
fs_hz_ == 48000);
assert(fs_hz_ <= kMaxSampleRate); // Should not be possible.
size_t old_length;
size_t expand_period;
// Get expansion data to overlap and mix with.
size_t expanded_length = GetExpandedSignal(&old_length, &expand_period);
// Transfer input signal to an AudioMultiVector.
AudioMultiVector input_vector(num_channels_);
input_vector.PushBackInterleaved(input, input_length);
size_t input_length_per_channel = input_vector.Size();
assert(input_length_per_channel == input_length / num_channels_);
size_t best_correlation_index = 0;
size_t output_length = 0;
std::unique_ptr<int16_t[]> input_channel(
new int16_t[input_length_per_channel]);
std::unique_ptr<int16_t[]> expanded_channel(new int16_t[expanded_length]);
for (size_t channel = 0; channel < num_channels_; ++channel) {
input_vector[channel].CopyTo(
input_length_per_channel, 0, input_channel.get());
expanded_[channel].CopyTo(expanded_length, 0, expanded_channel.get());
int16_t new_mute_factor = SignalScaling(
input_channel.get(), input_length_per_channel, expanded_channel.get());
// Adjust muting factor (product of "main" muting factor and expand muting
// factor).
int16_t* external_mute_factor = &external_mute_factor_array[channel];
*external_mute_factor =
(*external_mute_factor * expand_->MuteFactor(channel)) >> 14;
// Update |external_mute_factor| if it is lower than |new_mute_factor|.
if (new_mute_factor > *external_mute_factor) {
*external_mute_factor = std::min(new_mute_factor,
static_cast<int16_t>(16384));
}
if (channel == 0) {
// Downsample, correlate, and find strongest correlation period for the
// master (i.e., first) channel only.
// Downsample to 4kHz sample rate.
Downsample(input_channel.get(), input_length_per_channel,
expanded_channel.get(), expanded_length);
// Calculate the lag of the strongest correlation period.
best_correlation_index = CorrelateAndPeakSearch(
old_length, input_length_per_channel, expand_period);
}
temp_data_.resize(input_length_per_channel + best_correlation_index);
int16_t* decoded_output = temp_data_.data() + best_correlation_index;
// Mute the new decoded data if needed (and unmute it linearly).
// This is the overlapping part of expanded_signal.
size_t interpolation_length = std::min(
kMaxCorrelationLength * fs_mult_,
expanded_length - best_correlation_index);
interpolation_length = std::min(interpolation_length,
input_length_per_channel);
if (*external_mute_factor < 16384) {
// Set a suitable muting slope (Q20). 0.004 for NB, 0.002 for WB,
// and so on.
int increment = 4194 / fs_mult_;
*external_mute_factor =
static_cast<int16_t>(DspHelper::RampSignal(input_channel.get(),
interpolation_length,
*external_mute_factor,
increment));
DspHelper::UnmuteSignal(&input_channel[interpolation_length],
input_length_per_channel - interpolation_length,
external_mute_factor, increment,
&decoded_output[interpolation_length]);
} else {
// No muting needed.
memmove(
&decoded_output[interpolation_length],
&input_channel[interpolation_length],
sizeof(int16_t) * (input_length_per_channel - interpolation_length));
}
// Do overlap and mix linearly.
int16_t increment =
static_cast<int16_t>(16384 / (interpolation_length + 1)); // In Q14.
int16_t mute_factor = 16384 - increment;
memmove(temp_data_.data(), expanded_channel.get(),
sizeof(int16_t) * best_correlation_index);
DspHelper::CrossFade(&expanded_channel[best_correlation_index],
input_channel.get(), interpolation_length,
&mute_factor, increment, decoded_output);
output_length = best_correlation_index + input_length_per_channel;
if (channel == 0) {
assert(output->Empty()); // Output should be empty at this point.
output->AssertSize(output_length);
} else {
assert(output->Size() == output_length);
}
(*output)[channel].OverwriteAt(temp_data_.data(), output_length, 0);
}
// Copy back the first part of the data to |sync_buffer_| and remove it from
// |output|.
sync_buffer_->ReplaceAtIndex(*output, old_length, sync_buffer_->next_index());
output->PopFront(old_length);
// Return new added length. |old_length| samples were borrowed from
// |sync_buffer_|.
RTC_DCHECK_GE(output_length, old_length);
return output_length - old_length;
}
size_t Merge::GetExpandedSignal(size_t* old_length, size_t* expand_period) {
// Check how much data that is left since earlier.
*old_length = sync_buffer_->FutureLength();
// Should never be less than overlap_length.
assert(*old_length >= expand_->overlap_length());
// Generate data to merge the overlap with using expand.
expand_->SetParametersForMergeAfterExpand();
if (*old_length >= 210 * kMaxSampleRate / 8000) {
// TODO(hlundin): Write test case for this.
// The number of samples available in the sync buffer is more than what fits
// in expanded_signal. Keep the first 210 * kMaxSampleRate / 8000 samples,
// but shift them towards the end of the buffer. This is ok, since all of
// the buffer will be expand data anyway, so as long as the beginning is
// left untouched, we're fine.
size_t length_diff = *old_length - 210 * kMaxSampleRate / 8000;
sync_buffer_->InsertZerosAtIndex(length_diff, sync_buffer_->next_index());
*old_length = 210 * kMaxSampleRate / 8000;
// This is the truncated length.
}
// This assert should always be true thanks to the if statement above.
assert(210 * kMaxSampleRate / 8000 >= *old_length);
AudioMultiVector expanded_temp(num_channels_);
expand_->Process(&expanded_temp);
*expand_period = expanded_temp.Size(); // Samples per channel.
expanded_.Clear();
// Copy what is left since earlier into the expanded vector.
expanded_.PushBackFromIndex(*sync_buffer_, sync_buffer_->next_index());
assert(expanded_.Size() == *old_length);
assert(expanded_temp.Size() > 0);
// Do "ugly" copy and paste from the expanded in order to generate more data
// to correlate (but not interpolate) with.
const size_t required_length = static_cast<size_t>((120 + 80 + 2) * fs_mult_);
if (expanded_.Size() < required_length) {
while (expanded_.Size() < required_length) {
// Append one more pitch period each time.
expanded_.PushBack(expanded_temp);
}
// Trim the length to exactly |required_length|.
expanded_.PopBack(expanded_.Size() - required_length);
}
assert(expanded_.Size() >= required_length);
return required_length;
}
int16_t Merge::SignalScaling(const int16_t* input, size_t input_length,
const int16_t* expanded_signal) const {
// Adjust muting factor if new vector is more or less of the BGN energy.
const auto mod_input_length = rtc::SafeMin<size_t>(
64 * rtc::dchecked_cast<size_t>(fs_mult_), input_length);
const int16_t expanded_max =
WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length);
int32_t factor = (expanded_max * expanded_max) /
(std::numeric_limits<int32_t>::max() /
static_cast<int32_t>(mod_input_length));
const int expanded_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal,
expanded_signal,
mod_input_length,
expanded_shift);
// Calculate energy of input signal.
const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);
factor = (input_max * input_max) / (std::numeric_limits<int32_t>::max() /
static_cast<int32_t>(mod_input_length));
const int input_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input,
mod_input_length,
input_shift);
// Align to the same Q-domain.
if (input_shift > expanded_shift) {
energy_expanded = energy_expanded >> (input_shift - expanded_shift);
} else {
energy_input = energy_input >> (expanded_shift - input_shift);
}
// Calculate muting factor to use for new frame.
int16_t mute_factor;
if (energy_input > energy_expanded) {
// Normalize |energy_input| to 14 bits.
int16_t temp_shift = WebRtcSpl_NormW32(energy_input) - 17;
energy_input = WEBRTC_SPL_SHIFT_W32(energy_input, temp_shift);
// Put |energy_expanded| in a domain 14 higher, so that
// energy_expanded / energy_input is in Q14.
energy_expanded = WEBRTC_SPL_SHIFT_W32(energy_expanded, temp_shift + 14);
// Calculate sqrt(energy_expanded / energy_input) in Q14.
mute_factor = static_cast<int16_t>(
WebRtcSpl_SqrtFloor((energy_expanded / energy_input) << 14));
} else {
// Set to 1 (in Q14) when |expanded| has higher energy than |input|.
mute_factor = 16384;
}
return mute_factor;
}
// TODO(hlundin): There are some parameter values in this method that seem
// strange. Compare with Expand::Correlation.
void Merge::Downsample(const int16_t* input, size_t input_length,
const int16_t* expanded_signal, size_t expanded_length) {
const int16_t* filter_coefficients;
size_t num_coefficients;
int decimation_factor = fs_hz_ / 4000;
static const size_t kCompensateDelay = 0;
size_t length_limit = static_cast<size_t>(fs_hz_ / 100); // 10 ms in samples.
if (fs_hz_ == 8000) {
filter_coefficients = DspHelper::kDownsample8kHzTbl;
num_coefficients = 3;
} else if (fs_hz_ == 16000) {
filter_coefficients = DspHelper::kDownsample16kHzTbl;
num_coefficients = 5;
} else if (fs_hz_ == 32000) {
filter_coefficients = DspHelper::kDownsample32kHzTbl;
num_coefficients = 7;
} else { // fs_hz_ == 48000
filter_coefficients = DspHelper::kDownsample48kHzTbl;
num_coefficients = 7;
}
size_t signal_offset = num_coefficients - 1;
WebRtcSpl_DownsampleFast(&expanded_signal[signal_offset],
expanded_length - signal_offset,
expanded_downsampled_, kExpandDownsampLength,
filter_coefficients, num_coefficients,
decimation_factor, kCompensateDelay);
if (input_length <= length_limit) {
// Not quite long enough, so we have to cheat a bit.
size_t temp_len = input_length - signal_offset;
// TODO(hlundin): Should |downsamp_temp_len| be corrected for round-off
// errors? I.e., (temp_len + decimation_factor - 1) / decimation_factor?
size_t downsamp_temp_len = temp_len / decimation_factor;
WebRtcSpl_DownsampleFast(&input[signal_offset], temp_len,
input_downsampled_, downsamp_temp_len,
filter_coefficients, num_coefficients,
decimation_factor, kCompensateDelay);
memset(&input_downsampled_[downsamp_temp_len], 0,
sizeof(int16_t) * (kInputDownsampLength - downsamp_temp_len));
} else {
WebRtcSpl_DownsampleFast(&input[signal_offset],
input_length - signal_offset, input_downsampled_,
kInputDownsampLength, filter_coefficients,
num_coefficients, decimation_factor,
kCompensateDelay);
}
}
size_t Merge::CorrelateAndPeakSearch(size_t start_position, size_t input_length,
size_t expand_period) const {
// Calculate correlation without any normalization.
const size_t max_corr_length = kMaxCorrelationLength;
size_t stop_position_downsamp =
std::min(max_corr_length, expand_->max_lag() / (fs_mult_ * 2) + 1);
int32_t correlation[kMaxCorrelationLength];
CrossCorrelationWithAutoShift(input_downsampled_, expanded_downsampled_,
kInputDownsampLength, stop_position_downsamp, 1,
correlation);
// Normalize correlation to 14 bits and copy to a 16-bit array.
const size_t pad_length = expand_->overlap_length() - 1;
const size_t correlation_buffer_size = 2 * pad_length + kMaxCorrelationLength;
std::unique_ptr<int16_t[]> correlation16(
new int16_t[correlation_buffer_size]);
memset(correlation16.get(), 0, correlation_buffer_size * sizeof(int16_t));
int16_t* correlation_ptr = &correlation16[pad_length];
int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,
stop_position_downsamp);
int norm_shift = std::max(0, 17 - WebRtcSpl_NormW32(max_correlation));
WebRtcSpl_VectorBitShiftW32ToW16(correlation_ptr, stop_position_downsamp,
correlation, norm_shift);
// Calculate allowed starting point for peak finding.
// The peak location bestIndex must fulfill two criteria:
// (1) w16_bestIndex + input_length <
// timestamps_per_call_ + expand_->overlap_length();
// (2) w16_bestIndex + input_length < start_position.
size_t start_index = timestamps_per_call_ + expand_->overlap_length();
start_index = std::max(start_position, start_index);
start_index = (input_length > start_index) ? 0 : (start_index - input_length);
// Downscale starting index to 4kHz domain. (fs_mult_ * 2 = fs_hz_ / 4000.)
size_t start_index_downsamp = start_index / (fs_mult_ * 2);
// Calculate a modified |stop_position_downsamp| to account for the increased
// start index |start_index_downsamp| and the effective array length.
size_t modified_stop_pos =
std::min(stop_position_downsamp,
kMaxCorrelationLength + pad_length - start_index_downsamp);
size_t best_correlation_index;
int16_t best_correlation;
static const size_t kNumCorrelationCandidates = 1;
DspHelper::PeakDetection(&correlation_ptr[start_index_downsamp],
modified_stop_pos, kNumCorrelationCandidates,
fs_mult_, &best_correlation_index,
&best_correlation);
// Compensate for modified start index.
best_correlation_index += start_index;
// Ensure that underrun does not occur for 10ms case => we have to get at
// least 10ms + overlap . (This should never happen thanks to the above
// modification of peak-finding starting point.)
while (((best_correlation_index + input_length) <
(timestamps_per_call_ + expand_->overlap_length())) ||
((best_correlation_index + input_length) < start_position)) {
assert(false); // Should never happen.
best_correlation_index += expand_period; // Jump one lag ahead.
}
return best_correlation_index;
}
size_t Merge::RequiredFutureSamples() {
return fs_hz_ / 100 * num_channels_; // 10 ms.
}
} // namespace webrtc

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MERGE_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MERGE_H_
#include <assert.h>
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class Expand;
class SyncBuffer;
// This class handles the transition from expansion to normal operation.
// When a packet is not available for decoding when needed, the expand operation
// is called to generate extrapolation data. If the missing packet arrives,
// i.e., it was just delayed, it can be decoded and appended directly to the
// end of the expanded data (thanks to how the Expand class operates). However,
// if a later packet arrives instead, the loss is a fact, and the new data must
// be stitched together with the end of the expanded data. This stitching is
// what the Merge class does.
class Merge {
public:
Merge(int fs_hz,
size_t num_channels,
Expand* expand,
SyncBuffer* sync_buffer);
virtual ~Merge();
// The main method to produce the audio data. The decoded data is supplied in
// |input|, having |input_length| samples in total for all channels
// (interleaved). The result is written to |output|. The number of channels
// allocated in |output| defines the number of channels that will be used when
// de-interleaving |input|. The values in |external_mute_factor_array| (Q14)
// will be used to scale the audio, and is updated in the process. The array
// must have |num_channels_| elements.
virtual size_t Process(int16_t* input, size_t input_length,
int16_t* external_mute_factor_array,
AudioMultiVector* output);
virtual size_t RequiredFutureSamples();
protected:
const int fs_hz_;
const size_t num_channels_;
private:
static const int kMaxSampleRate = 48000;
static const size_t kExpandDownsampLength = 100;
static const size_t kInputDownsampLength = 40;
static const size_t kMaxCorrelationLength = 60;
// Calls |expand_| to get more expansion data to merge with. The data is
// written to |expanded_signal_|. Returns the length of the expanded data,
// while |expand_period| will be the number of samples in one expansion period
// (typically one pitch period). The value of |old_length| will be the number
// of samples that were taken from the |sync_buffer_|.
size_t GetExpandedSignal(size_t* old_length, size_t* expand_period);
// Analyzes |input| and |expanded_signal| and returns muting factor (Q14) to
// be used on the new data.
int16_t SignalScaling(const int16_t* input, size_t input_length,
const int16_t* expanded_signal) const;
// Downsamples |input| (|input_length| samples) and |expanded_signal| to
// 4 kHz sample rate. The downsampled signals are written to
// |input_downsampled_| and |expanded_downsampled_|, respectively.
void Downsample(const int16_t* input, size_t input_length,
const int16_t* expanded_signal, size_t expanded_length);
// Calculates cross-correlation between |input_downsampled_| and
// |expanded_downsampled_|, and finds the correlation maximum. The maximizing
// lag is returned.
size_t CorrelateAndPeakSearch(size_t start_position, size_t input_length,
size_t expand_period) const;
const int fs_mult_; // fs_hz_ / 8000.
const size_t timestamps_per_call_;
Expand* expand_;
SyncBuffer* sync_buffer_;
int16_t expanded_downsampled_[kExpandDownsampLength];
int16_t input_downsampled_[kInputDownsampLength];
AudioMultiVector expanded_;
std::vector<int16_t> temp_data_;
RTC_DISALLOW_COPY_AND_ASSIGN(Merge);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MERGE_H_

View File

@ -0,0 +1,39 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for Merge class.
#include "webrtc/modules/audio_coding/neteq/merge.h"
#include <vector>
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
#include "webrtc/modules/audio_coding/neteq/expand.h"
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
#include "webrtc/test/gtest.h"
namespace webrtc {
TEST(Merge, CreateAndDestroy) {
int fs = 8000;
size_t channels = 1;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(1, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels);
Merge merge(fs, channels, &expand, &sync_buffer);
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,37 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_
#include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
#include "webrtc/test/gmock.h"
namespace webrtc {
class MockBufferLevelFilter : public BufferLevelFilter {
public:
virtual ~MockBufferLevelFilter() { Die(); }
MOCK_METHOD0(Die,
void());
MOCK_METHOD0(Reset,
void());
MOCK_METHOD3(Update,
void(size_t buffer_size_packets, int time_stretched_samples,
size_t packet_len_samples));
MOCK_METHOD1(SetTargetBufferLevel,
void(int target_buffer_level));
MOCK_CONST_METHOD0(filtered_current_level,
int());
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_

View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_
#include <string>
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
#include "webrtc/test/gmock.h"
namespace webrtc {
class MockDecoderDatabase : public DecoderDatabase {
public:
explicit MockDecoderDatabase(
rtc::scoped_refptr<AudioDecoderFactory> factory = nullptr)
: DecoderDatabase(factory) {}
virtual ~MockDecoderDatabase() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_CONST_METHOD0(Empty,
bool());
MOCK_CONST_METHOD0(Size,
int());
MOCK_METHOD0(Reset,
void());
MOCK_METHOD3(RegisterPayload,
int(uint8_t rtp_payload_type, NetEqDecoder codec_type,
const std::string& name));
MOCK_METHOD2(RegisterPayload,
int(int rtp_payload_type, const SdpAudioFormat& audio_format));
MOCK_METHOD4(InsertExternal,
int(uint8_t rtp_payload_type,
NetEqDecoder codec_type,
const std::string& codec_name,
AudioDecoder* decoder));
MOCK_METHOD1(Remove,
int(uint8_t rtp_payload_type));
MOCK_METHOD0(RemoveAll, void());
MOCK_CONST_METHOD1(GetDecoderInfo,
const DecoderInfo*(uint8_t rtp_payload_type));
MOCK_METHOD2(SetActiveDecoder,
int(uint8_t rtp_payload_type, bool* new_decoder));
MOCK_CONST_METHOD0(GetActiveDecoder,
AudioDecoder*());
MOCK_METHOD1(SetActiveCngDecoder,
int(uint8_t rtp_payload_type));
MOCK_CONST_METHOD0(GetActiveCngDecoder,
ComfortNoiseDecoder*());
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_

View File

@ -0,0 +1,62 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_
#include "webrtc/modules/audio_coding/neteq/delay_manager.h"
#include "webrtc/test/gmock.h"
namespace webrtc {
class MockDelayManager : public DelayManager {
public:
MockDelayManager(size_t max_packets_in_buffer,
DelayPeakDetector* peak_detector,
const TickTimer* tick_timer)
: DelayManager(max_packets_in_buffer, peak_detector, tick_timer) {}
virtual ~MockDelayManager() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_CONST_METHOD0(iat_vector,
const IATVector&());
MOCK_METHOD3(Update,
int(uint16_t sequence_number, uint32_t timestamp, int sample_rate_hz));
MOCK_METHOD1(CalculateTargetLevel,
int(int iat_packets));
MOCK_METHOD1(SetPacketAudioLength,
int(int length_ms));
MOCK_METHOD0(Reset,
void());
MOCK_CONST_METHOD0(PeakFound,
bool());
MOCK_METHOD1(UpdateCounters,
void(int elapsed_time_ms));
MOCK_METHOD0(ResetPacketIatCount,
void());
MOCK_CONST_METHOD2(BufferLimits,
void(int* lower_limit, int* higher_limit));
MOCK_CONST_METHOD0(TargetLevel,
int());
MOCK_METHOD0(RegisterEmptyPacket, void());
MOCK_METHOD1(set_extra_delay_ms,
void(int16_t delay));
MOCK_CONST_METHOD0(base_target_level,
int());
MOCK_METHOD1(set_streaming_mode,
void(bool value));
MOCK_CONST_METHOD0(last_pack_cng_or_dtmf,
int());
MOCK_METHOD1(set_last_pack_cng_or_dtmf,
void(int value));
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_PEAK_DETECTOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_PEAK_DETECTOR_H_
#include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h"
#include "webrtc/test/gmock.h"
namespace webrtc {
class MockDelayPeakDetector : public DelayPeakDetector {
public:
MockDelayPeakDetector(const TickTimer* tick_timer)
: DelayPeakDetector(tick_timer) {}
virtual ~MockDelayPeakDetector() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD0(Reset, void());
MOCK_METHOD1(SetPacketAudioLength, void(int length_ms));
MOCK_METHOD0(peak_found, bool());
MOCK_CONST_METHOD0(MaxPeakHeight, int());
MOCK_CONST_METHOD0(MaxPeakPeriod, uint64_t());
MOCK_METHOD2(Update, bool(int inter_arrival_time, int target_level));
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_PEAK_DETECTOR_H_

View File

@ -0,0 +1,38 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_
#include "webrtc/modules/audio_coding/neteq/dtmf_buffer.h"
#include "webrtc/test/gmock.h"
namespace webrtc {
class MockDtmfBuffer : public DtmfBuffer {
public:
MockDtmfBuffer(int fs) : DtmfBuffer(fs) {}
virtual ~MockDtmfBuffer() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD0(Flush,
void());
MOCK_METHOD1(InsertEvent,
int(const DtmfEvent& event));
MOCK_METHOD2(GetEvent,
bool(uint32_t current_timestamp, DtmfEvent* event));
MOCK_CONST_METHOD0(Length,
size_t());
MOCK_CONST_METHOD0(Empty,
bool());
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_
#include "webrtc/modules/audio_coding/neteq/dtmf_tone_generator.h"
#include "webrtc/test/gmock.h"
namespace webrtc {
class MockDtmfToneGenerator : public DtmfToneGenerator {
public:
virtual ~MockDtmfToneGenerator() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD3(Init,
int(int fs, int event, int attenuation));
MOCK_METHOD0(Reset,
void());
MOCK_METHOD2(Generate,
int(size_t num_samples, AudioMultiVector* output));
MOCK_CONST_METHOD0(initialized,
bool());
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_

View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_
#include "webrtc/modules/audio_coding/neteq/expand.h"
#include "webrtc/test/gmock.h"
namespace webrtc {
class MockExpand : public Expand {
public:
MockExpand(BackgroundNoise* background_noise,
SyncBuffer* sync_buffer,
RandomVector* random_vector,
StatisticsCalculator* statistics,
int fs,
size_t num_channels)
: Expand(background_noise,
sync_buffer,
random_vector,
statistics,
fs,
num_channels) {}
virtual ~MockExpand() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD0(Reset,
void());
MOCK_METHOD1(Process,
int(AudioMultiVector* output));
MOCK_METHOD0(SetParametersForNormalAfterExpand,
void());
MOCK_METHOD0(SetParametersForMergeAfterExpand,
void());
MOCK_CONST_METHOD0(overlap_length,
size_t());
};
} // namespace webrtc
namespace webrtc {
class MockExpandFactory : public ExpandFactory {
public:
MOCK_CONST_METHOD6(Create,
Expand*(BackgroundNoise* background_noise,
SyncBuffer* sync_buffer,
RandomVector* random_vector,
StatisticsCalculator* statistics,
int fs,
size_t num_channels));
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_

View File

@ -0,0 +1,98 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXTERNAL_DECODER_PCM16B_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXTERNAL_DECODER_PCM16B_H_
#include "webrtc/api/audio_codecs/audio_decoder.h"
#include "webrtc/modules/audio_coding/codecs/pcm16b/pcm16b.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/test/gmock.h"
#include "webrtc/typedefs.h"
namespace webrtc {
using ::testing::_;
using ::testing::Invoke;
// Implement an external version of the PCM16b decoder.
class ExternalPcm16B : public AudioDecoder {
public:
explicit ExternalPcm16B(int sample_rate_hz)
: sample_rate_hz_(sample_rate_hz) {}
void Reset() override {}
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override {
EXPECT_EQ(sample_rate_hz_, sample_rate_hz);
size_t ret = WebRtcPcm16b_Decode(encoded, encoded_len, decoded);
*speech_type = ConvertSpeechType(1);
return static_cast<int>(ret);
}
int SampleRateHz() const override { return sample_rate_hz_; }
size_t Channels() const override { return 1; }
private:
const int sample_rate_hz_;
RTC_DISALLOW_COPY_AND_ASSIGN(ExternalPcm16B);
};
// Create a mock of ExternalPcm16B which delegates all calls to the real object.
// The reason is that we can then track that the correct calls are being made.
class MockExternalPcm16B : public AudioDecoder {
public:
explicit MockExternalPcm16B(int sample_rate_hz) : real_(sample_rate_hz) {
// By default, all calls are delegated to the real object.
ON_CALL(*this, DecodeInternal(_, _, _, _, _))
.WillByDefault(Invoke(&real_, &ExternalPcm16B::DecodeInternal));
ON_CALL(*this, HasDecodePlc())
.WillByDefault(Invoke(&real_, &ExternalPcm16B::HasDecodePlc));
ON_CALL(*this, DecodePlc(_, _))
.WillByDefault(Invoke(&real_, &ExternalPcm16B::DecodePlc));
ON_CALL(*this, Reset())
.WillByDefault(Invoke(&real_, &ExternalPcm16B::Reset));
ON_CALL(*this, IncomingPacket(_, _, _, _, _))
.WillByDefault(Invoke(&real_, &ExternalPcm16B::IncomingPacket));
ON_CALL(*this, ErrorCode())
.WillByDefault(Invoke(&real_, &ExternalPcm16B::ErrorCode));
}
virtual ~MockExternalPcm16B() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD5(DecodeInternal,
int(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type));
MOCK_CONST_METHOD0(HasDecodePlc,
bool());
MOCK_METHOD2(DecodePlc,
size_t(size_t num_frames, int16_t* decoded));
MOCK_METHOD0(Reset, void());
MOCK_METHOD5(IncomingPacket,
int(const uint8_t* payload, size_t payload_len,
uint16_t rtp_sequence_number, uint32_t rtp_timestamp,
uint32_t arrival_timestamp));
MOCK_METHOD0(ErrorCode,
int());
int SampleRateHz() const /* override */ { return real_.SampleRateHz(); }
size_t Channels() const /* override */ { return real_.Channels(); }
private:
ExternalPcm16B real_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXTERNAL_DECODER_PCM16B_H_

View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_
#include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
#include "webrtc/test/gmock.h"
namespace webrtc {
class MockPacketBuffer : public PacketBuffer {
public:
MockPacketBuffer(size_t max_number_of_packets, const TickTimer* tick_timer)
: PacketBuffer(max_number_of_packets, tick_timer) {}
virtual ~MockPacketBuffer() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD0(Flush,
void());
MOCK_CONST_METHOD0(Empty,
bool());
int InsertPacket(Packet&& packet, StatisticsCalculator* stats) {
return InsertPacketWrapped(&packet, stats);
}
// Since gtest does not properly support move-only types, InsertPacket is
// implemented as a wrapper. You'll have to implement InsertPacketWrapped
// instead and move from |*packet|.
MOCK_METHOD2(InsertPacketWrapped,
int(Packet* packet, StatisticsCalculator* stats));
MOCK_METHOD5(InsertPacketList,
int(PacketList* packet_list,
const DecoderDatabase& decoder_database,
rtc::Optional<uint8_t>* current_rtp_payload_type,
rtc::Optional<uint8_t>* current_cng_rtp_payload_type,
StatisticsCalculator* stats));
MOCK_CONST_METHOD1(NextTimestamp,
int(uint32_t* next_timestamp));
MOCK_CONST_METHOD2(NextHigherTimestamp,
int(uint32_t timestamp, uint32_t* next_timestamp));
MOCK_CONST_METHOD0(PeekNextPacket,
const Packet*());
MOCK_METHOD0(GetNextPacket,
rtc::Optional<Packet>());
MOCK_METHOD1(DiscardNextPacket, int(StatisticsCalculator* stats));
MOCK_METHOD3(DiscardOldPackets,
void(uint32_t timestamp_limit,
uint32_t horizon_samples,
StatisticsCalculator* stats));
MOCK_METHOD2(DiscardAllOldPackets,
void(uint32_t timestamp_limit, StatisticsCalculator* stats));
MOCK_CONST_METHOD0(NumPacketsInBuffer,
size_t());
MOCK_METHOD1(IncrementWaitingTimes,
void(int));
MOCK_CONST_METHOD0(current_memory_bytes,
int());
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_

View File

@ -0,0 +1,29 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_
#include "webrtc/modules/audio_coding/neteq/red_payload_splitter.h"
#include "webrtc/test/gmock.h"
namespace webrtc {
class MockRedPayloadSplitter : public RedPayloadSplitter {
public:
MOCK_METHOD1(SplitRed, bool(PacketList* packet_list));
MOCK_METHOD2(CheckRedPayloads,
int(PacketList* packet_list,
const DecoderDatabase& decoder_database));
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_

View File

@ -0,0 +1,27 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
#include "webrtc/test/gmock.h"
namespace webrtc {
class MockStatisticsCalculator : public StatisticsCalculator {
public:
MOCK_METHOD1(PacketsDiscarded, void(size_t num_packets));
MOCK_METHOD1(SecondaryPacketsDiscarded, void(size_t num_packets));
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_

View File

@ -0,0 +1,232 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/nack_tracker.h"
#include <assert.h> // For assert.
#include <algorithm> // For std::max.
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/rtc_base/checks.h"
namespace webrtc {
namespace {
const int kDefaultSampleRateKhz = 48;
const int kDefaultPacketSizeMs = 20;
} // namespace
NackTracker::NackTracker(int nack_threshold_packets)
: nack_threshold_packets_(nack_threshold_packets),
sequence_num_last_received_rtp_(0),
timestamp_last_received_rtp_(0),
any_rtp_received_(false),
sequence_num_last_decoded_rtp_(0),
timestamp_last_decoded_rtp_(0),
any_rtp_decoded_(false),
sample_rate_khz_(kDefaultSampleRateKhz),
samples_per_packet_(sample_rate_khz_ * kDefaultPacketSizeMs),
max_nack_list_size_(kNackListSizeLimit) {}
NackTracker::~NackTracker() = default;
NackTracker* NackTracker::Create(int nack_threshold_packets) {
return new NackTracker(nack_threshold_packets);
}
void NackTracker::UpdateSampleRate(int sample_rate_hz) {
assert(sample_rate_hz > 0);
sample_rate_khz_ = sample_rate_hz / 1000;
}
void NackTracker::UpdateLastReceivedPacket(uint16_t sequence_number,
uint32_t timestamp) {
// Just record the value of sequence number and timestamp if this is the
// first packet.
if (!any_rtp_received_) {
sequence_num_last_received_rtp_ = sequence_number;
timestamp_last_received_rtp_ = timestamp;
any_rtp_received_ = true;
// If no packet is decoded, to have a reasonable estimate of time-to-play
// use the given values.
if (!any_rtp_decoded_) {
sequence_num_last_decoded_rtp_ = sequence_number;
timestamp_last_decoded_rtp_ = timestamp;
}
return;
}
if (sequence_number == sequence_num_last_received_rtp_)
return;
// Received RTP should not be in the list.
nack_list_.erase(sequence_number);
// If this is an old sequence number, no more action is required, return.
if (IsNewerSequenceNumber(sequence_num_last_received_rtp_, sequence_number))
return;
UpdateSamplesPerPacket(sequence_number, timestamp);
UpdateList(sequence_number);
sequence_num_last_received_rtp_ = sequence_number;
timestamp_last_received_rtp_ = timestamp;
LimitNackListSize();
}
void NackTracker::UpdateSamplesPerPacket(
uint16_t sequence_number_current_received_rtp,
uint32_t timestamp_current_received_rtp) {
uint32_t timestamp_increase =
timestamp_current_received_rtp - timestamp_last_received_rtp_;
uint16_t sequence_num_increase =
sequence_number_current_received_rtp - sequence_num_last_received_rtp_;
samples_per_packet_ = timestamp_increase / sequence_num_increase;
}
void NackTracker::UpdateList(uint16_t sequence_number_current_received_rtp) {
// Some of the packets which were considered late, now are considered missing.
ChangeFromLateToMissing(sequence_number_current_received_rtp);
if (IsNewerSequenceNumber(sequence_number_current_received_rtp,
sequence_num_last_received_rtp_ + 1))
AddToList(sequence_number_current_received_rtp);
}
void NackTracker::ChangeFromLateToMissing(
uint16_t sequence_number_current_received_rtp) {
NackList::const_iterator lower_bound =
nack_list_.lower_bound(static_cast<uint16_t>(
sequence_number_current_received_rtp - nack_threshold_packets_));
for (NackList::iterator it = nack_list_.begin(); it != lower_bound; ++it)
it->second.is_missing = true;
}
uint32_t NackTracker::EstimateTimestamp(uint16_t sequence_num) {
uint16_t sequence_num_diff = sequence_num - sequence_num_last_received_rtp_;
return sequence_num_diff * samples_per_packet_ + timestamp_last_received_rtp_;
}
void NackTracker::AddToList(uint16_t sequence_number_current_received_rtp) {
assert(!any_rtp_decoded_ ||
IsNewerSequenceNumber(sequence_number_current_received_rtp,
sequence_num_last_decoded_rtp_));
// Packets with sequence numbers older than |upper_bound_missing| are
// considered missing, and the rest are considered late.
uint16_t upper_bound_missing =
sequence_number_current_received_rtp - nack_threshold_packets_;
for (uint16_t n = sequence_num_last_received_rtp_ + 1;
IsNewerSequenceNumber(sequence_number_current_received_rtp, n); ++n) {
bool is_missing = IsNewerSequenceNumber(upper_bound_missing, n);
uint32_t timestamp = EstimateTimestamp(n);
NackElement nack_element(TimeToPlay(timestamp), timestamp, is_missing);
nack_list_.insert(nack_list_.end(), std::make_pair(n, nack_element));
}
}
void NackTracker::UpdateEstimatedPlayoutTimeBy10ms() {
while (!nack_list_.empty() &&
nack_list_.begin()->second.time_to_play_ms <= 10)
nack_list_.erase(nack_list_.begin());
for (NackList::iterator it = nack_list_.begin(); it != nack_list_.end(); ++it)
it->second.time_to_play_ms -= 10;
}
void NackTracker::UpdateLastDecodedPacket(uint16_t sequence_number,
uint32_t timestamp) {
if (IsNewerSequenceNumber(sequence_number, sequence_num_last_decoded_rtp_) ||
!any_rtp_decoded_) {
sequence_num_last_decoded_rtp_ = sequence_number;
timestamp_last_decoded_rtp_ = timestamp;
// Packets in the list with sequence numbers less than the
// sequence number of the decoded RTP should be removed from the lists.
// They will be discarded by the jitter buffer if they arrive.
nack_list_.erase(nack_list_.begin(),
nack_list_.upper_bound(sequence_num_last_decoded_rtp_));
// Update estimated time-to-play.
for (NackList::iterator it = nack_list_.begin(); it != nack_list_.end();
++it)
it->second.time_to_play_ms = TimeToPlay(it->second.estimated_timestamp);
} else {
assert(sequence_number == sequence_num_last_decoded_rtp_);
// Same sequence number as before. 10 ms is elapsed, update estimations for
// time-to-play.
UpdateEstimatedPlayoutTimeBy10ms();
// Update timestamp for better estimate of time-to-play, for packets which
// are added to NACK list later on.
timestamp_last_decoded_rtp_ += sample_rate_khz_ * 10;
}
any_rtp_decoded_ = true;
}
NackTracker::NackList NackTracker::GetNackList() const {
return nack_list_;
}
void NackTracker::Reset() {
nack_list_.clear();
sequence_num_last_received_rtp_ = 0;
timestamp_last_received_rtp_ = 0;
any_rtp_received_ = false;
sequence_num_last_decoded_rtp_ = 0;
timestamp_last_decoded_rtp_ = 0;
any_rtp_decoded_ = false;
sample_rate_khz_ = kDefaultSampleRateKhz;
samples_per_packet_ = sample_rate_khz_ * kDefaultPacketSizeMs;
}
void NackTracker::SetMaxNackListSize(size_t max_nack_list_size) {
RTC_CHECK_GT(max_nack_list_size, 0);
// Ugly hack to get around the problem of passing static consts by reference.
const size_t kNackListSizeLimitLocal = NackTracker::kNackListSizeLimit;
RTC_CHECK_LE(max_nack_list_size, kNackListSizeLimitLocal);
max_nack_list_size_ = max_nack_list_size;
LimitNackListSize();
}
void NackTracker::LimitNackListSize() {
uint16_t limit = sequence_num_last_received_rtp_ -
static_cast<uint16_t>(max_nack_list_size_) - 1;
nack_list_.erase(nack_list_.begin(), nack_list_.upper_bound(limit));
}
int64_t NackTracker::TimeToPlay(uint32_t timestamp) const {
uint32_t timestamp_increase = timestamp - timestamp_last_decoded_rtp_;
return timestamp_increase / sample_rate_khz_;
}
// We don't erase elements with time-to-play shorter than round-trip-time.
std::vector<uint16_t> NackTracker::GetNackList(
int64_t round_trip_time_ms) const {
RTC_DCHECK_GE(round_trip_time_ms, 0);
std::vector<uint16_t> sequence_numbers;
for (NackList::const_iterator it = nack_list_.begin(); it != nack_list_.end();
++it) {
if (it->second.is_missing &&
it->second.time_to_play_ms > round_trip_time_ms)
sequence_numbers.push_back(it->first);
}
return sequence_numbers;
}
} // namespace webrtc

View File

@ -0,0 +1,208 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_
#include <vector>
#include <map>
#include "webrtc/modules/audio_coding/include/audio_coding_module_typedefs.h"
#include "webrtc/rtc_base/gtest_prod_util.h"
//
// The NackTracker class keeps track of the lost packets, an estimate of
// time-to-play for each packet is also given.
//
// Every time a packet is pushed into NetEq, LastReceivedPacket() has to be
// called to update the NACK list.
//
// Every time 10ms audio is pulled from NetEq LastDecodedPacket() should be
// called, and time-to-play is updated at that moment.
//
// If packet N is received, any packet prior to |N - NackThreshold| which is not
// arrived is considered lost, and should be labeled as "missing" (the size of
// the list might be limited and older packet eliminated from the list). Packets
// |N - NackThreshold|, |N - NackThreshold + 1|, ..., |N - 1| are considered
// "late." A "late" packet with sequence number K is changed to "missing" any
// time a packet with sequence number newer than |K + NackList| is arrived.
//
// The NackTracker class has to know about the sample rate of the packets to
// compute time-to-play. So sample rate should be set as soon as the first
// packet is received. If there is a change in the receive codec (sender changes
// codec) then NackTracker should be reset. This is because NetEQ would flush
// its buffer and re-transmission is meaning less for old packet. Therefore, in
// that case, after reset the sampling rate has to be updated.
//
// Thread Safety
// =============
// Please note that this class in not thread safe. The class must be protected
// if different APIs are called from different threads.
//
namespace webrtc {
class NackTracker {
public:
// A limit for the size of the NACK list.
static const size_t kNackListSizeLimit = 500; // 10 seconds for 20 ms frame
// packets.
// Factory method.
static NackTracker* Create(int nack_threshold_packets);
~NackTracker();
// Set a maximum for the size of the NACK list. If the last received packet
// has sequence number of N, then NACK list will not contain any element
// with sequence number earlier than N - |max_nack_list_size|.
//
// The largest maximum size is defined by |kNackListSizeLimit|
void SetMaxNackListSize(size_t max_nack_list_size);
// Set the sampling rate.
//
// If associated sampling rate of the received packets is changed, call this
// function to update sampling rate. Note that if there is any change in
// received codec then NetEq will flush its buffer and NACK has to be reset.
// After Reset() is called sampling rate has to be set.
void UpdateSampleRate(int sample_rate_hz);
// Update the sequence number and the timestamp of the last decoded RTP. This
// API should be called every time 10 ms audio is pulled from NetEq.
void UpdateLastDecodedPacket(uint16_t sequence_number, uint32_t timestamp);
// Update the sequence number and the timestamp of the last received RTP. This
// API should be called every time a packet pushed into ACM.
void UpdateLastReceivedPacket(uint16_t sequence_number, uint32_t timestamp);
// Get a list of "missing" packets which have expected time-to-play larger
// than the given round-trip-time (in milliseconds).
// Note: Late packets are not included.
std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const;
// Reset to default values. The NACK list is cleared.
// |nack_threshold_packets_| & |max_nack_list_size_| preserve their values.
void Reset();
private:
// This test need to access the private method GetNackList().
FRIEND_TEST_ALL_PREFIXES(NackTrackerTest, EstimateTimestampAndTimeToPlay);
struct NackElement {
NackElement(int64_t initial_time_to_play_ms,
uint32_t initial_timestamp,
bool missing)
: time_to_play_ms(initial_time_to_play_ms),
estimated_timestamp(initial_timestamp),
is_missing(missing) {}
// Estimated time (ms) left for this packet to be decoded. This estimate is
// updated every time jitter buffer decodes a packet.
int64_t time_to_play_ms;
// A guess about the timestamp of the missing packet, it is used for
// estimation of |time_to_play_ms|. The estimate might be slightly wrong if
// there has been frame-size change since the last received packet and the
// missing packet. However, the risk of this is low, and in case of such
// errors, there will be a minor misestimation in time-to-play of missing
// packets. This will have a very minor effect on NACK performance.
uint32_t estimated_timestamp;
// True if the packet is considered missing. Otherwise indicates packet is
// late.
bool is_missing;
};
class NackListCompare {
public:
bool operator()(uint16_t sequence_number_old,
uint16_t sequence_number_new) const {
return IsNewerSequenceNumber(sequence_number_new, sequence_number_old);
}
};
typedef std::map<uint16_t, NackElement, NackListCompare> NackList;
// Constructor.
explicit NackTracker(int nack_threshold_packets);
// This API is used only for testing to assess whether time-to-play is
// computed correctly.
NackList GetNackList() const;
// Given the |sequence_number_current_received_rtp| of currently received RTP,
// recognize packets which are not arrive and add to the list.
void AddToList(uint16_t sequence_number_current_received_rtp);
// This function subtracts 10 ms of time-to-play for all packets in NACK list.
// This is called when 10 ms elapsed with no new RTP packet decoded.
void UpdateEstimatedPlayoutTimeBy10ms();
// Given the |sequence_number_current_received_rtp| and
// |timestamp_current_received_rtp| of currently received RTP update number
// of samples per packet.
void UpdateSamplesPerPacket(uint16_t sequence_number_current_received_rtp,
uint32_t timestamp_current_received_rtp);
// Given the |sequence_number_current_received_rtp| of currently received RTP
// update the list. That is; some packets will change from late to missing,
// some packets are inserted as missing and some inserted as late.
void UpdateList(uint16_t sequence_number_current_received_rtp);
// Packets which are considered late for too long (according to
// |nack_threshold_packets_|) are flagged as missing.
void ChangeFromLateToMissing(uint16_t sequence_number_current_received_rtp);
// Packets which have sequence number older that
// |sequence_num_last_received_rtp_| - |max_nack_list_size_| are removed
// from the NACK list.
void LimitNackListSize();
// Estimate timestamp of a missing packet given its sequence number.
uint32_t EstimateTimestamp(uint16_t sequence_number);
// Compute time-to-play given a timestamp.
int64_t TimeToPlay(uint32_t timestamp) const;
// If packet N is arrived, any packet prior to N - |nack_threshold_packets_|
// which is not arrived is considered missing, and should be in NACK list.
// Also any packet in the range of N-1 and N - |nack_threshold_packets_|,
// exclusive, which is not arrived is considered late, and should should be
// in the list of late packets.
const int nack_threshold_packets_;
// Valid if a packet is received.
uint16_t sequence_num_last_received_rtp_;
uint32_t timestamp_last_received_rtp_;
bool any_rtp_received_; // If any packet received.
// Valid if a packet is decoded.
uint16_t sequence_num_last_decoded_rtp_;
uint32_t timestamp_last_decoded_rtp_;
bool any_rtp_decoded_; // If any packet decoded.
int sample_rate_khz_; // Sample rate in kHz.
// Number of samples per packet. We update this every time we receive a
// packet, not only for consecutive packets.
int samples_per_packet_;
// A list of missing packets to be retransmitted. Components of the list
// contain the sequence number of missing packets and the estimated time that
// each pack is going to be played out.
NackList nack_list_;
// NACK list will not keep track of missing packets prior to
// |sequence_num_last_received_rtp_| - |max_nack_list_size_|.
size_t max_nack_list_size_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_

View File

@ -0,0 +1,483 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/nack_tracker.h"
#include <stdint.h>
#include <algorithm>
#include <memory>
#include "webrtc/modules/audio_coding/include/audio_coding_module_typedefs.h"
#include "webrtc/test/gtest.h"
#include "webrtc/typedefs.h"
namespace webrtc {
namespace {
const int kNackThreshold = 3;
const int kSampleRateHz = 16000;
const int kPacketSizeMs = 30;
const uint32_t kTimestampIncrement = 480; // 30 ms.
const int64_t kShortRoundTripTimeMs = 1;
bool IsNackListCorrect(const std::vector<uint16_t>& nack_list,
const uint16_t* lost_sequence_numbers,
size_t num_lost_packets) {
if (nack_list.size() != num_lost_packets)
return false;
if (num_lost_packets == 0)
return true;
for (size_t k = 0; k < nack_list.size(); ++k) {
int seq_num = nack_list[k];
bool seq_num_matched = false;
for (size_t n = 0; n < num_lost_packets; ++n) {
if (seq_num == lost_sequence_numbers[n]) {
seq_num_matched = true;
break;
}
}
if (!seq_num_matched)
return false;
}
return true;
}
} // namespace
TEST(NackTrackerTest, EmptyListWhenNoPacketLoss) {
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
nack->UpdateSampleRate(kSampleRateHz);
int seq_num = 1;
uint32_t timestamp = 0;
std::vector<uint16_t> nack_list;
for (int n = 0; n < 100; n++) {
nack->UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
seq_num++;
timestamp += kTimestampIncrement;
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
}
}
TEST(NackTrackerTest, NoNackIfReorderWithinNackThreshold) {
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
nack->UpdateSampleRate(kSampleRateHz);
int seq_num = 1;
uint32_t timestamp = 0;
std::vector<uint16_t> nack_list;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
int num_late_packets = kNackThreshold + 1;
// Push in reverse order
while (num_late_packets > 0) {
nack->UpdateLastReceivedPacket(
seq_num + num_late_packets,
timestamp + num_late_packets * kTimestampIncrement);
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
num_late_packets--;
}
}
TEST(NackTrackerTest, LatePacketsMovedToNackThenNackListDoesNotChange) {
const uint16_t kSequenceNumberLostPackets[] = {2, 3, 4, 5, 6, 7, 8, 9};
static const int kNumAllLostPackets = sizeof(kSequenceNumberLostPackets) /
sizeof(kSequenceNumberLostPackets[0]);
for (int k = 0; k < 2; k++) { // Two iteration with/without wrap around.
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
nack->UpdateSampleRate(kSampleRateHz);
uint16_t sequence_num_lost_packets[kNumAllLostPackets];
for (int n = 0; n < kNumAllLostPackets; n++) {
sequence_num_lost_packets[n] =
kSequenceNumberLostPackets[n] +
k * 65531; // Have wrap around in sequence numbers for |k == 1|.
}
uint16_t seq_num = sequence_num_lost_packets[0] - 1;
uint32_t timestamp = 0;
std::vector<uint16_t> nack_list;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
seq_num = sequence_num_lost_packets[kNumAllLostPackets - 1] + 1;
timestamp += kTimestampIncrement * (kNumAllLostPackets + 1);
int num_lost_packets = std::max(0, kNumAllLostPackets - kNackThreshold);
for (int n = 0; n < kNackThreshold + 1; ++n) {
nack->UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(IsNackListCorrect(nack_list, sequence_num_lost_packets,
num_lost_packets));
seq_num++;
timestamp += kTimestampIncrement;
num_lost_packets++;
}
for (int n = 0; n < 100; ++n) {
nack->UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(IsNackListCorrect(nack_list, sequence_num_lost_packets,
kNumAllLostPackets));
seq_num++;
timestamp += kTimestampIncrement;
}
}
}
TEST(NackTrackerTest, ArrivedPacketsAreRemovedFromNackList) {
const uint16_t kSequenceNumberLostPackets[] = {2, 3, 4, 5, 6, 7, 8, 9};
static const int kNumAllLostPackets = sizeof(kSequenceNumberLostPackets) /
sizeof(kSequenceNumberLostPackets[0]);
for (int k = 0; k < 2; ++k) { // Two iteration with/without wrap around.
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
nack->UpdateSampleRate(kSampleRateHz);
uint16_t sequence_num_lost_packets[kNumAllLostPackets];
for (int n = 0; n < kNumAllLostPackets; ++n) {
sequence_num_lost_packets[n] = kSequenceNumberLostPackets[n] +
k * 65531; // Wrap around for |k == 1|.
}
uint16_t seq_num = sequence_num_lost_packets[0] - 1;
uint32_t timestamp = 0;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
std::vector<uint16_t> nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
size_t index_retransmitted_rtp = 0;
uint32_t timestamp_retransmitted_rtp = timestamp + kTimestampIncrement;
seq_num = sequence_num_lost_packets[kNumAllLostPackets - 1] + 1;
timestamp += kTimestampIncrement * (kNumAllLostPackets + 1);
size_t num_lost_packets = std::max(0, kNumAllLostPackets - kNackThreshold);
for (int n = 0; n < kNumAllLostPackets; ++n) {
// Number of lost packets does not change for the first
// |kNackThreshold + 1| packets, one is added to the list and one is
// removed. Thereafter, the list shrinks every iteration.
if (n >= kNackThreshold + 1)
num_lost_packets--;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(IsNackListCorrect(
nack_list, &sequence_num_lost_packets[index_retransmitted_rtp],
num_lost_packets));
seq_num++;
timestamp += kTimestampIncrement;
// Retransmission of a lost RTP.
nack->UpdateLastReceivedPacket(
sequence_num_lost_packets[index_retransmitted_rtp],
timestamp_retransmitted_rtp);
index_retransmitted_rtp++;
timestamp_retransmitted_rtp += kTimestampIncrement;
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(IsNackListCorrect(
nack_list, &sequence_num_lost_packets[index_retransmitted_rtp],
num_lost_packets - 1)); // One less lost packet in the list.
}
ASSERT_TRUE(nack_list.empty());
}
}
// Assess if estimation of timestamps and time-to-play is correct. Introduce all
// combinations that timestamps and sequence numbers might have wrap around.
TEST(NackTrackerTest, EstimateTimestampAndTimeToPlay) {
const uint16_t kLostPackets[] = {2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15};
static const int kNumAllLostPackets =
sizeof(kLostPackets) / sizeof(kLostPackets[0]);
for (int k = 0; k < 4; ++k) {
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
nack->UpdateSampleRate(kSampleRateHz);
// Sequence number wrap around if |k| is 2 or 3;
int seq_num_offset = (k < 2) ? 0 : 65531;
// Timestamp wrap around if |k| is 1 or 3.
uint32_t timestamp_offset =
(k & 0x1) ? static_cast<uint32_t>(0xffffffff) - 6 : 0;
uint32_t timestamp_lost_packets[kNumAllLostPackets];
uint16_t seq_num_lost_packets[kNumAllLostPackets];
for (int n = 0; n < kNumAllLostPackets; ++n) {
timestamp_lost_packets[n] =
timestamp_offset + kLostPackets[n] * kTimestampIncrement;
seq_num_lost_packets[n] = seq_num_offset + kLostPackets[n];
}
// We and to push two packets before lost burst starts.
uint16_t seq_num = seq_num_lost_packets[0] - 2;
uint32_t timestamp = timestamp_lost_packets[0] - 2 * kTimestampIncrement;
const uint16_t first_seq_num = seq_num;
const uint32_t first_timestamp = timestamp;
// Two consecutive packets to have a correct estimate of timestamp increase.
nack->UpdateLastReceivedPacket(seq_num, timestamp);
seq_num++;
timestamp += kTimestampIncrement;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
// A packet after the last one which is supposed to be lost.
seq_num = seq_num_lost_packets[kNumAllLostPackets - 1] + 1;
timestamp =
timestamp_lost_packets[kNumAllLostPackets - 1] + kTimestampIncrement;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
NackTracker::NackList nack_list = nack->GetNackList();
EXPECT_EQ(static_cast<size_t>(kNumAllLostPackets), nack_list.size());
// Pretend the first packet is decoded.
nack->UpdateLastDecodedPacket(first_seq_num, first_timestamp);
nack_list = nack->GetNackList();
NackTracker::NackList::iterator it = nack_list.begin();
while (it != nack_list.end()) {
seq_num = it->first - seq_num_offset;
int index = seq_num - kLostPackets[0];
EXPECT_EQ(timestamp_lost_packets[index], it->second.estimated_timestamp);
EXPECT_EQ((index + 2) * kPacketSizeMs, it->second.time_to_play_ms);
++it;
}
// Pretend 10 ms is passed, and we had pulled audio from NetEq, it still
// reports the same sequence number as decoded, time-to-play should be
// updated by 10 ms.
nack->UpdateLastDecodedPacket(first_seq_num, first_timestamp);
nack_list = nack->GetNackList();
it = nack_list.begin();
while (it != nack_list.end()) {
seq_num = it->first - seq_num_offset;
int index = seq_num - kLostPackets[0];
EXPECT_EQ((index + 2) * kPacketSizeMs - 10, it->second.time_to_play_ms);
++it;
}
}
}
TEST(NackTrackerTest,
MissingPacketsPriorToLastDecodedRtpShouldNotBeInNackList) {
for (int m = 0; m < 2; ++m) {
uint16_t seq_num_offset = (m == 0) ? 0 : 65531; // Wrap around if |m| is 1.
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
nack->UpdateSampleRate(kSampleRateHz);
// Two consecutive packets to have a correct estimate of timestamp increase.
uint16_t seq_num = 0;
nack->UpdateLastReceivedPacket(seq_num_offset + seq_num,
seq_num * kTimestampIncrement);
seq_num++;
nack->UpdateLastReceivedPacket(seq_num_offset + seq_num,
seq_num * kTimestampIncrement);
// Skip 10 packets (larger than NACK threshold).
const int kNumLostPackets = 10;
seq_num += kNumLostPackets + 1;
nack->UpdateLastReceivedPacket(seq_num_offset + seq_num,
seq_num * kTimestampIncrement);
const size_t kExpectedListSize = kNumLostPackets - kNackThreshold;
std::vector<uint16_t> nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_EQ(kExpectedListSize, nack_list.size());
for (int k = 0; k < 2; ++k) {
// Decoding of the first and the second arrived packets.
for (int n = 0; n < kPacketSizeMs / 10; ++n) {
nack->UpdateLastDecodedPacket(seq_num_offset + k,
k * kTimestampIncrement);
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_EQ(kExpectedListSize, nack_list.size());
}
}
// Decoding of the last received packet.
nack->UpdateLastDecodedPacket(seq_num + seq_num_offset,
seq_num * kTimestampIncrement);
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
// Make sure list of late packets is also empty. To check that, push few
// packets, if the late list is not empty its content will pop up in NACK
// list.
for (int n = 0; n < kNackThreshold + 10; ++n) {
seq_num++;
nack->UpdateLastReceivedPacket(seq_num_offset + seq_num,
seq_num * kTimestampIncrement);
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
}
}
}
TEST(NackTrackerTest, Reset) {
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
nack->UpdateSampleRate(kSampleRateHz);
// Two consecutive packets to have a correct estimate of timestamp increase.
uint16_t seq_num = 0;
nack->UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement);
seq_num++;
nack->UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement);
// Skip 10 packets (larger than NACK threshold).
const int kNumLostPackets = 10;
seq_num += kNumLostPackets + 1;
nack->UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement);
const size_t kExpectedListSize = kNumLostPackets - kNackThreshold;
std::vector<uint16_t> nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_EQ(kExpectedListSize, nack_list.size());
nack->Reset();
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
}
TEST(NackTrackerTest, ListSizeAppliedFromBeginning) {
const size_t kNackListSize = 10;
for (int m = 0; m < 2; ++m) {
uint16_t seq_num_offset = (m == 0) ? 0 : 65525; // Wrap around if |m| is 1.
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
nack->UpdateSampleRate(kSampleRateHz);
nack->SetMaxNackListSize(kNackListSize);
uint16_t seq_num = seq_num_offset;
uint32_t timestamp = 0x12345678;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
// Packet lost more than NACK-list size limit.
uint16_t num_lost_packets = kNackThreshold + kNackListSize + 5;
seq_num += num_lost_packets + 1;
timestamp += (num_lost_packets + 1) * kTimestampIncrement;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
std::vector<uint16_t> nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_EQ(kNackListSize - kNackThreshold, nack_list.size());
}
}
TEST(NackTrackerTest, ChangeOfListSizeAppliedAndOldElementsRemoved) {
const size_t kNackListSize = 10;
for (int m = 0; m < 2; ++m) {
uint16_t seq_num_offset = (m == 0) ? 0 : 65525; // Wrap around if |m| is 1.
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
nack->UpdateSampleRate(kSampleRateHz);
uint16_t seq_num = seq_num_offset;
uint32_t timestamp = 0x87654321;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
// Packet lost more than NACK-list size limit.
uint16_t num_lost_packets = kNackThreshold + kNackListSize + 5;
std::unique_ptr<uint16_t[]> seq_num_lost(new uint16_t[num_lost_packets]);
for (int n = 0; n < num_lost_packets; ++n) {
seq_num_lost[n] = ++seq_num;
}
++seq_num;
timestamp += (num_lost_packets + 1) * kTimestampIncrement;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
size_t expected_size = num_lost_packets - kNackThreshold;
std::vector<uint16_t> nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_EQ(expected_size, nack_list.size());
nack->SetMaxNackListSize(kNackListSize);
expected_size = kNackListSize - kNackThreshold;
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(IsNackListCorrect(
nack_list, &seq_num_lost[num_lost_packets - kNackListSize],
expected_size));
// NACK list does not change size but the content is changing. The oldest
// element is removed and one from late list is inserted.
size_t n;
for (n = 1; n <= static_cast<size_t>(kNackThreshold); ++n) {
++seq_num;
timestamp += kTimestampIncrement;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(IsNackListCorrect(
nack_list, &seq_num_lost[num_lost_packets - kNackListSize + n],
expected_size));
}
// NACK list should shrink.
for (; n < kNackListSize; ++n) {
++seq_num;
timestamp += kTimestampIncrement;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
--expected_size;
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(IsNackListCorrect(
nack_list, &seq_num_lost[num_lost_packets - kNackListSize + n],
expected_size));
}
// After this packet, NACK list should be empty.
++seq_num;
timestamp += kTimestampIncrement;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
EXPECT_TRUE(nack_list.empty());
}
}
TEST(NackTrackerTest, RoudTripTimeIsApplied) {
const int kNackListSize = 200;
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
nack->UpdateSampleRate(kSampleRateHz);
nack->SetMaxNackListSize(kNackListSize);
uint16_t seq_num = 0;
uint32_t timestamp = 0x87654321;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
// Packet lost more than NACK-list size limit.
uint16_t kNumLostPackets = kNackThreshold + 5;
seq_num += (1 + kNumLostPackets);
timestamp += (1 + kNumLostPackets) * kTimestampIncrement;
nack->UpdateLastReceivedPacket(seq_num, timestamp);
// Expected time-to-play are:
// kPacketSizeMs - 10, 2*kPacketSizeMs - 10, 3*kPacketSizeMs - 10, ...
//
// sequence number: 1, 2, 3, 4, 5
// time-to-play: 20, 50, 80, 110, 140
//
std::vector<uint16_t> nack_list = nack->GetNackList(100);
ASSERT_EQ(2u, nack_list.size());
EXPECT_EQ(4, nack_list[0]);
EXPECT_EQ(5, nack_list[1]);
}
} // namespace webrtc

View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/include/neteq.h"
#include <memory>
#include <sstream>
#include "webrtc/modules/audio_coding/neteq/neteq_impl.h"
namespace webrtc {
std::string NetEq::Config::ToString() const {
std::stringstream ss;
ss << "sample_rate_hz=" << sample_rate_hz
<< ", enable_post_decode_vad="
<< (enable_post_decode_vad ? "true" : "false")
<< ", max_packets_in_buffer=" << max_packets_in_buffer
<< ", background_noise_mode=" << background_noise_mode
<< ", playout_mode=" << playout_mode
<< ", enable_fast_accelerate="
<< (enable_fast_accelerate ? " true": "false")
<< ", enable_muted_state=" << (enable_muted_state ? " true": "false");
return ss.str();
}
// Creates all classes needed and inject them into a new NetEqImpl object.
// Return the new object.
NetEq* NetEq::Create(
const NetEq::Config& config,
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory) {
return new NetEqImpl(config,
NetEqImpl::Dependencies(config, decoder_factory));
}
} // namespace webrtc

View File

@ -0,0 +1,89 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <map>
#include <string>
#include "webrtc/modules/audio_coding/neteq/neteq_decoder_enum.h"
namespace webrtc {
rtc::Optional<SdpAudioFormat> NetEqDecoderToSdpAudioFormat(NetEqDecoder nd) {
switch (nd) {
case NetEqDecoder::kDecoderPCMu:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("pcmu", 8000, 1));
case NetEqDecoder::kDecoderPCMa:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("pcma", 8000, 1));
case NetEqDecoder::kDecoderPCMu_2ch:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("pcmu", 8000, 2));
case NetEqDecoder::kDecoderPCMa_2ch:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("pcma", 8000, 2));
case NetEqDecoder::kDecoderILBC:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("ilbc", 8000, 1));
case NetEqDecoder::kDecoderISAC:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("isac", 16000, 1));
case NetEqDecoder::kDecoderISACswb:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("isac", 32000, 1));
case NetEqDecoder::kDecoderPCM16B:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 8000, 1));
case NetEqDecoder::kDecoderPCM16Bwb:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 16000, 1));
case NetEqDecoder::kDecoderPCM16Bswb32kHz:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 32000, 1));
case NetEqDecoder::kDecoderPCM16Bswb48kHz:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 48000, 1));
case NetEqDecoder::kDecoderPCM16B_2ch:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 8000, 2));
case NetEqDecoder::kDecoderPCM16Bwb_2ch:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 16000, 2));
case NetEqDecoder::kDecoderPCM16Bswb32kHz_2ch:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 32000, 2));
case NetEqDecoder::kDecoderPCM16Bswb48kHz_2ch:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 48000, 2));
case NetEqDecoder::kDecoderPCM16B_5ch:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 8000, 5));
case NetEqDecoder::kDecoderG722:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("g722", 8000, 1));
case NetEqDecoder::kDecoderG722_2ch:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("g722", 8000, 2));
case NetEqDecoder::kDecoderOpus:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("opus", 48000, 2));
case NetEqDecoder::kDecoderOpus_2ch:
return rtc::Optional<SdpAudioFormat>(
SdpAudioFormat("opus", 48000, 2,
std::map<std::string, std::string>{{"stereo", "1"}}));
case NetEqDecoder::kDecoderRED:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("red", 8000, 1));
case NetEqDecoder::kDecoderAVT:
return rtc::Optional<SdpAudioFormat>(
SdpAudioFormat("telephone-event", 8000, 1));
case NetEqDecoder::kDecoderAVT16kHz:
return rtc::Optional<SdpAudioFormat>(
SdpAudioFormat("telephone-event", 16000, 1));
case NetEqDecoder::kDecoderAVT32kHz:
return rtc::Optional<SdpAudioFormat>(
SdpAudioFormat("telephone-event", 32000, 1));
case NetEqDecoder::kDecoderAVT48kHz:
return rtc::Optional<SdpAudioFormat>(
SdpAudioFormat("telephone-event", 48000, 1));
case NetEqDecoder::kDecoderCNGnb:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("cn", 8000, 1));
case NetEqDecoder::kDecoderCNGwb:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("cn", 16000, 1));
case NetEqDecoder::kDecoderCNGswb32kHz:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("cn", 32000, 1));
case NetEqDecoder::kDecoderCNGswb48kHz:
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("cn", 48000, 1));
default:
return rtc::Optional<SdpAudioFormat>();
}
}
} // namespace webrtc

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_DECODER_ENUM_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_DECODER_ENUM_H_
#include "webrtc/api/audio_codecs/audio_format.h"
#include "webrtc/api/optional.h"
namespace webrtc {
enum class NetEqDecoder {
kDecoderPCMu,
kDecoderPCMa,
kDecoderPCMu_2ch,
kDecoderPCMa_2ch,
kDecoderILBC,
kDecoderISAC,
kDecoderISACswb,
kDecoderPCM16B,
kDecoderPCM16Bwb,
kDecoderPCM16Bswb32kHz,
kDecoderPCM16Bswb48kHz,
kDecoderPCM16B_2ch,
kDecoderPCM16Bwb_2ch,
kDecoderPCM16Bswb32kHz_2ch,
kDecoderPCM16Bswb48kHz_2ch,
kDecoderPCM16B_5ch,
kDecoderG722,
kDecoderG722_2ch,
kDecoderRED,
kDecoderAVT,
kDecoderAVT16kHz,
kDecoderAVT32kHz,
kDecoderAVT48kHz,
kDecoderCNGnb,
kDecoderCNGwb,
kDecoderCNGswb32kHz,
kDecoderCNGswb48kHz,
kDecoderArbitrary,
kDecoderOpus,
kDecoderOpus_2ch,
};
rtc::Optional<SdpAudioFormat> NetEqDecoderToSdpAudioFormat(NetEqDecoder nd);
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_DECODER_ENUM_H_

View File

@ -0,0 +1,457 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Test to verify correct operation for externally created decoders.
#include <memory>
#include "webrtc/api/audio_codecs/builtin_audio_decoder_factory.h"
#include "webrtc/common_types.h"
#include "webrtc/modules/audio_coding/neteq/mock/mock_external_decoder_pcm16b.h"
#include "webrtc/modules/audio_coding/neteq/tools/input_audio_file.h"
#include "webrtc/modules/audio_coding/neteq/tools/neteq_external_decoder_test.h"
#include "webrtc/modules/audio_coding/neteq/tools/rtp_generator.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/test/gmock.h"
#include "webrtc/test/testsupport/fileutils.h"
namespace webrtc {
using ::testing::_;
using ::testing::Return;
class NetEqExternalDecoderUnitTest : public test::NetEqExternalDecoderTest {
protected:
static const int kFrameSizeMs = 10; // Frame size of Pcm16B.
NetEqExternalDecoderUnitTest(NetEqDecoder codec,
int sample_rate_hz,
MockExternalPcm16B* decoder)
: NetEqExternalDecoderTest(codec, sample_rate_hz, decoder),
external_decoder_(decoder),
samples_per_ms_(sample_rate_hz / 1000),
frame_size_samples_(kFrameSizeMs * samples_per_ms_),
rtp_generator_(new test::RtpGenerator(samples_per_ms_)),
input_(new int16_t[frame_size_samples_]),
// Payload should be no larger than input.
encoded_(new uint8_t[2 * frame_size_samples_]),
payload_size_bytes_(0),
last_send_time_(0),
last_arrival_time_(0) {
// NetEq is not allowed to delete the external decoder (hence Times(0)).
EXPECT_CALL(*external_decoder_, Die()).Times(0);
Init();
const std::string file_name =
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
input_file_.reset(new test::InputAudioFile(file_name));
}
virtual ~NetEqExternalDecoderUnitTest() {
delete [] input_;
delete [] encoded_;
// ~NetEqExternalDecoderTest() will delete |external_decoder_|, so expecting
// Die() to be called.
EXPECT_CALL(*external_decoder_, Die()).Times(1);
}
// Method to draw kFrameSizeMs audio and verify the output.
// Use gTest methods. e.g. ASSERT_EQ() inside to trigger errors.
virtual void GetAndVerifyOutput() = 0;
// Method to get the number of calls to the Decode() method of the external
// decoder.
virtual int NumExpectedDecodeCalls(int num_loops) = 0;
// Method to generate packets and return the send time of the packet.
int GetNewPacket() {
if (!input_file_->Read(frame_size_samples_, input_)) {
return -1;
}
payload_size_bytes_ = WebRtcPcm16b_Encode(input_, frame_size_samples_,
encoded_);
int next_send_time = rtp_generator_->GetRtpHeader(
kPayloadType, frame_size_samples_, &rtp_header_);
return next_send_time;
}
// Method to decide packet losses.
virtual bool Lost() { return false; }
// Method to calculate packet arrival time.
int GetArrivalTime(int send_time) {
int arrival_time = last_arrival_time_ + (send_time - last_send_time_);
last_send_time_ = send_time;
last_arrival_time_ = arrival_time;
return arrival_time;
}
void RunTest(int num_loops) {
// Get next input packets (mono and multi-channel).
uint32_t next_send_time;
uint32_t next_arrival_time;
do {
next_send_time = GetNewPacket();
next_arrival_time = GetArrivalTime(next_send_time);
} while (Lost()); // If lost, immediately read the next packet.
EXPECT_CALL(
*external_decoder_,
DecodeInternal(_, payload_size_bytes_, 1000 * samples_per_ms_, _, _))
.Times(NumExpectedDecodeCalls(num_loops));
uint32_t time_now = 0;
for (int k = 0; k < num_loops; ++k) {
while (time_now >= next_arrival_time) {
InsertPacket(rtp_header_, rtc::ArrayView<const uint8_t>(
encoded_, payload_size_bytes_),
next_arrival_time);
// Get next input packet.
do {
next_send_time = GetNewPacket();
next_arrival_time = GetArrivalTime(next_send_time);
} while (Lost()); // If lost, immediately read the next packet.
}
std::ostringstream ss;
ss << "Lap number " << k << ".";
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
// Compare mono and multi-channel.
ASSERT_NO_FATAL_FAILURE(GetAndVerifyOutput());
time_now += kOutputLengthMs;
}
}
void InsertPacket(RTPHeader rtp_header,
rtc::ArrayView<const uint8_t> payload,
uint32_t receive_timestamp) override {
EXPECT_CALL(*external_decoder_,
IncomingPacket(_, payload.size(), rtp_header.sequenceNumber,
rtp_header.timestamp, receive_timestamp));
NetEqExternalDecoderTest::InsertPacket(rtp_header, payload,
receive_timestamp);
}
MockExternalPcm16B* external_decoder() { return external_decoder_.get(); }
void ResetRtpGenerator(test::RtpGenerator* rtp_generator) {
rtp_generator_.reset(rtp_generator);
}
int samples_per_ms() const { return samples_per_ms_; }
private:
std::unique_ptr<MockExternalPcm16B> external_decoder_;
int samples_per_ms_;
size_t frame_size_samples_;
std::unique_ptr<test::RtpGenerator> rtp_generator_;
int16_t* input_;
uint8_t* encoded_;
size_t payload_size_bytes_;
uint32_t last_send_time_;
uint32_t last_arrival_time_;
std::unique_ptr<test::InputAudioFile> input_file_;
RTPHeader rtp_header_;
};
// This test encodes a few packets of PCM16b 32 kHz data and inserts it into two
// different NetEq instances. The first instance uses the internal version of
// the decoder object, while the second one uses an externally created decoder
// object (ExternalPcm16B wrapped in MockExternalPcm16B, both defined above).
// The test verifies that the output from both instances match.
class NetEqExternalVsInternalDecoderTest : public NetEqExternalDecoderUnitTest,
public ::testing::Test {
protected:
static const size_t kMaxBlockSize = 480; // 10 ms @ 48 kHz.
NetEqExternalVsInternalDecoderTest()
: NetEqExternalDecoderUnitTest(NetEqDecoder::kDecoderPCM16Bswb32kHz,
32000,
new MockExternalPcm16B(32000)),
sample_rate_hz_(32000) {
NetEq::Config config;
config.sample_rate_hz = sample_rate_hz_;
neteq_internal_.reset(
NetEq::Create(config, CreateBuiltinAudioDecoderFactory()));
}
void SetUp() override {
ASSERT_EQ(true, neteq_internal_->RegisterPayloadType(
kPayloadType, SdpAudioFormat("L16", 32000, 1)));
}
void GetAndVerifyOutput() override {
// Get audio from internal decoder instance.
bool muted;
EXPECT_EQ(NetEq::kOK, neteq_internal_->GetAudio(&output_internal_, &muted));
ASSERT_FALSE(muted);
EXPECT_EQ(1u, output_internal_.num_channels_);
EXPECT_EQ(static_cast<size_t>(kOutputLengthMs * sample_rate_hz_ / 1000),
output_internal_.samples_per_channel_);
// Get audio from external decoder instance.
GetOutputAudio(&output_);
const int16_t* output_data = output_.data();
const int16_t* output_internal_data = output_internal_.data();
for (size_t i = 0; i < output_.samples_per_channel_; ++i) {
ASSERT_EQ(output_data[i], output_internal_data[i])
<< "Diff in sample " << i << ".";
}
}
void InsertPacket(RTPHeader rtp_header,
rtc::ArrayView<const uint8_t> payload,
uint32_t receive_timestamp) override {
// Insert packet in internal decoder.
ASSERT_EQ(NetEq::kOK, neteq_internal_->InsertPacket(rtp_header, payload,
receive_timestamp));
// Insert packet in external decoder instance.
NetEqExternalDecoderUnitTest::InsertPacket(rtp_header, payload,
receive_timestamp);
}
int NumExpectedDecodeCalls(int num_loops) override { return num_loops; }
private:
int sample_rate_hz_;
std::unique_ptr<NetEq> neteq_internal_;
AudioFrame output_internal_;
AudioFrame output_;
};
TEST_F(NetEqExternalVsInternalDecoderTest, RunTest) {
RunTest(100); // Run 100 laps @ 10 ms each in the test loop.
}
class LargeTimestampJumpTest : public NetEqExternalDecoderUnitTest,
public ::testing::Test {
protected:
static const size_t kMaxBlockSize = 480; // 10 ms @ 48 kHz.
enum TestStates {
kInitialPhase,
kNormalPhase,
kExpandPhase,
kFadedExpandPhase,
kRecovered
};
LargeTimestampJumpTest()
: NetEqExternalDecoderUnitTest(NetEqDecoder::kDecoderPCM16B,
8000,
new MockExternalPcm16B(8000)),
test_state_(kInitialPhase) {
EXPECT_CALL(*external_decoder(), HasDecodePlc())
.WillRepeatedly(Return(false));
}
virtual void UpdateState(AudioFrame::SpeechType output_type) {
switch (test_state_) {
case kInitialPhase: {
if (output_type == AudioFrame::kNormalSpeech) {
test_state_ = kNormalPhase;
}
break;
}
case kNormalPhase: {
if (output_type == AudioFrame::kPLC) {
test_state_ = kExpandPhase;
}
break;
}
case kExpandPhase: {
if (output_type == AudioFrame::kPLCCNG) {
test_state_ = kFadedExpandPhase;
} else if (output_type == AudioFrame::kNormalSpeech) {
test_state_ = kRecovered;
}
break;
}
case kFadedExpandPhase: {
if (output_type == AudioFrame::kNormalSpeech) {
test_state_ = kRecovered;
}
break;
}
case kRecovered: {
break;
}
}
}
void GetAndVerifyOutput() override {
AudioFrame output;
GetOutputAudio(&output);
UpdateState(output.speech_type_);
if (test_state_ == kExpandPhase || test_state_ == kFadedExpandPhase) {
// Don't verify the output in this phase of the test.
return;
}
ASSERT_EQ(1u, output.num_channels_);
const int16_t* output_data = output.data();
for (size_t i = 0; i < output.samples_per_channel_; ++i) {
if (output_data[i] != 0)
return;
}
EXPECT_TRUE(false)
<< "Expected at least one non-zero sample in each output block.";
}
int NumExpectedDecodeCalls(int num_loops) override {
// Some packets at the end of the stream won't be decoded. When the jump in
// timestamp happens, NetEq will do Expand during one GetAudio call. In the
// next call it will decode the packet after the jump, but the net result is
// that the delay increased by 1 packet. In another call, a Pre-emptive
// Expand operation is performed, leading to delay increase by 1 packet. In
// total, the test will end with a 2-packet delay, which results in the 2
// last packets not being decoded.
return num_loops - 2;
}
TestStates test_state_;
};
TEST_F(LargeTimestampJumpTest, JumpLongerThanHalfRange) {
// Set the timestamp series to start at 2880, increase to 7200, then jump to
// 2869342376. The sequence numbers start at 42076 and increase by 1 for each
// packet, also when the timestamp jumps.
static const uint16_t kStartSeqeunceNumber = 42076;
static const uint32_t kStartTimestamp = 2880;
static const uint32_t kJumpFromTimestamp = 7200;
static const uint32_t kJumpToTimestamp = 2869342376;
static_assert(kJumpFromTimestamp < kJumpToTimestamp,
"timestamp jump should not result in wrap");
static_assert(
static_cast<uint32_t>(kJumpToTimestamp - kJumpFromTimestamp) > 0x7FFFFFFF,
"jump should be larger than half range");
// Replace the default RTP generator with one that jumps in timestamp.
ResetRtpGenerator(new test::TimestampJumpRtpGenerator(samples_per_ms(),
kStartSeqeunceNumber,
kStartTimestamp,
kJumpFromTimestamp,
kJumpToTimestamp));
RunTest(130); // Run 130 laps @ 10 ms each in the test loop.
EXPECT_EQ(kRecovered, test_state_);
}
TEST_F(LargeTimestampJumpTest, JumpLongerThanHalfRangeAndWrap) {
// Make a jump larger than half the 32-bit timestamp range. Set the start
// timestamp such that the jump will result in a wrap around.
static const uint16_t kStartSeqeunceNumber = 42076;
// Set the jump length slightly larger than 2^31.
static const uint32_t kStartTimestamp = 3221223116;
static const uint32_t kJumpFromTimestamp = 3221223216;
static const uint32_t kJumpToTimestamp = 1073744278;
static_assert(kJumpToTimestamp < kJumpFromTimestamp,
"timestamp jump should result in wrap");
static_assert(
static_cast<uint32_t>(kJumpToTimestamp - kJumpFromTimestamp) > 0x7FFFFFFF,
"jump should be larger than half range");
// Replace the default RTP generator with one that jumps in timestamp.
ResetRtpGenerator(new test::TimestampJumpRtpGenerator(samples_per_ms(),
kStartSeqeunceNumber,
kStartTimestamp,
kJumpFromTimestamp,
kJumpToTimestamp));
RunTest(130); // Run 130 laps @ 10 ms each in the test loop.
EXPECT_EQ(kRecovered, test_state_);
}
class ShortTimestampJumpTest : public LargeTimestampJumpTest {
protected:
void UpdateState(AudioFrame::SpeechType output_type) override {
switch (test_state_) {
case kInitialPhase: {
if (output_type == AudioFrame::kNormalSpeech) {
test_state_ = kNormalPhase;
}
break;
}
case kNormalPhase: {
if (output_type == AudioFrame::kPLC) {
test_state_ = kExpandPhase;
}
break;
}
case kExpandPhase: {
if (output_type == AudioFrame::kNormalSpeech) {
test_state_ = kRecovered;
}
break;
}
case kRecovered: {
break;
}
default: { FAIL(); }
}
}
int NumExpectedDecodeCalls(int num_loops) override {
// Some packets won't be decoded because of the timestamp jump.
return num_loops - 2;
}
};
TEST_F(ShortTimestampJumpTest, JumpShorterThanHalfRange) {
// Make a jump shorter than half the 32-bit timestamp range. Set the start
// timestamp such that the jump will not result in a wrap around.
static const uint16_t kStartSeqeunceNumber = 42076;
// Set the jump length slightly smaller than 2^31.
static const uint32_t kStartTimestamp = 4711;
static const uint32_t kJumpFromTimestamp = 4811;
static const uint32_t kJumpToTimestamp = 2147483747;
static_assert(kJumpFromTimestamp < kJumpToTimestamp,
"timestamp jump should not result in wrap");
static_assert(
static_cast<uint32_t>(kJumpToTimestamp - kJumpFromTimestamp) < 0x7FFFFFFF,
"jump should be smaller than half range");
// Replace the default RTP generator with one that jumps in timestamp.
ResetRtpGenerator(new test::TimestampJumpRtpGenerator(samples_per_ms(),
kStartSeqeunceNumber,
kStartTimestamp,
kJumpFromTimestamp,
kJumpToTimestamp));
RunTest(130); // Run 130 laps @ 10 ms each in the test loop.
EXPECT_EQ(kRecovered, test_state_);
}
TEST_F(ShortTimestampJumpTest, JumpShorterThanHalfRangeAndWrap) {
// Make a jump shorter than half the 32-bit timestamp range. Set the start
// timestamp such that the jump will result in a wrap around.
static const uint16_t kStartSeqeunceNumber = 42076;
// Set the jump length slightly smaller than 2^31.
static const uint32_t kStartTimestamp = 3221227827;
static const uint32_t kJumpFromTimestamp = 3221227927;
static const uint32_t kJumpToTimestamp = 1073739567;
static_assert(kJumpToTimestamp < kJumpFromTimestamp,
"timestamp jump should result in wrap");
static_assert(
static_cast<uint32_t>(kJumpToTimestamp - kJumpFromTimestamp) < 0x7FFFFFFF,
"jump should be smaller than half range");
// Replace the default RTP generator with one that jumps in timestamp.
ResetRtpGenerator(new test::TimestampJumpRtpGenerator(samples_per_ms(),
kStartSeqeunceNumber,
kStartTimestamp,
kJumpFromTimestamp,
kJumpToTimestamp));
RunTest(130); // Run 130 laps @ 10 ms each in the test loop.
EXPECT_EQ(kRecovered, test_state_);
}
} // namespace webrtc

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,449 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
#include <memory>
#include <string>
#include "webrtc/api/optional.h"
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq/defines.h"
#include "webrtc/modules/audio_coding/neteq/include/neteq.h"
#include "webrtc/modules/audio_coding/neteq/packet.h" // Declare PacketList.
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
#include "webrtc/modules/audio_coding/neteq/rtcp.h"
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/rtc_base/criticalsection.h"
#include "webrtc/rtc_base/thread_annotations.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class Accelerate;
class BackgroundNoise;
class BufferLevelFilter;
class ComfortNoise;
class DecisionLogic;
class DecoderDatabase;
class DelayManager;
class DelayPeakDetector;
class DtmfBuffer;
class DtmfToneGenerator;
class Expand;
class Merge;
class NackTracker;
class Normal;
class PacketBuffer;
class RedPayloadSplitter;
class PostDecodeVad;
class PreemptiveExpand;
class RandomVector;
class SyncBuffer;
class TimestampScaler;
struct AccelerateFactory;
struct DtmfEvent;
struct ExpandFactory;
struct PreemptiveExpandFactory;
class NetEqImpl : public webrtc::NetEq {
public:
enum class OutputType {
kNormalSpeech,
kPLC,
kCNG,
kPLCCNG,
kVadPassive
};
enum ErrorCodes {
kNoError = 0,
kOtherError,
kUnknownRtpPayloadType,
kDecoderNotFound,
kInvalidPointer,
kAccelerateError,
kPreemptiveExpandError,
kComfortNoiseErrorCode,
kDecoderErrorCode,
kOtherDecoderError,
kInvalidOperation,
kDtmfParsingError,
kDtmfInsertError,
kSampleUnderrun,
kDecodedTooMuch,
kRedundancySplitError,
kPacketBufferCorruption
};
struct Dependencies {
// The constructor populates the Dependencies struct with the default
// implementations of the objects. They can all be replaced by the user
// before sending the struct to the NetEqImpl constructor. However, there
// are dependencies between some of the classes inside the struct, so
// swapping out one may make it necessary to re-create another one.
explicit Dependencies(
const NetEq::Config& config,
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);
~Dependencies();
std::unique_ptr<TickTimer> tick_timer;
std::unique_ptr<BufferLevelFilter> buffer_level_filter;
std::unique_ptr<DecoderDatabase> decoder_database;
std::unique_ptr<DelayPeakDetector> delay_peak_detector;
std::unique_ptr<DelayManager> delay_manager;
std::unique_ptr<DtmfBuffer> dtmf_buffer;
std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator;
std::unique_ptr<PacketBuffer> packet_buffer;
std::unique_ptr<RedPayloadSplitter> red_payload_splitter;
std::unique_ptr<TimestampScaler> timestamp_scaler;
std::unique_ptr<AccelerateFactory> accelerate_factory;
std::unique_ptr<ExpandFactory> expand_factory;
std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory;
};
// Creates a new NetEqImpl object.
NetEqImpl(const NetEq::Config& config,
Dependencies&& deps,
bool create_components = true);
~NetEqImpl() override;
// Inserts a new packet into NetEq. The |receive_timestamp| is an indication
// of the time when the packet was received, and should be measured with
// the same tick rate as the RTP timestamp of the current payload.
// Returns 0 on success, -1 on failure.
int InsertPacket(const RTPHeader& rtp_header,
rtc::ArrayView<const uint8_t> payload,
uint32_t receive_timestamp) override;
void InsertEmptyPacket(const RTPHeader& rtp_header) override;
int GetAudio(AudioFrame* audio_frame, bool* muted) override;
void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) override;
int RegisterPayloadType(NetEqDecoder codec,
const std::string& codec_name,
uint8_t rtp_payload_type) override;
int RegisterExternalDecoder(AudioDecoder* decoder,
NetEqDecoder codec,
const std::string& codec_name,
uint8_t rtp_payload_type) override;
bool RegisterPayloadType(int rtp_payload_type,
const SdpAudioFormat& audio_format) override;
// Removes |rtp_payload_type| from the codec database. Returns 0 on success,
// -1 on failure.
int RemovePayloadType(uint8_t rtp_payload_type) override;
void RemoveAllPayloadTypes() override;
bool SetMinimumDelay(int delay_ms) override;
bool SetMaximumDelay(int delay_ms) override;
int LeastRequiredDelayMs() const override;
int SetTargetDelay() override;
int TargetDelayMs() override;
int CurrentDelayMs() const override;
int FilteredCurrentDelayMs() const override;
// Sets the playout mode to |mode|.
// Deprecated.
// TODO(henrik.lundin) Delete.
void SetPlayoutMode(NetEqPlayoutMode mode) override;
// Returns the current playout mode.
// Deprecated.
// TODO(henrik.lundin) Delete.
NetEqPlayoutMode PlayoutMode() const override;
// Writes the current network statistics to |stats|. The statistics are reset
// after the call.
int NetworkStatistics(NetEqNetworkStatistics* stats) override;
// Writes the current RTCP statistics to |stats|. The statistics are reset
// and a new report period is started with the call.
void GetRtcpStatistics(RtcpStatistics* stats) override;
NetEqLifetimeStatistics GetLifetimeStatistics() const override;
// Same as RtcpStatistics(), but does not reset anything.
void GetRtcpStatisticsNoReset(RtcpStatistics* stats) override;
// Enables post-decode VAD. When enabled, GetAudio() will return
// kOutputVADPassive when the signal contains no speech.
void EnableVad() override;
// Disables post-decode VAD.
void DisableVad() override;
rtc::Optional<uint32_t> GetPlayoutTimestamp() const override;
int last_output_sample_rate_hz() const override;
rtc::Optional<CodecInst> GetDecoder(int payload_type) const override;
rtc::Optional<SdpAudioFormat> GetDecoderFormat(
int payload_type) const override;
int SetTargetNumberOfChannels() override;
int SetTargetSampleRate() override;
// Flushes both the packet buffer and the sync buffer.
void FlushBuffers() override;
void PacketBufferStatistics(int* current_num_packets,
int* max_num_packets) const override;
void EnableNack(size_t max_nack_list_size) override;
void DisableNack() override;
std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const override;
std::vector<uint32_t> LastDecodedTimestamps() const override;
int SyncBufferSizeMs() const override;
// This accessor method is only intended for testing purposes.
const SyncBuffer* sync_buffer_for_test() const;
Operations last_operation_for_test() const;
protected:
static const int kOutputSizeMs = 10;
static const size_t kMaxFrameSize = 5760; // 120 ms @ 48 kHz.
// TODO(hlundin): Provide a better value for kSyncBufferSize.
// Current value is kMaxFrameSize + 60 ms * 48 kHz, which is enough for
// calculating correlations of current frame against history.
static const size_t kSyncBufferSize = kMaxFrameSize + 60 * 48;
// Inserts a new packet into NetEq. This is used by the InsertPacket method
// above. Returns 0 on success, otherwise an error code.
// TODO(hlundin): Merge this with InsertPacket above?
int InsertPacketInternal(const RTPHeader& rtp_header,
rtc::ArrayView<const uint8_t> payload,
uint32_t receive_timestamp)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Delivers 10 ms of audio data. The data is written to |audio_frame|.
// Returns 0 on success, otherwise an error code.
int GetAudioInternal(AudioFrame* audio_frame, bool* muted)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Provides a decision to the GetAudioInternal method. The decision what to
// do is written to |operation|. Packets to decode are written to
// |packet_list|, and a DTMF event to play is written to |dtmf_event|. When
// DTMF should be played, |play_dtmf| is set to true by the method.
// Returns 0 on success, otherwise an error code.
int GetDecision(Operations* operation,
PacketList* packet_list,
DtmfEvent* dtmf_event,
bool* play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Decodes the speech packets in |packet_list|, and writes the results to
// |decoded_buffer|, which is allocated to hold |decoded_buffer_length|
// elements. The length of the decoded data is written to |decoded_length|.
// The speech type -- speech or (codec-internal) comfort noise -- is written
// to |speech_type|. If |packet_list| contains any SID frames for RFC 3389
// comfort noise, those are not decoded.
int Decode(PacketList* packet_list,
Operations* operation,
int* decoded_length,
AudioDecoder::SpeechType* speech_type)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Sub-method to Decode(). Performs codec internal CNG.
int DecodeCng(AudioDecoder* decoder,
int* decoded_length,
AudioDecoder::SpeechType* speech_type)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Sub-method to Decode(). Performs the actual decoding.
int DecodeLoop(PacketList* packet_list,
const Operations& operation,
AudioDecoder* decoder,
int* decoded_length,
AudioDecoder::SpeechType* speech_type)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Sub-method which calls the Normal class to perform the normal operation.
void DoNormal(const int16_t* decoded_buffer,
size_t decoded_length,
AudioDecoder::SpeechType speech_type,
bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Sub-method which calls the Merge class to perform the merge operation.
void DoMerge(int16_t* decoded_buffer,
size_t decoded_length,
AudioDecoder::SpeechType speech_type,
bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Sub-method which calls the Expand class to perform the expand operation.
int DoExpand(bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Sub-method which calls the Accelerate class to perform the accelerate
// operation.
int DoAccelerate(int16_t* decoded_buffer,
size_t decoded_length,
AudioDecoder::SpeechType speech_type,
bool play_dtmf,
bool fast_accelerate)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Sub-method which calls the PreemptiveExpand class to perform the
// preemtive expand operation.
int DoPreemptiveExpand(int16_t* decoded_buffer,
size_t decoded_length,
AudioDecoder::SpeechType speech_type,
bool play_dtmf)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort
// noise. |packet_list| can either contain one SID frame to update the
// noise parameters, or no payload at all, in which case the previously
// received parameters are used.
int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Calls the audio decoder to generate codec-internal comfort noise when
// no packet was received.
void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Calls the DtmfToneGenerator class to generate DTMF tones.
int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Produces packet-loss concealment using alternative methods. If the codec
// has an internal PLC, it is called to generate samples. Otherwise, the
// method performs zero-stuffing.
void DoAlternativePlc(bool increase_timestamp)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Overdub DTMF on top of |output|.
int DtmfOverdub(const DtmfEvent& dtmf_event,
size_t num_channels,
int16_t* output) const
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Extracts packets from |packet_buffer_| to produce at least
// |required_samples| samples. The packets are inserted into |packet_list|.
// Returns the number of samples that the packets in the list will produce, or
// -1 in case of an error.
int ExtractPackets(size_t required_samples, PacketList* packet_list)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Resets various variables and objects to new values based on the sample rate
// |fs_hz| and |channels| number audio channels.
void SetSampleRateAndChannels(int fs_hz, size_t channels)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Returns the output type for the audio produced by the latest call to
// GetAudio().
OutputType LastOutputType() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Updates Expand and Merge.
virtual void UpdatePlcComponents(int fs_hz, size_t channels)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Creates DecisionLogic object with the mode given by |playout_mode_|.
virtual void CreateDecisionLogic() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
rtc::CriticalSection crit_sect_;
const std::unique_ptr<TickTimer> tick_timer_ RTC_GUARDED_BY(crit_sect_);
const std::unique_ptr<BufferLevelFilter> buffer_level_filter_
RTC_GUARDED_BY(crit_sect_);
const std::unique_ptr<DecoderDatabase> decoder_database_
RTC_GUARDED_BY(crit_sect_);
const std::unique_ptr<DelayManager> delay_manager_ RTC_GUARDED_BY(crit_sect_);
const std::unique_ptr<DelayPeakDetector> delay_peak_detector_
RTC_GUARDED_BY(crit_sect_);
const std::unique_ptr<DtmfBuffer> dtmf_buffer_ RTC_GUARDED_BY(crit_sect_);
const std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator_
RTC_GUARDED_BY(crit_sect_);
const std::unique_ptr<PacketBuffer> packet_buffer_ RTC_GUARDED_BY(crit_sect_);
const std::unique_ptr<RedPayloadSplitter> red_payload_splitter_
RTC_GUARDED_BY(crit_sect_);
const std::unique_ptr<TimestampScaler> timestamp_scaler_
RTC_GUARDED_BY(crit_sect_);
const std::unique_ptr<PostDecodeVad> vad_ RTC_GUARDED_BY(crit_sect_);
const std::unique_ptr<ExpandFactory> expand_factory_
RTC_GUARDED_BY(crit_sect_);
const std::unique_ptr<AccelerateFactory> accelerate_factory_
RTC_GUARDED_BY(crit_sect_);
const std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory_
RTC_GUARDED_BY(crit_sect_);
std::unique_ptr<BackgroundNoise> background_noise_ RTC_GUARDED_BY(crit_sect_);
std::unique_ptr<DecisionLogic> decision_logic_ RTC_GUARDED_BY(crit_sect_);
std::unique_ptr<AudioMultiVector> algorithm_buffer_
RTC_GUARDED_BY(crit_sect_);
std::unique_ptr<SyncBuffer> sync_buffer_ RTC_GUARDED_BY(crit_sect_);
std::unique_ptr<Expand> expand_ RTC_GUARDED_BY(crit_sect_);
std::unique_ptr<Normal> normal_ RTC_GUARDED_BY(crit_sect_);
std::unique_ptr<Merge> merge_ RTC_GUARDED_BY(crit_sect_);
std::unique_ptr<Accelerate> accelerate_ RTC_GUARDED_BY(crit_sect_);
std::unique_ptr<PreemptiveExpand> preemptive_expand_
RTC_GUARDED_BY(crit_sect_);
RandomVector random_vector_ RTC_GUARDED_BY(crit_sect_);
std::unique_ptr<ComfortNoise> comfort_noise_ RTC_GUARDED_BY(crit_sect_);
Rtcp rtcp_ RTC_GUARDED_BY(crit_sect_);
StatisticsCalculator stats_ RTC_GUARDED_BY(crit_sect_);
int fs_hz_ RTC_GUARDED_BY(crit_sect_);
int fs_mult_ RTC_GUARDED_BY(crit_sect_);
int last_output_sample_rate_hz_ RTC_GUARDED_BY(crit_sect_);
size_t output_size_samples_ RTC_GUARDED_BY(crit_sect_);
size_t decoder_frame_length_ RTC_GUARDED_BY(crit_sect_);
Modes last_mode_ RTC_GUARDED_BY(crit_sect_);
Operations last_operation_ RTC_GUARDED_BY(crit_sect_);
std::unique_ptr<int16_t[]> mute_factor_array_ RTC_GUARDED_BY(crit_sect_);
size_t decoded_buffer_length_ RTC_GUARDED_BY(crit_sect_);
std::unique_ptr<int16_t[]> decoded_buffer_ RTC_GUARDED_BY(crit_sect_);
uint32_t playout_timestamp_ RTC_GUARDED_BY(crit_sect_);
bool new_codec_ RTC_GUARDED_BY(crit_sect_);
uint32_t timestamp_ RTC_GUARDED_BY(crit_sect_);
bool reset_decoder_ RTC_GUARDED_BY(crit_sect_);
rtc::Optional<uint8_t> current_rtp_payload_type_ RTC_GUARDED_BY(crit_sect_);
rtc::Optional<uint8_t> current_cng_rtp_payload_type_
RTC_GUARDED_BY(crit_sect_);
uint32_t ssrc_ RTC_GUARDED_BY(crit_sect_);
bool first_packet_ RTC_GUARDED_BY(crit_sect_);
const BackgroundNoiseMode background_noise_mode_ RTC_GUARDED_BY(crit_sect_);
NetEqPlayoutMode playout_mode_ RTC_GUARDED_BY(crit_sect_);
bool enable_fast_accelerate_ RTC_GUARDED_BY(crit_sect_);
std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(crit_sect_);
bool nack_enabled_ RTC_GUARDED_BY(crit_sect_);
const bool enable_muted_state_ RTC_GUARDED_BY(crit_sect_);
AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(crit_sect_) =
AudioFrame::kVadPassive;
std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
RTC_GUARDED_BY(crit_sect_);
std::vector<uint32_t> last_decoded_timestamps_ RTC_GUARDED_BY(crit_sect_);
private:
RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,337 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <memory>
#include "webrtc/common_types.h"
#include "webrtc/modules/audio_coding/neteq/tools/neteq_external_decoder_test.h"
#include "webrtc/modules/audio_coding/neteq/tools/rtp_generator.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/test/gmock.h"
namespace webrtc {
namespace test {
using ::testing::_;
using ::testing::SetArgPointee;
using ::testing::Return;
class MockAudioDecoder final : public AudioDecoder {
public:
// TODO(nisse): Valid overrides commented out, because the gmock
// methods don't use any override declarations, and we want to avoid
// warnings from -Winconsistent-missing-override. See
// http://crbug.com/428099.
static const int kPacketDuration = 960; // 48 kHz * 20 ms
MockAudioDecoder(int sample_rate_hz, size_t num_channels)
: sample_rate_hz_(sample_rate_hz),
num_channels_(num_channels),
fec_enabled_(false) {}
~MockAudioDecoder() /* override */ { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD0(Reset, void());
class MockFrame : public AudioDecoder::EncodedAudioFrame {
public:
MockFrame(size_t num_channels) : num_channels_(num_channels) {}
size_t Duration() const override { return kPacketDuration; }
rtc::Optional<DecodeResult> Decode(
rtc::ArrayView<int16_t> decoded) const override {
const size_t output_size =
sizeof(int16_t) * kPacketDuration * num_channels_;
if (decoded.size() >= output_size) {
memset(decoded.data(), 0,
sizeof(int16_t) * kPacketDuration * num_channels_);
return rtc::Optional<DecodeResult>(
{kPacketDuration * num_channels_, kSpeech});
} else {
ADD_FAILURE() << "Expected decoded.size() to be >= output_size ("
<< decoded.size() << " vs. " << output_size << ")";
return rtc::Optional<DecodeResult>();
}
}
private:
const size_t num_channels_;
};
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp) /* override */ {
std::vector<ParseResult> results;
if (fec_enabled_) {
std::unique_ptr<MockFrame> fec_frame(new MockFrame(num_channels_));
results.emplace_back(timestamp - kPacketDuration, 1,
std::move(fec_frame));
}
std::unique_ptr<MockFrame> frame(new MockFrame(num_channels_));
results.emplace_back(timestamp, 0, std::move(frame));
return results;
}
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const
/* override */ {
ADD_FAILURE() << "Since going through ParsePayload, PacketDuration should "
"never get called.";
return kPacketDuration;
}
bool PacketHasFec(
const uint8_t* encoded, size_t encoded_len) const /* override */ {
ADD_FAILURE() << "Since going through ParsePayload, PacketHasFec should "
"never get called.";
return fec_enabled_;
}
int SampleRateHz() const /* override */ { return sample_rate_hz_; }
size_t Channels() const /* override */ { return num_channels_; }
void set_fec_enabled(bool enable_fec) { fec_enabled_ = enable_fec; }
bool fec_enabled() const { return fec_enabled_; }
protected:
int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) /* override */ {
ADD_FAILURE() << "Since going through ParsePayload, DecodeInternal should "
"never get called.";
return -1;
}
private:
const int sample_rate_hz_;
const size_t num_channels_;
bool fec_enabled_;
};
class NetEqNetworkStatsTest : public NetEqExternalDecoderTest {
public:
static const int kPayloadSizeByte = 30;
static const int kFrameSizeMs = 20;
enum logic {
kIgnore,
kEqual,
kSmallerThan,
kLargerThan,
};
struct NetEqNetworkStatsCheck {
logic current_buffer_size_ms;
logic preferred_buffer_size_ms;
logic jitter_peaks_found;
logic packet_loss_rate;
logic expand_rate;
logic speech_expand_rate;
logic preemptive_rate;
logic accelerate_rate;
logic secondary_decoded_rate;
logic secondary_discarded_rate;
logic clockdrift_ppm;
logic added_zero_samples;
NetEqNetworkStatistics stats_ref;
};
NetEqNetworkStatsTest(NetEqDecoder codec,
int sample_rate_hz,
MockAudioDecoder* decoder)
: NetEqExternalDecoderTest(codec, sample_rate_hz, decoder),
external_decoder_(decoder),
samples_per_ms_(sample_rate_hz / 1000),
frame_size_samples_(kFrameSizeMs * samples_per_ms_),
rtp_generator_(new test::RtpGenerator(samples_per_ms_)),
last_lost_time_(0),
packet_loss_interval_(0xffffffff) {
Init();
}
bool Lost(uint32_t send_time) {
if (send_time - last_lost_time_ >= packet_loss_interval_) {
last_lost_time_ = send_time;
return true;
}
return false;
}
void SetPacketLossRate(double loss_rate) {
packet_loss_interval_ = (loss_rate >= 1e-3 ?
static_cast<double>(kFrameSizeMs) / loss_rate : 0xffffffff);
}
// |stats_ref|
// expects.x = -1, do not care
// expects.x = 0, 'x' in current stats should equal 'x' in |stats_ref|
// expects.x = 1, 'x' in current stats should < 'x' in |stats_ref|
// expects.x = 2, 'x' in current stats should > 'x' in |stats_ref|
void CheckNetworkStatistics(NetEqNetworkStatsCheck expects) {
NetEqNetworkStatistics stats;
neteq()->NetworkStatistics(&stats);
#define CHECK_NETEQ_NETWORK_STATS(x)\
switch (expects.x) {\
case kEqual:\
EXPECT_EQ(stats.x, expects.stats_ref.x);\
break;\
case kSmallerThan:\
EXPECT_LT(stats.x, expects.stats_ref.x);\
break;\
case kLargerThan:\
EXPECT_GT(stats.x, expects.stats_ref.x);\
break;\
default:\
break;\
}
CHECK_NETEQ_NETWORK_STATS(current_buffer_size_ms);
CHECK_NETEQ_NETWORK_STATS(preferred_buffer_size_ms);
CHECK_NETEQ_NETWORK_STATS(jitter_peaks_found);
CHECK_NETEQ_NETWORK_STATS(packet_loss_rate);
CHECK_NETEQ_NETWORK_STATS(expand_rate);
CHECK_NETEQ_NETWORK_STATS(speech_expand_rate);
CHECK_NETEQ_NETWORK_STATS(preemptive_rate);
CHECK_NETEQ_NETWORK_STATS(accelerate_rate);
CHECK_NETEQ_NETWORK_STATS(secondary_decoded_rate);
CHECK_NETEQ_NETWORK_STATS(secondary_discarded_rate);
CHECK_NETEQ_NETWORK_STATS(clockdrift_ppm);
CHECK_NETEQ_NETWORK_STATS(added_zero_samples);
#undef CHECK_NETEQ_NETWORK_STATS
// Compare with CurrentDelay, which should be identical.
EXPECT_EQ(stats.current_buffer_size_ms, neteq()->CurrentDelayMs());
}
void RunTest(int num_loops, NetEqNetworkStatsCheck expects) {
uint32_t time_now;
uint32_t next_send_time;
// Initiate |last_lost_time_|.
time_now = next_send_time = last_lost_time_ =
rtp_generator_->GetRtpHeader(kPayloadType, frame_size_samples_,
&rtp_header_);
for (int k = 0; k < num_loops; ++k) {
// Delay by one frame such that the FEC can come in.
while (time_now + kFrameSizeMs >= next_send_time) {
next_send_time = rtp_generator_->GetRtpHeader(kPayloadType,
frame_size_samples_,
&rtp_header_);
if (!Lost(next_send_time)) {
static const uint8_t payload[kPayloadSizeByte] = {0};
InsertPacket(rtp_header_, payload, next_send_time);
}
}
GetOutputAudio(&output_frame_);
time_now += kOutputLengthMs;
}
CheckNetworkStatistics(expects);
neteq()->FlushBuffers();
}
void DecodeFecTest() {
external_decoder_->set_fec_enabled(false);
NetEqNetworkStatsCheck expects = {
kIgnore, // current_buffer_size_ms
kIgnore, // preferred_buffer_size_ms
kIgnore, // jitter_peaks_found
kEqual, // packet_loss_rate
kEqual, // expand_rate
kEqual, // voice_expand_rate
kIgnore, // preemptive_rate
kEqual, // accelerate_rate
kEqual, // decoded_fec_rate
kEqual, // discarded_fec_rate
kIgnore, // clockdrift_ppm
kEqual, // added_zero_samples
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
};
RunTest(50, expects);
// Next we introduce packet losses.
SetPacketLossRate(0.1);
expects.stats_ref.packet_loss_rate = 1337;
expects.stats_ref.expand_rate = expects.stats_ref.speech_expand_rate = 1065;
RunTest(50, expects);
// Next we enable FEC.
external_decoder_->set_fec_enabled(true);
// If FEC fills in the lost packets, no packet loss will be counted.
expects.stats_ref.packet_loss_rate = 0;
expects.stats_ref.expand_rate = expects.stats_ref.speech_expand_rate = 0;
expects.stats_ref.secondary_decoded_rate = 2006;
expects.stats_ref.secondary_discarded_rate = 14336;
RunTest(50, expects);
}
void NoiseExpansionTest() {
NetEqNetworkStatsCheck expects = {
kIgnore, // current_buffer_size_ms
kIgnore, // preferred_buffer_size_ms
kIgnore, // jitter_peaks_found
kEqual, // packet_loss_rate
kEqual, // expand_rate
kEqual, // speech_expand_rate
kIgnore, // preemptive_rate
kEqual, // accelerate_rate
kEqual, // decoded_fec_rate
kEqual, // discard_fec_rate
kIgnore, // clockdrift_ppm
kEqual, // added_zero_samples
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
};
RunTest(50, expects);
SetPacketLossRate(1);
expects.stats_ref.expand_rate = 16384;
expects.stats_ref.speech_expand_rate = 5324;
RunTest(10, expects); // Lost 10 * 20ms in a row.
}
private:
MockAudioDecoder* external_decoder_;
const int samples_per_ms_;
const size_t frame_size_samples_;
std::unique_ptr<test::RtpGenerator> rtp_generator_;
RTPHeader rtp_header_;
uint32_t last_lost_time_;
uint32_t packet_loss_interval_;
AudioFrame output_frame_;
};
TEST(NetEqNetworkStatsTest, DecodeFec) {
MockAudioDecoder decoder(48000, 1);
NetEqNetworkStatsTest test(NetEqDecoder::kDecoderOpus, 48000, &decoder);
test.DecodeFecTest();
EXPECT_CALL(decoder, Die()).Times(1);
}
TEST(NetEqNetworkStatsTest, StereoDecodeFec) {
MockAudioDecoder decoder(48000, 2);
NetEqNetworkStatsTest test(NetEqDecoder::kDecoderOpus, 48000, &decoder);
test.DecodeFecTest();
EXPECT_CALL(decoder, Die()).Times(1);
}
TEST(NetEqNetworkStatsTest, NoiseExpansionTest) {
MockAudioDecoder decoder(48000, 1);
NetEqNetworkStatsTest test(NetEqDecoder::kDecoderOpus, 48000, &decoder);
test.NoiseExpansionTest();
EXPECT_CALL(decoder, Die()).Times(1);
}
} // namespace test
} // namespace webrtc

View File

@ -0,0 +1,442 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Test to verify correct stereo and multi-channel operation.
#include <algorithm>
#include <memory>
#include <string>
#include <list>
#include "webrtc/api/audio_codecs/builtin_audio_decoder_factory.h"
#include "webrtc/common_types.h"
#include "webrtc/modules/audio_coding/codecs/pcm16b/pcm16b.h"
#include "webrtc/modules/audio_coding/neteq/include/neteq.h"
#include "webrtc/modules/audio_coding/neteq/tools/input_audio_file.h"
#include "webrtc/modules/audio_coding/neteq/tools/rtp_generator.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/test/gtest.h"
#include "webrtc/test/testsupport/fileutils.h"
namespace webrtc {
struct TestParameters {
int frame_size;
int sample_rate;
size_t num_channels;
};
// This is a parameterized test. The test parameters are supplied through a
// TestParameters struct, which is obtained through the GetParam() method.
//
// The objective of the test is to create a mono input signal and a
// multi-channel input signal, where each channel is identical to the mono
// input channel. The two input signals are processed through their respective
// NetEq instances. After that, the output signals are compared. The expected
// result is that each channel in the multi-channel output is identical to the
// mono output.
class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
protected:
static const int kTimeStepMs = 10;
static const size_t kMaxBlockSize = 480; // 10 ms @ 48 kHz.
static const uint8_t kPayloadTypeMono = 95;
static const uint8_t kPayloadTypeMulti = 96;
NetEqStereoTest()
: num_channels_(GetParam().num_channels),
sample_rate_hz_(GetParam().sample_rate),
samples_per_ms_(sample_rate_hz_ / 1000),
frame_size_ms_(GetParam().frame_size),
frame_size_samples_(
static_cast<size_t>(frame_size_ms_ * samples_per_ms_)),
output_size_samples_(10 * samples_per_ms_),
rtp_generator_mono_(samples_per_ms_),
rtp_generator_(samples_per_ms_),
payload_size_bytes_(0),
multi_payload_size_bytes_(0),
last_send_time_(0),
last_arrival_time_(0) {
NetEq::Config config;
config.sample_rate_hz = sample_rate_hz_;
rtc::scoped_refptr<AudioDecoderFactory> factory =
CreateBuiltinAudioDecoderFactory();
neteq_mono_ = NetEq::Create(config, factory);
neteq_ = NetEq::Create(config, factory);
input_ = new int16_t[frame_size_samples_];
encoded_ = new uint8_t[2 * frame_size_samples_];
input_multi_channel_ = new int16_t[frame_size_samples_ * num_channels_];
encoded_multi_channel_ = new uint8_t[frame_size_samples_ * 2 *
num_channels_];
}
~NetEqStereoTest() {
delete neteq_mono_;
delete neteq_;
delete [] input_;
delete [] encoded_;
delete [] input_multi_channel_;
delete [] encoded_multi_channel_;
}
virtual void SetUp() {
const std::string file_name =
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
input_file_.reset(new test::InputAudioFile(file_name));
NetEqDecoder mono_decoder;
NetEqDecoder multi_decoder;
switch (sample_rate_hz_) {
case 8000:
mono_decoder = NetEqDecoder::kDecoderPCM16B;
if (num_channels_ == 2) {
multi_decoder = NetEqDecoder::kDecoderPCM16B_2ch;
} else if (num_channels_ == 5) {
multi_decoder = NetEqDecoder::kDecoderPCM16B_5ch;
} else {
FAIL() << "Only 2 and 5 channels supported for 8000 Hz.";
}
break;
case 16000:
mono_decoder = NetEqDecoder::kDecoderPCM16Bwb;
if (num_channels_ == 2) {
multi_decoder = NetEqDecoder::kDecoderPCM16Bwb_2ch;
} else {
FAIL() << "More than 2 channels is not supported for 16000 Hz.";
}
break;
case 32000:
mono_decoder = NetEqDecoder::kDecoderPCM16Bswb32kHz;
if (num_channels_ == 2) {
multi_decoder = NetEqDecoder::kDecoderPCM16Bswb32kHz_2ch;
} else {
FAIL() << "More than 2 channels is not supported for 32000 Hz.";
}
break;
case 48000:
mono_decoder = NetEqDecoder::kDecoderPCM16Bswb48kHz;
if (num_channels_ == 2) {
multi_decoder = NetEqDecoder::kDecoderPCM16Bswb48kHz_2ch;
} else {
FAIL() << "More than 2 channels is not supported for 48000 Hz.";
}
break;
default:
FAIL() << "We shouldn't get here.";
}
ASSERT_EQ(NetEq::kOK, neteq_mono_->RegisterPayloadType(mono_decoder, "mono",
kPayloadTypeMono));
ASSERT_EQ(NetEq::kOK,
neteq_->RegisterPayloadType(multi_decoder, "multi-channel",
kPayloadTypeMulti));
}
virtual void TearDown() {}
int GetNewPackets() {
if (!input_file_->Read(frame_size_samples_, input_)) {
return -1;
}
payload_size_bytes_ = WebRtcPcm16b_Encode(input_, frame_size_samples_,
encoded_);
if (frame_size_samples_ * 2 != payload_size_bytes_) {
return -1;
}
int next_send_time = rtp_generator_mono_.GetRtpHeader(kPayloadTypeMono,
frame_size_samples_,
&rtp_header_mono_);
test::InputAudioFile::DuplicateInterleaved(input_, frame_size_samples_,
num_channels_,
input_multi_channel_);
multi_payload_size_bytes_ = WebRtcPcm16b_Encode(
input_multi_channel_, frame_size_samples_ * num_channels_,
encoded_multi_channel_);
if (frame_size_samples_ * 2 * num_channels_ != multi_payload_size_bytes_) {
return -1;
}
rtp_generator_.GetRtpHeader(kPayloadTypeMulti, frame_size_samples_,
&rtp_header_);
return next_send_time;
}
virtual void VerifyOutput(size_t num_samples) {
const int16_t* output_data = output_.data();
const int16_t* output_multi_channel_data = output_multi_channel_.data();
for (size_t i = 0; i < num_samples; ++i) {
for (size_t j = 0; j < num_channels_; ++j) {
ASSERT_EQ(output_data[i],
output_multi_channel_data[i * num_channels_ + j])
<< "Diff in sample " << i << ", channel " << j << ".";
}
}
}
virtual int GetArrivalTime(int send_time) {
int arrival_time = last_arrival_time_ + (send_time - last_send_time_);
last_send_time_ = send_time;
last_arrival_time_ = arrival_time;
return arrival_time;
}
virtual bool Lost() { return false; }
void RunTest(int num_loops) {
// Get next input packets (mono and multi-channel).
int next_send_time;
int next_arrival_time;
do {
next_send_time = GetNewPackets();
ASSERT_NE(-1, next_send_time);
next_arrival_time = GetArrivalTime(next_send_time);
} while (Lost()); // If lost, immediately read the next packet.
int time_now = 0;
for (int k = 0; k < num_loops; ++k) {
while (time_now >= next_arrival_time) {
// Insert packet in mono instance.
ASSERT_EQ(NetEq::kOK,
neteq_mono_->InsertPacket(rtp_header_mono_,
rtc::ArrayView<const uint8_t>(
encoded_, payload_size_bytes_),
next_arrival_time));
// Insert packet in multi-channel instance.
ASSERT_EQ(NetEq::kOK,
neteq_->InsertPacket(
rtp_header_,
rtc::ArrayView<const uint8_t>(encoded_multi_channel_,
multi_payload_size_bytes_),
next_arrival_time));
// Get next input packets (mono and multi-channel).
do {
next_send_time = GetNewPackets();
ASSERT_NE(-1, next_send_time);
next_arrival_time = GetArrivalTime(next_send_time);
} while (Lost()); // If lost, immediately read the next packet.
}
// Get audio from mono instance.
bool muted;
EXPECT_EQ(NetEq::kOK, neteq_mono_->GetAudio(&output_, &muted));
ASSERT_FALSE(muted);
EXPECT_EQ(1u, output_.num_channels_);
EXPECT_EQ(output_size_samples_, output_.samples_per_channel_);
// Get audio from multi-channel instance.
ASSERT_EQ(NetEq::kOK, neteq_->GetAudio(&output_multi_channel_, &muted));
ASSERT_FALSE(muted);
EXPECT_EQ(num_channels_, output_multi_channel_.num_channels_);
EXPECT_EQ(output_size_samples_,
output_multi_channel_.samples_per_channel_);
std::ostringstream ss;
ss << "Lap number " << k << ".";
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
// Compare mono and multi-channel.
ASSERT_NO_FATAL_FAILURE(VerifyOutput(output_size_samples_));
time_now += kTimeStepMs;
}
}
const size_t num_channels_;
const int sample_rate_hz_;
const int samples_per_ms_;
const int frame_size_ms_;
const size_t frame_size_samples_;
const size_t output_size_samples_;
NetEq* neteq_mono_;
NetEq* neteq_;
test::RtpGenerator rtp_generator_mono_;
test::RtpGenerator rtp_generator_;
int16_t* input_;
int16_t* input_multi_channel_;
uint8_t* encoded_;
uint8_t* encoded_multi_channel_;
AudioFrame output_;
AudioFrame output_multi_channel_;
RTPHeader rtp_header_mono_;
RTPHeader rtp_header_;
size_t payload_size_bytes_;
size_t multi_payload_size_bytes_;
int last_send_time_;
int last_arrival_time_;
std::unique_ptr<test::InputAudioFile> input_file_;
};
class NetEqStereoTestNoJitter : public NetEqStereoTest {
protected:
NetEqStereoTestNoJitter()
: NetEqStereoTest() {
// Start the sender 100 ms before the receiver to pre-fill the buffer.
// This is to avoid doing preemptive expand early in the test.
// TODO(hlundin): Mock the decision making instead to control the modes.
last_arrival_time_ = -100;
}
};
TEST_P(NetEqStereoTestNoJitter, RunTest) {
RunTest(8);
}
class NetEqStereoTestPositiveDrift : public NetEqStereoTest {
protected:
NetEqStereoTestPositiveDrift()
: NetEqStereoTest(),
drift_factor(0.9) {
// Start the sender 100 ms before the receiver to pre-fill the buffer.
// This is to avoid doing preemptive expand early in the test.
// TODO(hlundin): Mock the decision making instead to control the modes.
last_arrival_time_ = -100;
}
virtual int GetArrivalTime(int send_time) {
int arrival_time = last_arrival_time_ +
drift_factor * (send_time - last_send_time_);
last_send_time_ = send_time;
last_arrival_time_ = arrival_time;
return arrival_time;
}
double drift_factor;
};
TEST_P(NetEqStereoTestPositiveDrift, RunTest) {
RunTest(100);
}
class NetEqStereoTestNegativeDrift : public NetEqStereoTestPositiveDrift {
protected:
NetEqStereoTestNegativeDrift()
: NetEqStereoTestPositiveDrift() {
drift_factor = 1.1;
last_arrival_time_ = 0;
}
};
TEST_P(NetEqStereoTestNegativeDrift, RunTest) {
RunTest(100);
}
class NetEqStereoTestDelays : public NetEqStereoTest {
protected:
static const int kDelayInterval = 10;
static const int kDelay = 1000;
NetEqStereoTestDelays()
: NetEqStereoTest(),
frame_index_(0) {
}
virtual int GetArrivalTime(int send_time) {
// Deliver immediately, unless we have a back-log.
int arrival_time = std::min(last_arrival_time_, send_time);
if (++frame_index_ % kDelayInterval == 0) {
// Delay this packet.
arrival_time += kDelay;
}
last_send_time_ = send_time;
last_arrival_time_ = arrival_time;
return arrival_time;
}
int frame_index_;
};
TEST_P(NetEqStereoTestDelays, RunTest) {
RunTest(1000);
}
class NetEqStereoTestLosses : public NetEqStereoTest {
protected:
static const int kLossInterval = 10;
NetEqStereoTestLosses()
: NetEqStereoTest(),
frame_index_(0) {
}
virtual bool Lost() {
return (++frame_index_) % kLossInterval == 0;
}
// TODO(hlundin): NetEq is not giving bitexact results for these cases.
virtual void VerifyOutput(size_t num_samples) {
for (size_t i = 0; i < num_samples; ++i) {
const int16_t* output_data = output_.data();
const int16_t* output_multi_channel_data = output_multi_channel_.data();
auto first_channel_sample =
output_multi_channel_data[i * num_channels_];
for (size_t j = 0; j < num_channels_; ++j) {
const int kErrorMargin = 200;
EXPECT_NEAR(output_data[i],
output_multi_channel_data[i * num_channels_ + j],
kErrorMargin)
<< "Diff in sample " << i << ", channel " << j << ".";
EXPECT_EQ(first_channel_sample,
output_multi_channel_data[i * num_channels_ + j]);
}
}
}
int frame_index_;
};
TEST_P(NetEqStereoTestLosses, RunTest) {
RunTest(100);
}
// Creates a list of parameter sets.
std::list<TestParameters> GetTestParameters() {
std::list<TestParameters> l;
const int sample_rates[] = {8000, 16000, 32000};
const int num_rates = sizeof(sample_rates) / sizeof(sample_rates[0]);
// Loop through sample rates.
for (int rate_index = 0; rate_index < num_rates; ++rate_index) {
int sample_rate = sample_rates[rate_index];
// Loop through all frame sizes between 10 and 60 ms.
for (int frame_size = 10; frame_size <= 60; frame_size += 10) {
TestParameters p;
p.frame_size = frame_size;
p.sample_rate = sample_rate;
p.num_channels = 2;
l.push_back(p);
if (sample_rate == 8000) {
// Add a five-channel test for 8000 Hz.
p.num_channels = 5;
l.push_back(p);
}
}
}
return l;
}
// Pretty-printing the test parameters in case of an error.
void PrintTo(const TestParameters& p, ::std::ostream* os) {
*os << "{frame_size = " << p.frame_size <<
", num_channels = " << p.num_channels <<
", sample_rate = " << p.sample_rate << "}";
}
// Instantiate the tests. Each test is instantiated using the function above,
// so that all different parameter combinations are tested.
INSTANTIATE_TEST_CASE_P(MultiChannel,
NetEqStereoTestNoJitter,
::testing::ValuesIn(GetTestParameters()));
INSTANTIATE_TEST_CASE_P(MultiChannel,
NetEqStereoTestPositiveDrift,
::testing::ValuesIn(GetTestParameters()));
INSTANTIATE_TEST_CASE_P(MultiChannel,
NetEqStereoTestNegativeDrift,
::testing::ValuesIn(GetTestParameters()));
INSTANTIATE_TEST_CASE_P(MultiChannel,
NetEqStereoTestDelays,
::testing::ValuesIn(GetTestParameters()));
INSTANTIATE_TEST_CASE_P(MultiChannel,
NetEqStereoTestLosses,
::testing::ValuesIn(GetTestParameters()));
} // namespace webrtc

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,32 @@
syntax = "proto2";
option optimize_for = LITE_RUNTIME;
package webrtc.neteq_unittest;
message NetEqNetworkStatistics {
// Next field number 18.
optional uint32 current_buffer_size_ms = 1;
optional uint32 preferred_buffer_size_ms = 2;
optional uint32 jitter_peaks_found = 3;
optional uint32 packet_loss_rate = 4;
optional uint32 packet_discard_rate = 5 [deprecated = true];
optional uint32 expand_rate = 6;
optional uint32 speech_expand_rate = 7;
optional uint32 preemptive_rate = 8;
optional uint32 accelerate_rate = 9;
optional uint32 secondary_decoded_rate = 10;
optional uint32 secondary_discarded_rate = 17;
optional int32 clockdrift_ppm = 11;
optional uint64 added_zero_samples = 12;
optional int32 mean_waiting_time_ms = 13;
optional int32 median_waiting_time_ms = 14;
optional int32 min_waiting_time_ms = 15;
optional int32 max_waiting_time_ms = 16;
}
message RtcpStatistics {
optional uint32 fraction_lost = 1;
optional uint32 cumulative_lost = 2;
optional uint32 extended_max_sequence_number = 3;
optional uint32 jitter = 4;
}

View File

@ -0,0 +1,216 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/normal.h"
#include <string.h> // memset, memcpy
#include <algorithm> // min
#include "webrtc/api/audio_codecs/audio_decoder.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq/expand.h"
#include "webrtc/rtc_base/checks.h"
namespace webrtc {
int Normal::Process(const int16_t* input,
size_t length,
Modes last_mode,
int16_t* external_mute_factor_array,
AudioMultiVector* output) {
if (length == 0) {
// Nothing to process.
output->Clear();
return static_cast<int>(length);
}
RTC_DCHECK(output->Empty());
// Output should be empty at this point.
if (length % output->Channels() != 0) {
// The length does not match the number of channels.
output->Clear();
return 0;
}
output->PushBackInterleaved(input, length);
const int fs_mult = fs_hz_ / 8000;
RTC_DCHECK_GT(fs_mult, 0);
// fs_shift = log2(fs_mult), rounded down.
// Note that |fs_shift| is not "exact" for 48 kHz.
// TODO(hlundin): Investigate this further.
const int fs_shift = 30 - WebRtcSpl_NormW32(fs_mult);
// Check if last RecOut call resulted in an Expand. If so, we have to take
// care of some cross-fading and unmuting.
if (last_mode == kModeExpand) {
// Generate interpolation data using Expand.
// First, set Expand parameters to appropriate values.
expand_->SetParametersForNormalAfterExpand();
// Call Expand.
AudioMultiVector expanded(output->Channels());
expand_->Process(&expanded);
expand_->Reset();
size_t length_per_channel = length / output->Channels();
std::unique_ptr<int16_t[]> signal(new int16_t[length_per_channel]);
for (size_t channel_ix = 0; channel_ix < output->Channels(); ++channel_ix) {
// Adjust muting factor (main muting factor times expand muting factor).
external_mute_factor_array[channel_ix] = static_cast<int16_t>(
(external_mute_factor_array[channel_ix] *
expand_->MuteFactor(channel_ix)) >> 14);
(*output)[channel_ix].CopyTo(length_per_channel, 0, signal.get());
// Find largest absolute value in new data.
int16_t decoded_max =
WebRtcSpl_MaxAbsValueW16(signal.get(), length_per_channel);
// Adjust muting factor if needed (to BGN level).
size_t energy_length =
std::min(static_cast<size_t>(fs_mult * 64), length_per_channel);
int scaling = 6 + fs_shift
- WebRtcSpl_NormW32(decoded_max * decoded_max);
scaling = std::max(scaling, 0); // |scaling| should always be >= 0.
int32_t energy = WebRtcSpl_DotProductWithScale(signal.get(), signal.get(),
energy_length, scaling);
int32_t scaled_energy_length =
static_cast<int32_t>(energy_length >> scaling);
if (scaled_energy_length > 0) {
energy = energy / scaled_energy_length;
} else {
energy = 0;
}
int mute_factor;
if ((energy != 0) &&
(energy > background_noise_.Energy(channel_ix))) {
// Normalize new frame energy to 15 bits.
scaling = WebRtcSpl_NormW32(energy) - 16;
// We want background_noise_.energy() / energy in Q14.
int32_t bgn_energy = WEBRTC_SPL_SHIFT_W32(
background_noise_.Energy(channel_ix), scaling + 14);
int16_t energy_scaled =
static_cast<int16_t>(WEBRTC_SPL_SHIFT_W32(energy, scaling));
int32_t ratio = WebRtcSpl_DivW32W16(bgn_energy, energy_scaled);
mute_factor = WebRtcSpl_SqrtFloor(ratio << 14);
} else {
mute_factor = 16384; // 1.0 in Q14.
}
if (mute_factor > external_mute_factor_array[channel_ix]) {
external_mute_factor_array[channel_ix] =
static_cast<int16_t>(std::min(mute_factor, 16384));
}
// If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14).
int increment = 64 / fs_mult;
for (size_t i = 0; i < length_per_channel; i++) {
// Scale with mute factor.
RTC_DCHECK_LT(channel_ix, output->Channels());
RTC_DCHECK_LT(i, output->Size());
int32_t scaled_signal = (*output)[channel_ix][i] *
external_mute_factor_array[channel_ix];
// Shift 14 with proper rounding.
(*output)[channel_ix][i] =
static_cast<int16_t>((scaled_signal + 8192) >> 14);
// Increase mute_factor towards 16384.
external_mute_factor_array[channel_ix] = static_cast<int16_t>(std::min(
external_mute_factor_array[channel_ix] + increment, 16384));
}
// Interpolate the expanded data into the new vector.
// (NB/WB/SWB32/SWB48 8/16/32/48 samples.)
size_t win_length = samples_per_ms_;
int16_t win_slope_Q14 = default_win_slope_Q14_;
RTC_DCHECK_LT(channel_ix, output->Channels());
if (win_length > output->Size()) {
win_length = output->Size();
win_slope_Q14 = (1 << 14) / static_cast<int16_t>(win_length);
}
int16_t win_up_Q14 = 0;
for (size_t i = 0; i < win_length; i++) {
win_up_Q14 += win_slope_Q14;
(*output)[channel_ix][i] =
(win_up_Q14 * (*output)[channel_ix][i] +
((1 << 14) - win_up_Q14) * expanded[channel_ix][i] + (1 << 13)) >>
14;
}
RTC_DCHECK_GT(win_up_Q14,
(1 << 14) - 32); // Worst case rouding is a length of 34
}
} else if (last_mode == kModeRfc3389Cng) {
RTC_DCHECK_EQ(output->Channels(), 1); // Not adapted for multi-channel yet.
static const size_t kCngLength = 48;
RTC_DCHECK_LE(8 * fs_mult, kCngLength);
int16_t cng_output[kCngLength];
// Reset mute factor and start up fresh.
external_mute_factor_array[0] = 16384;
ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
if (cng_decoder) {
// Generate long enough for 48kHz.
if (!cng_decoder->Generate(cng_output, 0)) {
// Error returned; set return vector to all zeros.
memset(cng_output, 0, sizeof(cng_output));
}
} else {
// If no CNG instance is defined, just copy from the decoded data.
// (This will result in interpolating the decoded with itself.)
(*output)[0].CopyTo(fs_mult * 8, 0, cng_output);
}
// Interpolate the CNG into the new vector.
// (NB/WB/SWB32/SWB48 8/16/32/48 samples.)
size_t win_length = samples_per_ms_;
int16_t win_slope_Q14 = default_win_slope_Q14_;
if (win_length > kCngLength) {
win_length = kCngLength;
win_slope_Q14 = (1 << 14) / static_cast<int16_t>(win_length);
}
int16_t win_up_Q14 = 0;
for (size_t i = 0; i < win_length; i++) {
win_up_Q14 += win_slope_Q14;
(*output)[0][i] =
(win_up_Q14 * (*output)[0][i] +
((1 << 14) - win_up_Q14) * cng_output[i] + (1 << 13)) >>
14;
}
RTC_DCHECK_GT(win_up_Q14,
(1 << 14) - 32); // Worst case rouding is a length of 34
} else if (external_mute_factor_array[0] < 16384) {
// Previous was neither of Expand, FadeToBGN or RFC3389_CNG, but we are
// still ramping up from previous muting.
// If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14).
int increment = 64 / fs_mult;
size_t length_per_channel = length / output->Channels();
for (size_t i = 0; i < length_per_channel; i++) {
for (size_t channel_ix = 0; channel_ix < output->Channels();
++channel_ix) {
// Scale with mute factor.
RTC_DCHECK_LT(channel_ix, output->Channels());
RTC_DCHECK_LT(i, output->Size());
int32_t scaled_signal = (*output)[channel_ix][i] *
external_mute_factor_array[channel_ix];
// Shift 14 with proper rounding.
(*output)[channel_ix][i] =
static_cast<int16_t>((scaled_signal + 8192) >> 14);
// Increase mute_factor towards 16384.
external_mute_factor_array[channel_ix] = static_cast<int16_t>(std::min(
16384, external_mute_factor_array[channel_ix] + increment));
}
}
}
return static_cast<int>(length);
}
} // namespace webrtc

View File

@ -0,0 +1,75 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_NORMAL_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_NORMAL_H_
#include <string.h> // Access to size_t.
#include <vector>
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq/defines.h"
#include "webrtc/rtc_base/checks.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/rtc_base/safe_conversions.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class BackgroundNoise;
class DecoderDatabase;
class Expand;
// This class provides the "Normal" DSP operation, that is performed when
// there is no data loss, no need to stretch the timing of the signal, and
// no other "special circumstances" are at hand.
class Normal {
public:
Normal(int fs_hz,
DecoderDatabase* decoder_database,
const BackgroundNoise& background_noise,
Expand* expand)
: fs_hz_(fs_hz),
decoder_database_(decoder_database),
background_noise_(background_noise),
expand_(expand),
samples_per_ms_(rtc::CheckedDivExact(fs_hz_, 1000)),
default_win_slope_Q14_(
rtc::dchecked_cast<uint16_t>((1 << 14) / samples_per_ms_)) {}
virtual ~Normal() {}
// Performs the "Normal" operation. The decoder data is supplied in |input|,
// having |length| samples in total for all channels (interleaved). The
// result is written to |output|. The number of channels allocated in
// |output| defines the number of channels that will be used when
// de-interleaving |input|. |last_mode| contains the mode used in the previous
// GetAudio call (i.e., not the current one), and |external_mute_factor| is
// a pointer to the mute factor in the NetEqImpl class.
int Process(const int16_t* input, size_t length,
Modes last_mode,
int16_t* external_mute_factor_array,
AudioMultiVector* output);
private:
int fs_hz_;
DecoderDatabase* decoder_database_;
const BackgroundNoise& background_noise_;
Expand* expand_;
const size_t samples_per_ms_;
const int16_t default_win_slope_Q14_;
RTC_DISALLOW_COPY_AND_ASSIGN(Normal);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_NORMAL_H_

View File

@ -0,0 +1,176 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for Normal class.
#include "webrtc/modules/audio_coding/neteq/normal.h"
#include <memory>
#include <vector>
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
#include "webrtc/modules/audio_coding/neteq/expand.h"
#include "webrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h"
#include "webrtc/modules/audio_coding/neteq/mock/mock_expand.h"
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
#include "webrtc/test/gtest.h"
using ::testing::_;
using ::testing::Invoke;
namespace webrtc {
namespace {
int ExpandProcess120ms(AudioMultiVector* output) {
AudioMultiVector dummy_audio(1, 11520u);
dummy_audio.CopyTo(output);
return 0;
}
} // namespace
TEST(Normal, CreateAndDestroy) {
MockDecoderDatabase db;
int fs = 8000;
size_t channels = 1;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(1, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels);
Normal normal(fs, &db, bgn, &expand);
EXPECT_CALL(db, Die()); // Called when |db| goes out of scope.
}
TEST(Normal, AvoidDivideByZero) {
WebRtcSpl_Init();
MockDecoderDatabase db;
int fs = 8000;
size_t channels = 1;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(1, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs,
channels);
Normal normal(fs, &db, bgn, &expand);
int16_t input[1000] = {0};
std::unique_ptr<int16_t[]> mute_factor_array(new int16_t[channels]);
for (size_t i = 0; i < channels; ++i) {
mute_factor_array[i] = 16384;
}
AudioMultiVector output(channels);
// Zero input length.
EXPECT_EQ(
0,
normal.Process(input, 0, kModeExpand, mute_factor_array.get(), &output));
EXPECT_EQ(0u, output.Size());
// Try to make energy_length >> scaling = 0;
EXPECT_CALL(expand, SetParametersForNormalAfterExpand());
EXPECT_CALL(expand, Process(_));
EXPECT_CALL(expand, Reset());
// If input_size_samples < 64, then energy_length in Normal::Process() will
// be equal to input_size_samples. Since the input is all zeros, decoded_max
// will be zero, and scaling will be >= 6. Thus, energy_length >> scaling = 0,
// and using this as a denominator would lead to problems.
int input_size_samples = 63;
EXPECT_EQ(input_size_samples,
normal.Process(input,
input_size_samples,
kModeExpand,
mute_factor_array.get(),
&output));
EXPECT_CALL(db, Die()); // Called when |db| goes out of scope.
EXPECT_CALL(expand, Die()); // Called when |expand| goes out of scope.
}
TEST(Normal, InputLengthAndChannelsDoNotMatch) {
WebRtcSpl_Init();
MockDecoderDatabase db;
int fs = 8000;
size_t channels = 2;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(channels, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs,
channels);
Normal normal(fs, &db, bgn, &expand);
int16_t input[1000] = {0};
std::unique_ptr<int16_t[]> mute_factor_array(new int16_t[channels]);
for (size_t i = 0; i < channels; ++i) {
mute_factor_array[i] = 16384;
}
AudioMultiVector output(channels);
// Let the number of samples be one sample less than 80 samples per channel.
size_t input_len = 80 * channels - 1;
EXPECT_EQ(
0,
normal.Process(
input, input_len, kModeExpand, mute_factor_array.get(), &output));
EXPECT_EQ(0u, output.Size());
EXPECT_CALL(db, Die()); // Called when |db| goes out of scope.
EXPECT_CALL(expand, Die()); // Called when |expand| goes out of scope.
}
TEST(Normal, LastModeExpand120msPacket) {
WebRtcSpl_Init();
MockDecoderDatabase db;
const int kFs = 48000;
const size_t kPacketsizeBytes = 11520u;
const size_t kChannels = 1;
BackgroundNoise bgn(kChannels);
SyncBuffer sync_buffer(kChannels, 1000);
RandomVector random_vector;
StatisticsCalculator statistics;
MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, kFs,
kChannels);
Normal normal(kFs, &db, bgn, &expand);
int16_t input[kPacketsizeBytes] = {0};
std::unique_ptr<int16_t[]> mute_factor_array(new int16_t[kChannels]);
for (size_t i = 0; i < kChannels; ++i) {
mute_factor_array[i] = 16384;
}
AudioMultiVector output(kChannels);
EXPECT_CALL(expand, SetParametersForNormalAfterExpand());
EXPECT_CALL(expand, Process(_)).WillOnce(Invoke(ExpandProcess120ms));
EXPECT_CALL(expand, Reset());
EXPECT_EQ(static_cast<int>(kPacketsizeBytes),
normal.Process(input,
kPacketsizeBytes,
kModeExpand,
mute_factor_array.get(),
&output));
EXPECT_EQ(kPacketsizeBytes, output.Size());
EXPECT_CALL(db, Die()); // Called when |db| goes out of scope.
EXPECT_CALL(expand, Die()); // Called when |expand| goes out of scope.
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/packet.h"
namespace webrtc {
Packet::Packet() = default;
Packet::Packet(Packet&& b) = default;
Packet::~Packet() = default;
Packet& Packet::operator=(Packet&& b) = default;
Packet Packet::Clone() const {
RTC_CHECK(!frame);
Packet clone;
clone.timestamp = timestamp;
clone.sequence_number = sequence_number;
clone.payload_type = payload_type;
clone.payload.SetData(payload.data(), payload.size());
clone.priority = priority;
return clone;
}
} // namespace webrtc

View File

@ -0,0 +1,124 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_PACKET_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_PACKET_H_
#include <list>
#include <memory>
#include "webrtc/api/audio_codecs/audio_decoder.h"
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
#include "webrtc/rtc_base/buffer.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Struct for holding RTP packets.
struct Packet {
struct Priority {
Priority() : codec_level(0), red_level(0) {}
Priority(int codec_level, int red_level)
: codec_level(codec_level), red_level(red_level) {
CheckInvariant();
}
int codec_level;
int red_level;
// Priorities are sorted low-to-high, first on the level the codec
// prioritizes it, then on the level of RED packet it is; i.e. if it is a
// primary or secondary payload of a RED packet. For example: with Opus, an
// Fec packet (which the decoder prioritizes lower than a regular packet)
// will not be used if there is _any_ RED payload for the same
// timeframe. The highest priority packet will have levels {0, 0}. Negative
// priorities are not allowed.
bool operator<(const Priority& b) const {
CheckInvariant();
b.CheckInvariant();
if (codec_level == b.codec_level)
return red_level < b.red_level;
return codec_level < b.codec_level;
}
bool operator==(const Priority& b) const {
CheckInvariant();
b.CheckInvariant();
return codec_level == b.codec_level && red_level == b.red_level;
}
bool operator!=(const Priority& b) const { return !(*this == b); }
bool operator>(const Priority& b) const { return b < *this; }
bool operator<=(const Priority& b) const { return !(b > *this); }
bool operator>=(const Priority& b) const { return !(b < *this); }
private:
void CheckInvariant() const {
RTC_DCHECK_GE(codec_level, 0);
RTC_DCHECK_GE(red_level, 0);
}
};
uint32_t timestamp;
uint16_t sequence_number;
uint8_t payload_type;
// Datagram excluding RTP header and header extension.
rtc::Buffer payload;
Priority priority;
std::unique_ptr<TickTimer::Stopwatch> waiting_time;
std::unique_ptr<AudioDecoder::EncodedAudioFrame> frame;
Packet();
Packet(Packet&& b);
~Packet();
// Packets should generally be moved around but sometimes it's useful to make
// a copy, for example for testing purposes. NOTE: Will only work for
// un-parsed packets, i.e. |frame| must be unset. The payload will, however,
// be copied. |waiting_time| will also not be copied.
Packet Clone() const;
Packet& operator=(Packet&& b);
// Comparison operators. Establish a packet ordering based on (1) timestamp,
// (2) sequence number and (3) redundancy.
// Timestamp and sequence numbers are compared taking wrap-around into
// account. For two packets with the same sequence number and timestamp a
// primary payload is considered "smaller" than a secondary.
bool operator==(const Packet& rhs) const {
return (this->timestamp == rhs.timestamp &&
this->sequence_number == rhs.sequence_number &&
this->priority == rhs.priority);
}
bool operator!=(const Packet& rhs) const { return !operator==(rhs); }
bool operator<(const Packet& rhs) const {
if (this->timestamp == rhs.timestamp) {
if (this->sequence_number == rhs.sequence_number) {
// Timestamp and sequence numbers are identical - deem the left hand
// side to be "smaller" (i.e., "earlier") if it has higher priority.
return this->priority < rhs.priority;
}
return (static_cast<uint16_t>(rhs.sequence_number -
this->sequence_number) < 0xFFFF / 2);
}
return (static_cast<uint32_t>(rhs.timestamp - this->timestamp) <
0xFFFFFFFF / 2);
}
bool operator>(const Packet& rhs) const { return rhs.operator<(*this); }
bool operator<=(const Packet& rhs) const { return !operator>(rhs); }
bool operator>=(const Packet& rhs) const { return !operator<(rhs); }
bool empty() const { return !frame && payload.empty(); }
};
// A list of packets.
typedef std::list<Packet> PacketList;
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_PACKET_H_

View File

@ -0,0 +1,294 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This is the implementation of the PacketBuffer class. It is mostly based on
// an STL list. The list is kept sorted at all times so that the next packet to
// decode is at the beginning of the list.
#include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
#include <algorithm> // find_if()
#include "webrtc/api/audio_codecs/audio_decoder.h"
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
#include "webrtc/rtc_base/logging.h"
namespace webrtc {
namespace {
// Predicate used when inserting packets in the buffer list.
// Operator() returns true when |packet| goes before |new_packet|.
class NewTimestampIsLarger {
public:
explicit NewTimestampIsLarger(const Packet& new_packet)
: new_packet_(new_packet) {
}
bool operator()(const Packet& packet) {
return (new_packet_ >= packet);
}
private:
const Packet& new_packet_;
};
// Returns true if both payload types are known to the decoder database, and
// have the same sample rate.
bool EqualSampleRates(uint8_t pt1,
uint8_t pt2,
const DecoderDatabase& decoder_database) {
auto* di1 = decoder_database.GetDecoderInfo(pt1);
auto* di2 = decoder_database.GetDecoderInfo(pt2);
return di1 && di2 && di1->SampleRateHz() == di2->SampleRateHz();
}
void LogPacketDiscarded(int codec_level, StatisticsCalculator* stats) {
RTC_CHECK(stats);
if (codec_level > 0) {
stats->SecondaryPacketsDiscarded(1);
} else {
stats->PacketsDiscarded(1);
}
}
} // namespace
PacketBuffer::PacketBuffer(size_t max_number_of_packets,
const TickTimer* tick_timer)
: max_number_of_packets_(max_number_of_packets), tick_timer_(tick_timer) {}
// Destructor. All packets in the buffer will be destroyed.
PacketBuffer::~PacketBuffer() {
Flush();
}
// Flush the buffer. All packets in the buffer will be destroyed.
void PacketBuffer::Flush() {
buffer_.clear();
}
bool PacketBuffer::Empty() const {
return buffer_.empty();
}
int PacketBuffer::InsertPacket(Packet&& packet, StatisticsCalculator* stats) {
if (packet.empty()) {
LOG(LS_WARNING) << "InsertPacket invalid packet";
return kInvalidPacket;
}
RTC_DCHECK_GE(packet.priority.codec_level, 0);
RTC_DCHECK_GE(packet.priority.red_level, 0);
int return_val = kOK;
packet.waiting_time = tick_timer_->GetNewStopwatch();
if (buffer_.size() >= max_number_of_packets_) {
// Buffer is full. Flush it.
Flush();
LOG(LS_WARNING) << "Packet buffer flushed";
return_val = kFlushed;
}
// Get an iterator pointing to the place in the buffer where the new packet
// should be inserted. The list is searched from the back, since the most
// likely case is that the new packet should be near the end of the list.
PacketList::reverse_iterator rit = std::find_if(
buffer_.rbegin(), buffer_.rend(),
NewTimestampIsLarger(packet));
// The new packet is to be inserted to the right of |rit|. If it has the same
// timestamp as |rit|, which has a higher priority, do not insert the new
// packet to list.
if (rit != buffer_.rend() && packet.timestamp == rit->timestamp) {
LogPacketDiscarded(packet.priority.codec_level, stats);
return return_val;
}
// The new packet is to be inserted to the left of |it|. If it has the same
// timestamp as |it|, which has a lower priority, replace |it| with the new
// packet.
PacketList::iterator it = rit.base();
if (it != buffer_.end() && packet.timestamp == it->timestamp) {
LogPacketDiscarded(packet.priority.codec_level, stats);
it = buffer_.erase(it);
}
buffer_.insert(it, std::move(packet)); // Insert the packet at that position.
return return_val;
}
int PacketBuffer::InsertPacketList(
PacketList* packet_list,
const DecoderDatabase& decoder_database,
rtc::Optional<uint8_t>* current_rtp_payload_type,
rtc::Optional<uint8_t>* current_cng_rtp_payload_type,
StatisticsCalculator* stats) {
RTC_DCHECK(stats);
bool flushed = false;
for (auto& packet : *packet_list) {
if (decoder_database.IsComfortNoise(packet.payload_type)) {
if (*current_cng_rtp_payload_type &&
**current_cng_rtp_payload_type != packet.payload_type) {
// New CNG payload type implies new codec type.
*current_rtp_payload_type = rtc::Optional<uint8_t>();
Flush();
flushed = true;
}
*current_cng_rtp_payload_type =
rtc::Optional<uint8_t>(packet.payload_type);
} else if (!decoder_database.IsDtmf(packet.payload_type)) {
// This must be speech.
if ((*current_rtp_payload_type &&
**current_rtp_payload_type != packet.payload_type) ||
(*current_cng_rtp_payload_type &&
!EqualSampleRates(packet.payload_type,
**current_cng_rtp_payload_type,
decoder_database))) {
*current_cng_rtp_payload_type = rtc::Optional<uint8_t>();
Flush();
flushed = true;
}
*current_rtp_payload_type = rtc::Optional<uint8_t>(packet.payload_type);
}
int return_val = InsertPacket(std::move(packet), stats);
if (return_val == kFlushed) {
// The buffer flushed, but this is not an error. We can still continue.
flushed = true;
} else if (return_val != kOK) {
// An error occurred. Delete remaining packets in list and return.
packet_list->clear();
return return_val;
}
}
packet_list->clear();
return flushed ? kFlushed : kOK;
}
int PacketBuffer::NextTimestamp(uint32_t* next_timestamp) const {
if (Empty()) {
return kBufferEmpty;
}
if (!next_timestamp) {
return kInvalidPointer;
}
*next_timestamp = buffer_.front().timestamp;
return kOK;
}
int PacketBuffer::NextHigherTimestamp(uint32_t timestamp,
uint32_t* next_timestamp) const {
if (Empty()) {
return kBufferEmpty;
}
if (!next_timestamp) {
return kInvalidPointer;
}
PacketList::const_iterator it;
for (it = buffer_.begin(); it != buffer_.end(); ++it) {
if (it->timestamp >= timestamp) {
// Found a packet matching the search.
*next_timestamp = it->timestamp;
return kOK;
}
}
return kNotFound;
}
const Packet* PacketBuffer::PeekNextPacket() const {
return buffer_.empty() ? nullptr : &buffer_.front();
}
rtc::Optional<Packet> PacketBuffer::GetNextPacket() {
if (Empty()) {
// Buffer is empty.
return rtc::Optional<Packet>();
}
rtc::Optional<Packet> packet(std::move(buffer_.front()));
// Assert that the packet sanity checks in InsertPacket method works.
RTC_DCHECK(!packet->empty());
buffer_.pop_front();
return packet;
}
int PacketBuffer::DiscardNextPacket(StatisticsCalculator* stats) {
if (Empty()) {
return kBufferEmpty;
}
// Assert that the packet sanity checks in InsertPacket method works.
const Packet& packet = buffer_.front();
RTC_DCHECK(!packet.empty());
LogPacketDiscarded(packet.priority.codec_level, stats);
buffer_.pop_front();
return kOK;
}
void PacketBuffer::DiscardOldPackets(uint32_t timestamp_limit,
uint32_t horizon_samples,
StatisticsCalculator* stats) {
buffer_.remove_if([timestamp_limit, horizon_samples, stats](const Packet& p) {
if (timestamp_limit == p.timestamp ||
!IsObsoleteTimestamp(p.timestamp, timestamp_limit, horizon_samples)) {
return false;
}
LogPacketDiscarded(p.priority.codec_level, stats);
return true;
});
}
void PacketBuffer::DiscardAllOldPackets(uint32_t timestamp_limit,
StatisticsCalculator* stats) {
DiscardOldPackets(timestamp_limit, 0, stats);
}
void PacketBuffer::DiscardPacketsWithPayloadType(uint8_t payload_type,
StatisticsCalculator* stats) {
buffer_.remove_if([payload_type, stats](const Packet& p) {
if (p.payload_type != payload_type) {
return false;
}
LogPacketDiscarded(p.priority.codec_level, stats);
return true;
});
}
size_t PacketBuffer::NumPacketsInBuffer() const {
return buffer_.size();
}
size_t PacketBuffer::NumSamplesInBuffer(size_t last_decoded_length) const {
size_t num_samples = 0;
size_t last_duration = last_decoded_length;
for (const Packet& packet : buffer_) {
if (packet.frame) {
// TODO(hlundin): Verify that it's fine to count all packets and remove
// this check.
if (packet.priority != Packet::Priority(0, 0)) {
continue;
}
size_t duration = packet.frame->Duration();
if (duration > 0) {
last_duration = duration; // Save the most up-to-date (valid) duration.
}
}
num_samples += last_duration;
}
return num_samples;
}
void PacketBuffer::BufferStat(int* num_packets, int* max_num_packets) const {
*num_packets = static_cast<int>(buffer_.size());
*max_num_packets = static_cast<int>(max_number_of_packets_);
}
} // namespace webrtc

View File

@ -0,0 +1,148 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_
#include "webrtc/api/optional.h"
#include "webrtc/modules/audio_coding/neteq/packet.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class DecoderDatabase;
class StatisticsCalculator;
class TickTimer;
// This is the actual buffer holding the packets before decoding.
class PacketBuffer {
public:
enum BufferReturnCodes {
kOK = 0,
kFlushed,
kNotFound,
kBufferEmpty,
kInvalidPacket,
kInvalidPointer
};
// Constructor creates a buffer which can hold a maximum of
// |max_number_of_packets| packets.
PacketBuffer(size_t max_number_of_packets, const TickTimer* tick_timer);
// Deletes all packets in the buffer before destroying the buffer.
virtual ~PacketBuffer();
// Flushes the buffer and deletes all packets in it.
virtual void Flush();
// Returns true for an empty buffer.
virtual bool Empty() const;
// Inserts |packet| into the buffer. The buffer will take over ownership of
// the packet object.
// Returns PacketBuffer::kOK on success, PacketBuffer::kFlushed if the buffer
// was flushed due to overfilling.
virtual int InsertPacket(Packet&& packet, StatisticsCalculator* stats);
// Inserts a list of packets into the buffer. The buffer will take over
// ownership of the packet objects.
// Returns PacketBuffer::kOK if all packets were inserted successfully.
// If the buffer was flushed due to overfilling, only a subset of the list is
// inserted, and PacketBuffer::kFlushed is returned.
// The last three parameters are included for legacy compatibility.
// TODO(hlundin): Redesign to not use current_*_payload_type and
// decoder_database.
virtual int InsertPacketList(
PacketList* packet_list,
const DecoderDatabase& decoder_database,
rtc::Optional<uint8_t>* current_rtp_payload_type,
rtc::Optional<uint8_t>* current_cng_rtp_payload_type,
StatisticsCalculator* stats);
// Gets the timestamp for the first packet in the buffer and writes it to the
// output variable |next_timestamp|.
// Returns PacketBuffer::kBufferEmpty if the buffer is empty,
// PacketBuffer::kOK otherwise.
virtual int NextTimestamp(uint32_t* next_timestamp) const;
// Gets the timestamp for the first packet in the buffer with a timestamp no
// lower than the input limit |timestamp|. The result is written to the output
// variable |next_timestamp|.
// Returns PacketBuffer::kBufferEmpty if the buffer is empty,
// PacketBuffer::kOK otherwise.
virtual int NextHigherTimestamp(uint32_t timestamp,
uint32_t* next_timestamp) const;
// Returns a (constant) pointer to the first packet in the buffer. Returns
// NULL if the buffer is empty.
virtual const Packet* PeekNextPacket() const;
// Extracts the first packet in the buffer and returns it.
// Returns an empty optional if the buffer is empty.
virtual rtc::Optional<Packet> GetNextPacket();
// Discards the first packet in the buffer. The packet is deleted.
// Returns PacketBuffer::kBufferEmpty if the buffer is empty,
// PacketBuffer::kOK otherwise.
virtual int DiscardNextPacket(StatisticsCalculator* stats);
// Discards all packets that are (strictly) older than timestamp_limit,
// but newer than timestamp_limit - horizon_samples. Setting horizon_samples
// to zero implies that the horizon is set to half the timestamp range. That
// is, if a packet is more than 2^31 timestamps into the future compared with
// timestamp_limit (including wrap-around), it is considered old.
virtual void DiscardOldPackets(uint32_t timestamp_limit,
uint32_t horizon_samples,
StatisticsCalculator* stats);
// Discards all packets that are (strictly) older than timestamp_limit.
virtual void DiscardAllOldPackets(uint32_t timestamp_limit,
StatisticsCalculator* stats);
// Removes all packets with a specific payload type from the buffer.
virtual void DiscardPacketsWithPayloadType(uint8_t payload_type,
StatisticsCalculator* stats);
// Returns the number of packets in the buffer, including duplicates and
// redundant packets.
virtual size_t NumPacketsInBuffer() const;
// Returns the number of samples in the buffer, including samples carried in
// duplicate and redundant packets.
virtual size_t NumSamplesInBuffer(size_t last_decoded_length) const;
virtual void BufferStat(int* num_packets, int* max_num_packets) const;
// Static method returning true if |timestamp| is older than |timestamp_limit|
// but less than |horizon_samples| behind |timestamp_limit|. For instance,
// with timestamp_limit = 100 and horizon_samples = 10, a timestamp in the
// range (90, 100) is considered obsolete, and will yield true.
// Setting |horizon_samples| to 0 is the same as setting it to 2^31, i.e.,
// half the 32-bit timestamp range.
static bool IsObsoleteTimestamp(uint32_t timestamp,
uint32_t timestamp_limit,
uint32_t horizon_samples) {
return IsNewerTimestamp(timestamp_limit, timestamp) &&
(horizon_samples == 0 ||
IsNewerTimestamp(timestamp, timestamp_limit - horizon_samples));
}
private:
size_t max_number_of_packets_;
PacketList buffer_;
const TickTimer* tick_timer_;
RTC_DISALLOW_COPY_AND_ASSIGN(PacketBuffer);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_

View File

@ -0,0 +1,737 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for PacketBuffer class.
#include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
#include "webrtc/api/audio_codecs/builtin_audio_decoder_factory.h"
#include "webrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h"
#include "webrtc/modules/audio_coding/neteq/mock/mock_statistics_calculator.h"
#include "webrtc/modules/audio_coding/neteq/packet.h"
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
#include "webrtc/test/gmock.h"
#include "webrtc/test/gtest.h"
using ::testing::Return;
using ::testing::StrictMock;
using ::testing::_;
using ::testing::InSequence;
using ::testing::MockFunction;
namespace webrtc {
// Helper class to generate packets. Packets must be deleted by the user.
class PacketGenerator {
public:
PacketGenerator(uint16_t seq_no, uint32_t ts, uint8_t pt, int frame_size);
virtual ~PacketGenerator() {}
void Reset(uint16_t seq_no, uint32_t ts, uint8_t pt, int frame_size);
Packet NextPacket(int payload_size_bytes);
uint16_t seq_no_;
uint32_t ts_;
uint8_t pt_;
int frame_size_;
};
PacketGenerator::PacketGenerator(uint16_t seq_no, uint32_t ts, uint8_t pt,
int frame_size) {
Reset(seq_no, ts, pt, frame_size);
}
void PacketGenerator::Reset(uint16_t seq_no, uint32_t ts, uint8_t pt,
int frame_size) {
seq_no_ = seq_no;
ts_ = ts;
pt_ = pt;
frame_size_ = frame_size;
}
Packet PacketGenerator::NextPacket(int payload_size_bytes) {
Packet packet;
packet.sequence_number = seq_no_;
packet.timestamp = ts_;
packet.payload_type = pt_;
packet.payload.SetSize(payload_size_bytes);
++seq_no_;
ts_ += frame_size_;
return packet;
}
struct PacketsToInsert {
uint16_t sequence_number;
uint32_t timestamp;
uint8_t payload_type;
bool primary;
// Order of this packet to appear upon extraction, after inserting a series
// of packets. A negative number means that it should have been discarded
// before extraction.
int extract_order;
};
// Start of test definitions.
TEST(PacketBuffer, CreateAndDestroy) {
TickTimer tick_timer;
PacketBuffer* buffer = new PacketBuffer(10, &tick_timer); // 10 packets.
EXPECT_TRUE(buffer->Empty());
delete buffer;
}
TEST(PacketBuffer, InsertPacket) {
TickTimer tick_timer;
PacketBuffer buffer(10, &tick_timer); // 10 packets.
PacketGenerator gen(17u, 4711u, 0, 10);
StrictMock<MockStatisticsCalculator> mock_stats;
const int payload_len = 100;
const Packet packet = gen.NextPacket(payload_len);
EXPECT_EQ(0, buffer.InsertPacket(packet.Clone(), &mock_stats));
uint32_t next_ts;
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
EXPECT_EQ(4711u, next_ts);
EXPECT_FALSE(buffer.Empty());
EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
const Packet* next_packet = buffer.PeekNextPacket();
EXPECT_EQ(packet, *next_packet); // Compare contents.
// Do not explicitly flush buffer or delete packet to test that it is deleted
// with the buffer. (Tested with Valgrind or similar tool.)
}
// Test to flush buffer.
TEST(PacketBuffer, FlushBuffer) {
TickTimer tick_timer;
PacketBuffer buffer(10, &tick_timer); // 10 packets.
PacketGenerator gen(0, 0, 0, 10);
const int payload_len = 10;
StrictMock<MockStatisticsCalculator> mock_stats;
// Insert 10 small packets; should be ok.
for (int i = 0; i < 10; ++i) {
EXPECT_EQ(PacketBuffer::kOK,
buffer.InsertPacket(gen.NextPacket(payload_len), &mock_stats));
}
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
EXPECT_FALSE(buffer.Empty());
buffer.Flush();
// Buffer should delete the payloads itself.
EXPECT_EQ(0u, buffer.NumPacketsInBuffer());
EXPECT_TRUE(buffer.Empty());
}
// Test to fill the buffer over the limits, and verify that it flushes.
TEST(PacketBuffer, OverfillBuffer) {
TickTimer tick_timer;
PacketBuffer buffer(10, &tick_timer); // 10 packets.
PacketGenerator gen(0, 0, 0, 10);
StrictMock<MockStatisticsCalculator> mock_stats;
// Insert 10 small packets; should be ok.
const int payload_len = 10;
int i;
for (i = 0; i < 10; ++i) {
EXPECT_EQ(PacketBuffer::kOK,
buffer.InsertPacket(gen.NextPacket(payload_len), &mock_stats));
}
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
uint32_t next_ts;
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
EXPECT_EQ(0u, next_ts); // Expect first inserted packet to be first in line.
const Packet packet = gen.NextPacket(payload_len);
// Insert 11th packet; should flush the buffer and insert it after flushing.
EXPECT_EQ(PacketBuffer::kFlushed,
buffer.InsertPacket(packet.Clone(), &mock_stats));
EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
// Expect last inserted packet to be first in line.
EXPECT_EQ(packet.timestamp, next_ts);
// Flush buffer to delete all packets.
buffer.Flush();
}
// Test inserting a list of packets.
TEST(PacketBuffer, InsertPacketList) {
TickTimer tick_timer;
PacketBuffer buffer(10, &tick_timer); // 10 packets.
PacketGenerator gen(0, 0, 0, 10);
PacketList list;
const int payload_len = 10;
// Insert 10 small packets.
for (int i = 0; i < 10; ++i) {
list.push_back(gen.NextPacket(payload_len));
}
MockDecoderDatabase decoder_database;
auto factory = CreateBuiltinAudioDecoderFactory();
const DecoderDatabase::DecoderInfo info(NetEqDecoder::kDecoderPCMu, factory);
EXPECT_CALL(decoder_database, GetDecoderInfo(0))
.WillRepeatedly(Return(&info));
StrictMock<MockStatisticsCalculator> mock_stats;
rtc::Optional<uint8_t> current_pt;
rtc::Optional<uint8_t> current_cng_pt;
EXPECT_EQ(PacketBuffer::kOK,
buffer.InsertPacketList(&list, decoder_database, &current_pt,
&current_cng_pt, &mock_stats));
EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list.
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
EXPECT_EQ(rtc::Optional<uint8_t>(0),
current_pt); // Current payload type changed to 0.
EXPECT_FALSE(current_cng_pt); // CNG payload type not changed.
buffer.Flush(); // Clean up.
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
}
// Test inserting a list of packets. Last packet is of a different payload type.
// Expecting the buffer to flush.
// TODO(hlundin): Remove this test when legacy operation is no longer needed.
TEST(PacketBuffer, InsertPacketListChangePayloadType) {
TickTimer tick_timer;
PacketBuffer buffer(10, &tick_timer); // 10 packets.
PacketGenerator gen(0, 0, 0, 10);
PacketList list;
const int payload_len = 10;
// Insert 10 small packets.
for (int i = 0; i < 10; ++i) {
list.push_back(gen.NextPacket(payload_len));
}
// Insert 11th packet of another payload type (not CNG).
{
Packet packet = gen.NextPacket(payload_len);
packet.payload_type = 1;
list.push_back(std::move(packet));
}
MockDecoderDatabase decoder_database;
auto factory = CreateBuiltinAudioDecoderFactory();
const DecoderDatabase::DecoderInfo info0(NetEqDecoder::kDecoderPCMu, factory);
EXPECT_CALL(decoder_database, GetDecoderInfo(0))
.WillRepeatedly(Return(&info0));
const DecoderDatabase::DecoderInfo info1(NetEqDecoder::kDecoderPCMa, factory);
EXPECT_CALL(decoder_database, GetDecoderInfo(1))
.WillRepeatedly(Return(&info1));
StrictMock<MockStatisticsCalculator> mock_stats;
rtc::Optional<uint8_t> current_pt;
rtc::Optional<uint8_t> current_cng_pt;
EXPECT_EQ(PacketBuffer::kFlushed,
buffer.InsertPacketList(&list, decoder_database, &current_pt,
&current_cng_pt, &mock_stats));
EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list.
EXPECT_EQ(1u, buffer.NumPacketsInBuffer()); // Only the last packet.
EXPECT_EQ(rtc::Optional<uint8_t>(1),
current_pt); // Current payload type changed to 1.
EXPECT_FALSE(current_cng_pt); // CNG payload type not changed.
buffer.Flush(); // Clean up.
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
}
TEST(PacketBuffer, ExtractOrderRedundancy) {
TickTimer tick_timer;
PacketBuffer buffer(100, &tick_timer); // 100 packets.
const int kPackets = 18;
const int kFrameSize = 10;
const int kPayloadLength = 10;
PacketsToInsert packet_facts[kPackets] = {
{0xFFFD, 0xFFFFFFD7, 0, true, 0},
{0xFFFE, 0xFFFFFFE1, 0, true, 1},
{0xFFFE, 0xFFFFFFD7, 1, false, -1},
{0xFFFF, 0xFFFFFFEB, 0, true, 2},
{0xFFFF, 0xFFFFFFE1, 1, false, -1},
{0x0000, 0xFFFFFFF5, 0, true, 3},
{0x0000, 0xFFFFFFEB, 1, false, -1},
{0x0001, 0xFFFFFFFF, 0, true, 4},
{0x0001, 0xFFFFFFF5, 1, false, -1},
{0x0002, 0x0000000A, 0, true, 5},
{0x0002, 0xFFFFFFFF, 1, false, -1},
{0x0003, 0x0000000A, 1, false, -1},
{0x0004, 0x0000001E, 0, true, 7},
{0x0004, 0x00000014, 1, false, 6},
{0x0005, 0x0000001E, 0, true, -1},
{0x0005, 0x00000014, 1, false, -1},
{0x0006, 0x00000028, 0, true, 8},
{0x0006, 0x0000001E, 1, false, -1},
};
const size_t kExpectPacketsInBuffer = 9;
std::vector<Packet> expect_order(kExpectPacketsInBuffer);
PacketGenerator gen(0, 0, 0, kFrameSize);
StrictMock<MockStatisticsCalculator> mock_stats;
// Interleaving the EXPECT_CALL sequence with expectations on the MockFunction
// check ensures that exactly one call to PacketsDiscarded happens in each
// DiscardNextPacket call.
InSequence s;
MockFunction<void(int check_point_id)> check;
for (int i = 0; i < kPackets; ++i) {
gen.Reset(packet_facts[i].sequence_number,
packet_facts[i].timestamp,
packet_facts[i].payload_type,
kFrameSize);
Packet packet = gen.NextPacket(kPayloadLength);
packet.priority.codec_level = packet_facts[i].primary ? 0 : 1;
if (packet_facts[i].extract_order < 0) {
if (packet.priority.codec_level > 0) {
EXPECT_CALL(mock_stats, SecondaryPacketsDiscarded(1));
} else {
EXPECT_CALL(mock_stats, PacketsDiscarded(1));
}
}
EXPECT_CALL(check, Call(i));
EXPECT_EQ(PacketBuffer::kOK,
buffer.InsertPacket(packet.Clone(), &mock_stats));
if (packet_facts[i].extract_order >= 0) {
expect_order[packet_facts[i].extract_order] = std::move(packet);
}
check.Call(i);
}
EXPECT_EQ(kExpectPacketsInBuffer, buffer.NumPacketsInBuffer());
for (size_t i = 0; i < kExpectPacketsInBuffer; ++i) {
const rtc::Optional<Packet> packet = buffer.GetNextPacket();
EXPECT_EQ(packet, expect_order[i]); // Compare contents.
}
EXPECT_TRUE(buffer.Empty());
}
TEST(PacketBuffer, DiscardPackets) {
TickTimer tick_timer;
PacketBuffer buffer(100, &tick_timer); // 100 packets.
const uint16_t start_seq_no = 17;
const uint32_t start_ts = 4711;
const uint32_t ts_increment = 10;
PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment);
PacketList list;
const int payload_len = 10;
StrictMock<MockStatisticsCalculator> mock_stats;
constexpr int kTotalPackets = 10;
// Insert 10 small packets.
for (int i = 0; i < kTotalPackets; ++i) {
buffer.InsertPacket(gen.NextPacket(payload_len), &mock_stats);
}
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
uint32_t current_ts = start_ts;
// Discard them one by one and make sure that the right packets are at the
// front of the buffer.
constexpr int kDiscardPackets = 5;
// Interleaving the EXPECT_CALL sequence with expectations on the MockFunction
// check ensures that exactly one call to PacketsDiscarded happens in each
// DiscardNextPacket call.
InSequence s;
MockFunction<void(int check_point_id)> check;
for (int i = 0; i < kDiscardPackets; ++i) {
uint32_t ts;
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&ts));
EXPECT_EQ(current_ts, ts);
EXPECT_CALL(mock_stats, PacketsDiscarded(1));
EXPECT_CALL(check, Call(i));
EXPECT_EQ(PacketBuffer::kOK, buffer.DiscardNextPacket(&mock_stats));
current_ts += ts_increment;
check.Call(i);
}
constexpr int kRemainingPackets = kTotalPackets - kDiscardPackets;
// This will discard all remaining packets but one. The oldest packet is older
// than the indicated horizon_samples, and will thus be left in the buffer.
constexpr size_t kSkipPackets = 1;
EXPECT_CALL(mock_stats, PacketsDiscarded(1))
.Times(kRemainingPackets - kSkipPackets);
EXPECT_CALL(check, Call(17)); // Arbitrary id number.
buffer.DiscardOldPackets(start_ts + kTotalPackets * ts_increment,
kRemainingPackets * ts_increment, &mock_stats);
check.Call(17); // Same arbitrary id number.
EXPECT_EQ(kSkipPackets, buffer.NumPacketsInBuffer());
uint32_t ts;
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&ts));
EXPECT_EQ(current_ts, ts);
// Discard all remaining packets.
EXPECT_CALL(mock_stats, PacketsDiscarded(kSkipPackets));
buffer.DiscardAllOldPackets(start_ts + kTotalPackets * ts_increment,
&mock_stats);
EXPECT_TRUE(buffer.Empty());
}
TEST(PacketBuffer, Reordering) {
TickTimer tick_timer;
PacketBuffer buffer(100, &tick_timer); // 100 packets.
const uint16_t start_seq_no = 17;
const uint32_t start_ts = 4711;
const uint32_t ts_increment = 10;
PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment);
const int payload_len = 10;
// Generate 10 small packets and insert them into a PacketList. Insert every
// odd packet to the front, and every even packet to the back, thus creating
// a (rather strange) reordering.
PacketList list;
for (int i = 0; i < 10; ++i) {
Packet packet = gen.NextPacket(payload_len);
if (i % 2) {
list.push_front(std::move(packet));
} else {
list.push_back(std::move(packet));
}
}
MockDecoderDatabase decoder_database;
auto factory = CreateBuiltinAudioDecoderFactory();
const DecoderDatabase::DecoderInfo info(NetEqDecoder::kDecoderPCMu, factory);
EXPECT_CALL(decoder_database, GetDecoderInfo(0))
.WillRepeatedly(Return(&info));
rtc::Optional<uint8_t> current_pt;
rtc::Optional<uint8_t> current_cng_pt;
StrictMock<MockStatisticsCalculator> mock_stats;
EXPECT_EQ(PacketBuffer::kOK,
buffer.InsertPacketList(&list, decoder_database, &current_pt,
&current_cng_pt, &mock_stats));
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
// Extract them and make sure that come out in the right order.
uint32_t current_ts = start_ts;
for (int i = 0; i < 10; ++i) {
const rtc::Optional<Packet> packet = buffer.GetNextPacket();
ASSERT_TRUE(packet);
EXPECT_EQ(current_ts, packet->timestamp);
current_ts += ts_increment;
}
EXPECT_TRUE(buffer.Empty());
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
}
// The test first inserts a packet with narrow-band CNG, then a packet with
// wide-band speech. The expected behavior of the packet buffer is to detect a
// change in sample rate, even though no speech packet has been inserted before,
// and flush out the CNG packet.
TEST(PacketBuffer, CngFirstThenSpeechWithNewSampleRate) {
TickTimer tick_timer;
PacketBuffer buffer(10, &tick_timer); // 10 packets.
const uint8_t kCngPt = 13;
const int kPayloadLen = 10;
const uint8_t kSpeechPt = 100;
MockDecoderDatabase decoder_database;
auto factory = CreateBuiltinAudioDecoderFactory();
const DecoderDatabase::DecoderInfo info_cng(NetEqDecoder::kDecoderCNGnb,
factory);
EXPECT_CALL(decoder_database, GetDecoderInfo(kCngPt))
.WillRepeatedly(Return(&info_cng));
const DecoderDatabase::DecoderInfo info_speech(NetEqDecoder::kDecoderPCM16Bwb,
factory);
EXPECT_CALL(decoder_database, GetDecoderInfo(kSpeechPt))
.WillRepeatedly(Return(&info_speech));
// Insert first packet, which is narrow-band CNG.
PacketGenerator gen(0, 0, kCngPt, 10);
PacketList list;
list.push_back(gen.NextPacket(kPayloadLen));
rtc::Optional<uint8_t> current_pt;
rtc::Optional<uint8_t> current_cng_pt;
StrictMock<MockStatisticsCalculator> mock_stats;
EXPECT_EQ(PacketBuffer::kOK,
buffer.InsertPacketList(&list, decoder_database, &current_pt,
&current_cng_pt, &mock_stats));
EXPECT_TRUE(list.empty());
EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
ASSERT_TRUE(buffer.PeekNextPacket());
EXPECT_EQ(kCngPt, buffer.PeekNextPacket()->payload_type);
EXPECT_FALSE(current_pt); // Current payload type not set.
EXPECT_EQ(rtc::Optional<uint8_t>(kCngPt),
current_cng_pt); // CNG payload type set.
// Insert second packet, which is wide-band speech.
{
Packet packet = gen.NextPacket(kPayloadLen);
packet.payload_type = kSpeechPt;
list.push_back(std::move(packet));
}
// Expect the buffer to flush out the CNG packet, since it does not match the
// new speech sample rate.
EXPECT_EQ(PacketBuffer::kFlushed,
buffer.InsertPacketList(&list, decoder_database, &current_pt,
&current_cng_pt, &mock_stats));
EXPECT_TRUE(list.empty());
EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
ASSERT_TRUE(buffer.PeekNextPacket());
EXPECT_EQ(kSpeechPt, buffer.PeekNextPacket()->payload_type);
EXPECT_EQ(rtc::Optional<uint8_t>(kSpeechPt),
current_pt); // Current payload type set.
EXPECT_FALSE(current_cng_pt); // CNG payload type reset.
buffer.Flush(); // Clean up.
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
}
TEST(PacketBuffer, Failures) {
const uint16_t start_seq_no = 17;
const uint32_t start_ts = 4711;
const uint32_t ts_increment = 10;
int payload_len = 100;
PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment);
TickTimer tick_timer;
StrictMock<MockStatisticsCalculator> mock_stats;
PacketBuffer* buffer = new PacketBuffer(100, &tick_timer); // 100 packets.
{
Packet packet = gen.NextPacket(payload_len);
packet.payload.Clear();
EXPECT_EQ(PacketBuffer::kInvalidPacket,
buffer->InsertPacket(std::move(packet), &mock_stats));
}
// Buffer should still be empty. Test all empty-checks.
uint32_t temp_ts;
EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer->NextTimestamp(&temp_ts));
EXPECT_EQ(PacketBuffer::kBufferEmpty,
buffer->NextHigherTimestamp(0, &temp_ts));
EXPECT_EQ(NULL, buffer->PeekNextPacket());
EXPECT_FALSE(buffer->GetNextPacket());
// Discarding packets will not invoke mock_stats.PacketDiscarded() because the
// packet buffer is empty.
EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer->DiscardNextPacket(&mock_stats));
buffer->DiscardAllOldPackets(0, &mock_stats);
// Insert one packet to make the buffer non-empty.
EXPECT_EQ(PacketBuffer::kOK,
buffer->InsertPacket(gen.NextPacket(payload_len), &mock_stats));
EXPECT_EQ(PacketBuffer::kInvalidPointer, buffer->NextTimestamp(NULL));
EXPECT_EQ(PacketBuffer::kInvalidPointer,
buffer->NextHigherTimestamp(0, NULL));
delete buffer;
// Insert packet list of three packets, where the second packet has an invalid
// payload. Expect first packet to be inserted, and the remaining two to be
// discarded.
buffer = new PacketBuffer(100, &tick_timer); // 100 packets.
PacketList list;
list.push_back(gen.NextPacket(payload_len)); // Valid packet.
{
Packet packet = gen.NextPacket(payload_len);
packet.payload.Clear(); // Invalid.
list.push_back(std::move(packet));
}
list.push_back(gen.NextPacket(payload_len)); // Valid packet.
MockDecoderDatabase decoder_database;
auto factory = CreateBuiltinAudioDecoderFactory();
const DecoderDatabase::DecoderInfo info(NetEqDecoder::kDecoderPCMu, factory);
EXPECT_CALL(decoder_database, GetDecoderInfo(0))
.WillRepeatedly(Return(&info));
rtc::Optional<uint8_t> current_pt;
rtc::Optional<uint8_t> current_cng_pt;
EXPECT_EQ(PacketBuffer::kInvalidPacket,
buffer->InsertPacketList(&list, decoder_database, &current_pt,
&current_cng_pt, &mock_stats));
EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list.
EXPECT_EQ(1u, buffer->NumPacketsInBuffer());
delete buffer;
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
}
// Test packet comparison function.
// The function should return true if the first packet "goes before" the second.
TEST(PacketBuffer, ComparePackets) {
PacketGenerator gen(0, 0, 0, 10);
Packet a(gen.NextPacket(10)); // SN = 0, TS = 0.
Packet b(gen.NextPacket(10)); // SN = 1, TS = 10.
EXPECT_FALSE(a == b);
EXPECT_TRUE(a != b);
EXPECT_TRUE(a < b);
EXPECT_FALSE(a > b);
EXPECT_TRUE(a <= b);
EXPECT_FALSE(a >= b);
// Testing wrap-around case; 'a' is earlier but has a larger timestamp value.
a.timestamp = 0xFFFFFFFF - 10;
EXPECT_FALSE(a == b);
EXPECT_TRUE(a != b);
EXPECT_TRUE(a < b);
EXPECT_FALSE(a > b);
EXPECT_TRUE(a <= b);
EXPECT_FALSE(a >= b);
// Test equal packets.
EXPECT_TRUE(a == a);
EXPECT_FALSE(a != a);
EXPECT_FALSE(a < a);
EXPECT_FALSE(a > a);
EXPECT_TRUE(a <= a);
EXPECT_TRUE(a >= a);
// Test equal timestamps but different sequence numbers (0 and 1).
a.timestamp = b.timestamp;
EXPECT_FALSE(a == b);
EXPECT_TRUE(a != b);
EXPECT_TRUE(a < b);
EXPECT_FALSE(a > b);
EXPECT_TRUE(a <= b);
EXPECT_FALSE(a >= b);
// Test equal timestamps but different sequence numbers (32767 and 1).
a.sequence_number = 0xFFFF;
EXPECT_FALSE(a == b);
EXPECT_TRUE(a != b);
EXPECT_TRUE(a < b);
EXPECT_FALSE(a > b);
EXPECT_TRUE(a <= b);
EXPECT_FALSE(a >= b);
// Test equal timestamps and sequence numbers, but differing priorities.
a.sequence_number = b.sequence_number;
a.priority = {1, 0};
b.priority = {0, 0};
// a after b
EXPECT_FALSE(a == b);
EXPECT_TRUE(a != b);
EXPECT_FALSE(a < b);
EXPECT_TRUE(a > b);
EXPECT_FALSE(a <= b);
EXPECT_TRUE(a >= b);
Packet c(gen.NextPacket(0)); // SN = 2, TS = 20.
Packet d(gen.NextPacket(0)); // SN = 3, TS = 20.
c.timestamp = b.timestamp;
d.timestamp = b.timestamp;
c.sequence_number = b.sequence_number;
d.sequence_number = b.sequence_number;
c.priority = {1, 1};
d.priority = {0, 1};
// c after d
EXPECT_FALSE(c == d);
EXPECT_TRUE(c != d);
EXPECT_FALSE(c < d);
EXPECT_TRUE(c > d);
EXPECT_FALSE(c <= d);
EXPECT_TRUE(c >= d);
// c after a
EXPECT_FALSE(c == a);
EXPECT_TRUE(c != a);
EXPECT_FALSE(c < a);
EXPECT_TRUE(c > a);
EXPECT_FALSE(c <= a);
EXPECT_TRUE(c >= a);
// c after b
EXPECT_FALSE(c == b);
EXPECT_TRUE(c != b);
EXPECT_FALSE(c < b);
EXPECT_TRUE(c > b);
EXPECT_FALSE(c <= b);
EXPECT_TRUE(c >= b);
// a after d
EXPECT_FALSE(a == d);
EXPECT_TRUE(a != d);
EXPECT_FALSE(a < d);
EXPECT_TRUE(a > d);
EXPECT_FALSE(a <= d);
EXPECT_TRUE(a >= d);
// d after b
EXPECT_FALSE(d == b);
EXPECT_TRUE(d != b);
EXPECT_FALSE(d < b);
EXPECT_TRUE(d > b);
EXPECT_FALSE(d <= b);
EXPECT_TRUE(d >= b);
}
namespace {
void TestIsObsoleteTimestamp(uint32_t limit_timestamp) {
// Check with zero horizon, which implies that the horizon is at 2^31, i.e.,
// half the timestamp range.
static const uint32_t kZeroHorizon = 0;
static const uint32_t k2Pow31Minus1 = 0x7FFFFFFF;
// Timestamp on the limit is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp, limit_timestamp, kZeroHorizon));
// 1 sample behind is old.
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp - 1, limit_timestamp, kZeroHorizon));
// 2^31 - 1 samples behind is old.
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp - k2Pow31Minus1, limit_timestamp, kZeroHorizon));
// 1 sample ahead is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp + 1, limit_timestamp, kZeroHorizon));
// If |t1-t2|=2^31 and t1>t2, t2 is older than t1 but not the opposite.
uint32_t other_timestamp = limit_timestamp + (1 << 31);
uint32_t lowest_timestamp = std::min(limit_timestamp, other_timestamp);
uint32_t highest_timestamp = std::max(limit_timestamp, other_timestamp);
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(
lowest_timestamp, highest_timestamp, kZeroHorizon));
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
highest_timestamp, lowest_timestamp, kZeroHorizon));
// Fixed horizon at 10 samples.
static const uint32_t kHorizon = 10;
// Timestamp on the limit is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp, limit_timestamp, kHorizon));
// 1 sample behind is old.
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp - 1, limit_timestamp, kHorizon));
// 9 samples behind is old.
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp - 9, limit_timestamp, kHorizon));
// 10 samples behind is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp - 10, limit_timestamp, kHorizon));
// 2^31 - 1 samples behind is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp - k2Pow31Minus1, limit_timestamp, kHorizon));
// 1 sample ahead is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp + 1, limit_timestamp, kHorizon));
// 2^31 samples ahead is not old.
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
limit_timestamp + (1 << 31), limit_timestamp, kHorizon));
}
} // namespace
// Test the IsObsoleteTimestamp method with different limit timestamps.
TEST(PacketBuffer, IsObsoleteTimestamp) {
TestIsObsoleteTimestamp(0);
TestIsObsoleteTimestamp(1);
TestIsObsoleteTimestamp(0xFFFFFFFF); // -1 in uint32_t.
TestIsObsoleteTimestamp(0x80000000); // 2^31.
TestIsObsoleteTimestamp(0x80000001); // 2^31 + 1.
TestIsObsoleteTimestamp(0x7FFFFFFF); // 2^31 - 1.
}
} // namespace webrtc

View File

@ -0,0 +1,89 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/post_decode_vad.h"
namespace webrtc {
PostDecodeVad::~PostDecodeVad() {
if (vad_instance_)
WebRtcVad_Free(vad_instance_);
}
void PostDecodeVad::Enable() {
if (!vad_instance_) {
// Create the instance.
vad_instance_ = WebRtcVad_Create();
if (vad_instance_ == nullptr) {
// Failed to create instance.
Disable();
return;
}
}
Init();
enabled_ = true;
}
void PostDecodeVad::Disable() {
enabled_ = false;
running_ = false;
}
void PostDecodeVad::Init() {
running_ = false;
if (vad_instance_) {
WebRtcVad_Init(vad_instance_);
WebRtcVad_set_mode(vad_instance_, kVadMode);
running_ = true;
}
}
void PostDecodeVad::Update(int16_t* signal, size_t length,
AudioDecoder::SpeechType speech_type,
bool sid_frame,
int fs_hz) {
if (!vad_instance_ || !enabled_) {
return;
}
if (speech_type == AudioDecoder::kComfortNoise || sid_frame ||
fs_hz > 16000) {
// TODO(hlundin): Remove restriction on fs_hz.
running_ = false;
active_speech_ = true;
sid_interval_counter_ = 0;
} else if (!running_) {
++sid_interval_counter_;
}
if (sid_interval_counter_ >= kVadAutoEnable) {
Init();
}
if (length > 0 && running_) {
size_t vad_sample_index = 0;
active_speech_ = false;
// Loop through frame sizes 30, 20, and 10 ms.
for (int vad_frame_size_ms = 30; vad_frame_size_ms >= 10;
vad_frame_size_ms -= 10) {
size_t vad_frame_size_samples =
static_cast<size_t>(vad_frame_size_ms * fs_hz / 1000);
while (length - vad_sample_index >= vad_frame_size_samples) {
int vad_return = WebRtcVad_Process(
vad_instance_, fs_hz, &signal[vad_sample_index],
vad_frame_size_samples);
active_speech_ |= (vad_return == 1);
vad_sample_index += vad_frame_size_samples;
}
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,72 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_
#include <string> // size_t
#include "webrtc/api/audio_codecs/audio_decoder.h"
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
#include "webrtc/common_types.h" // NULL
#include "webrtc/modules/audio_coding/neteq/defines.h"
#include "webrtc/modules/audio_coding/neteq/packet.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class PostDecodeVad {
public:
PostDecodeVad()
: enabled_(false),
running_(false),
active_speech_(true),
sid_interval_counter_(0),
vad_instance_(NULL) {
}
virtual ~PostDecodeVad();
// Enables post-decode VAD.
void Enable();
// Disables post-decode VAD.
void Disable();
// Initializes post-decode VAD.
void Init();
// Updates post-decode VAD with the audio data in |signal| having |length|
// samples. The data is of type |speech_type|, at the sample rate |fs_hz|.
void Update(int16_t* signal, size_t length,
AudioDecoder::SpeechType speech_type, bool sid_frame, int fs_hz);
// Accessors.
bool enabled() const { return enabled_; }
bool running() const { return running_; }
bool active_speech() const { return active_speech_; }
private:
static const int kVadMode = 0; // Sets aggressiveness to "Normal".
// Number of Update() calls without CNG/SID before re-enabling VAD.
static const int kVadAutoEnable = 3000;
bool enabled_;
bool running_;
bool active_speech_;
int sid_interval_counter_;
::VadInst* vad_instance_;
RTC_DISALLOW_COPY_AND_ASSIGN(PostDecodeVad);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for PostDecodeVad class.
#include "webrtc/modules/audio_coding/neteq/post_decode_vad.h"
#include "webrtc/test/gtest.h"
namespace webrtc {
TEST(PostDecodeVad, CreateAndDestroy) {
PostDecodeVad vad;
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,114 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/preemptive_expand.h"
#include <algorithm> // min, max
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
namespace webrtc {
PreemptiveExpand::ReturnCodes PreemptiveExpand::Process(
const int16_t* input,
size_t input_length,
size_t old_data_length,
AudioMultiVector* output,
size_t* length_change_samples) {
old_data_length_per_channel_ = old_data_length;
// Input length must be (almost) 30 ms.
// Also, the new part must be at least |overlap_samples_| elements.
static const size_t k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate.
if (num_channels_ == 0 ||
input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_ ||
old_data_length >= input_length / num_channels_ - overlap_samples_) {
// Length of input data too short to do preemptive expand. Simply move all
// data from input to output.
output->PushBackInterleaved(input, input_length);
return kError;
}
const bool kFastMode = false; // Fast mode is not available for PE Expand.
return TimeStretch::Process(input, input_length, kFastMode, output,
length_change_samples);
}
void PreemptiveExpand::SetParametersForPassiveSpeech(size_t len,
int16_t* best_correlation,
size_t* peak_index) const {
// When the signal does not contain any active speech, the correlation does
// not matter. Simply set it to zero.
*best_correlation = 0;
// For low energy expansion, the new data can be less than 15 ms,
// but we must ensure that best_correlation is not larger than the length of
// the new data.
// but we must ensure that best_correlation is not larger than the new data.
*peak_index = std::min(*peak_index,
len - old_data_length_per_channel_);
}
PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch(
const int16_t* input,
size_t input_length,
size_t peak_index,
int16_t best_correlation,
bool active_speech,
bool /*fast_mode*/,
AudioMultiVector* output) const {
// Pre-calculate common multiplication with |fs_mult_|.
// 120 corresponds to 15 ms.
size_t fs_mult_120 = static_cast<size_t>(fs_mult_ * 120);
// Check for strong correlation (>0.9 in Q14) and at least 15 ms new data,
// or passive speech.
if (((best_correlation > kCorrelationThreshold) &&
(old_data_length_per_channel_ <= fs_mult_120)) ||
!active_speech) {
// Do accelerate operation by overlap add.
// Set length of the first part, not to be modified.
size_t unmodified_length = std::max(old_data_length_per_channel_,
fs_mult_120);
// Copy first part, including cross-fade region.
output->PushBackInterleaved(
input, (unmodified_length + peak_index) * num_channels_);
// Copy the last |peak_index| samples up to 15 ms to |temp_vector|.
AudioMultiVector temp_vector(num_channels_);
temp_vector.PushBackInterleaved(
&input[(unmodified_length - peak_index) * num_channels_],
peak_index * num_channels_);
// Cross-fade |temp_vector| onto the end of |output|.
output->CrossFade(temp_vector, peak_index);
// Copy the last unmodified part, 15 ms + pitch period until the end.
output->PushBackInterleaved(
&input[unmodified_length * num_channels_],
input_length - unmodified_length * num_channels_);
if (active_speech) {
return kSuccess;
} else {
return kSuccessLowEnergy;
}
} else {
// Accelerate not allowed. Simply move all data from decoded to outData.
output->PushBackInterleaved(input, input_length);
return kNoStretch;
}
}
PreemptiveExpand* PreemptiveExpandFactory::Create(
int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise,
size_t overlap_samples) const {
return new PreemptiveExpand(
sample_rate_hz, num_channels, background_noise, overlap_samples);
}
} // namespace webrtc

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_
#include <assert.h>
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq/time_stretch.h"
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class BackgroundNoise;
// This class implements the PreemptiveExpand operation. Most of the work is
// done in the base class TimeStretch, which is shared with the Accelerate
// operation. In the PreemptiveExpand class, the operations that are specific to
// PreemptiveExpand are implemented.
class PreemptiveExpand : public TimeStretch {
public:
PreemptiveExpand(int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise,
size_t overlap_samples)
: TimeStretch(sample_rate_hz, num_channels, background_noise),
old_data_length_per_channel_(0),
overlap_samples_(overlap_samples) {
}
// This method performs the actual PreemptiveExpand operation. The samples are
// read from |input|, of length |input_length| elements, and are written to
// |output|. The number of samples added through time-stretching is
// is provided in the output |length_change_samples|. The method returns
// the outcome of the operation as an enumerator value.
ReturnCodes Process(const int16_t *pw16_decoded,
size_t len,
size_t old_data_len,
AudioMultiVector* output,
size_t* length_change_samples);
protected:
// Sets the parameters |best_correlation| and |peak_index| to suitable
// values when the signal contains no active speech.
void SetParametersForPassiveSpeech(size_t input_length,
int16_t* best_correlation,
size_t* peak_index) const override;
// Checks the criteria for performing the time-stretching operation and,
// if possible, performs the time-stretching.
ReturnCodes CheckCriteriaAndStretch(const int16_t* input,
size_t input_length,
size_t peak_index,
int16_t best_correlation,
bool active_speech,
bool /*fast_mode*/,
AudioMultiVector* output) const override;
private:
size_t old_data_length_per_channel_;
size_t overlap_samples_;
RTC_DISALLOW_COPY_AND_ASSIGN(PreemptiveExpand);
};
struct PreemptiveExpandFactory {
PreemptiveExpandFactory() {}
virtual ~PreemptiveExpandFactory() {}
virtual PreemptiveExpand* Create(
int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise,
size_t overlap_samples) const;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
namespace webrtc {
const int16_t RandomVector::kRandomTable[RandomVector::kRandomTableSize] = {
2680, 5532, 441, 5520, 16170, -5146, -1024, -8733, 3115, 9598, -10380,
-4959, -1280, -21716, 7133, -1522, 13458, -3902, 2789, -675, 3441, 5016,
-13599, -4003, -2739, 3922, -7209, 13352, -11617, -7241, 12905, -2314, 5426,
10121, -9702, 11207, -13542, 1373, 816, -5934, -12504, 4798, 1811, 4112,
-613, 201, -10367, -2960, -2419, 3442, 4299, -6116, -6092, 1552, -1650,
-480, -1237, 18720, -11858, -8303, -8212, 865, -2890, -16968, 12052, -5845,
-5912, 9777, -5665, -6294, 5426, -4737, -6335, 1652, 761, 3832, 641, -8552,
-9084, -5753, 8146, 12156, -4915, 15086, -1231, -1869, 11749, -9319, -6403,
11407, 6232, -1683, 24340, -11166, 4017, -10448, 3153, -2936, 6212, 2891,
-866, -404, -4807, -2324, -1917, -2388, -6470, -3895, -10300, 5323, -5403,
2205, 4640, 7022, -21186, -6244, -882, -10031, -3395, -12885, 7155, -5339,
5079, -2645, -9515, 6622, 14651, 15852, 359, 122, 8246, -3502, -6696, -3679,
-13535, -1409, -704, -7403, -4007, 1798, 279, -420, -12796, -14219, 1141,
3359, 11434, 7049, -6684, -7473, 14283, -4115, -9123, -8969, 4152, 4117,
13792, 5742, 16168, 8661, -1609, -6095, 1881, 14380, -5588, 6758, -6425,
-22969, -7269, 7031, 1119, -1611, -5850, -11281, 3559, -8952, -10146, -4667,
-16251, -1538, 2062, -1012, -13073, 227, -3142, -5265, 20, 5770, -7559,
4740, -4819, 992, -8208, -7130, -4652, 6725, 7369, -1036, 13144, -1588,
-5304, -2344, -449, -5705, -8894, 5205, -17904, -11188, -1022, 4852, 10101,
-5255, -4200, -752, 7941, -1543, 5959, 14719, 13346, 17045, -15605, -1678,
-1600, -9230, 68, 23348, 1172, 7750, 11212, -18227, 9956, 4161, 883, 3947,
4341, 1014, -4889, -2603, 1246, -5630, -3596, -870, -1298, 2784, -3317,
-6612, -20541, 4166, 4181, -8625, 3562, 12890, 4761, 3205, -12259, -8579 };
void RandomVector::Reset() {
seed_ = 777;
seed_increment_ = 1;
}
void RandomVector::Generate(size_t length, int16_t* output) {
for (size_t i = 0; i < length; i++) {
seed_ += seed_increment_;
size_t position = seed_ & (kRandomTableSize - 1);
output[i] = kRandomTable[position];
}
}
void RandomVector::IncreaseSeedIncrement(int16_t increase_by) {
seed_increment_+= increase_by;
seed_increment_ &= kRandomTableSize - 1;
}
} // namespace webrtc

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_
#include <string.h> // size_t
#include "webrtc/rtc_base/constructormagic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This class generates pseudo-random samples.
class RandomVector {
public:
static const size_t kRandomTableSize = 256;
static const int16_t kRandomTable[kRandomTableSize];
RandomVector()
: seed_(777),
seed_increment_(1) {
}
void Reset();
void Generate(size_t length, int16_t* output);
void IncreaseSeedIncrement(int16_t increase_by);
// Accessors and mutators.
int16_t seed_increment() { return seed_increment_; }
void set_seed_increment(int16_t value) { seed_increment_ = value; }
private:
uint32_t seed_;
int16_t seed_increment_;
RTC_DISALLOW_COPY_AND_ASSIGN(RandomVector);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for RandomVector class.
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
#include "webrtc/test/gtest.h"
namespace webrtc {
TEST(RandomVector, CreateAndDestroy) {
RandomVector random_vector;
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,162 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/red_payload_splitter.h"
#include <assert.h>
#include <vector>
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
#include "webrtc/rtc_base/checks.h"
#include "webrtc/rtc_base/logging.h"
#include "webrtc/rtc_base/safe_conversions.h"
namespace webrtc {
// The method loops through a list of packets {A, B, C, ...}. Each packet is
// split into its corresponding RED payloads, {A1, A2, ...}, which is
// temporarily held in the list |new_packets|.
// When the first packet in |packet_list| has been processed, the orignal packet
// is replaced by the new ones in |new_packets|, so that |packet_list| becomes:
// {A1, A2, ..., B, C, ...}. The method then continues with B, and C, until all
// the original packets have been replaced by their split payloads.
bool RedPayloadSplitter::SplitRed(PacketList* packet_list) {
// Too many RED blocks indicates that something is wrong. Clamp it at some
// reasonable value.
const size_t kMaxRedBlocks = 32;
bool ret = true;
PacketList::iterator it = packet_list->begin();
while (it != packet_list->end()) {
const Packet& red_packet = *it;
assert(!red_packet.payload.empty());
const uint8_t* payload_ptr = red_packet.payload.data();
// Read RED headers (according to RFC 2198):
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |F| block PT | timestamp offset | block length |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// Last RED header:
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |0| Block PT |
// +-+-+-+-+-+-+-+-+
struct RedHeader {
uint8_t payload_type;
uint32_t timestamp;
size_t payload_length;
};
std::vector<RedHeader> new_headers;
bool last_block = false;
size_t sum_length = 0;
while (!last_block) {
RedHeader new_header;
// Check the F bit. If F == 0, this was the last block.
last_block = ((*payload_ptr & 0x80) == 0);
// Bits 1 through 7 are payload type.
new_header.payload_type = payload_ptr[0] & 0x7F;
if (last_block) {
// No more header data to read.
++sum_length; // Account for RED header size of 1 byte.
new_header.timestamp = red_packet.timestamp;
new_header.payload_length = red_packet.payload.size() - sum_length;
payload_ptr += 1; // Advance to first payload byte.
} else {
// Bits 8 through 21 are timestamp offset.
int timestamp_offset =
(payload_ptr[1] << 6) + ((payload_ptr[2] & 0xFC) >> 2);
new_header.timestamp = red_packet.timestamp - timestamp_offset;
// Bits 22 through 31 are payload length.
new_header.payload_length =
((payload_ptr[2] & 0x03) << 8) + payload_ptr[3];
payload_ptr += 4; // Advance to next RED header.
}
sum_length += new_header.payload_length;
sum_length += 4; // Account for RED header size of 4 bytes.
// Store in new list of packets.
new_headers.push_back(new_header);
}
if (new_headers.size() <= kMaxRedBlocks) {
// Populate the new packets with payload data.
// |payload_ptr| now points at the first payload byte.
PacketList new_packets; // An empty list to store the split packets in.
for (size_t i = 0; i != new_headers.size(); ++i) {
const auto& new_header = new_headers[i];
size_t payload_length = new_header.payload_length;
if (payload_ptr + payload_length >
red_packet.payload.data() + red_packet.payload.size()) {
// The block lengths in the RED headers do not match the overall
// packet length. Something is corrupt. Discard this and the remaining
// payloads from this packet.
LOG(LS_WARNING) << "SplitRed length mismatch";
ret = false;
break;
}
Packet new_packet;
new_packet.timestamp = new_header.timestamp;
new_packet.payload_type = new_header.payload_type;
new_packet.sequence_number = red_packet.sequence_number;
new_packet.priority.red_level =
rtc::dchecked_cast<int>((new_headers.size() - 1) - i);
new_packet.payload.SetData(payload_ptr, payload_length);
new_packets.push_front(std::move(new_packet));
payload_ptr += payload_length;
}
// Insert new packets into original list, before the element pointed to by
// iterator |it|.
packet_list->splice(it, std::move(new_packets));
} else {
LOG(LS_WARNING) << "SplitRed too many blocks: " << new_headers.size();
ret = false;
}
// Remove |it| from the packet list. This operation effectively moves the
// iterator |it| to the next packet in the list. Thus, we do not have to
// increment it manually.
it = packet_list->erase(it);
}
return ret;
}
int RedPayloadSplitter::CheckRedPayloads(
PacketList* packet_list,
const DecoderDatabase& decoder_database) {
int main_payload_type = -1;
int num_deleted_packets = 0;
for (auto it = packet_list->begin(); it != packet_list->end(); /* */) {
uint8_t this_payload_type = it->payload_type;
if (!decoder_database.IsDtmf(this_payload_type) &&
!decoder_database.IsComfortNoise(this_payload_type)) {
if (main_payload_type == -1) {
// This is the first packet in the list which is non-DTMF non-CNG.
main_payload_type = this_payload_type;
} else {
if (this_payload_type != main_payload_type) {
// We do not allow redundant payloads of a different type.
// Remove |it| from the packet list. This operation effectively
// moves the iterator |it| to the next packet in the list. Thus, we
// do not have to increment it manually.
it = packet_list->erase(it);
++num_deleted_packets;
continue;
}
}
}
++it;
}
return num_deleted_packets;
}
} // namespace webrtc

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_
#include "webrtc/modules/audio_coding/neteq/packet.h"
#include "webrtc/rtc_base/constructormagic.h"
namespace webrtc {
// Forward declarations.
class DecoderDatabase;
// This class handles splitting of RED payloads into smaller parts.
// Codec-specific packet splitting can be performed by
// AudioDecoder::ParsePayload.
class RedPayloadSplitter {
public:
RedPayloadSplitter() {}
virtual ~RedPayloadSplitter() {}
// Splits each packet in |packet_list| into its separate RED payloads. Each
// RED payload is packetized into a Packet. The original elements in
// |packet_list| are properly deleted, and replaced by the new packets.
// Note that all packets in |packet_list| must be RED payloads, i.e., have
// RED headers according to RFC 2198 at the very beginning of the payload.
// Returns kOK or an error.
virtual bool SplitRed(PacketList* packet_list);
// Checks all packets in |packet_list|. Packets that are DTMF events or
// comfort noise payloads are kept. Except that, only one single payload type
// is accepted. Any packet with another payload type is discarded. Returns
// the number of discarded packets.
virtual int CheckRedPayloads(PacketList* packet_list,
const DecoderDatabase& decoder_database);
private:
RTC_DISALLOW_COPY_AND_ASSIGN(RedPayloadSplitter);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_

View File

@ -0,0 +1,344 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for RedPayloadSplitter class.
#include "webrtc/modules/audio_coding/neteq/red_payload_splitter.h"
#include <assert.h>
#include <memory>
#include <utility> // pair
#include "webrtc/api/audio_codecs/builtin_audio_decoder_factory.h"
#include "webrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h"
#include "webrtc/modules/audio_coding/neteq/packet.h"
#include "webrtc/test/gtest.h"
#include "webrtc/test/mock_audio_decoder_factory.h"
using ::testing::Return;
using ::testing::ReturnNull;
namespace webrtc {
static const int kRedPayloadType = 100;
static const size_t kPayloadLength = 10;
static const size_t kRedHeaderLength = 4; // 4 bytes RED header.
static const uint16_t kSequenceNumber = 0;
static const uint32_t kBaseTimestamp = 0x12345678;
// A possible Opus packet that contains FEC is the following.
// The frame is 20 ms in duration.
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |0|0|0|0|1|0|0|0|x|1|x|x|x|x|x|x|x| |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
// | Compressed frame 1 (N-2 bytes)... :
// : |
// | |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
void CreateOpusFecPayload(uint8_t* payload,
size_t payload_length,
uint8_t payload_value) {
if (payload_length < 2) {
return;
}
payload[0] = 0x08;
payload[1] = 0x40;
memset(&payload[2], payload_value, payload_length - 2);
}
// RED headers (according to RFC 2198):
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |F| block PT | timestamp offset | block length |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
// Last RED header:
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |0| Block PT |
// +-+-+-+-+-+-+-+-+
// Creates a RED packet, with |num_payloads| payloads, with payload types given
// by the values in array |payload_types| (which must be of length
// |num_payloads|). Each redundant payload is |timestamp_offset| samples
// "behind" the the previous payload.
Packet CreateRedPayload(size_t num_payloads,
uint8_t* payload_types,
int timestamp_offset,
bool embed_opus_fec = false) {
Packet packet;
packet.payload_type = kRedPayloadType;
packet.timestamp = kBaseTimestamp;
packet.sequence_number = kSequenceNumber;
packet.payload.SetSize((kPayloadLength + 1) +
(num_payloads - 1) *
(kPayloadLength + kRedHeaderLength));
uint8_t* payload_ptr = packet.payload.data();
for (size_t i = 0; i < num_payloads; ++i) {
// Write the RED headers.
if (i == num_payloads - 1) {
// Special case for last payload.
*payload_ptr = payload_types[i] & 0x7F; // F = 0;
++payload_ptr;
break;
}
*payload_ptr = payload_types[i] & 0x7F;
// Not the last block; set F = 1.
*payload_ptr |= 0x80;
++payload_ptr;
int this_offset = (num_payloads - i - 1) * timestamp_offset;
*payload_ptr = this_offset >> 6;
++payload_ptr;
assert(kPayloadLength <= 1023); // Max length described by 10 bits.
*payload_ptr = ((this_offset & 0x3F) << 2) | (kPayloadLength >> 8);
++payload_ptr;
*payload_ptr = kPayloadLength & 0xFF;
++payload_ptr;
}
for (size_t i = 0; i < num_payloads; ++i) {
// Write |i| to all bytes in each payload.
if (embed_opus_fec) {
CreateOpusFecPayload(payload_ptr, kPayloadLength,
static_cast<uint8_t>(i));
} else {
memset(payload_ptr, static_cast<int>(i), kPayloadLength);
}
payload_ptr += kPayloadLength;
}
return packet;
}
// Create a packet with all payload bytes set to |payload_value|.
Packet CreatePacket(uint8_t payload_type,
size_t payload_length,
uint8_t payload_value,
bool opus_fec = false) {
Packet packet;
packet.payload_type = payload_type;
packet.timestamp = kBaseTimestamp;
packet.sequence_number = kSequenceNumber;
packet.payload.SetSize(payload_length);
if (opus_fec) {
CreateOpusFecPayload(packet.payload.data(), packet.payload.size(),
payload_value);
} else {
memset(packet.payload.data(), payload_value, packet.payload.size());
}
return packet;
}
// Checks that |packet| has the attributes given in the remaining parameters.
void VerifyPacket(const Packet& packet,
size_t payload_length,
uint8_t payload_type,
uint16_t sequence_number,
uint32_t timestamp,
uint8_t payload_value,
Packet::Priority priority) {
EXPECT_EQ(payload_length, packet.payload.size());
EXPECT_EQ(payload_type, packet.payload_type);
EXPECT_EQ(sequence_number, packet.sequence_number);
EXPECT_EQ(timestamp, packet.timestamp);
EXPECT_EQ(priority, packet.priority);
ASSERT_FALSE(packet.payload.empty());
for (size_t i = 0; i < packet.payload.size(); ++i) {
ASSERT_EQ(payload_value, packet.payload.data()[i]);
}
}
void VerifyPacket(const Packet& packet,
size_t payload_length,
uint8_t payload_type,
uint16_t sequence_number,
uint32_t timestamp,
uint8_t payload_value,
bool primary) {
return VerifyPacket(packet, payload_length, payload_type, sequence_number,
timestamp, payload_value,
Packet::Priority{0, primary ? 0 : 1});
}
// Start of test definitions.
TEST(RedPayloadSplitter, CreateAndDestroy) {
RedPayloadSplitter* splitter = new RedPayloadSplitter;
delete splitter;
}
// Packet A is split into A1 and A2.
TEST(RedPayloadSplitter, OnePacketTwoPayloads) {
uint8_t payload_types[] = {0, 0};
const int kTimestampOffset = 160;
PacketList packet_list;
packet_list.push_back(CreateRedPayload(2, payload_types, kTimestampOffset));
RedPayloadSplitter splitter;
EXPECT_TRUE(splitter.SplitRed(&packet_list));
ASSERT_EQ(2u, packet_list.size());
// Check first packet. The first in list should always be the primary payload.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1],
kSequenceNumber, kBaseTimestamp, 1, true);
packet_list.pop_front();
// Check second packet.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
kSequenceNumber, kBaseTimestamp - kTimestampOffset, 0, false);
}
// Packets A and B are not split at all. Only the RED header in each packet is
// removed.
TEST(RedPayloadSplitter, TwoPacketsOnePayload) {
uint8_t payload_types[] = {0};
const int kTimestampOffset = 160;
// Create first packet, with a single RED payload.
PacketList packet_list;
packet_list.push_back(CreateRedPayload(1, payload_types, kTimestampOffset));
// Create second packet, with a single RED payload.
{
Packet packet = CreateRedPayload(1, payload_types, kTimestampOffset);
// Manually change timestamp and sequence number of second packet.
packet.timestamp += kTimestampOffset;
packet.sequence_number++;
packet_list.push_back(std::move(packet));
}
RedPayloadSplitter splitter;
EXPECT_TRUE(splitter.SplitRed(&packet_list));
ASSERT_EQ(2u, packet_list.size());
// Check first packet.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
kSequenceNumber, kBaseTimestamp, 0, true);
packet_list.pop_front();
// Check second packet.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
kSequenceNumber + 1, kBaseTimestamp + kTimestampOffset, 0, true);
}
// Packets A and B are split into packets A1, A2, A3, B1, B2, B3, with
// attributes as follows:
//
// A1* A2 A3 B1* B2 B3
// Payload type 0 1 2 0 1 2
// Timestamp b b-o b-2o b+o b b-o
// Sequence number 0 0 0 1 1 1
//
// b = kBaseTimestamp, o = kTimestampOffset, * = primary.
TEST(RedPayloadSplitter, TwoPacketsThreePayloads) {
uint8_t payload_types[] = {2, 1, 0}; // Primary is the last one.
const int kTimestampOffset = 160;
// Create first packet, with 3 RED payloads.
PacketList packet_list;
packet_list.push_back(CreateRedPayload(3, payload_types, kTimestampOffset));
// Create first packet, with 3 RED payloads.
{
Packet packet = CreateRedPayload(3, payload_types, kTimestampOffset);
// Manually change timestamp and sequence number of second packet.
packet.timestamp += kTimestampOffset;
packet.sequence_number++;
packet_list.push_back(std::move(packet));
}
RedPayloadSplitter splitter;
EXPECT_TRUE(splitter.SplitRed(&packet_list));
ASSERT_EQ(6u, packet_list.size());
// Check first packet, A1.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[2],
kSequenceNumber, kBaseTimestamp, 2, {0, 0});
packet_list.pop_front();
// Check second packet, A2.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1],
kSequenceNumber, kBaseTimestamp - kTimestampOffset, 1, {0, 1});
packet_list.pop_front();
// Check third packet, A3.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
kSequenceNumber, kBaseTimestamp - 2 * kTimestampOffset, 0,
{0, 2});
packet_list.pop_front();
// Check fourth packet, B1.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[2],
kSequenceNumber + 1, kBaseTimestamp + kTimestampOffset, 2,
{0, 0});
packet_list.pop_front();
// Check fifth packet, B2.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1],
kSequenceNumber + 1, kBaseTimestamp, 1, {0, 1});
packet_list.pop_front();
// Check sixth packet, B3.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
kSequenceNumber + 1, kBaseTimestamp - kTimestampOffset, 0,
{0, 2});
}
// Creates a list with 4 packets with these payload types:
// 0 = CNGnb
// 1 = PCMu
// 2 = DTMF (AVT)
// 3 = iLBC
// We expect the method CheckRedPayloads to discard the iLBC packet, since it
// is a non-CNG, non-DTMF payload of another type than the first speech payload
// found in the list (which is PCMu).
TEST(RedPayloadSplitter, CheckRedPayloads) {
PacketList packet_list;
for (uint8_t i = 0; i <= 3; ++i) {
// Create packet with payload type |i|, payload length 10 bytes, all 0.
packet_list.push_back(CreatePacket(i, 10, 0));
}
// Use a real DecoderDatabase object here instead of a mock, since it is
// easier to just register the payload types and let the actual implementation
// do its job.
DecoderDatabase decoder_database(
new rtc::RefCountedObject<MockAudioDecoderFactory>);
decoder_database.RegisterPayload(0, NetEqDecoder::kDecoderCNGnb, "cng-nb");
decoder_database.RegisterPayload(1, NetEqDecoder::kDecoderPCMu, "pcmu");
decoder_database.RegisterPayload(2, NetEqDecoder::kDecoderAVT, "avt");
decoder_database.RegisterPayload(3, NetEqDecoder::kDecoderILBC, "ilbc");
RedPayloadSplitter splitter;
splitter.CheckRedPayloads(&packet_list, decoder_database);
ASSERT_EQ(3u, packet_list.size()); // Should have dropped the last packet.
// Verify packets. The loop verifies that payload types 0, 1, and 2 are in the
// list.
for (int i = 0; i <= 2; ++i) {
VerifyPacket(packet_list.front(), 10, i, kSequenceNumber, kBaseTimestamp, 0,
true);
packet_list.pop_front();
}
EXPECT_TRUE(packet_list.empty());
}
// Packet A is split into A1, A2 and A3. But the length parameter is off, so
// the last payloads should be discarded.
TEST(RedPayloadSplitter, WrongPayloadLength) {
uint8_t payload_types[] = {0, 0, 0};
const int kTimestampOffset = 160;
PacketList packet_list;
{
Packet packet = CreateRedPayload(3, payload_types, kTimestampOffset);
// Manually tamper with the payload length of the packet.
// This is one byte too short for the second payload (out of three).
// We expect only the first payload to be returned.
packet.payload.SetSize(packet.payload.size() - (kPayloadLength + 1));
packet_list.push_back(std::move(packet));
}
RedPayloadSplitter splitter;
EXPECT_FALSE(splitter.SplitRed(&packet_list));
ASSERT_EQ(1u, packet_list.size());
// Check first packet.
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
kSequenceNumber, kBaseTimestamp - 2 * kTimestampOffset, 0,
{0, 2});
packet_list.pop_front();
}
} // namespace webrtc

View File

@ -0,0 +1,96 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq/rtcp.h"
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include "webrtc/modules/include/module_common_types.h"
namespace webrtc {
void Rtcp::Init(uint16_t start_sequence_number) {
cycles_ = 0;
max_seq_no_ = start_sequence_number;
base_seq_no_ = start_sequence_number;
received_packets_ = 0;
received_packets_prior_ = 0;
expected_prior_ = 0;
jitter_ = 0;
transit_ = 0;
}
void Rtcp::Update(const RTPHeader& rtp_header, uint32_t receive_timestamp) {
// Update number of received packets, and largest packet number received.
received_packets_++;
int16_t sn_diff = rtp_header.sequenceNumber - max_seq_no_;
if (sn_diff >= 0) {
if (rtp_header.sequenceNumber < max_seq_no_) {
// Wrap-around detected.
cycles_++;
}
max_seq_no_ = rtp_header.sequenceNumber;
}
// Calculate jitter according to RFC 3550, and update previous timestamps.
// Note that the value in |jitter_| is in Q4.
if (received_packets_ > 1) {
int32_t ts_diff = receive_timestamp - (rtp_header.timestamp - transit_);
int64_t jitter_diff = (std::abs(int64_t{ts_diff}) << 4) - jitter_;
// Calculate 15 * jitter_ / 16 + jitter_diff / 16 (with proper rounding).
jitter_ = jitter_ + ((jitter_diff + 8) >> 4);
RTC_DCHECK_GE(jitter_, 0);
}
transit_ = rtp_header.timestamp - receive_timestamp;
}
void Rtcp::GetStatistics(bool no_reset, RtcpStatistics* stats) {
// Extended highest sequence number received.
stats->extended_highest_sequence_number =
(static_cast<int>(cycles_) << 16) + max_seq_no_;
// Calculate expected number of packets and compare it with the number of
// packets that were actually received. The cumulative number of lost packets
// can be extracted.
uint32_t expected_packets =
stats->extended_highest_sequence_number - base_seq_no_ + 1;
if (received_packets_ == 0) {
// No packets received, assume none lost.
stats->packets_lost = 0;
} else if (expected_packets > received_packets_) {
stats->packets_lost = expected_packets - received_packets_;
if (stats->packets_lost > 0xFFFFFF) {
stats->packets_lost = 0xFFFFFF;
}
} else {
stats->packets_lost = 0;
}
// Fraction lost since last report.
uint32_t expected_since_last = expected_packets - expected_prior_;
uint32_t received_since_last = received_packets_ - received_packets_prior_;
if (!no_reset) {
expected_prior_ = expected_packets;
received_packets_prior_ = received_packets_;
}
int32_t lost = expected_since_last - received_since_last;
if (expected_since_last == 0 || lost <= 0 || received_packets_ == 0) {
stats->fraction_lost = 0;
} else {
stats->fraction_lost = std::min(0xFFU, (lost << 8) / expected_since_last);
}
stats->jitter = jitter_ >> 4; // Scaling from Q4.
}
} // namespace webrtc

Some files were not shown because too many files have changed in this diff Show More