Moving src/webrtc into src/.
In order to eliminate the WebRTC Subtree mirror in Chromium, WebRTC is moving the content of the src/webrtc directory up to the src/ directory. NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true TBR=tommi@webrtc.org Bug: chromium:611808 Change-Id: Iac59c5b51b950f174119565bac87955a7994bc38 Reviewed-on: https://webrtc-review.googlesource.com/1560 Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org> Reviewed-by: Henrik Kjellander <kjellander@webrtc.org> Cr-Commit-Position: refs/heads/master@{#19845}
This commit is contained in:
committed by
Commit Bot
parent
6674846b4a
commit
bb547203bf
101
modules/audio_coding/neteq/accelerate.cc
Normal file
101
modules/audio_coding/neteq/accelerate.cc
Normal file
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/accelerate.h"
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
Accelerate::ReturnCodes Accelerate::Process(const int16_t* input,
|
||||
size_t input_length,
|
||||
bool fast_accelerate,
|
||||
AudioMultiVector* output,
|
||||
size_t* length_change_samples) {
|
||||
// Input length must be (almost) 30 ms.
|
||||
static const size_t k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate.
|
||||
if (num_channels_ == 0 ||
|
||||
input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_) {
|
||||
// Length of input data too short to do accelerate. Simply move all data
|
||||
// from input to output.
|
||||
output->PushBackInterleaved(input, input_length);
|
||||
return kError;
|
||||
}
|
||||
return TimeStretch::Process(input, input_length, fast_accelerate, output,
|
||||
length_change_samples);
|
||||
}
|
||||
|
||||
void Accelerate::SetParametersForPassiveSpeech(size_t /*len*/,
|
||||
int16_t* best_correlation,
|
||||
size_t* /*peak_index*/) const {
|
||||
// When the signal does not contain any active speech, the correlation does
|
||||
// not matter. Simply set it to zero.
|
||||
*best_correlation = 0;
|
||||
}
|
||||
|
||||
Accelerate::ReturnCodes Accelerate::CheckCriteriaAndStretch(
|
||||
const int16_t* input,
|
||||
size_t input_length,
|
||||
size_t peak_index,
|
||||
int16_t best_correlation,
|
||||
bool active_speech,
|
||||
bool fast_mode,
|
||||
AudioMultiVector* output) const {
|
||||
// Check for strong correlation or passive speech.
|
||||
// Use 8192 (0.5 in Q14) in fast mode.
|
||||
const int correlation_threshold = fast_mode ? 8192 : kCorrelationThreshold;
|
||||
if ((best_correlation > correlation_threshold) || !active_speech) {
|
||||
// Do accelerate operation by overlap add.
|
||||
|
||||
// Pre-calculate common multiplication with |fs_mult_|.
|
||||
// 120 corresponds to 15 ms.
|
||||
size_t fs_mult_120 = fs_mult_ * 120;
|
||||
|
||||
if (fast_mode) {
|
||||
// Fit as many multiples of |peak_index| as possible in fs_mult_120.
|
||||
// TODO(henrik.lundin) Consider finding multiple correlation peaks and
|
||||
// pick the one with the longest correlation lag in this case.
|
||||
peak_index = (fs_mult_120 / peak_index) * peak_index;
|
||||
}
|
||||
|
||||
assert(fs_mult_120 >= peak_index); // Should be handled in Process().
|
||||
// Copy first part; 0 to 15 ms.
|
||||
output->PushBackInterleaved(input, fs_mult_120 * num_channels_);
|
||||
// Copy the |peak_index| starting at 15 ms to |temp_vector|.
|
||||
AudioMultiVector temp_vector(num_channels_);
|
||||
temp_vector.PushBackInterleaved(&input[fs_mult_120 * num_channels_],
|
||||
peak_index * num_channels_);
|
||||
// Cross-fade |temp_vector| onto the end of |output|.
|
||||
output->CrossFade(temp_vector, peak_index);
|
||||
// Copy the last unmodified part, 15 ms + pitch period until the end.
|
||||
output->PushBackInterleaved(
|
||||
&input[(fs_mult_120 + peak_index) * num_channels_],
|
||||
input_length - (fs_mult_120 + peak_index) * num_channels_);
|
||||
|
||||
if (active_speech) {
|
||||
return kSuccess;
|
||||
} else {
|
||||
return kSuccessLowEnergy;
|
||||
}
|
||||
} else {
|
||||
// Accelerate not allowed. Simply move all data from decoded to outData.
|
||||
output->PushBackInterleaved(input, input_length);
|
||||
return kNoStretch;
|
||||
}
|
||||
}
|
||||
|
||||
Accelerate* AccelerateFactory::Create(
|
||||
int sample_rate_hz,
|
||||
size_t num_channels,
|
||||
const BackgroundNoise& background_noise) const {
|
||||
return new Accelerate(sample_rate_hz, num_channels, background_noise);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
81
modules/audio_coding/neteq/accelerate.h
Normal file
81
modules/audio_coding/neteq/accelerate.h
Normal file
@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/time_stretch.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Forward declarations.
|
||||
class BackgroundNoise;
|
||||
|
||||
// This class implements the Accelerate operation. Most of the work is done
|
||||
// in the base class TimeStretch, which is shared with the PreemptiveExpand
|
||||
// operation. In the Accelerate class, the operations that are specific to
|
||||
// Accelerate are implemented.
|
||||
class Accelerate : public TimeStretch {
|
||||
public:
|
||||
Accelerate(int sample_rate_hz, size_t num_channels,
|
||||
const BackgroundNoise& background_noise)
|
||||
: TimeStretch(sample_rate_hz, num_channels, background_noise) {
|
||||
}
|
||||
|
||||
// This method performs the actual Accelerate operation. The samples are
|
||||
// read from |input|, of length |input_length| elements, and are written to
|
||||
// |output|. The number of samples removed through time-stretching is
|
||||
// is provided in the output |length_change_samples|. The method returns
|
||||
// the outcome of the operation as an enumerator value. If |fast_accelerate|
|
||||
// is true, the algorithm will relax the requirements on finding strong
|
||||
// correlations, and may remove multiple pitch periods if possible.
|
||||
ReturnCodes Process(const int16_t* input,
|
||||
size_t input_length,
|
||||
bool fast_accelerate,
|
||||
AudioMultiVector* output,
|
||||
size_t* length_change_samples);
|
||||
|
||||
protected:
|
||||
// Sets the parameters |best_correlation| and |peak_index| to suitable
|
||||
// values when the signal contains no active speech.
|
||||
void SetParametersForPassiveSpeech(size_t len,
|
||||
int16_t* best_correlation,
|
||||
size_t* peak_index) const override;
|
||||
|
||||
// Checks the criteria for performing the time-stretching operation and,
|
||||
// if possible, performs the time-stretching.
|
||||
ReturnCodes CheckCriteriaAndStretch(const int16_t* input,
|
||||
size_t input_length,
|
||||
size_t peak_index,
|
||||
int16_t best_correlation,
|
||||
bool active_speech,
|
||||
bool fast_mode,
|
||||
AudioMultiVector* output) const override;
|
||||
|
||||
private:
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(Accelerate);
|
||||
};
|
||||
|
||||
struct AccelerateFactory {
|
||||
AccelerateFactory() {}
|
||||
virtual ~AccelerateFactory() {}
|
||||
|
||||
virtual Accelerate* Create(int sample_rate_hz,
|
||||
size_t num_channels,
|
||||
const BackgroundNoise& background_noise) const;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_ACCELERATE_H_
|
||||
88
modules/audio_coding/neteq/audio_decoder_impl.cc
Normal file
88
modules/audio_coding/neteq/audio_decoder_impl.cc
Normal file
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "webrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h"
|
||||
#include "webrtc/rtc_base/checks.h"
|
||||
#ifdef WEBRTC_CODEC_G722
|
||||
#include "webrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h"
|
||||
#endif
|
||||
#ifdef WEBRTC_CODEC_ILBC
|
||||
#include "webrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h"
|
||||
#endif
|
||||
#ifdef WEBRTC_CODEC_ISACFX
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/fix/include/audio_decoder_isacfix.h" // nogncheck
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/fix/include/audio_encoder_isacfix.h" // nogncheck
|
||||
#endif
|
||||
#ifdef WEBRTC_CODEC_ISAC
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/main/include/audio_decoder_isac.h" // nogncheck
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/main/include/audio_encoder_isac.h" // nogncheck
|
||||
#endif
|
||||
#ifdef WEBRTC_CODEC_OPUS
|
||||
#include "webrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h"
|
||||
#endif
|
||||
#include "webrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
bool CodecSupported(NetEqDecoder codec_type) {
|
||||
switch (codec_type) {
|
||||
case NetEqDecoder::kDecoderPCMu:
|
||||
case NetEqDecoder::kDecoderPCMa:
|
||||
case NetEqDecoder::kDecoderPCMu_2ch:
|
||||
case NetEqDecoder::kDecoderPCMa_2ch:
|
||||
#ifdef WEBRTC_CODEC_ILBC
|
||||
case NetEqDecoder::kDecoderILBC:
|
||||
#endif
|
||||
#if defined(WEBRTC_CODEC_ISACFX) || defined(WEBRTC_CODEC_ISAC)
|
||||
case NetEqDecoder::kDecoderISAC:
|
||||
#endif
|
||||
#ifdef WEBRTC_CODEC_ISAC
|
||||
case NetEqDecoder::kDecoderISACswb:
|
||||
#endif
|
||||
case NetEqDecoder::kDecoderPCM16B:
|
||||
case NetEqDecoder::kDecoderPCM16Bwb:
|
||||
case NetEqDecoder::kDecoderPCM16Bswb32kHz:
|
||||
case NetEqDecoder::kDecoderPCM16Bswb48kHz:
|
||||
case NetEqDecoder::kDecoderPCM16B_2ch:
|
||||
case NetEqDecoder::kDecoderPCM16Bwb_2ch:
|
||||
case NetEqDecoder::kDecoderPCM16Bswb32kHz_2ch:
|
||||
case NetEqDecoder::kDecoderPCM16Bswb48kHz_2ch:
|
||||
case NetEqDecoder::kDecoderPCM16B_5ch:
|
||||
#ifdef WEBRTC_CODEC_G722
|
||||
case NetEqDecoder::kDecoderG722:
|
||||
case NetEqDecoder::kDecoderG722_2ch:
|
||||
#endif
|
||||
#ifdef WEBRTC_CODEC_OPUS
|
||||
case NetEqDecoder::kDecoderOpus:
|
||||
case NetEqDecoder::kDecoderOpus_2ch:
|
||||
#endif
|
||||
case NetEqDecoder::kDecoderRED:
|
||||
case NetEqDecoder::kDecoderAVT:
|
||||
case NetEqDecoder::kDecoderAVT16kHz:
|
||||
case NetEqDecoder::kDecoderAVT32kHz:
|
||||
case NetEqDecoder::kDecoderAVT48kHz:
|
||||
case NetEqDecoder::kDecoderCNGnb:
|
||||
case NetEqDecoder::kDecoderCNGwb:
|
||||
case NetEqDecoder::kDecoderCNGswb32kHz:
|
||||
case NetEqDecoder::kDecoderCNGswb48kHz:
|
||||
case NetEqDecoder::kDecoderArbitrary: {
|
||||
return true;
|
||||
}
|
||||
default: {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
31
modules/audio_coding/neteq/audio_decoder_impl.h
Normal file
31
modules/audio_coding/neteq/audio_decoder_impl.h
Normal file
@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_DECODER_IMPL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_DECODER_IMPL_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "webrtc/api/audio_codecs/audio_decoder.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/neteq_decoder_enum.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
#ifdef WEBRTC_CODEC_G722
|
||||
#include "webrtc/modules/audio_coding/codecs/g722/g722_interface.h"
|
||||
#endif
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Returns true if |codec_type| is supported.
|
||||
bool CodecSupported(NetEqDecoder codec_type);
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_DECODER_IMPL_H_
|
||||
693
modules/audio_coding/neteq/audio_decoder_unittest.cc
Normal file
693
modules/audio_coding/neteq/audio_decoder_unittest.cc
Normal file
@ -0,0 +1,693 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/api/audio_codecs/opus/audio_encoder_opus.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/g711/audio_decoder_pcm.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/g711/audio_encoder_pcm.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/g722/audio_decoder_g722.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/ilbc/audio_encoder_ilbc.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/fix/include/audio_decoder_isacfix.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/fix/include/audio_encoder_isacfix.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/main/include/audio_decoder_isac.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/main/include/audio_encoder_isac.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/pcm16b/audio_encoder_pcm16b.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
// The absolute difference between the input and output (the first channel) is
|
||||
// compared vs |tolerance|. The parameter |delay| is used to correct for codec
|
||||
// delays.
|
||||
void CompareInputOutput(const std::vector<int16_t>& input,
|
||||
const std::vector<int16_t>& output,
|
||||
size_t num_samples,
|
||||
size_t channels,
|
||||
int tolerance,
|
||||
int delay) {
|
||||
ASSERT_LE(num_samples, input.size());
|
||||
ASSERT_LE(num_samples * channels, output.size());
|
||||
for (unsigned int n = 0; n < num_samples - delay; ++n) {
|
||||
ASSERT_NEAR(input[n], output[channels * n + delay], tolerance)
|
||||
<< "Exit test on first diff; n = " << n;
|
||||
}
|
||||
}
|
||||
|
||||
// The absolute difference between the first two channels in |output| is
|
||||
// compared vs |tolerance|.
|
||||
void CompareTwoChannels(const std::vector<int16_t>& output,
|
||||
size_t samples_per_channel,
|
||||
size_t channels,
|
||||
int tolerance) {
|
||||
ASSERT_GE(channels, 2u);
|
||||
ASSERT_LE(samples_per_channel * channels, output.size());
|
||||
for (unsigned int n = 0; n < samples_per_channel; ++n)
|
||||
ASSERT_NEAR(output[channels * n], output[channels * n + 1], tolerance)
|
||||
<< "Stereo samples differ.";
|
||||
}
|
||||
|
||||
// Calculates mean-squared error between input and output (the first channel).
|
||||
// The parameter |delay| is used to correct for codec delays.
|
||||
double MseInputOutput(const std::vector<int16_t>& input,
|
||||
const std::vector<int16_t>& output,
|
||||
size_t num_samples,
|
||||
size_t channels,
|
||||
int delay) {
|
||||
assert(delay < static_cast<int>(num_samples));
|
||||
assert(num_samples <= input.size());
|
||||
assert(num_samples * channels <= output.size());
|
||||
if (num_samples == 0)
|
||||
return 0.0;
|
||||
double squared_sum = 0.0;
|
||||
for (unsigned int n = 0; n < num_samples - delay; ++n) {
|
||||
squared_sum += (input[n] - output[channels * n + delay]) *
|
||||
(input[n] - output[channels * n + delay]);
|
||||
}
|
||||
return squared_sum / (num_samples - delay);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
class AudioDecoderTest : public ::testing::Test {
|
||||
protected:
|
||||
AudioDecoderTest()
|
||||
: input_audio_(
|
||||
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
|
||||
32000),
|
||||
codec_input_rate_hz_(32000), // Legacy default value.
|
||||
frame_size_(0),
|
||||
data_length_(0),
|
||||
channels_(1),
|
||||
payload_type_(17),
|
||||
decoder_(NULL) {}
|
||||
|
||||
virtual ~AudioDecoderTest() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
if (audio_encoder_)
|
||||
codec_input_rate_hz_ = audio_encoder_->SampleRateHz();
|
||||
// Create arrays.
|
||||
ASSERT_GT(data_length_, 0u) << "The test must set data_length_ > 0";
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
delete decoder_;
|
||||
decoder_ = NULL;
|
||||
}
|
||||
|
||||
virtual void InitEncoder() { }
|
||||
|
||||
// TODO(henrik.lundin) Change return type to size_t once most/all overriding
|
||||
// implementations are gone.
|
||||
virtual int EncodeFrame(const int16_t* input,
|
||||
size_t input_len_samples,
|
||||
rtc::Buffer* output) {
|
||||
AudioEncoder::EncodedInfo encoded_info;
|
||||
const size_t samples_per_10ms = audio_encoder_->SampleRateHz() / 100;
|
||||
RTC_CHECK_EQ(samples_per_10ms * audio_encoder_->Num10MsFramesInNextPacket(),
|
||||
input_len_samples);
|
||||
std::unique_ptr<int16_t[]> interleaved_input(
|
||||
new int16_t[channels_ * samples_per_10ms]);
|
||||
for (size_t i = 0; i < audio_encoder_->Num10MsFramesInNextPacket(); ++i) {
|
||||
EXPECT_EQ(0u, encoded_info.encoded_bytes);
|
||||
|
||||
// Duplicate the mono input signal to however many channels the test
|
||||
// wants.
|
||||
test::InputAudioFile::DuplicateInterleaved(input + i * samples_per_10ms,
|
||||
samples_per_10ms, channels_,
|
||||
interleaved_input.get());
|
||||
|
||||
encoded_info = audio_encoder_->Encode(
|
||||
0, rtc::ArrayView<const int16_t>(interleaved_input.get(),
|
||||
audio_encoder_->NumChannels() *
|
||||
audio_encoder_->SampleRateHz() /
|
||||
100),
|
||||
output);
|
||||
}
|
||||
EXPECT_EQ(payload_type_, encoded_info.payload_type);
|
||||
return static_cast<int>(encoded_info.encoded_bytes);
|
||||
}
|
||||
|
||||
// Encodes and decodes audio. The absolute difference between the input and
|
||||
// output is compared vs |tolerance|, and the mean-squared error is compared
|
||||
// with |mse|. The encoded stream should contain |expected_bytes|. For stereo
|
||||
// audio, the absolute difference between the two channels is compared vs
|
||||
// |channel_diff_tolerance|.
|
||||
void EncodeDecodeTest(size_t expected_bytes, int tolerance, double mse,
|
||||
int delay = 0, int channel_diff_tolerance = 0) {
|
||||
ASSERT_GE(tolerance, 0) << "Test must define a tolerance >= 0";
|
||||
ASSERT_GE(channel_diff_tolerance, 0) <<
|
||||
"Test must define a channel_diff_tolerance >= 0";
|
||||
size_t processed_samples = 0u;
|
||||
rtc::Buffer encoded;
|
||||
size_t encoded_bytes = 0u;
|
||||
InitEncoder();
|
||||
std::vector<int16_t> input;
|
||||
std::vector<int16_t> decoded;
|
||||
while (processed_samples + frame_size_ <= data_length_) {
|
||||
// Extend input vector with |frame_size_|.
|
||||
input.resize(input.size() + frame_size_, 0);
|
||||
// Read from input file.
|
||||
ASSERT_GE(input.size() - processed_samples, frame_size_);
|
||||
ASSERT_TRUE(input_audio_.Read(
|
||||
frame_size_, codec_input_rate_hz_, &input[processed_samples]));
|
||||
size_t enc_len = EncodeFrame(
|
||||
&input[processed_samples], frame_size_, &encoded);
|
||||
// Make sure that frame_size_ * channels_ samples are allocated and free.
|
||||
decoded.resize((processed_samples + frame_size_) * channels_, 0);
|
||||
AudioDecoder::SpeechType speech_type;
|
||||
size_t dec_len = decoder_->Decode(
|
||||
&encoded.data()[encoded_bytes], enc_len, codec_input_rate_hz_,
|
||||
frame_size_ * channels_ * sizeof(int16_t),
|
||||
&decoded[processed_samples * channels_], &speech_type);
|
||||
EXPECT_EQ(frame_size_ * channels_, dec_len);
|
||||
encoded_bytes += enc_len;
|
||||
processed_samples += frame_size_;
|
||||
}
|
||||
// For some codecs it doesn't make sense to check expected number of bytes,
|
||||
// since the number can vary for different platforms. Opus and iSAC are
|
||||
// such codecs. In this case expected_bytes is set to 0.
|
||||
if (expected_bytes) {
|
||||
EXPECT_EQ(expected_bytes, encoded_bytes);
|
||||
}
|
||||
CompareInputOutput(
|
||||
input, decoded, processed_samples, channels_, tolerance, delay);
|
||||
if (channels_ == 2)
|
||||
CompareTwoChannels(
|
||||
decoded, processed_samples, channels_, channel_diff_tolerance);
|
||||
EXPECT_LE(
|
||||
MseInputOutput(input, decoded, processed_samples, channels_, delay),
|
||||
mse);
|
||||
}
|
||||
|
||||
// Encodes a payload and decodes it twice with decoder re-init before each
|
||||
// decode. Verifies that the decoded result is the same.
|
||||
void ReInitTest() {
|
||||
InitEncoder();
|
||||
std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]);
|
||||
ASSERT_TRUE(
|
||||
input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
|
||||
rtc::Buffer encoded;
|
||||
size_t enc_len = EncodeFrame(input.get(), frame_size_, &encoded);
|
||||
size_t dec_len;
|
||||
AudioDecoder::SpeechType speech_type1, speech_type2;
|
||||
decoder_->Reset();
|
||||
std::unique_ptr<int16_t[]> output1(new int16_t[frame_size_ * channels_]);
|
||||
dec_len = decoder_->Decode(encoded.data(), enc_len, codec_input_rate_hz_,
|
||||
frame_size_ * channels_ * sizeof(int16_t),
|
||||
output1.get(), &speech_type1);
|
||||
ASSERT_LE(dec_len, frame_size_ * channels_);
|
||||
EXPECT_EQ(frame_size_ * channels_, dec_len);
|
||||
// Re-init decoder and decode again.
|
||||
decoder_->Reset();
|
||||
std::unique_ptr<int16_t[]> output2(new int16_t[frame_size_ * channels_]);
|
||||
dec_len = decoder_->Decode(encoded.data(), enc_len, codec_input_rate_hz_,
|
||||
frame_size_ * channels_ * sizeof(int16_t),
|
||||
output2.get(), &speech_type2);
|
||||
ASSERT_LE(dec_len, frame_size_ * channels_);
|
||||
EXPECT_EQ(frame_size_ * channels_, dec_len);
|
||||
for (unsigned int n = 0; n < frame_size_; ++n) {
|
||||
ASSERT_EQ(output1[n], output2[n]) << "Exit test on first diff; n = " << n;
|
||||
}
|
||||
EXPECT_EQ(speech_type1, speech_type2);
|
||||
}
|
||||
|
||||
// Call DecodePlc and verify that the correct number of samples is produced.
|
||||
void DecodePlcTest() {
|
||||
InitEncoder();
|
||||
std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]);
|
||||
ASSERT_TRUE(
|
||||
input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
|
||||
rtc::Buffer encoded;
|
||||
size_t enc_len = EncodeFrame(input.get(), frame_size_, &encoded);
|
||||
AudioDecoder::SpeechType speech_type;
|
||||
decoder_->Reset();
|
||||
std::unique_ptr<int16_t[]> output(new int16_t[frame_size_ * channels_]);
|
||||
size_t dec_len = decoder_->Decode(encoded.data(), enc_len,
|
||||
codec_input_rate_hz_,
|
||||
frame_size_ * channels_ * sizeof(int16_t),
|
||||
output.get(), &speech_type);
|
||||
EXPECT_EQ(frame_size_ * channels_, dec_len);
|
||||
// Call DecodePlc and verify that we get one frame of data.
|
||||
// (Overwrite the output from the above Decode call, but that does not
|
||||
// matter.)
|
||||
dec_len = decoder_->DecodePlc(1, output.get());
|
||||
EXPECT_EQ(frame_size_ * channels_, dec_len);
|
||||
}
|
||||
|
||||
test::ResampleInputAudioFile input_audio_;
|
||||
int codec_input_rate_hz_;
|
||||
size_t frame_size_;
|
||||
size_t data_length_;
|
||||
size_t channels_;
|
||||
const int payload_type_;
|
||||
AudioDecoder* decoder_;
|
||||
std::unique_ptr<AudioEncoder> audio_encoder_;
|
||||
};
|
||||
|
||||
class AudioDecoderPcmUTest : public AudioDecoderTest {
|
||||
protected:
|
||||
AudioDecoderPcmUTest() : AudioDecoderTest() {
|
||||
frame_size_ = 160;
|
||||
data_length_ = 10 * frame_size_;
|
||||
decoder_ = new AudioDecoderPcmU(1);
|
||||
AudioEncoderPcmU::Config config;
|
||||
config.frame_size_ms = static_cast<int>(frame_size_ / 8);
|
||||
config.payload_type = payload_type_;
|
||||
audio_encoder_.reset(new AudioEncoderPcmU(config));
|
||||
}
|
||||
};
|
||||
|
||||
class AudioDecoderPcmATest : public AudioDecoderTest {
|
||||
protected:
|
||||
AudioDecoderPcmATest() : AudioDecoderTest() {
|
||||
frame_size_ = 160;
|
||||
data_length_ = 10 * frame_size_;
|
||||
decoder_ = new AudioDecoderPcmA(1);
|
||||
AudioEncoderPcmA::Config config;
|
||||
config.frame_size_ms = static_cast<int>(frame_size_ / 8);
|
||||
config.payload_type = payload_type_;
|
||||
audio_encoder_.reset(new AudioEncoderPcmA(config));
|
||||
}
|
||||
};
|
||||
|
||||
class AudioDecoderPcm16BTest : public AudioDecoderTest {
|
||||
protected:
|
||||
AudioDecoderPcm16BTest() : AudioDecoderTest() {
|
||||
codec_input_rate_hz_ = 16000;
|
||||
frame_size_ = 20 * codec_input_rate_hz_ / 1000;
|
||||
data_length_ = 10 * frame_size_;
|
||||
decoder_ = new AudioDecoderPcm16B(codec_input_rate_hz_, 1);
|
||||
assert(decoder_);
|
||||
AudioEncoderPcm16B::Config config;
|
||||
config.sample_rate_hz = codec_input_rate_hz_;
|
||||
config.frame_size_ms =
|
||||
static_cast<int>(frame_size_ / (config.sample_rate_hz / 1000));
|
||||
config.payload_type = payload_type_;
|
||||
audio_encoder_.reset(new AudioEncoderPcm16B(config));
|
||||
}
|
||||
};
|
||||
|
||||
class AudioDecoderIlbcTest : public AudioDecoderTest {
|
||||
protected:
|
||||
AudioDecoderIlbcTest() : AudioDecoderTest() {
|
||||
codec_input_rate_hz_ = 8000;
|
||||
frame_size_ = 240;
|
||||
data_length_ = 10 * frame_size_;
|
||||
decoder_ = new AudioDecoderIlbcImpl;
|
||||
assert(decoder_);
|
||||
AudioEncoderIlbcConfig config;
|
||||
config.frame_size_ms = 30;
|
||||
audio_encoder_.reset(new AudioEncoderIlbcImpl(config, payload_type_));
|
||||
}
|
||||
|
||||
// Overload the default test since iLBC's function WebRtcIlbcfix_NetEqPlc does
|
||||
// not return any data. It simply resets a few states and returns 0.
|
||||
void DecodePlcTest() {
|
||||
InitEncoder();
|
||||
std::unique_ptr<int16_t[]> input(new int16_t[frame_size_]);
|
||||
ASSERT_TRUE(
|
||||
input_audio_.Read(frame_size_, codec_input_rate_hz_, input.get()));
|
||||
rtc::Buffer encoded;
|
||||
size_t enc_len = EncodeFrame(input.get(), frame_size_, &encoded);
|
||||
AudioDecoder::SpeechType speech_type;
|
||||
decoder_->Reset();
|
||||
std::unique_ptr<int16_t[]> output(new int16_t[frame_size_ * channels_]);
|
||||
size_t dec_len = decoder_->Decode(encoded.data(), enc_len,
|
||||
codec_input_rate_hz_,
|
||||
frame_size_ * channels_ * sizeof(int16_t),
|
||||
output.get(), &speech_type);
|
||||
EXPECT_EQ(frame_size_, dec_len);
|
||||
// Simply call DecodePlc and verify that we get 0 as return value.
|
||||
EXPECT_EQ(0U, decoder_->DecodePlc(1, output.get()));
|
||||
}
|
||||
};
|
||||
|
||||
class AudioDecoderIsacFloatTest : public AudioDecoderTest {
|
||||
protected:
|
||||
AudioDecoderIsacFloatTest() : AudioDecoderTest() {
|
||||
codec_input_rate_hz_ = 16000;
|
||||
frame_size_ = 480;
|
||||
data_length_ = 10 * frame_size_;
|
||||
AudioEncoderIsacFloatImpl::Config config;
|
||||
config.payload_type = payload_type_;
|
||||
config.sample_rate_hz = codec_input_rate_hz_;
|
||||
config.adaptive_mode = false;
|
||||
config.frame_size_ms =
|
||||
1000 * static_cast<int>(frame_size_) / codec_input_rate_hz_;
|
||||
audio_encoder_.reset(new AudioEncoderIsacFloatImpl(config));
|
||||
decoder_ = new AudioDecoderIsacFloatImpl(codec_input_rate_hz_);
|
||||
}
|
||||
};
|
||||
|
||||
class AudioDecoderIsacSwbTest : public AudioDecoderTest {
|
||||
protected:
|
||||
AudioDecoderIsacSwbTest() : AudioDecoderTest() {
|
||||
codec_input_rate_hz_ = 32000;
|
||||
frame_size_ = 960;
|
||||
data_length_ = 10 * frame_size_;
|
||||
AudioEncoderIsacFloatImpl::Config config;
|
||||
config.payload_type = payload_type_;
|
||||
config.sample_rate_hz = codec_input_rate_hz_;
|
||||
config.adaptive_mode = false;
|
||||
config.frame_size_ms =
|
||||
1000 * static_cast<int>(frame_size_) / codec_input_rate_hz_;
|
||||
audio_encoder_.reset(new AudioEncoderIsacFloatImpl(config));
|
||||
decoder_ = new AudioDecoderIsacFloatImpl(codec_input_rate_hz_);
|
||||
}
|
||||
};
|
||||
|
||||
class AudioDecoderIsacFixTest : public AudioDecoderTest {
|
||||
protected:
|
||||
AudioDecoderIsacFixTest() : AudioDecoderTest() {
|
||||
codec_input_rate_hz_ = 16000;
|
||||
frame_size_ = 480;
|
||||
data_length_ = 10 * frame_size_;
|
||||
AudioEncoderIsacFixImpl::Config config;
|
||||
config.payload_type = payload_type_;
|
||||
config.sample_rate_hz = codec_input_rate_hz_;
|
||||
config.adaptive_mode = false;
|
||||
config.frame_size_ms =
|
||||
1000 * static_cast<int>(frame_size_) / codec_input_rate_hz_;
|
||||
audio_encoder_.reset(new AudioEncoderIsacFixImpl(config));
|
||||
decoder_ = new AudioDecoderIsacFixImpl(codec_input_rate_hz_);
|
||||
}
|
||||
};
|
||||
|
||||
class AudioDecoderG722Test : public AudioDecoderTest {
|
||||
protected:
|
||||
AudioDecoderG722Test() : AudioDecoderTest() {
|
||||
codec_input_rate_hz_ = 16000;
|
||||
frame_size_ = 160;
|
||||
data_length_ = 10 * frame_size_;
|
||||
decoder_ = new AudioDecoderG722Impl;
|
||||
assert(decoder_);
|
||||
AudioEncoderG722Config config;
|
||||
config.frame_size_ms = 10;
|
||||
config.num_channels = 1;
|
||||
audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_));
|
||||
}
|
||||
};
|
||||
|
||||
class AudioDecoderG722StereoTest : public AudioDecoderTest {
|
||||
protected:
|
||||
AudioDecoderG722StereoTest() : AudioDecoderTest() {
|
||||
channels_ = 2;
|
||||
codec_input_rate_hz_ = 16000;
|
||||
frame_size_ = 160;
|
||||
data_length_ = 10 * frame_size_;
|
||||
decoder_ = new AudioDecoderG722StereoImpl;
|
||||
assert(decoder_);
|
||||
AudioEncoderG722Config config;
|
||||
config.frame_size_ms = 10;
|
||||
config.num_channels = 2;
|
||||
audio_encoder_.reset(new AudioEncoderG722Impl(config, payload_type_));
|
||||
}
|
||||
};
|
||||
|
||||
class AudioDecoderOpusTest : public AudioDecoderTest {
|
||||
protected:
|
||||
AudioDecoderOpusTest() : AudioDecoderTest() {
|
||||
codec_input_rate_hz_ = 48000;
|
||||
frame_size_ = 480;
|
||||
data_length_ = 10 * frame_size_;
|
||||
decoder_ = new AudioDecoderOpusImpl(1);
|
||||
AudioEncoderOpusConfig config;
|
||||
config.frame_size_ms = static_cast<int>(frame_size_) / 48;
|
||||
config.application = AudioEncoderOpusConfig::ApplicationMode::kVoip;
|
||||
audio_encoder_ = AudioEncoderOpus::MakeAudioEncoder(config, payload_type_);
|
||||
}
|
||||
};
|
||||
|
||||
class AudioDecoderOpusStereoTest : public AudioDecoderOpusTest {
|
||||
protected:
|
||||
AudioDecoderOpusStereoTest() : AudioDecoderOpusTest() {
|
||||
channels_ = 2;
|
||||
delete decoder_;
|
||||
decoder_ = new AudioDecoderOpusImpl(2);
|
||||
AudioEncoderOpusConfig config;
|
||||
config.frame_size_ms = static_cast<int>(frame_size_) / 48;
|
||||
config.num_channels = 2;
|
||||
config.application = AudioEncoderOpusConfig::ApplicationMode::kAudio;
|
||||
audio_encoder_ = AudioEncoderOpus::MakeAudioEncoder(config, payload_type_);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(AudioDecoderPcmUTest, EncodeDecode) {
|
||||
int tolerance = 251;
|
||||
double mse = 1734.0;
|
||||
EncodeDecodeTest(data_length_, tolerance, mse);
|
||||
ReInitTest();
|
||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||
}
|
||||
|
||||
namespace {
|
||||
int SetAndGetTargetBitrate(AudioEncoder* audio_encoder, int rate) {
|
||||
audio_encoder->OnReceivedUplinkBandwidth(rate, rtc::Optional<int64_t>());
|
||||
return audio_encoder->GetTargetBitrate();
|
||||
}
|
||||
void TestSetAndGetTargetBitratesWithFixedCodec(AudioEncoder* audio_encoder,
|
||||
int fixed_rate) {
|
||||
EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, 32000));
|
||||
EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate - 1));
|
||||
EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate));
|
||||
EXPECT_EQ(fixed_rate, SetAndGetTargetBitrate(audio_encoder, fixed_rate + 1));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST_F(AudioDecoderPcmUTest, SetTargetBitrate) {
|
||||
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000);
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderPcmATest, EncodeDecode) {
|
||||
int tolerance = 308;
|
||||
double mse = 1931.0;
|
||||
EncodeDecodeTest(data_length_, tolerance, mse);
|
||||
ReInitTest();
|
||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderPcmATest, SetTargetBitrate) {
|
||||
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000);
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderPcm16BTest, EncodeDecode) {
|
||||
int tolerance = 0;
|
||||
double mse = 0.0;
|
||||
EncodeDecodeTest(2 * data_length_, tolerance, mse);
|
||||
ReInitTest();
|
||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderPcm16BTest, SetTargetBitrate) {
|
||||
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(),
|
||||
codec_input_rate_hz_ * 16);
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderIlbcTest, EncodeDecode) {
|
||||
int tolerance = 6808;
|
||||
double mse = 2.13e6;
|
||||
int delay = 80; // Delay from input to output.
|
||||
EncodeDecodeTest(500, tolerance, mse, delay);
|
||||
ReInitTest();
|
||||
EXPECT_TRUE(decoder_->HasDecodePlc());
|
||||
DecodePlcTest();
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderIlbcTest, SetTargetBitrate) {
|
||||
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 13333);
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderIsacFloatTest, EncodeDecode) {
|
||||
int tolerance = 3399;
|
||||
double mse = 434951.0;
|
||||
int delay = 48; // Delay from input to output.
|
||||
EncodeDecodeTest(0, tolerance, mse, delay);
|
||||
ReInitTest();
|
||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderIsacFloatTest, SetTargetBitrate) {
|
||||
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 32000);
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderIsacSwbTest, EncodeDecode) {
|
||||
int tolerance = 19757;
|
||||
double mse = 8.18e6;
|
||||
int delay = 160; // Delay from input to output.
|
||||
EncodeDecodeTest(0, tolerance, mse, delay);
|
||||
ReInitTest();
|
||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderIsacSwbTest, SetTargetBitrate) {
|
||||
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 32000);
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderIsacFixTest, EncodeDecode) {
|
||||
int tolerance = 11034;
|
||||
double mse = 3.46e6;
|
||||
int delay = 54; // Delay from input to output.
|
||||
#if defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM)
|
||||
static const int kEncodedBytes = 685;
|
||||
#elif defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM64)
|
||||
static const int kEncodedBytes = 673;
|
||||
#else
|
||||
static const int kEncodedBytes = 671;
|
||||
#endif
|
||||
EncodeDecodeTest(kEncodedBytes, tolerance, mse, delay);
|
||||
ReInitTest();
|
||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderIsacFixTest, SetTargetBitrate) {
|
||||
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 32000);
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderG722Test, EncodeDecode) {
|
||||
int tolerance = 6176;
|
||||
double mse = 238630.0;
|
||||
int delay = 22; // Delay from input to output.
|
||||
EncodeDecodeTest(data_length_ / 2, tolerance, mse, delay);
|
||||
ReInitTest();
|
||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderG722Test, SetTargetBitrate) {
|
||||
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 64000);
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderG722StereoTest, EncodeDecode) {
|
||||
int tolerance = 6176;
|
||||
int channel_diff_tolerance = 0;
|
||||
double mse = 238630.0;
|
||||
int delay = 22; // Delay from input to output.
|
||||
EncodeDecodeTest(data_length_, tolerance, mse, delay, channel_diff_tolerance);
|
||||
ReInitTest();
|
||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderG722StereoTest, SetTargetBitrate) {
|
||||
TestSetAndGetTargetBitratesWithFixedCodec(audio_encoder_.get(), 128000);
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderOpusTest, EncodeDecode) {
|
||||
int tolerance = 6176;
|
||||
double mse = 238630.0;
|
||||
int delay = 22; // Delay from input to output.
|
||||
EncodeDecodeTest(0, tolerance, mse, delay);
|
||||
ReInitTest();
|
||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestOpusSetTargetBitrates(AudioEncoder* audio_encoder) {
|
||||
EXPECT_EQ(6000, SetAndGetTargetBitrate(audio_encoder, 5999));
|
||||
EXPECT_EQ(6000, SetAndGetTargetBitrate(audio_encoder, 6000));
|
||||
EXPECT_EQ(32000, SetAndGetTargetBitrate(audio_encoder, 32000));
|
||||
EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder, 510000));
|
||||
EXPECT_EQ(510000, SetAndGetTargetBitrate(audio_encoder, 511000));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST_F(AudioDecoderOpusTest, SetTargetBitrate) {
|
||||
TestOpusSetTargetBitrates(audio_encoder_.get());
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderOpusStereoTest, EncodeDecode) {
|
||||
int tolerance = 6176;
|
||||
int channel_diff_tolerance = 0;
|
||||
double mse = 238630.0;
|
||||
int delay = 22; // Delay from input to output.
|
||||
EncodeDecodeTest(0, tolerance, mse, delay, channel_diff_tolerance);
|
||||
ReInitTest();
|
||||
EXPECT_FALSE(decoder_->HasDecodePlc());
|
||||
}
|
||||
|
||||
TEST_F(AudioDecoderOpusStereoTest, SetTargetBitrate) {
|
||||
TestOpusSetTargetBitrates(audio_encoder_.get());
|
||||
}
|
||||
|
||||
namespace {
|
||||
#ifdef WEBRTC_CODEC_ILBC
|
||||
const bool has_ilbc = true;
|
||||
#else
|
||||
const bool has_ilbc = false;
|
||||
#endif
|
||||
#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)
|
||||
const bool has_isac = true;
|
||||
#else
|
||||
const bool has_isac = false;
|
||||
#endif
|
||||
#ifdef WEBRTC_CODEC_ISAC
|
||||
const bool has_isac_swb = true;
|
||||
#else
|
||||
const bool has_isac_swb = false;
|
||||
#endif
|
||||
#ifdef WEBRTC_CODEC_G722
|
||||
const bool has_g722 = true;
|
||||
#else
|
||||
const bool has_g722 = false;
|
||||
#endif
|
||||
#ifdef WEBRTC_CODEC_OPUS
|
||||
const bool has_opus = true;
|
||||
#else
|
||||
const bool has_opus = false;
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
TEST(AudioDecoder, CodecSupported) {
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCMu));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCMa));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCMu_2ch));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCMa_2ch));
|
||||
EXPECT_EQ(has_ilbc, CodecSupported(NetEqDecoder::kDecoderILBC));
|
||||
EXPECT_EQ(has_isac, CodecSupported(NetEqDecoder::kDecoderISAC));
|
||||
EXPECT_EQ(has_isac_swb, CodecSupported(NetEqDecoder::kDecoderISACswb));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16B));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16Bwb));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16Bswb32kHz));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16Bswb48kHz));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16B_2ch));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16Bwb_2ch));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16Bswb32kHz_2ch));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16Bswb48kHz_2ch));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderPCM16B_5ch));
|
||||
EXPECT_EQ(has_g722, CodecSupported(NetEqDecoder::kDecoderG722));
|
||||
EXPECT_EQ(has_g722, CodecSupported(NetEqDecoder::kDecoderG722_2ch));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderRED));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderAVT));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderAVT16kHz));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderAVT32kHz));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderAVT48kHz));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderCNGnb));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderCNGwb));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderCNGswb32kHz));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderCNGswb48kHz));
|
||||
EXPECT_TRUE(CodecSupported(NetEqDecoder::kDecoderArbitrary));
|
||||
EXPECT_EQ(has_opus, CodecSupported(NetEqDecoder::kDecoderOpus));
|
||||
EXPECT_EQ(has_opus, CodecSupported(NetEqDecoder::kDecoderOpus_2ch));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
222
modules/audio_coding/neteq/audio_multi_vector.cc
Normal file
222
modules/audio_coding/neteq/audio_multi_vector.cc
Normal file
@ -0,0 +1,222 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/rtc_base/checks.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
AudioMultiVector::AudioMultiVector(size_t N) {
|
||||
assert(N > 0);
|
||||
if (N < 1) N = 1;
|
||||
for (size_t n = 0; n < N; ++n) {
|
||||
channels_.push_back(new AudioVector);
|
||||
}
|
||||
num_channels_ = N;
|
||||
}
|
||||
|
||||
AudioMultiVector::AudioMultiVector(size_t N, size_t initial_size) {
|
||||
assert(N > 0);
|
||||
if (N < 1) N = 1;
|
||||
for (size_t n = 0; n < N; ++n) {
|
||||
channels_.push_back(new AudioVector(initial_size));
|
||||
}
|
||||
num_channels_ = N;
|
||||
}
|
||||
|
||||
AudioMultiVector::~AudioMultiVector() {
|
||||
std::vector<AudioVector*>::iterator it = channels_.begin();
|
||||
while (it != channels_.end()) {
|
||||
delete (*it);
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
void AudioMultiVector::Clear() {
|
||||
for (size_t i = 0; i < num_channels_; ++i) {
|
||||
channels_[i]->Clear();
|
||||
}
|
||||
}
|
||||
|
||||
void AudioMultiVector::Zeros(size_t length) {
|
||||
for (size_t i = 0; i < num_channels_; ++i) {
|
||||
channels_[i]->Clear();
|
||||
channels_[i]->Extend(length);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioMultiVector::CopyTo(AudioMultiVector* copy_to) const {
|
||||
if (copy_to) {
|
||||
for (size_t i = 0; i < num_channels_; ++i) {
|
||||
channels_[i]->CopyTo(&(*copy_to)[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AudioMultiVector::PushBackInterleaved(const int16_t* append_this,
|
||||
size_t length) {
|
||||
assert(length % num_channels_ == 0);
|
||||
if (num_channels_ == 1) {
|
||||
// Special case to avoid extra allocation and data shuffling.
|
||||
channels_[0]->PushBack(append_this, length);
|
||||
return;
|
||||
}
|
||||
size_t length_per_channel = length / num_channels_;
|
||||
int16_t* temp_array = new int16_t[length_per_channel]; // Temporary storage.
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
// Copy elements to |temp_array|.
|
||||
// Set |source_ptr| to first element of this channel.
|
||||
const int16_t* source_ptr = &append_this[channel];
|
||||
for (size_t i = 0; i < length_per_channel; ++i) {
|
||||
temp_array[i] = *source_ptr;
|
||||
source_ptr += num_channels_; // Jump to next element of this channel.
|
||||
}
|
||||
channels_[channel]->PushBack(temp_array, length_per_channel);
|
||||
}
|
||||
delete [] temp_array;
|
||||
}
|
||||
|
||||
void AudioMultiVector::PushBack(const AudioMultiVector& append_this) {
|
||||
assert(num_channels_ == append_this.num_channels_);
|
||||
if (num_channels_ == append_this.num_channels_) {
|
||||
for (size_t i = 0; i < num_channels_; ++i) {
|
||||
channels_[i]->PushBack(append_this[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AudioMultiVector::PushBackFromIndex(const AudioMultiVector& append_this,
|
||||
size_t index) {
|
||||
assert(index < append_this.Size());
|
||||
index = std::min(index, append_this.Size() - 1);
|
||||
size_t length = append_this.Size() - index;
|
||||
assert(num_channels_ == append_this.num_channels_);
|
||||
if (num_channels_ == append_this.num_channels_) {
|
||||
for (size_t i = 0; i < num_channels_; ++i) {
|
||||
channels_[i]->PushBack(append_this[i], length, index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AudioMultiVector::PopFront(size_t length) {
|
||||
for (size_t i = 0; i < num_channels_; ++i) {
|
||||
channels_[i]->PopFront(length);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioMultiVector::PopBack(size_t length) {
|
||||
for (size_t i = 0; i < num_channels_; ++i) {
|
||||
channels_[i]->PopBack(length);
|
||||
}
|
||||
}
|
||||
|
||||
size_t AudioMultiVector::ReadInterleaved(size_t length,
|
||||
int16_t* destination) const {
|
||||
return ReadInterleavedFromIndex(0, length, destination);
|
||||
}
|
||||
|
||||
size_t AudioMultiVector::ReadInterleavedFromIndex(size_t start_index,
|
||||
size_t length,
|
||||
int16_t* destination) const {
|
||||
RTC_DCHECK(destination);
|
||||
size_t index = 0; // Number of elements written to |destination| so far.
|
||||
RTC_DCHECK_LE(start_index, Size());
|
||||
start_index = std::min(start_index, Size());
|
||||
if (length + start_index > Size()) {
|
||||
length = Size() - start_index;
|
||||
}
|
||||
if (num_channels_ == 1) {
|
||||
// Special case to avoid the nested for loop below.
|
||||
(*this)[0].CopyTo(length, start_index, destination);
|
||||
return length;
|
||||
}
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
destination[index] = (*this)[channel][i + start_index];
|
||||
++index;
|
||||
}
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
size_t AudioMultiVector::ReadInterleavedFromEnd(size_t length,
|
||||
int16_t* destination) const {
|
||||
length = std::min(length, Size()); // Cannot read more than Size() elements.
|
||||
return ReadInterleavedFromIndex(Size() - length, length, destination);
|
||||
}
|
||||
|
||||
void AudioMultiVector::OverwriteAt(const AudioMultiVector& insert_this,
|
||||
size_t length,
|
||||
size_t position) {
|
||||
assert(num_channels_ == insert_this.num_channels_);
|
||||
// Cap |length| at the length of |insert_this|.
|
||||
assert(length <= insert_this.Size());
|
||||
length = std::min(length, insert_this.Size());
|
||||
if (num_channels_ == insert_this.num_channels_) {
|
||||
for (size_t i = 0; i < num_channels_; ++i) {
|
||||
channels_[i]->OverwriteAt(insert_this[i], length, position);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AudioMultiVector::CrossFade(const AudioMultiVector& append_this,
|
||||
size_t fade_length) {
|
||||
assert(num_channels_ == append_this.num_channels_);
|
||||
if (num_channels_ == append_this.num_channels_) {
|
||||
for (size_t i = 0; i < num_channels_; ++i) {
|
||||
channels_[i]->CrossFade(append_this[i], fade_length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t AudioMultiVector::Channels() const {
|
||||
return num_channels_;
|
||||
}
|
||||
|
||||
size_t AudioMultiVector::Size() const {
|
||||
assert(channels_[0]);
|
||||
return channels_[0]->Size();
|
||||
}
|
||||
|
||||
void AudioMultiVector::AssertSize(size_t required_size) {
|
||||
if (Size() < required_size) {
|
||||
size_t extend_length = required_size - Size();
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
channels_[channel]->Extend(extend_length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool AudioMultiVector::Empty() const {
|
||||
assert(channels_[0]);
|
||||
return channels_[0]->Empty();
|
||||
}
|
||||
|
||||
void AudioMultiVector::CopyChannel(size_t from_channel, size_t to_channel) {
|
||||
assert(from_channel < num_channels_);
|
||||
assert(to_channel < num_channels_);
|
||||
channels_[from_channel]->CopyTo(channels_[to_channel]);
|
||||
}
|
||||
|
||||
const AudioVector& AudioMultiVector::operator[](size_t index) const {
|
||||
return *(channels_[index]);
|
||||
}
|
||||
|
||||
AudioVector& AudioMultiVector::operator[](size_t index) {
|
||||
return *(channels_[index]);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
139
modules/audio_coding/neteq/audio_multi_vector.h
Normal file
139
modules/audio_coding/neteq/audio_multi_vector.h
Normal file
@ -0,0 +1,139 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_
|
||||
|
||||
#include <string.h> // Access to size_t.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_vector.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioMultiVector {
|
||||
public:
|
||||
// Creates an empty AudioMultiVector with |N| audio channels. |N| must be
|
||||
// larger than 0.
|
||||
explicit AudioMultiVector(size_t N);
|
||||
|
||||
// Creates an AudioMultiVector with |N| audio channels, each channel having
|
||||
// an initial size. |N| must be larger than 0.
|
||||
AudioMultiVector(size_t N, size_t initial_size);
|
||||
|
||||
virtual ~AudioMultiVector();
|
||||
|
||||
// Deletes all values and make the vector empty.
|
||||
virtual void Clear();
|
||||
|
||||
// Clears the vector and inserts |length| zeros into each channel.
|
||||
virtual void Zeros(size_t length);
|
||||
|
||||
// Copies all values from this vector to |copy_to|. Any contents in |copy_to|
|
||||
// are deleted. After the operation is done, |copy_to| will be an exact
|
||||
// replica of this object. The source and the destination must have the same
|
||||
// number of channels.
|
||||
virtual void CopyTo(AudioMultiVector* copy_to) const;
|
||||
|
||||
// Appends the contents of array |append_this| to the end of this
|
||||
// object. The array is assumed to be channel-interleaved. |length| must be
|
||||
// an even multiple of this object's number of channels.
|
||||
// The length of this object is increased with the |length| divided by the
|
||||
// number of channels.
|
||||
virtual void PushBackInterleaved(const int16_t* append_this, size_t length);
|
||||
|
||||
// Appends the contents of AudioMultiVector |append_this| to this object. The
|
||||
// length of this object is increased with the length of |append_this|.
|
||||
virtual void PushBack(const AudioMultiVector& append_this);
|
||||
|
||||
// Appends the contents of AudioMultiVector |append_this| to this object,
|
||||
// taken from |index| up until the end of |append_this|. The length of this
|
||||
// object is increased.
|
||||
virtual void PushBackFromIndex(const AudioMultiVector& append_this,
|
||||
size_t index);
|
||||
|
||||
// Removes |length| elements from the beginning of this object, from each
|
||||
// channel.
|
||||
virtual void PopFront(size_t length);
|
||||
|
||||
// Removes |length| elements from the end of this object, from each
|
||||
// channel.
|
||||
virtual void PopBack(size_t length);
|
||||
|
||||
// Reads |length| samples from each channel and writes them interleaved to
|
||||
// |destination|. The total number of elements written to |destination| is
|
||||
// returned, i.e., |length| * number of channels. If the AudioMultiVector
|
||||
// contains less than |length| samples per channel, this is reflected in the
|
||||
// return value.
|
||||
virtual size_t ReadInterleaved(size_t length, int16_t* destination) const;
|
||||
|
||||
// Like ReadInterleaved() above, but reads from |start_index| instead of from
|
||||
// the beginning.
|
||||
virtual size_t ReadInterleavedFromIndex(size_t start_index,
|
||||
size_t length,
|
||||
int16_t* destination) const;
|
||||
|
||||
// Like ReadInterleaved() above, but reads from the end instead of from
|
||||
// the beginning.
|
||||
virtual size_t ReadInterleavedFromEnd(size_t length,
|
||||
int16_t* destination) const;
|
||||
|
||||
// Overwrites each channel in this AudioMultiVector with values taken from
|
||||
// |insert_this|. The values are taken from the beginning of |insert_this| and
|
||||
// are inserted starting at |position|. |length| values are written into each
|
||||
// channel. If |length| and |position| are selected such that the new data
|
||||
// extends beyond the end of the current AudioVector, the vector is extended
|
||||
// to accommodate the new data. |length| is limited to the length of
|
||||
// |insert_this|.
|
||||
virtual void OverwriteAt(const AudioMultiVector& insert_this,
|
||||
size_t length,
|
||||
size_t position);
|
||||
|
||||
// Appends |append_this| to the end of the current vector. Lets the two
|
||||
// vectors overlap by |fade_length| samples (per channel), and cross-fade
|
||||
// linearly in this region.
|
||||
virtual void CrossFade(const AudioMultiVector& append_this,
|
||||
size_t fade_length);
|
||||
|
||||
// Returns the number of channels.
|
||||
virtual size_t Channels() const;
|
||||
|
||||
// Returns the number of elements per channel in this AudioMultiVector.
|
||||
virtual size_t Size() const;
|
||||
|
||||
// Verify that each channel can hold at least |required_size| elements. If
|
||||
// not, extend accordingly.
|
||||
virtual void AssertSize(size_t required_size);
|
||||
|
||||
virtual bool Empty() const;
|
||||
|
||||
// Copies the data between two channels in the AudioMultiVector. The method
|
||||
// does not add any new channel. Thus, |from_channel| and |to_channel| must
|
||||
// both be valid channel numbers.
|
||||
virtual void CopyChannel(size_t from_channel, size_t to_channel);
|
||||
|
||||
// Accesses and modifies a channel (i.e., an AudioVector object) of this
|
||||
// AudioMultiVector.
|
||||
const AudioVector& operator[](size_t index) const;
|
||||
AudioVector& operator[](size_t index);
|
||||
|
||||
protected:
|
||||
std::vector<AudioVector*> channels_;
|
||||
size_t num_channels_;
|
||||
|
||||
private:
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(AudioMultiVector);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_MULTI_VECTOR_H_
|
||||
323
modules/audio_coding/neteq/audio_multi_vector_unittest.cc
Normal file
323
modules/audio_coding/neteq/audio_multi_vector_unittest.cc
Normal file
@ -0,0 +1,323 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "webrtc/test/gtest.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This is a value-parameterized test. The test cases are instantiated with
|
||||
// different values for the test parameter, which is used to determine the
|
||||
// number of channels in the AudioMultiBuffer. Note that it is not possible
|
||||
// to combine typed testing with value-parameterized testing, and since the
|
||||
// tests for AudioVector already covers a number of different type parameters,
|
||||
// this test focuses on testing different number of channels, and keeping the
|
||||
// value type constant.
|
||||
|
||||
class AudioMultiVectorTest : public ::testing::TestWithParam<size_t> {
|
||||
protected:
|
||||
AudioMultiVectorTest()
|
||||
: num_channels_(GetParam()), // Get the test parameter.
|
||||
interleaved_length_(num_channels_ * array_length()) {
|
||||
array_interleaved_ = new int16_t[num_channels_ * array_length()];
|
||||
}
|
||||
|
||||
~AudioMultiVectorTest() {
|
||||
delete [] array_interleaved_;
|
||||
}
|
||||
|
||||
virtual void SetUp() {
|
||||
// Populate test arrays.
|
||||
for (size_t i = 0; i < array_length(); ++i) {
|
||||
array_[i] = static_cast<int16_t>(i);
|
||||
}
|
||||
int16_t* ptr = array_interleaved_;
|
||||
// Write 100, 101, 102, ... for first channel.
|
||||
// Write 200, 201, 202, ... for second channel.
|
||||
// And so on.
|
||||
for (size_t i = 0; i < array_length(); ++i) {
|
||||
for (size_t j = 1; j <= num_channels_; ++j) {
|
||||
*ptr = j * 100 + i;
|
||||
++ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t array_length() const {
|
||||
return sizeof(array_) / sizeof(array_[0]);
|
||||
}
|
||||
|
||||
const size_t num_channels_;
|
||||
size_t interleaved_length_;
|
||||
int16_t array_[10];
|
||||
int16_t* array_interleaved_;
|
||||
};
|
||||
|
||||
// Create and destroy AudioMultiVector objects, both empty and with a predefined
|
||||
// length.
|
||||
TEST_P(AudioMultiVectorTest, CreateAndDestroy) {
|
||||
AudioMultiVector vec1(num_channels_);
|
||||
EXPECT_TRUE(vec1.Empty());
|
||||
EXPECT_EQ(num_channels_, vec1.Channels());
|
||||
EXPECT_EQ(0u, vec1.Size());
|
||||
|
||||
size_t initial_size = 17;
|
||||
AudioMultiVector vec2(num_channels_, initial_size);
|
||||
EXPECT_FALSE(vec2.Empty());
|
||||
EXPECT_EQ(num_channels_, vec2.Channels());
|
||||
EXPECT_EQ(initial_size, vec2.Size());
|
||||
}
|
||||
|
||||
// Test the subscript operator [] for getting and setting.
|
||||
TEST_P(AudioMultiVectorTest, SubscriptOperator) {
|
||||
AudioMultiVector vec(num_channels_, array_length());
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
for (size_t i = 0; i < array_length(); ++i) {
|
||||
vec[channel][i] = static_cast<int16_t>(i);
|
||||
// Make sure to use the const version.
|
||||
const AudioVector& audio_vec = vec[channel];
|
||||
EXPECT_EQ(static_cast<int16_t>(i), audio_vec[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test the PushBackInterleaved method and the CopyFrom method. The Clear
|
||||
// method is also invoked.
|
||||
TEST_P(AudioMultiVectorTest, PushBackInterleavedAndCopy) {
|
||||
AudioMultiVector vec(num_channels_);
|
||||
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
|
||||
AudioMultiVector vec_copy(num_channels_);
|
||||
vec.CopyTo(&vec_copy); // Copy from |vec| to |vec_copy|.
|
||||
ASSERT_EQ(num_channels_, vec.Channels());
|
||||
ASSERT_EQ(array_length(), vec.Size());
|
||||
ASSERT_EQ(num_channels_, vec_copy.Channels());
|
||||
ASSERT_EQ(array_length(), vec_copy.Size());
|
||||
for (size_t channel = 0; channel < vec.Channels(); ++channel) {
|
||||
for (size_t i = 0; i < array_length(); ++i) {
|
||||
EXPECT_EQ(static_cast<int16_t>((channel + 1) * 100 + i), vec[channel][i]);
|
||||
EXPECT_EQ(vec[channel][i], vec_copy[channel][i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Clear |vec| and verify that it is empty.
|
||||
vec.Clear();
|
||||
EXPECT_TRUE(vec.Empty());
|
||||
|
||||
// Now copy the empty vector and verify that the copy becomes empty too.
|
||||
vec.CopyTo(&vec_copy);
|
||||
EXPECT_TRUE(vec_copy.Empty());
|
||||
}
|
||||
|
||||
// Try to copy to a NULL pointer. Nothing should happen.
|
||||
TEST_P(AudioMultiVectorTest, CopyToNull) {
|
||||
AudioMultiVector vec(num_channels_);
|
||||
AudioMultiVector* vec_copy = NULL;
|
||||
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
|
||||
vec.CopyTo(vec_copy);
|
||||
}
|
||||
|
||||
// Test the PushBack method with another AudioMultiVector as input argument.
|
||||
TEST_P(AudioMultiVectorTest, PushBackVector) {
|
||||
AudioMultiVector vec1(num_channels_, array_length());
|
||||
AudioMultiVector vec2(num_channels_, array_length());
|
||||
// Set the first vector to [0, 1, ..., array_length() - 1] +
|
||||
// 100 * channel_number.
|
||||
// Set the second vector to [array_length(), array_length() + 1, ...,
|
||||
// 2 * array_length() - 1] + 100 * channel_number.
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
for (size_t i = 0; i < array_length(); ++i) {
|
||||
vec1[channel][i] = static_cast<int16_t>(i + 100 * channel);
|
||||
vec2[channel][i] =
|
||||
static_cast<int16_t>(i + 100 * channel + array_length());
|
||||
}
|
||||
}
|
||||
// Append vec2 to the back of vec1.
|
||||
vec1.PushBack(vec2);
|
||||
ASSERT_EQ(2u * array_length(), vec1.Size());
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
for (size_t i = 0; i < 2 * array_length(); ++i) {
|
||||
EXPECT_EQ(static_cast<int16_t>(i + 100 * channel), vec1[channel][i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test the PushBackFromIndex method.
|
||||
TEST_P(AudioMultiVectorTest, PushBackFromIndex) {
|
||||
AudioMultiVector vec1(num_channels_);
|
||||
vec1.PushBackInterleaved(array_interleaved_, interleaved_length_);
|
||||
AudioMultiVector vec2(num_channels_);
|
||||
|
||||
// Append vec1 to the back of vec2 (which is empty). Read vec1 from the second
|
||||
// last element.
|
||||
vec2.PushBackFromIndex(vec1, array_length() - 2);
|
||||
ASSERT_EQ(2u, vec2.Size());
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
for (size_t i = 0; i < 2; ++i) {
|
||||
EXPECT_EQ(array_interleaved_[channel + num_channels_ *
|
||||
(array_length() - 2 + i)], vec2[channel][i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Starts with pushing some values to the vector, then test the Zeros method.
|
||||
TEST_P(AudioMultiVectorTest, Zeros) {
|
||||
AudioMultiVector vec(num_channels_);
|
||||
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
|
||||
vec.Zeros(2 * array_length());
|
||||
ASSERT_EQ(num_channels_, vec.Channels());
|
||||
ASSERT_EQ(2u * array_length(), vec.Size());
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
for (size_t i = 0; i < 2 * array_length(); ++i) {
|
||||
EXPECT_EQ(0, vec[channel][i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test the ReadInterleaved method
|
||||
TEST_P(AudioMultiVectorTest, ReadInterleaved) {
|
||||
AudioMultiVector vec(num_channels_);
|
||||
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
|
||||
int16_t* output = new int16_t[interleaved_length_];
|
||||
// Read 5 samples.
|
||||
size_t read_samples = 5;
|
||||
EXPECT_EQ(num_channels_ * read_samples,
|
||||
vec.ReadInterleaved(read_samples, output));
|
||||
EXPECT_EQ(0,
|
||||
memcmp(array_interleaved_, output, read_samples * sizeof(int16_t)));
|
||||
|
||||
// Read too many samples. Expect to get all samples from the vector.
|
||||
EXPECT_EQ(interleaved_length_,
|
||||
vec.ReadInterleaved(array_length() + 1, output));
|
||||
EXPECT_EQ(0,
|
||||
memcmp(array_interleaved_, output, read_samples * sizeof(int16_t)));
|
||||
|
||||
delete [] output;
|
||||
}
|
||||
|
||||
// Test the PopFront method.
|
||||
TEST_P(AudioMultiVectorTest, PopFront) {
|
||||
AudioMultiVector vec(num_channels_);
|
||||
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
|
||||
vec.PopFront(1); // Remove one element from each channel.
|
||||
ASSERT_EQ(array_length() - 1u, vec.Size());
|
||||
// Let |ptr| point to the second element of the first channel in the
|
||||
// interleaved array.
|
||||
int16_t* ptr = &array_interleaved_[num_channels_];
|
||||
for (size_t i = 0; i < array_length() - 1; ++i) {
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
EXPECT_EQ(*ptr, vec[channel][i]);
|
||||
++ptr;
|
||||
}
|
||||
}
|
||||
vec.PopFront(array_length()); // Remove more elements than vector size.
|
||||
EXPECT_EQ(0u, vec.Size());
|
||||
}
|
||||
|
||||
// Test the PopBack method.
|
||||
TEST_P(AudioMultiVectorTest, PopBack) {
|
||||
AudioMultiVector vec(num_channels_);
|
||||
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
|
||||
vec.PopBack(1); // Remove one element from each channel.
|
||||
ASSERT_EQ(array_length() - 1u, vec.Size());
|
||||
// Let |ptr| point to the first element of the first channel in the
|
||||
// interleaved array.
|
||||
int16_t* ptr = array_interleaved_;
|
||||
for (size_t i = 0; i < array_length() - 1; ++i) {
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
EXPECT_EQ(*ptr, vec[channel][i]);
|
||||
++ptr;
|
||||
}
|
||||
}
|
||||
vec.PopBack(array_length()); // Remove more elements than vector size.
|
||||
EXPECT_EQ(0u, vec.Size());
|
||||
}
|
||||
|
||||
// Test the AssertSize method.
|
||||
TEST_P(AudioMultiVectorTest, AssertSize) {
|
||||
AudioMultiVector vec(num_channels_, array_length());
|
||||
EXPECT_EQ(array_length(), vec.Size());
|
||||
// Start with asserting with smaller sizes than already allocated.
|
||||
vec.AssertSize(0);
|
||||
vec.AssertSize(array_length() - 1);
|
||||
// Nothing should have changed.
|
||||
EXPECT_EQ(array_length(), vec.Size());
|
||||
// Assert with one element longer than already allocated.
|
||||
vec.AssertSize(array_length() + 1);
|
||||
// Expect vector to have grown.
|
||||
EXPECT_EQ(array_length() + 1, vec.Size());
|
||||
// Also check the individual AudioVectors.
|
||||
for (size_t channel = 0; channel < vec.Channels(); ++channel) {
|
||||
EXPECT_EQ(array_length() + 1u, vec[channel].Size());
|
||||
}
|
||||
}
|
||||
|
||||
// Test the PushBack method with another AudioMultiVector as input argument.
|
||||
TEST_P(AudioMultiVectorTest, OverwriteAt) {
|
||||
AudioMultiVector vec1(num_channels_);
|
||||
vec1.PushBackInterleaved(array_interleaved_, interleaved_length_);
|
||||
AudioMultiVector vec2(num_channels_);
|
||||
vec2.Zeros(3); // 3 zeros in each channel.
|
||||
// Overwrite vec2 at position 5.
|
||||
vec1.OverwriteAt(vec2, 3, 5);
|
||||
// Verify result.
|
||||
// Length remains the same.
|
||||
ASSERT_EQ(array_length(), vec1.Size());
|
||||
int16_t* ptr = array_interleaved_;
|
||||
for (size_t i = 0; i < array_length() - 1; ++i) {
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
if (i >= 5 && i <= 7) {
|
||||
// Elements 5, 6, 7 should have been replaced with zeros.
|
||||
EXPECT_EQ(0, vec1[channel][i]);
|
||||
} else {
|
||||
EXPECT_EQ(*ptr, vec1[channel][i]);
|
||||
}
|
||||
++ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test the CopyChannel method, when the test is instantiated with at least two
|
||||
// channels.
|
||||
TEST_P(AudioMultiVectorTest, CopyChannel) {
|
||||
if (num_channels_ < 2)
|
||||
return;
|
||||
|
||||
AudioMultiVector vec(num_channels_);
|
||||
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
|
||||
// Create a reference copy.
|
||||
AudioMultiVector ref(num_channels_);
|
||||
ref.PushBack(vec);
|
||||
// Copy from first to last channel.
|
||||
vec.CopyChannel(0, num_channels_ - 1);
|
||||
// Verify that the first and last channels are identical; the others should
|
||||
// be left untouched.
|
||||
for (size_t i = 0; i < array_length(); ++i) {
|
||||
// Verify that all but the last channel are untouched.
|
||||
for (size_t channel = 0; channel < num_channels_ - 1; ++channel) {
|
||||
EXPECT_EQ(ref[channel][i], vec[channel][i]);
|
||||
}
|
||||
// Verify that the last and the first channels are identical.
|
||||
EXPECT_EQ(vec[0][i], vec[num_channels_ - 1][i]);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(TestNumChannels,
|
||||
AudioMultiVectorTest,
|
||||
::testing::Values(static_cast<size_t>(1),
|
||||
static_cast<size_t>(2),
|
||||
static_cast<size_t>(5)));
|
||||
} // namespace webrtc
|
||||
386
modules/audio_coding/neteq/audio_vector.cc
Normal file
386
modules/audio_coding/neteq/audio_vector.cc
Normal file
@ -0,0 +1,386 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_vector.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#include "webrtc/rtc_base/checks.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
AudioVector::AudioVector()
|
||||
: AudioVector(kDefaultInitialSize) {
|
||||
Clear();
|
||||
}
|
||||
|
||||
AudioVector::AudioVector(size_t initial_size)
|
||||
: array_(new int16_t[initial_size + 1]),
|
||||
capacity_(initial_size + 1),
|
||||
begin_index_(0),
|
||||
end_index_(capacity_ - 1) {
|
||||
memset(array_.get(), 0, capacity_ * sizeof(int16_t));
|
||||
}
|
||||
|
||||
AudioVector::~AudioVector() = default;
|
||||
|
||||
void AudioVector::Clear() {
|
||||
end_index_ = begin_index_ = 0;
|
||||
}
|
||||
|
||||
void AudioVector::CopyTo(AudioVector* copy_to) const {
|
||||
RTC_DCHECK(copy_to);
|
||||
copy_to->Reserve(Size());
|
||||
CopyTo(Size(), 0, copy_to->array_.get());
|
||||
copy_to->begin_index_ = 0;
|
||||
copy_to->end_index_ = Size();
|
||||
}
|
||||
|
||||
void AudioVector::CopyTo(
|
||||
size_t length, size_t position, int16_t* copy_to) const {
|
||||
if (length == 0)
|
||||
return;
|
||||
length = std::min(length, Size() - position);
|
||||
const size_t copy_index = (begin_index_ + position) % capacity_;
|
||||
const size_t first_chunk_length =
|
||||
std::min(length, capacity_ - copy_index);
|
||||
memcpy(copy_to, &array_[copy_index],
|
||||
first_chunk_length * sizeof(int16_t));
|
||||
const size_t remaining_length = length - first_chunk_length;
|
||||
if (remaining_length > 0) {
|
||||
memcpy(©_to[first_chunk_length], array_.get(),
|
||||
remaining_length * sizeof(int16_t));
|
||||
}
|
||||
}
|
||||
|
||||
void AudioVector::PushFront(const AudioVector& prepend_this) {
|
||||
const size_t length = prepend_this.Size();
|
||||
if (length == 0)
|
||||
return;
|
||||
|
||||
// Although the subsequent calling to PushFront does Reserve in it, it is
|
||||
// always more efficient to do a big Reserve first.
|
||||
Reserve(Size() + length);
|
||||
|
||||
const size_t first_chunk_length =
|
||||
std::min(length, prepend_this.capacity_ - prepend_this.begin_index_);
|
||||
const size_t remaining_length = length - first_chunk_length;
|
||||
if (remaining_length > 0)
|
||||
PushFront(prepend_this.array_.get(), remaining_length);
|
||||
PushFront(&prepend_this.array_[prepend_this.begin_index_],
|
||||
first_chunk_length);
|
||||
}
|
||||
|
||||
void AudioVector::PushFront(const int16_t* prepend_this, size_t length) {
|
||||
if (length == 0)
|
||||
return;
|
||||
Reserve(Size() + length);
|
||||
const size_t first_chunk_length = std::min(length, begin_index_);
|
||||
memcpy(&array_[begin_index_ - first_chunk_length],
|
||||
&prepend_this[length - first_chunk_length],
|
||||
first_chunk_length * sizeof(int16_t));
|
||||
const size_t remaining_length = length - first_chunk_length;
|
||||
if (remaining_length > 0) {
|
||||
memcpy(&array_[capacity_ - remaining_length], prepend_this,
|
||||
remaining_length * sizeof(int16_t));
|
||||
}
|
||||
begin_index_ = (begin_index_ + capacity_ - length) % capacity_;
|
||||
}
|
||||
|
||||
void AudioVector::PushBack(const AudioVector& append_this) {
|
||||
PushBack(append_this, append_this.Size(), 0);
|
||||
}
|
||||
|
||||
void AudioVector::PushBack(
|
||||
const AudioVector& append_this, size_t length, size_t position) {
|
||||
RTC_DCHECK_LE(position, append_this.Size());
|
||||
RTC_DCHECK_LE(length, append_this.Size() - position);
|
||||
|
||||
if (length == 0)
|
||||
return;
|
||||
|
||||
// Although the subsequent calling to PushBack does Reserve in it, it is
|
||||
// always more efficient to do a big Reserve first.
|
||||
Reserve(Size() + length);
|
||||
|
||||
const size_t start_index =
|
||||
(append_this.begin_index_ + position) % append_this.capacity_;
|
||||
const size_t first_chunk_length = std::min(
|
||||
length, append_this.capacity_ - start_index);
|
||||
PushBack(&append_this.array_[start_index], first_chunk_length);
|
||||
|
||||
const size_t remaining_length = length - first_chunk_length;
|
||||
if (remaining_length > 0)
|
||||
PushBack(append_this.array_.get(), remaining_length);
|
||||
}
|
||||
|
||||
void AudioVector::PushBack(const int16_t* append_this, size_t length) {
|
||||
if (length == 0)
|
||||
return;
|
||||
Reserve(Size() + length);
|
||||
const size_t first_chunk_length = std::min(length, capacity_ - end_index_);
|
||||
memcpy(&array_[end_index_], append_this,
|
||||
first_chunk_length * sizeof(int16_t));
|
||||
const size_t remaining_length = length - first_chunk_length;
|
||||
if (remaining_length > 0) {
|
||||
memcpy(array_.get(), &append_this[first_chunk_length],
|
||||
remaining_length * sizeof(int16_t));
|
||||
}
|
||||
end_index_ = (end_index_ + length) % capacity_;
|
||||
}
|
||||
|
||||
void AudioVector::PopFront(size_t length) {
|
||||
if (length == 0)
|
||||
return;
|
||||
length = std::min(length, Size());
|
||||
begin_index_ = (begin_index_ + length) % capacity_;
|
||||
}
|
||||
|
||||
void AudioVector::PopBack(size_t length) {
|
||||
if (length == 0)
|
||||
return;
|
||||
// Never remove more than what is in the array.
|
||||
length = std::min(length, Size());
|
||||
end_index_ = (end_index_ + capacity_ - length) % capacity_;
|
||||
}
|
||||
|
||||
void AudioVector::Extend(size_t extra_length) {
|
||||
if (extra_length == 0)
|
||||
return;
|
||||
InsertZerosByPushBack(extra_length, Size());
|
||||
}
|
||||
|
||||
void AudioVector::InsertAt(const int16_t* insert_this,
|
||||
size_t length,
|
||||
size_t position) {
|
||||
if (length == 0)
|
||||
return;
|
||||
// Cap the insert position at the current array length.
|
||||
position = std::min(Size(), position);
|
||||
|
||||
// When inserting to a position closer to the beginning, it is more efficient
|
||||
// to insert by pushing front than to insert by pushing back, since less data
|
||||
// will be moved, vice versa.
|
||||
if (position <= Size() - position) {
|
||||
InsertByPushFront(insert_this, length, position);
|
||||
} else {
|
||||
InsertByPushBack(insert_this, length, position);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioVector::InsertZerosAt(size_t length,
|
||||
size_t position) {
|
||||
if (length == 0)
|
||||
return;
|
||||
// Cap the insert position at the current array length.
|
||||
position = std::min(Size(), position);
|
||||
|
||||
// When inserting to a position closer to the beginning, it is more efficient
|
||||
// to insert by pushing front than to insert by pushing back, since less data
|
||||
// will be moved, vice versa.
|
||||
if (position <= Size() - position) {
|
||||
InsertZerosByPushFront(length, position);
|
||||
} else {
|
||||
InsertZerosByPushBack(length, position);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioVector::OverwriteAt(const AudioVector& insert_this,
|
||||
size_t length,
|
||||
size_t position) {
|
||||
RTC_DCHECK_LE(length, insert_this.Size());
|
||||
if (length == 0)
|
||||
return;
|
||||
|
||||
// Cap the insert position at the current array length.
|
||||
position = std::min(Size(), position);
|
||||
|
||||
// Although the subsequent calling to OverwriteAt does Reserve in it, it is
|
||||
// always more efficient to do a big Reserve first.
|
||||
size_t new_size = std::max(Size(), position + length);
|
||||
Reserve(new_size);
|
||||
|
||||
const size_t first_chunk_length =
|
||||
std::min(length, insert_this.capacity_ - insert_this.begin_index_);
|
||||
OverwriteAt(&insert_this.array_[insert_this.begin_index_], first_chunk_length,
|
||||
position);
|
||||
const size_t remaining_length = length - first_chunk_length;
|
||||
if (remaining_length > 0) {
|
||||
OverwriteAt(insert_this.array_.get(), remaining_length,
|
||||
position + first_chunk_length);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioVector::OverwriteAt(const int16_t* insert_this,
|
||||
size_t length,
|
||||
size_t position) {
|
||||
if (length == 0)
|
||||
return;
|
||||
// Cap the insert position at the current array length.
|
||||
position = std::min(Size(), position);
|
||||
|
||||
size_t new_size = std::max(Size(), position + length);
|
||||
Reserve(new_size);
|
||||
|
||||
const size_t overwrite_index = (begin_index_ + position) % capacity_;
|
||||
const size_t first_chunk_length =
|
||||
std::min(length, capacity_ - overwrite_index);
|
||||
memcpy(&array_[overwrite_index], insert_this,
|
||||
first_chunk_length * sizeof(int16_t));
|
||||
const size_t remaining_length = length - first_chunk_length;
|
||||
if (remaining_length > 0) {
|
||||
memcpy(array_.get(), &insert_this[first_chunk_length],
|
||||
remaining_length * sizeof(int16_t));
|
||||
}
|
||||
|
||||
end_index_ = (begin_index_ + new_size) % capacity_;
|
||||
}
|
||||
|
||||
void AudioVector::CrossFade(const AudioVector& append_this,
|
||||
size_t fade_length) {
|
||||
// Fade length cannot be longer than the current vector or |append_this|.
|
||||
assert(fade_length <= Size());
|
||||
assert(fade_length <= append_this.Size());
|
||||
fade_length = std::min(fade_length, Size());
|
||||
fade_length = std::min(fade_length, append_this.Size());
|
||||
size_t position = Size() - fade_length + begin_index_;
|
||||
// Cross fade the overlapping regions.
|
||||
// |alpha| is the mixing factor in Q14.
|
||||
// TODO(hlundin): Consider skipping +1 in the denominator to produce a
|
||||
// smoother cross-fade, in particular at the end of the fade.
|
||||
int alpha_step = 16384 / (static_cast<int>(fade_length) + 1);
|
||||
int alpha = 16384;
|
||||
for (size_t i = 0; i < fade_length; ++i) {
|
||||
alpha -= alpha_step;
|
||||
array_[(position + i) % capacity_] =
|
||||
(alpha * array_[(position + i) % capacity_] +
|
||||
(16384 - alpha) * append_this[i] + 8192) >> 14;
|
||||
}
|
||||
assert(alpha >= 0); // Verify that the slope was correct.
|
||||
// Append what is left of |append_this|.
|
||||
size_t samples_to_push_back = append_this.Size() - fade_length;
|
||||
if (samples_to_push_back > 0)
|
||||
PushBack(append_this, samples_to_push_back, fade_length);
|
||||
}
|
||||
|
||||
// Returns the number of elements in this AudioVector.
|
||||
size_t AudioVector::Size() const {
|
||||
return (end_index_ + capacity_ - begin_index_) % capacity_;
|
||||
}
|
||||
|
||||
// Returns true if this AudioVector is empty.
|
||||
bool AudioVector::Empty() const {
|
||||
return begin_index_ == end_index_;
|
||||
}
|
||||
|
||||
void AudioVector::Reserve(size_t n) {
|
||||
if (capacity_ > n)
|
||||
return;
|
||||
const size_t length = Size();
|
||||
// Reserve one more sample to remove the ambiguity between empty vector and
|
||||
// full vector. Therefore |begin_index_| == |end_index_| indicates empty
|
||||
// vector, and |begin_index_| == (|end_index_| + 1) % capacity indicates
|
||||
// full vector.
|
||||
std::unique_ptr<int16_t[]> temp_array(new int16_t[n + 1]);
|
||||
CopyTo(length, 0, temp_array.get());
|
||||
array_.swap(temp_array);
|
||||
begin_index_ = 0;
|
||||
end_index_ = length;
|
||||
capacity_ = n + 1;
|
||||
}
|
||||
|
||||
void AudioVector::InsertByPushBack(const int16_t* insert_this,
|
||||
size_t length,
|
||||
size_t position) {
|
||||
const size_t move_chunk_length = Size() - position;
|
||||
std::unique_ptr<int16_t[]> temp_array(nullptr);
|
||||
if (move_chunk_length > 0) {
|
||||
// TODO(minyue): see if it is possible to avoid copying to a buffer.
|
||||
temp_array.reset(new int16_t[move_chunk_length]);
|
||||
CopyTo(move_chunk_length, position, temp_array.get());
|
||||
PopBack(move_chunk_length);
|
||||
}
|
||||
|
||||
Reserve(Size() + length + move_chunk_length);
|
||||
PushBack(insert_this, length);
|
||||
if (move_chunk_length > 0)
|
||||
PushBack(temp_array.get(), move_chunk_length);
|
||||
}
|
||||
|
||||
void AudioVector::InsertByPushFront(const int16_t* insert_this,
|
||||
size_t length,
|
||||
size_t position) {
|
||||
std::unique_ptr<int16_t[]> temp_array(nullptr);
|
||||
if (position > 0) {
|
||||
// TODO(minyue): see if it is possible to avoid copying to a buffer.
|
||||
temp_array.reset(new int16_t[position]);
|
||||
CopyTo(position, 0, temp_array.get());
|
||||
PopFront(position);
|
||||
}
|
||||
|
||||
Reserve(Size() + length + position);
|
||||
PushFront(insert_this, length);
|
||||
if (position > 0)
|
||||
PushFront(temp_array.get(), position);
|
||||
}
|
||||
|
||||
void AudioVector::InsertZerosByPushBack(size_t length,
|
||||
size_t position) {
|
||||
const size_t move_chunk_length = Size() - position;
|
||||
std::unique_ptr<int16_t[]> temp_array(nullptr);
|
||||
if (move_chunk_length > 0) {
|
||||
temp_array.reset(new int16_t[move_chunk_length]);
|
||||
CopyTo(move_chunk_length, position, temp_array.get());
|
||||
PopBack(move_chunk_length);
|
||||
}
|
||||
|
||||
Reserve(Size() + length + move_chunk_length);
|
||||
|
||||
const size_t first_zero_chunk_length =
|
||||
std::min(length, capacity_ - end_index_);
|
||||
memset(&array_[end_index_], 0, first_zero_chunk_length * sizeof(int16_t));
|
||||
const size_t remaining_zero_length = length - first_zero_chunk_length;
|
||||
if (remaining_zero_length > 0)
|
||||
memset(array_.get(), 0, remaining_zero_length * sizeof(int16_t));
|
||||
end_index_ = (end_index_ + length) % capacity_;
|
||||
|
||||
if (move_chunk_length > 0)
|
||||
PushBack(temp_array.get(), move_chunk_length);
|
||||
}
|
||||
|
||||
void AudioVector::InsertZerosByPushFront(size_t length,
|
||||
size_t position) {
|
||||
std::unique_ptr<int16_t[]> temp_array(nullptr);
|
||||
if (position > 0) {
|
||||
temp_array.reset(new int16_t[position]);
|
||||
CopyTo(position, 0, temp_array.get());
|
||||
PopFront(position);
|
||||
}
|
||||
|
||||
Reserve(Size() + length + position);
|
||||
|
||||
const size_t first_zero_chunk_length = std::min(length, begin_index_);
|
||||
memset(&array_[begin_index_ - first_zero_chunk_length], 0,
|
||||
first_zero_chunk_length * sizeof(int16_t));
|
||||
const size_t remaining_zero_length = length - first_zero_chunk_length;
|
||||
if (remaining_zero_length > 0)
|
||||
memset(&array_[capacity_ - remaining_zero_length], 0,
|
||||
remaining_zero_length * sizeof(int16_t));
|
||||
begin_index_ = (begin_index_ + capacity_ - length) % capacity_;
|
||||
|
||||
if (position > 0)
|
||||
PushFront(temp_array.get(), position);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
168
modules/audio_coding/neteq/audio_vector.h
Normal file
168
modules/audio_coding/neteq/audio_vector.h
Normal file
@ -0,0 +1,168 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_
|
||||
|
||||
#include <string.h> // Access to size_t.
|
||||
#include <memory>
|
||||
|
||||
#include "webrtc/rtc_base/checks.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioVector {
|
||||
public:
|
||||
// Creates an empty AudioVector.
|
||||
AudioVector();
|
||||
|
||||
// Creates an AudioVector with an initial size.
|
||||
explicit AudioVector(size_t initial_size);
|
||||
|
||||
virtual ~AudioVector();
|
||||
|
||||
// Deletes all values and make the vector empty.
|
||||
virtual void Clear();
|
||||
|
||||
// Copies all values from this vector to |copy_to|. Any contents in |copy_to|
|
||||
// are deleted before the copy operation. After the operation is done,
|
||||
// |copy_to| will be an exact replica of this object.
|
||||
virtual void CopyTo(AudioVector* copy_to) const;
|
||||
|
||||
// Copies |length| values from |position| in this vector to |copy_to|.
|
||||
virtual void CopyTo(size_t length, size_t position, int16_t* copy_to) const;
|
||||
|
||||
// Prepends the contents of AudioVector |prepend_this| to this object. The
|
||||
// length of this object is increased with the length of |prepend_this|.
|
||||
virtual void PushFront(const AudioVector& prepend_this);
|
||||
|
||||
// Same as above, but with an array |prepend_this| with |length| elements as
|
||||
// source.
|
||||
virtual void PushFront(const int16_t* prepend_this, size_t length);
|
||||
|
||||
// Same as PushFront but will append to the end of this object.
|
||||
virtual void PushBack(const AudioVector& append_this);
|
||||
|
||||
// Appends a segment of |append_this| to the end of this object. The segment
|
||||
// starts from |position| and has |length| samples.
|
||||
virtual void PushBack(const AudioVector& append_this,
|
||||
size_t length,
|
||||
size_t position);
|
||||
|
||||
// Same as PushFront but will append to the end of this object.
|
||||
virtual void PushBack(const int16_t* append_this, size_t length);
|
||||
|
||||
// Removes |length| elements from the beginning of this object.
|
||||
virtual void PopFront(size_t length);
|
||||
|
||||
// Removes |length| elements from the end of this object.
|
||||
virtual void PopBack(size_t length);
|
||||
|
||||
// Extends this object with |extra_length| elements at the end. The new
|
||||
// elements are initialized to zero.
|
||||
virtual void Extend(size_t extra_length);
|
||||
|
||||
// Inserts |length| elements taken from the array |insert_this| and insert
|
||||
// them at |position|. The length of the AudioVector is increased by |length|.
|
||||
// |position| = 0 means that the new values are prepended to the vector.
|
||||
// |position| = Size() means that the new values are appended to the vector.
|
||||
virtual void InsertAt(const int16_t* insert_this, size_t length,
|
||||
size_t position);
|
||||
|
||||
// Like InsertAt, but inserts |length| zero elements at |position|.
|
||||
virtual void InsertZerosAt(size_t length, size_t position);
|
||||
|
||||
// Overwrites |length| elements of this AudioVector starting from |position|
|
||||
// with first values in |AudioVector|. The definition of |position|
|
||||
// is the same as for InsertAt(). If |length| and |position| are selected
|
||||
// such that the new data extends beyond the end of the current AudioVector,
|
||||
// the vector is extended to accommodate the new data.
|
||||
virtual void OverwriteAt(const AudioVector& insert_this,
|
||||
size_t length,
|
||||
size_t position);
|
||||
|
||||
// Overwrites |length| elements of this AudioVector with values taken from the
|
||||
// array |insert_this|, starting at |position|. The definition of |position|
|
||||
// is the same as for InsertAt(). If |length| and |position| are selected
|
||||
// such that the new data extends beyond the end of the current AudioVector,
|
||||
// the vector is extended to accommodate the new data.
|
||||
virtual void OverwriteAt(const int16_t* insert_this,
|
||||
size_t length,
|
||||
size_t position);
|
||||
|
||||
// Appends |append_this| to the end of the current vector. Lets the two
|
||||
// vectors overlap by |fade_length| samples, and cross-fade linearly in this
|
||||
// region.
|
||||
virtual void CrossFade(const AudioVector& append_this, size_t fade_length);
|
||||
|
||||
// Returns the number of elements in this AudioVector.
|
||||
virtual size_t Size() const;
|
||||
|
||||
// Returns true if this AudioVector is empty.
|
||||
virtual bool Empty() const;
|
||||
|
||||
// Accesses and modifies an element of AudioVector.
|
||||
inline const int16_t& operator[](size_t index) const {
|
||||
return array_[WrapIndex(index, begin_index_, capacity_)];
|
||||
}
|
||||
|
||||
inline int16_t& operator[](size_t index) {
|
||||
return array_[WrapIndex(index, begin_index_, capacity_)];
|
||||
}
|
||||
|
||||
private:
|
||||
static const size_t kDefaultInitialSize = 10;
|
||||
|
||||
// This method is used by the [] operators to calculate an index within the
|
||||
// capacity of the array, but without using the modulo operation (%).
|
||||
static inline size_t WrapIndex(size_t index,
|
||||
size_t begin_index,
|
||||
size_t capacity) {
|
||||
RTC_DCHECK_LT(index, capacity);
|
||||
RTC_DCHECK_LT(begin_index, capacity);
|
||||
size_t ix = begin_index + index;
|
||||
RTC_DCHECK_GE(ix, index); // Check for overflow.
|
||||
if (ix >= capacity) {
|
||||
ix -= capacity;
|
||||
}
|
||||
RTC_DCHECK_LT(ix, capacity);
|
||||
return ix;
|
||||
}
|
||||
|
||||
void Reserve(size_t n);
|
||||
|
||||
void InsertByPushBack(const int16_t* insert_this, size_t length,
|
||||
size_t position);
|
||||
|
||||
void InsertByPushFront(const int16_t* insert_this, size_t length,
|
||||
size_t position);
|
||||
|
||||
void InsertZerosByPushBack(size_t length, size_t position);
|
||||
|
||||
void InsertZerosByPushFront(size_t length, size_t position);
|
||||
|
||||
std::unique_ptr<int16_t[]> array_;
|
||||
|
||||
size_t capacity_; // Allocated number of samples in the array.
|
||||
|
||||
// The index of the first sample in |array_|, except when
|
||||
// |begin_index_ == end_index_|, which indicates an empty buffer.
|
||||
size_t begin_index_;
|
||||
|
||||
// The index of the sample after the last sample in |array_|.
|
||||
size_t end_index_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(AudioVector);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_AUDIO_VECTOR_H_
|
||||
386
modules/audio_coding/neteq/audio_vector_unittest.cc
Normal file
386
modules/audio_coding/neteq/audio_vector_unittest.cc
Normal file
@ -0,0 +1,386 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_vector.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "webrtc/test/gtest.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioVectorTest : public ::testing::Test {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
// Populate test array.
|
||||
for (size_t i = 0; i < array_length(); ++i) {
|
||||
array_[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
size_t array_length() const {
|
||||
return sizeof(array_) / sizeof(array_[0]);
|
||||
}
|
||||
|
||||
int16_t array_[10];
|
||||
};
|
||||
|
||||
// Create and destroy AudioVector objects, both empty and with a predefined
|
||||
// length.
|
||||
TEST_F(AudioVectorTest, CreateAndDestroy) {
|
||||
AudioVector vec1;
|
||||
EXPECT_TRUE(vec1.Empty());
|
||||
EXPECT_EQ(0u, vec1.Size());
|
||||
|
||||
size_t initial_size = 17;
|
||||
AudioVector vec2(initial_size);
|
||||
EXPECT_FALSE(vec2.Empty());
|
||||
EXPECT_EQ(initial_size, vec2.Size());
|
||||
}
|
||||
|
||||
// Test the subscript operator [] for getting and setting.
|
||||
TEST_F(AudioVectorTest, SubscriptOperator) {
|
||||
AudioVector vec(array_length());
|
||||
for (size_t i = 0; i < array_length(); ++i) {
|
||||
vec[i] = static_cast<int16_t>(i);
|
||||
const int16_t& value = vec[i]; // Make sure to use the const version.
|
||||
EXPECT_EQ(static_cast<int16_t>(i), value);
|
||||
}
|
||||
}
|
||||
|
||||
// Test the PushBack method and the CopyFrom method. The Clear method is also
|
||||
// invoked.
|
||||
TEST_F(AudioVectorTest, PushBackAndCopy) {
|
||||
AudioVector vec;
|
||||
AudioVector vec_copy;
|
||||
vec.PushBack(array_, array_length());
|
||||
vec.CopyTo(&vec_copy); // Copy from |vec| to |vec_copy|.
|
||||
ASSERT_EQ(array_length(), vec.Size());
|
||||
ASSERT_EQ(array_length(), vec_copy.Size());
|
||||
for (size_t i = 0; i < array_length(); ++i) {
|
||||
EXPECT_EQ(array_[i], vec[i]);
|
||||
EXPECT_EQ(array_[i], vec_copy[i]);
|
||||
}
|
||||
|
||||
// Clear |vec| and verify that it is empty.
|
||||
vec.Clear();
|
||||
EXPECT_TRUE(vec.Empty());
|
||||
|
||||
// Now copy the empty vector and verify that the copy becomes empty too.
|
||||
vec.CopyTo(&vec_copy);
|
||||
EXPECT_TRUE(vec_copy.Empty());
|
||||
}
|
||||
|
||||
// Test the PushBack method with another AudioVector as input argument.
|
||||
TEST_F(AudioVectorTest, PushBackVector) {
|
||||
static const size_t kLength = 10;
|
||||
AudioVector vec1(kLength);
|
||||
AudioVector vec2(kLength);
|
||||
// Set the first vector to [0, 1, ..., kLength - 1].
|
||||
// Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1].
|
||||
for (size_t i = 0; i < kLength; ++i) {
|
||||
vec1[i] = static_cast<int16_t>(i);
|
||||
vec2[i] = static_cast<int16_t>(i + kLength);
|
||||
}
|
||||
// Append vec2 to the back of vec1.
|
||||
vec1.PushBack(vec2);
|
||||
ASSERT_EQ(2 * kLength, vec1.Size());
|
||||
for (size_t i = 0; i < 2 * kLength; ++i) {
|
||||
EXPECT_EQ(static_cast<int16_t>(i), vec1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Test the PushFront method.
|
||||
TEST_F(AudioVectorTest, PushFront) {
|
||||
AudioVector vec;
|
||||
vec.PushFront(array_, array_length());
|
||||
ASSERT_EQ(array_length(), vec.Size());
|
||||
for (size_t i = 0; i < array_length(); ++i) {
|
||||
EXPECT_EQ(array_[i], vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Test the PushFront method with another AudioVector as input argument.
|
||||
TEST_F(AudioVectorTest, PushFrontVector) {
|
||||
static const size_t kLength = 10;
|
||||
AudioVector vec1(kLength);
|
||||
AudioVector vec2(kLength);
|
||||
// Set the first vector to [0, 1, ..., kLength - 1].
|
||||
// Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1].
|
||||
for (size_t i = 0; i < kLength; ++i) {
|
||||
vec1[i] = static_cast<int16_t>(i);
|
||||
vec2[i] = static_cast<int16_t>(i + kLength);
|
||||
}
|
||||
// Prepend vec1 to the front of vec2.
|
||||
vec2.PushFront(vec1);
|
||||
ASSERT_EQ(2 * kLength, vec2.Size());
|
||||
for (size_t i = 0; i < 2 * kLength; ++i) {
|
||||
EXPECT_EQ(static_cast<int16_t>(i), vec2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Test the PopFront method.
|
||||
TEST_F(AudioVectorTest, PopFront) {
|
||||
AudioVector vec;
|
||||
vec.PushBack(array_, array_length());
|
||||
vec.PopFront(1); // Remove one element.
|
||||
EXPECT_EQ(array_length() - 1u, vec.Size());
|
||||
for (size_t i = 0; i < array_length() - 1; ++i) {
|
||||
EXPECT_EQ(static_cast<int16_t>(i + 1), vec[i]);
|
||||
}
|
||||
vec.PopFront(array_length()); // Remove more elements than vector size.
|
||||
EXPECT_EQ(0u, vec.Size());
|
||||
}
|
||||
|
||||
// Test the PopBack method.
|
||||
TEST_F(AudioVectorTest, PopBack) {
|
||||
AudioVector vec;
|
||||
vec.PushBack(array_, array_length());
|
||||
vec.PopBack(1); // Remove one element.
|
||||
EXPECT_EQ(array_length() - 1u, vec.Size());
|
||||
for (size_t i = 0; i < array_length() - 1; ++i) {
|
||||
EXPECT_EQ(static_cast<int16_t>(i), vec[i]);
|
||||
}
|
||||
vec.PopBack(array_length()); // Remove more elements than vector size.
|
||||
EXPECT_EQ(0u, vec.Size());
|
||||
}
|
||||
|
||||
// Test the Extend method.
|
||||
TEST_F(AudioVectorTest, Extend) {
|
||||
AudioVector vec;
|
||||
vec.PushBack(array_, array_length());
|
||||
vec.Extend(5); // Extend with 5 elements, which should all be zeros.
|
||||
ASSERT_EQ(array_length() + 5u, vec.Size());
|
||||
// Verify that all are zero.
|
||||
for (size_t i = array_length(); i < array_length() + 5; ++i) {
|
||||
EXPECT_EQ(0, vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Test the InsertAt method with an insert position in the middle of the vector.
|
||||
TEST_F(AudioVectorTest, InsertAt) {
|
||||
AudioVector vec;
|
||||
vec.PushBack(array_, array_length());
|
||||
static const int kNewLength = 5;
|
||||
int16_t new_array[kNewLength];
|
||||
// Set array elements to {100, 101, 102, ... }.
|
||||
for (int i = 0; i < kNewLength; ++i) {
|
||||
new_array[i] = 100 + i;
|
||||
}
|
||||
int insert_position = 5;
|
||||
vec.InsertAt(new_array, kNewLength, insert_position);
|
||||
// Verify that the vector looks as follows:
|
||||
// {0, 1, ..., |insert_position| - 1, 100, 101, ..., 100 + kNewLength - 1,
|
||||
// |insert_position|, |insert_position| + 1, ..., kLength - 1}.
|
||||
size_t pos = 0;
|
||||
for (int i = 0; i < insert_position; ++i) {
|
||||
EXPECT_EQ(array_[i], vec[pos]);
|
||||
++pos;
|
||||
}
|
||||
for (int i = 0; i < kNewLength; ++i) {
|
||||
EXPECT_EQ(new_array[i], vec[pos]);
|
||||
++pos;
|
||||
}
|
||||
for (size_t i = insert_position; i < array_length(); ++i) {
|
||||
EXPECT_EQ(array_[i], vec[pos]);
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
|
||||
// Test the InsertZerosAt method with an insert position in the middle of the
|
||||
// vector. Use the InsertAt method as reference.
|
||||
TEST_F(AudioVectorTest, InsertZerosAt) {
|
||||
AudioVector vec;
|
||||
AudioVector vec_ref;
|
||||
vec.PushBack(array_, array_length());
|
||||
vec_ref.PushBack(array_, array_length());
|
||||
static const int kNewLength = 5;
|
||||
int insert_position = 5;
|
||||
vec.InsertZerosAt(kNewLength, insert_position);
|
||||
int16_t new_array[kNewLength] = {0}; // All zero elements.
|
||||
vec_ref.InsertAt(new_array, kNewLength, insert_position);
|
||||
// Verify that the vectors are identical.
|
||||
ASSERT_EQ(vec_ref.Size(), vec.Size());
|
||||
for (size_t i = 0; i < vec.Size(); ++i) {
|
||||
EXPECT_EQ(vec_ref[i], vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Test the InsertAt method with an insert position at the start of the vector.
|
||||
TEST_F(AudioVectorTest, InsertAtBeginning) {
|
||||
AudioVector vec;
|
||||
vec.PushBack(array_, array_length());
|
||||
static const int kNewLength = 5;
|
||||
int16_t new_array[kNewLength];
|
||||
// Set array elements to {100, 101, 102, ... }.
|
||||
for (int i = 0; i < kNewLength; ++i) {
|
||||
new_array[i] = 100 + i;
|
||||
}
|
||||
int insert_position = 0;
|
||||
vec.InsertAt(new_array, kNewLength, insert_position);
|
||||
// Verify that the vector looks as follows:
|
||||
// {100, 101, ..., 100 + kNewLength - 1,
|
||||
// 0, 1, ..., kLength - 1}.
|
||||
size_t pos = 0;
|
||||
for (int i = 0; i < kNewLength; ++i) {
|
||||
EXPECT_EQ(new_array[i], vec[pos]);
|
||||
++pos;
|
||||
}
|
||||
for (size_t i = insert_position; i < array_length(); ++i) {
|
||||
EXPECT_EQ(array_[i], vec[pos]);
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
|
||||
// Test the InsertAt method with an insert position at the end of the vector.
|
||||
TEST_F(AudioVectorTest, InsertAtEnd) {
|
||||
AudioVector vec;
|
||||
vec.PushBack(array_, array_length());
|
||||
static const int kNewLength = 5;
|
||||
int16_t new_array[kNewLength];
|
||||
// Set array elements to {100, 101, 102, ... }.
|
||||
for (int i = 0; i < kNewLength; ++i) {
|
||||
new_array[i] = 100 + i;
|
||||
}
|
||||
int insert_position = array_length();
|
||||
vec.InsertAt(new_array, kNewLength, insert_position);
|
||||
// Verify that the vector looks as follows:
|
||||
// {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }.
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < array_length(); ++i) {
|
||||
EXPECT_EQ(array_[i], vec[pos]);
|
||||
++pos;
|
||||
}
|
||||
for (int i = 0; i < kNewLength; ++i) {
|
||||
EXPECT_EQ(new_array[i], vec[pos]);
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
|
||||
// Test the InsertAt method with an insert position beyond the end of the
|
||||
// vector. Verify that a position beyond the end of the vector does not lead to
|
||||
// an error. The expected outcome is the same as if the vector end was used as
|
||||
// input position. That is, the input position should be capped at the maximum
|
||||
// allowed value.
|
||||
TEST_F(AudioVectorTest, InsertBeyondEnd) {
|
||||
AudioVector vec;
|
||||
vec.PushBack(array_, array_length());
|
||||
static const int kNewLength = 5;
|
||||
int16_t new_array[kNewLength];
|
||||
// Set array elements to {100, 101, 102, ... }.
|
||||
for (int i = 0; i < kNewLength; ++i) {
|
||||
new_array[i] = 100 + i;
|
||||
}
|
||||
int insert_position = array_length() + 10; // Too large.
|
||||
vec.InsertAt(new_array, kNewLength, insert_position);
|
||||
// Verify that the vector looks as follows:
|
||||
// {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }.
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < array_length(); ++i) {
|
||||
EXPECT_EQ(array_[i], vec[pos]);
|
||||
++pos;
|
||||
}
|
||||
for (int i = 0; i < kNewLength; ++i) {
|
||||
EXPECT_EQ(new_array[i], vec[pos]);
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
|
||||
// Test the OverwriteAt method with a position such that all of the new values
|
||||
// fit within the old vector.
|
||||
TEST_F(AudioVectorTest, OverwriteAt) {
|
||||
AudioVector vec;
|
||||
vec.PushBack(array_, array_length());
|
||||
static const int kNewLength = 5;
|
||||
int16_t new_array[kNewLength];
|
||||
// Set array elements to {100, 101, 102, ... }.
|
||||
for (int i = 0; i < kNewLength; ++i) {
|
||||
new_array[i] = 100 + i;
|
||||
}
|
||||
size_t insert_position = 2;
|
||||
vec.OverwriteAt(new_array, kNewLength, insert_position);
|
||||
// Verify that the vector looks as follows:
|
||||
// {0, ..., |insert_position| - 1, 100, 101, ..., 100 + kNewLength - 1,
|
||||
// |insert_position|, |insert_position| + 1, ..., kLength - 1}.
|
||||
size_t pos = 0;
|
||||
for (pos = 0; pos < insert_position; ++pos) {
|
||||
EXPECT_EQ(array_[pos], vec[pos]);
|
||||
}
|
||||
for (int i = 0; i < kNewLength; ++i) {
|
||||
EXPECT_EQ(new_array[i], vec[pos]);
|
||||
++pos;
|
||||
}
|
||||
for (; pos < array_length(); ++pos) {
|
||||
EXPECT_EQ(array_[pos], vec[pos]);
|
||||
}
|
||||
}
|
||||
|
||||
// Test the OverwriteAt method with a position such that some of the new values
|
||||
// extend beyond the end of the current vector. This is valid, and the vector is
|
||||
// expected to expand to accommodate the new values.
|
||||
TEST_F(AudioVectorTest, OverwriteBeyondEnd) {
|
||||
AudioVector vec;
|
||||
vec.PushBack(array_, array_length());
|
||||
static const int kNewLength = 5;
|
||||
int16_t new_array[kNewLength];
|
||||
// Set array elements to {100, 101, 102, ... }.
|
||||
for (int i = 0; i < kNewLength; ++i) {
|
||||
new_array[i] = 100 + i;
|
||||
}
|
||||
int insert_position = array_length() - 2;
|
||||
vec.OverwriteAt(new_array, kNewLength, insert_position);
|
||||
ASSERT_EQ(array_length() - 2u + kNewLength, vec.Size());
|
||||
// Verify that the vector looks as follows:
|
||||
// {0, ..., |insert_position| - 1, 100, 101, ..., 100 + kNewLength - 1,
|
||||
// |insert_position|, |insert_position| + 1, ..., kLength - 1}.
|
||||
int pos = 0;
|
||||
for (pos = 0; pos < insert_position; ++pos) {
|
||||
EXPECT_EQ(array_[pos], vec[pos]);
|
||||
}
|
||||
for (int i = 0; i < kNewLength; ++i) {
|
||||
EXPECT_EQ(new_array[i], vec[pos]);
|
||||
++pos;
|
||||
}
|
||||
// Verify that we checked to the end of |vec|.
|
||||
EXPECT_EQ(vec.Size(), static_cast<size_t>(pos));
|
||||
}
|
||||
|
||||
TEST_F(AudioVectorTest, CrossFade) {
|
||||
static const size_t kLength = 100;
|
||||
static const size_t kFadeLength = 10;
|
||||
AudioVector vec1(kLength);
|
||||
AudioVector vec2(kLength);
|
||||
// Set all vector elements to 0 in |vec1| and 100 in |vec2|.
|
||||
for (size_t i = 0; i < kLength; ++i) {
|
||||
vec1[i] = 0;
|
||||
vec2[i] = 100;
|
||||
}
|
||||
vec1.CrossFade(vec2, kFadeLength);
|
||||
ASSERT_EQ(2 * kLength - kFadeLength, vec1.Size());
|
||||
// First part untouched.
|
||||
for (size_t i = 0; i < kLength - kFadeLength; ++i) {
|
||||
EXPECT_EQ(0, vec1[i]);
|
||||
}
|
||||
// Check mixing zone.
|
||||
for (size_t i = 0 ; i < kFadeLength; ++i) {
|
||||
EXPECT_NEAR((i + 1) * 100 / (kFadeLength + 1),
|
||||
vec1[kLength - kFadeLength + i], 1);
|
||||
}
|
||||
// Second part untouched.
|
||||
for (size_t i = kLength; i < vec1.Size(); ++i) {
|
||||
EXPECT_EQ(100, vec1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
255
modules/audio_coding/neteq/background_noise.cc
Normal file
255
modules/audio_coding/neteq/background_noise.cc
Normal file
@ -0,0 +1,255 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h> // memcpy
|
||||
|
||||
#include <algorithm> // min, max
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/post_decode_vad.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// static
|
||||
const size_t BackgroundNoise::kMaxLpcOrder;
|
||||
|
||||
BackgroundNoise::BackgroundNoise(size_t num_channels)
|
||||
: num_channels_(num_channels),
|
||||
channel_parameters_(new ChannelParameters[num_channels_]),
|
||||
mode_(NetEq::kBgnOn) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
BackgroundNoise::~BackgroundNoise() {}
|
||||
|
||||
void BackgroundNoise::Reset() {
|
||||
initialized_ = false;
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
channel_parameters_[channel].Reset();
|
||||
}
|
||||
// Keep _bgnMode as it is.
|
||||
}
|
||||
|
||||
void BackgroundNoise::Update(const AudioMultiVector& input,
|
||||
const PostDecodeVad& vad) {
|
||||
if (vad.running() && vad.active_speech()) {
|
||||
// Do not update the background noise parameters if we know that the signal
|
||||
// is active speech.
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t auto_correlation[kMaxLpcOrder + 1];
|
||||
int16_t fiter_output[kMaxLpcOrder + kResidualLength];
|
||||
int16_t reflection_coefficients[kMaxLpcOrder];
|
||||
int16_t lpc_coefficients[kMaxLpcOrder + 1];
|
||||
|
||||
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
|
||||
ChannelParameters& parameters = channel_parameters_[channel_ix];
|
||||
int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0};
|
||||
int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder];
|
||||
input[channel_ix].CopyTo(kVecLen, input.Size() - kVecLen, temp_signal);
|
||||
int32_t sample_energy = CalculateAutoCorrelation(temp_signal, kVecLen,
|
||||
auto_correlation);
|
||||
|
||||
if ((!vad.running() &&
|
||||
sample_energy < parameters.energy_update_threshold) ||
|
||||
(vad.running() && !vad.active_speech())) {
|
||||
// Generate LPC coefficients.
|
||||
if (auto_correlation[0] > 0) {
|
||||
// Regardless of whether the filter is actually updated or not,
|
||||
// update energy threshold levels, since we have in fact observed
|
||||
// a low energy signal.
|
||||
if (sample_energy < parameters.energy_update_threshold) {
|
||||
// Never go under 1.0 in average sample energy.
|
||||
parameters.energy_update_threshold = std::max(sample_energy, 1);
|
||||
parameters.low_energy_update_threshold = 0;
|
||||
}
|
||||
|
||||
// Only update BGN if filter is stable, i.e., if return value from
|
||||
// Levinson-Durbin function is 1.
|
||||
if (WebRtcSpl_LevinsonDurbin(auto_correlation, lpc_coefficients,
|
||||
reflection_coefficients,
|
||||
kMaxLpcOrder) != 1) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// Center value in auto-correlation is not positive. Do not update.
|
||||
return;
|
||||
}
|
||||
|
||||
// Generate the CNG gain factor by looking at the energy of the residual.
|
||||
WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength,
|
||||
fiter_output, lpc_coefficients,
|
||||
kMaxLpcOrder + 1, kResidualLength);
|
||||
int32_t residual_energy = WebRtcSpl_DotProductWithScale(fiter_output,
|
||||
fiter_output,
|
||||
kResidualLength,
|
||||
0);
|
||||
|
||||
// Check spectral flatness.
|
||||
// Comparing the residual variance with the input signal variance tells
|
||||
// if the spectrum is flat or not.
|
||||
// If 5 * residual_energy >= 16 * sample_energy, the spectrum is flat
|
||||
// enough. Also ensure that the energy is non-zero.
|
||||
if ((sample_energy > 0) &&
|
||||
(int64_t{5} * residual_energy >= int64_t{16} * sample_energy)) {
|
||||
// Spectrum is flat enough; save filter parameters.
|
||||
// |temp_signal| + |kVecLen| - |kMaxLpcOrder| points at the first of the
|
||||
// |kMaxLpcOrder| samples in the residual signal, which will form the
|
||||
// filter state for the next noise generation.
|
||||
SaveParameters(channel_ix, lpc_coefficients,
|
||||
temp_signal + kVecLen - kMaxLpcOrder, sample_energy,
|
||||
residual_energy);
|
||||
}
|
||||
} else {
|
||||
// Will only happen if post-decode VAD is disabled and |sample_energy| is
|
||||
// not low enough. Increase the threshold for update so that it increases
|
||||
// by a factor 4 in 4 seconds.
|
||||
IncrementEnergyThreshold(channel_ix, sample_energy);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t BackgroundNoise::Energy(size_t channel) const {
|
||||
assert(channel < num_channels_);
|
||||
return channel_parameters_[channel].energy;
|
||||
}
|
||||
|
||||
void BackgroundNoise::SetMuteFactor(size_t channel, int16_t value) {
|
||||
assert(channel < num_channels_);
|
||||
channel_parameters_[channel].mute_factor = value;
|
||||
}
|
||||
|
||||
int16_t BackgroundNoise::MuteFactor(size_t channel) const {
|
||||
assert(channel < num_channels_);
|
||||
return channel_parameters_[channel].mute_factor;
|
||||
}
|
||||
|
||||
const int16_t* BackgroundNoise::Filter(size_t channel) const {
|
||||
assert(channel < num_channels_);
|
||||
return channel_parameters_[channel].filter;
|
||||
}
|
||||
|
||||
const int16_t* BackgroundNoise::FilterState(size_t channel) const {
|
||||
assert(channel < num_channels_);
|
||||
return channel_parameters_[channel].filter_state;
|
||||
}
|
||||
|
||||
void BackgroundNoise::SetFilterState(size_t channel, const int16_t* input,
|
||||
size_t length) {
|
||||
assert(channel < num_channels_);
|
||||
length = std::min(length, kMaxLpcOrder);
|
||||
memcpy(channel_parameters_[channel].filter_state, input,
|
||||
length * sizeof(int16_t));
|
||||
}
|
||||
|
||||
int16_t BackgroundNoise::Scale(size_t channel) const {
|
||||
assert(channel < num_channels_);
|
||||
return channel_parameters_[channel].scale;
|
||||
}
|
||||
int16_t BackgroundNoise::ScaleShift(size_t channel) const {
|
||||
assert(channel < num_channels_);
|
||||
return channel_parameters_[channel].scale_shift;
|
||||
}
|
||||
|
||||
int32_t BackgroundNoise::CalculateAutoCorrelation(
|
||||
const int16_t* signal, size_t length, int32_t* auto_correlation) const {
|
||||
static const int kCorrelationStep = -1;
|
||||
const int correlation_scale =
|
||||
CrossCorrelationWithAutoShift(signal, signal, length, kMaxLpcOrder + 1,
|
||||
kCorrelationStep, auto_correlation);
|
||||
|
||||
// Number of shifts to normalize energy to energy/sample.
|
||||
int energy_sample_shift = kLogVecLen - correlation_scale;
|
||||
return auto_correlation[0] >> energy_sample_shift;
|
||||
}
|
||||
|
||||
void BackgroundNoise::IncrementEnergyThreshold(size_t channel,
|
||||
int32_t sample_energy) {
|
||||
// TODO(hlundin): Simplify the below threshold update. What this code
|
||||
// does is simply "threshold += (increment * threshold) >> 16", but due
|
||||
// to the limited-width operations, it is not exactly the same. The
|
||||
// difference should be inaudible, but bit-exactness would not be
|
||||
// maintained.
|
||||
assert(channel < num_channels_);
|
||||
ChannelParameters& parameters = channel_parameters_[channel];
|
||||
int32_t temp_energy =
|
||||
(kThresholdIncrement * parameters.low_energy_update_threshold) >> 16;
|
||||
temp_energy += kThresholdIncrement *
|
||||
(parameters.energy_update_threshold & 0xFF);
|
||||
temp_energy += (kThresholdIncrement *
|
||||
((parameters.energy_update_threshold>>8) & 0xFF)) << 8;
|
||||
parameters.low_energy_update_threshold += temp_energy;
|
||||
|
||||
parameters.energy_update_threshold += kThresholdIncrement *
|
||||
(parameters.energy_update_threshold>>16);
|
||||
parameters.energy_update_threshold +=
|
||||
parameters.low_energy_update_threshold >> 16;
|
||||
parameters.low_energy_update_threshold =
|
||||
parameters.low_energy_update_threshold & 0x0FFFF;
|
||||
|
||||
// Update maximum energy.
|
||||
// Decrease by a factor 1/1024 each time.
|
||||
parameters.max_energy = parameters.max_energy -
|
||||
(parameters.max_energy >> 10);
|
||||
if (sample_energy > parameters.max_energy) {
|
||||
parameters.max_energy = sample_energy;
|
||||
}
|
||||
|
||||
// Set |energy_update_threshold| to no less than 60 dB lower than
|
||||
// |max_energy_|. Adding 524288 assures proper rounding.
|
||||
int32_t energy_update_threshold = (parameters.max_energy + 524288) >> 20;
|
||||
if (energy_update_threshold > parameters.energy_update_threshold) {
|
||||
parameters.energy_update_threshold = energy_update_threshold;
|
||||
}
|
||||
}
|
||||
|
||||
void BackgroundNoise::SaveParameters(size_t channel,
|
||||
const int16_t* lpc_coefficients,
|
||||
const int16_t* filter_state,
|
||||
int32_t sample_energy,
|
||||
int32_t residual_energy) {
|
||||
assert(channel < num_channels_);
|
||||
ChannelParameters& parameters = channel_parameters_[channel];
|
||||
memcpy(parameters.filter, lpc_coefficients,
|
||||
(kMaxLpcOrder+1) * sizeof(int16_t));
|
||||
memcpy(parameters.filter_state, filter_state,
|
||||
kMaxLpcOrder * sizeof(int16_t));
|
||||
// Save energy level and update energy threshold levels.
|
||||
// Never get under 1.0 in average sample energy.
|
||||
parameters.energy = std::max(sample_energy, 1);
|
||||
parameters.energy_update_threshold = parameters.energy;
|
||||
parameters.low_energy_update_threshold = 0;
|
||||
|
||||
// Normalize residual_energy to 29 or 30 bits before sqrt.
|
||||
int16_t norm_shift = WebRtcSpl_NormW32(residual_energy) - 1;
|
||||
if (norm_shift & 0x1) {
|
||||
norm_shift -= 1; // Even number of shifts required.
|
||||
}
|
||||
residual_energy = WEBRTC_SPL_SHIFT_W32(residual_energy, norm_shift);
|
||||
|
||||
// Calculate scale and shift factor.
|
||||
parameters.scale = static_cast<int16_t>(WebRtcSpl_SqrtFloor(residual_energy));
|
||||
// Add 13 to the |scale_shift_|, since the random numbers table is in
|
||||
// Q13.
|
||||
// TODO(hlundin): Move the "13" to where the |scale_shift_| is used?
|
||||
parameters.scale_shift =
|
||||
static_cast<int16_t>(13 + ((kLogResidualLength + norm_shift) / 2));
|
||||
|
||||
initialized_ = true;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
137
modules/audio_coding/neteq/background_noise.h
Normal file
137
modules/audio_coding/neteq/background_noise.h
Normal file
@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_
|
||||
|
||||
#include <string.h> // size_t
|
||||
#include <memory>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/include/neteq.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Forward declarations.
|
||||
class PostDecodeVad;
|
||||
|
||||
// This class handles estimation of background noise parameters.
|
||||
class BackgroundNoise {
|
||||
public:
|
||||
// TODO(hlundin): For 48 kHz support, increase kMaxLpcOrder to 10.
|
||||
// Will work anyway, but probably sound a little worse.
|
||||
static const size_t kMaxLpcOrder = 8; // 32000 / 8000 + 4.
|
||||
|
||||
explicit BackgroundNoise(size_t num_channels);
|
||||
virtual ~BackgroundNoise();
|
||||
|
||||
void Reset();
|
||||
|
||||
// Updates the parameter estimates based on the signal currently in the
|
||||
// |sync_buffer|, and on the latest decision in |vad| if it is running.
|
||||
void Update(const AudioMultiVector& sync_buffer,
|
||||
const PostDecodeVad& vad);
|
||||
|
||||
// Returns |energy_| for |channel|.
|
||||
int32_t Energy(size_t channel) const;
|
||||
|
||||
// Sets the value of |mute_factor_| for |channel| to |value|.
|
||||
void SetMuteFactor(size_t channel, int16_t value);
|
||||
|
||||
// Returns |mute_factor_| for |channel|.
|
||||
int16_t MuteFactor(size_t channel) const;
|
||||
|
||||
// Returns a pointer to |filter_| for |channel|.
|
||||
const int16_t* Filter(size_t channel) const;
|
||||
|
||||
// Returns a pointer to |filter_state_| for |channel|.
|
||||
const int16_t* FilterState(size_t channel) const;
|
||||
|
||||
// Copies |length| elements from |input| to the filter state. Will not copy
|
||||
// more than |kMaxLpcOrder| elements.
|
||||
void SetFilterState(size_t channel, const int16_t* input, size_t length);
|
||||
|
||||
// Returns |scale_| for |channel|.
|
||||
int16_t Scale(size_t channel) const;
|
||||
|
||||
// Returns |scale_shift_| for |channel|.
|
||||
int16_t ScaleShift(size_t channel) const;
|
||||
|
||||
// Accessors.
|
||||
bool initialized() const { return initialized_; }
|
||||
NetEq::BackgroundNoiseMode mode() const { return mode_; }
|
||||
|
||||
// Sets the mode of the background noise playout for cases when there is long
|
||||
// duration of packet loss.
|
||||
void set_mode(NetEq::BackgroundNoiseMode mode) { mode_ = mode; }
|
||||
|
||||
private:
|
||||
static const int kThresholdIncrement = 229; // 0.0035 in Q16.
|
||||
static const size_t kVecLen = 256;
|
||||
static const int kLogVecLen = 8; // log2(kVecLen).
|
||||
static const size_t kResidualLength = 64;
|
||||
static const int16_t kLogResidualLength = 6; // log2(kResidualLength)
|
||||
|
||||
struct ChannelParameters {
|
||||
// Constructor.
|
||||
ChannelParameters() {
|
||||
Reset();
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
energy = 2500;
|
||||
max_energy = 0;
|
||||
energy_update_threshold = 500000;
|
||||
low_energy_update_threshold = 0;
|
||||
memset(filter_state, 0, sizeof(filter_state));
|
||||
memset(filter, 0, sizeof(filter));
|
||||
filter[0] = 4096;
|
||||
mute_factor = 0,
|
||||
scale = 20000;
|
||||
scale_shift = 24;
|
||||
}
|
||||
|
||||
int32_t energy;
|
||||
int32_t max_energy;
|
||||
int32_t energy_update_threshold;
|
||||
int32_t low_energy_update_threshold;
|
||||
int16_t filter_state[kMaxLpcOrder];
|
||||
int16_t filter[kMaxLpcOrder + 1];
|
||||
int16_t mute_factor;
|
||||
int16_t scale;
|
||||
int16_t scale_shift;
|
||||
};
|
||||
|
||||
int32_t CalculateAutoCorrelation(const int16_t* signal,
|
||||
size_t length,
|
||||
int32_t* auto_correlation) const;
|
||||
|
||||
// Increments the energy threshold by a factor 1 + |kThresholdIncrement|.
|
||||
void IncrementEnergyThreshold(size_t channel, int32_t sample_energy);
|
||||
|
||||
// Updates the filter parameters.
|
||||
void SaveParameters(size_t channel,
|
||||
const int16_t* lpc_coefficients,
|
||||
const int16_t* filter_state,
|
||||
int32_t sample_energy,
|
||||
int32_t residual_energy);
|
||||
|
||||
size_t num_channels_;
|
||||
std::unique_ptr<ChannelParameters[]> channel_parameters_;
|
||||
bool initialized_;
|
||||
NetEq::BackgroundNoiseMode mode_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(BackgroundNoise);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_BACKGROUND_NOISE_H_
|
||||
26
modules/audio_coding/neteq/background_noise_unittest.cc
Normal file
26
modules/audio_coding/neteq/background_noise_unittest.cc
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for BackgroundNoise class.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
|
||||
|
||||
#include "webrtc/test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(BackgroundNoise, CreateAndDestroy) {
|
||||
size_t channels = 1;
|
||||
BackgroundNoise bgn(channels);
|
||||
}
|
||||
|
||||
// TODO(hlundin): Write more tests.
|
||||
|
||||
} // namespace webrtc
|
||||
65
modules/audio_coding/neteq/buffer_level_filter.cc
Normal file
65
modules/audio_coding/neteq/buffer_level_filter.cc
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
|
||||
|
||||
#include <algorithm> // Provide access to std::max.
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
BufferLevelFilter::BufferLevelFilter() {
|
||||
Reset();
|
||||
}
|
||||
|
||||
void BufferLevelFilter::Reset() {
|
||||
filtered_current_level_ = 0;
|
||||
level_factor_ = 253;
|
||||
}
|
||||
|
||||
void BufferLevelFilter::Update(size_t buffer_size_packets,
|
||||
int time_stretched_samples,
|
||||
size_t packet_len_samples) {
|
||||
// Filter:
|
||||
// |filtered_current_level_| = |level_factor_| * |filtered_current_level_| +
|
||||
// (1 - |level_factor_|) * |buffer_size_packets|
|
||||
// |level_factor_| and |filtered_current_level_| are in Q8.
|
||||
// |buffer_size_packets| is in Q0.
|
||||
filtered_current_level_ = ((level_factor_ * filtered_current_level_) >> 8) +
|
||||
((256 - level_factor_) * static_cast<int>(buffer_size_packets));
|
||||
|
||||
// Account for time-scale operations (accelerate and pre-emptive expand).
|
||||
if (time_stretched_samples && packet_len_samples > 0) {
|
||||
// Time-scaling has been performed since last filter update. Subtract the
|
||||
// value of |time_stretched_samples| from |filtered_current_level_| after
|
||||
// converting |time_stretched_samples| from samples to packets in Q8.
|
||||
// Make sure that the filtered value remains non-negative.
|
||||
filtered_current_level_ = std::max(0,
|
||||
filtered_current_level_ -
|
||||
(time_stretched_samples << 8) / static_cast<int>(packet_len_samples));
|
||||
}
|
||||
}
|
||||
|
||||
void BufferLevelFilter::SetTargetBufferLevel(int target_buffer_level) {
|
||||
if (target_buffer_level <= 1) {
|
||||
level_factor_ = 251;
|
||||
} else if (target_buffer_level <= 3) {
|
||||
level_factor_ = 252;
|
||||
} else if (target_buffer_level <= 7) {
|
||||
level_factor_ = 253;
|
||||
} else {
|
||||
level_factor_ = 254;
|
||||
}
|
||||
}
|
||||
|
||||
int BufferLevelFilter::filtered_current_level() const {
|
||||
return filtered_current_level_;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
49
modules/audio_coding/neteq/buffer_level_filter.h
Normal file
49
modules/audio_coding/neteq/buffer_level_filter.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class BufferLevelFilter {
|
||||
public:
|
||||
BufferLevelFilter();
|
||||
virtual ~BufferLevelFilter() {}
|
||||
virtual void Reset();
|
||||
|
||||
// Updates the filter. Current buffer size is |buffer_size_packets| (Q0).
|
||||
// If |time_stretched_samples| is non-zero, the value is converted to the
|
||||
// corresponding number of packets, and is subtracted from the filtered
|
||||
// value (thus bypassing the filter operation). |packet_len_samples| is the
|
||||
// number of audio samples carried in each incoming packet.
|
||||
virtual void Update(size_t buffer_size_packets, int time_stretched_samples,
|
||||
size_t packet_len_samples);
|
||||
|
||||
// Set the current target buffer level (obtained from
|
||||
// DelayManager::base_target_level()). Used to select the appropriate
|
||||
// filter coefficient.
|
||||
virtual void SetTargetBufferLevel(int target_buffer_level);
|
||||
|
||||
virtual int filtered_current_level() const;
|
||||
|
||||
private:
|
||||
int level_factor_; // Filter factor for the buffer level filter in Q8.
|
||||
int filtered_current_level_; // Filtered current buffer level in Q8.
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(BufferLevelFilter);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_BUFFER_LEVEL_FILTER_H_
|
||||
162
modules/audio_coding/neteq/buffer_level_filter_unittest.cc
Normal file
162
modules/audio_coding/neteq/buffer_level_filter_unittest.cc
Normal file
@ -0,0 +1,162 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for BufferLevelFilter class.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
|
||||
|
||||
#include <math.h> // Access to pow function.
|
||||
|
||||
#include "webrtc/test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(BufferLevelFilter, CreateAndDestroy) {
|
||||
BufferLevelFilter* filter = new BufferLevelFilter();
|
||||
EXPECT_EQ(0, filter->filtered_current_level());
|
||||
delete filter;
|
||||
}
|
||||
|
||||
TEST(BufferLevelFilter, ConvergenceTest) {
|
||||
BufferLevelFilter filter;
|
||||
for (int times = 10; times <= 50; times += 10) {
|
||||
for (int value = 100; value <= 200; value += 10) {
|
||||
filter.Reset();
|
||||
filter.SetTargetBufferLevel(1); // Makes filter coefficient 251/256.
|
||||
std::ostringstream ss;
|
||||
ss << "times = " << times << ", value = " << value;
|
||||
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
|
||||
for (int i = 0; i < times; ++i) {
|
||||
filter.Update(value, 0 /* time_stretched_samples */,
|
||||
160 /* packet_len_samples */);
|
||||
}
|
||||
// Expect the filtered value to be (theoretically)
|
||||
// (1 - (251/256) ^ |times|) * |value|.
|
||||
double expected_value_double =
|
||||
(1 - pow(251.0 / 256.0, times)) * value;
|
||||
int expected_value = static_cast<int>(expected_value_double);
|
||||
// filtered_current_level() returns the value in Q8.
|
||||
// The actual value may differ slightly from the expected value due to
|
||||
// intermediate-stage rounding errors in the filter implementation.
|
||||
// This is why we have to use EXPECT_NEAR with a tolerance of +/-1.
|
||||
EXPECT_NEAR(expected_value, filter.filtered_current_level() >> 8, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Verify that target buffer level impacts on the filter convergence.
|
||||
TEST(BufferLevelFilter, FilterFactor) {
|
||||
BufferLevelFilter filter;
|
||||
// Update 10 times with value 100.
|
||||
const int kTimes = 10;
|
||||
const int kValue = 100;
|
||||
|
||||
filter.SetTargetBufferLevel(3); // Makes filter coefficient 252/256.
|
||||
for (int i = 0; i < kTimes; ++i) {
|
||||
filter.Update(kValue, 0 /* time_stretched_samples */,
|
||||
160 /* packet_len_samples */);
|
||||
}
|
||||
// Expect the filtered value to be
|
||||
// (1 - (252/256) ^ |kTimes|) * |kValue|.
|
||||
int expected_value = 14;
|
||||
// filtered_current_level() returns the value in Q8.
|
||||
EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8);
|
||||
|
||||
filter.Reset();
|
||||
filter.SetTargetBufferLevel(7); // Makes filter coefficient 253/256.
|
||||
for (int i = 0; i < kTimes; ++i) {
|
||||
filter.Update(kValue, 0 /* time_stretched_samples */,
|
||||
160 /* packet_len_samples */);
|
||||
}
|
||||
// Expect the filtered value to be
|
||||
// (1 - (253/256) ^ |kTimes|) * |kValue|.
|
||||
expected_value = 11;
|
||||
// filtered_current_level() returns the value in Q8.
|
||||
EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8);
|
||||
|
||||
filter.Reset();
|
||||
filter.SetTargetBufferLevel(8); // Makes filter coefficient 254/256.
|
||||
for (int i = 0; i < kTimes; ++i) {
|
||||
filter.Update(kValue, 0 /* time_stretched_samples */,
|
||||
160 /* packet_len_samples */);
|
||||
}
|
||||
// Expect the filtered value to be
|
||||
// (1 - (254/256) ^ |kTimes|) * |kValue|.
|
||||
expected_value = 7;
|
||||
// filtered_current_level() returns the value in Q8.
|
||||
EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8);
|
||||
}
|
||||
|
||||
|
||||
TEST(BufferLevelFilter, TimeStretchedSamples) {
|
||||
BufferLevelFilter filter;
|
||||
filter.SetTargetBufferLevel(1); // Makes filter coefficient 251/256.
|
||||
// Update 10 times with value 100.
|
||||
const int kTimes = 10;
|
||||
const int kValue = 100;
|
||||
const int kPacketSizeSamples = 160;
|
||||
const int kNumPacketsStretched = 2;
|
||||
const int kTimeStretchedSamples = kNumPacketsStretched * kPacketSizeSamples;
|
||||
for (int i = 0; i < kTimes; ++i) {
|
||||
// Packet size set to 0. Do not expect the parameter
|
||||
// |kTimeStretchedSamples| to have any effect.
|
||||
filter.Update(kValue, kTimeStretchedSamples, 0 /* packet_len_samples */);
|
||||
}
|
||||
// Expect the filtered value to be
|
||||
// (1 - (251/256) ^ |kTimes|) * |kValue|.
|
||||
const int kExpectedValue = 17;
|
||||
// filtered_current_level() returns the value in Q8.
|
||||
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
|
||||
|
||||
// Update filter again, now with non-zero value for packet length.
|
||||
// Set the current filtered value to be the input, in order to isolate the
|
||||
// impact of |kTimeStretchedSamples|.
|
||||
filter.Update(filter.filtered_current_level() >> 8, kTimeStretchedSamples,
|
||||
kPacketSizeSamples);
|
||||
EXPECT_EQ(kExpectedValue - kNumPacketsStretched,
|
||||
filter.filtered_current_level() >> 8);
|
||||
// Try negative value and verify that we come back to the previous result.
|
||||
filter.Update(filter.filtered_current_level() >> 8, -kTimeStretchedSamples,
|
||||
kPacketSizeSamples);
|
||||
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
|
||||
}
|
||||
|
||||
TEST(BufferLevelFilter, TimeStretchedSamplesNegativeUnevenFrames) {
|
||||
BufferLevelFilter filter;
|
||||
filter.SetTargetBufferLevel(1); // Makes filter coefficient 251/256.
|
||||
// Update 10 times with value 100.
|
||||
const int kTimes = 10;
|
||||
const int kValue = 100;
|
||||
const int kPacketSizeSamples = 160;
|
||||
const int kTimeStretchedSamples = -3.1415 * kPacketSizeSamples;
|
||||
for (int i = 0; i < kTimes; ++i) {
|
||||
// Packet size set to 0. Do not expect the parameter
|
||||
// |kTimeStretchedSamples| to have any effect.
|
||||
filter.Update(kValue, kTimeStretchedSamples, 0 /* packet_len_samples */);
|
||||
}
|
||||
// Expect the filtered value to be
|
||||
// (1 - (251/256) ^ |kTimes|) * |kValue|.
|
||||
const int kExpectedValue = 17;
|
||||
// filtered_current_level() returns the value in Q8.
|
||||
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
|
||||
|
||||
// Update filter again, now with non-zero value for packet length.
|
||||
// Set the current filtered value to be the input, in order to isolate the
|
||||
// impact of |kTimeStretchedSamples|.
|
||||
filter.Update(filter.filtered_current_level() >> 8, kTimeStretchedSamples,
|
||||
kPacketSizeSamples);
|
||||
EXPECT_EQ(21, filter.filtered_current_level() >> 8);
|
||||
// Try negative value and verify that we come back to the previous result.
|
||||
filter.Update(filter.filtered_current_level() >> 8, -kTimeStretchedSamples,
|
||||
kPacketSizeSamples);
|
||||
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
124
modules/audio_coding/neteq/comfort_noise.cc
Normal file
124
modules/audio_coding/neteq/comfort_noise.cc
Normal file
@ -0,0 +1,124 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/comfort_noise.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "webrtc/api/audio_codecs/audio_decoder.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
|
||||
#include "webrtc/rtc_base/logging.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
void ComfortNoise::Reset() {
|
||||
first_call_ = true;
|
||||
}
|
||||
|
||||
int ComfortNoise::UpdateParameters(const Packet& packet) {
|
||||
// Get comfort noise decoder.
|
||||
if (decoder_database_->SetActiveCngDecoder(packet.payload_type) != kOK) {
|
||||
return kUnknownPayloadType;
|
||||
}
|
||||
ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
|
||||
RTC_DCHECK(cng_decoder);
|
||||
cng_decoder->UpdateSid(packet.payload);
|
||||
return kOK;
|
||||
}
|
||||
|
||||
int ComfortNoise::Generate(size_t requested_length,
|
||||
AudioMultiVector* output) {
|
||||
// TODO(hlundin): Change to an enumerator and skip assert.
|
||||
assert(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 ||
|
||||
fs_hz_ == 48000);
|
||||
// Not adapted for multi-channel yet.
|
||||
if (output->Channels() != 1) {
|
||||
LOG(LS_ERROR) << "No multi-channel support";
|
||||
return kMultiChannelNotSupported;
|
||||
}
|
||||
|
||||
size_t number_of_samples = requested_length;
|
||||
bool new_period = false;
|
||||
if (first_call_) {
|
||||
// Generate noise and overlap slightly with old data.
|
||||
number_of_samples = requested_length + overlap_length_;
|
||||
new_period = true;
|
||||
}
|
||||
output->AssertSize(number_of_samples);
|
||||
// Get the decoder from the database.
|
||||
ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
|
||||
if (!cng_decoder) {
|
||||
LOG(LS_ERROR) << "Unknwown payload type";
|
||||
return kUnknownPayloadType;
|
||||
}
|
||||
|
||||
std::unique_ptr<int16_t[]> temp(new int16_t[number_of_samples]);
|
||||
if (!cng_decoder->Generate(
|
||||
rtc::ArrayView<int16_t>(temp.get(), number_of_samples),
|
||||
new_period)) {
|
||||
// Error returned.
|
||||
output->Zeros(requested_length);
|
||||
LOG(LS_ERROR) <<
|
||||
"ComfortNoiseDecoder::Genererate failed to generate comfort noise";
|
||||
return kInternalError;
|
||||
}
|
||||
(*output)[0].OverwriteAt(temp.get(), number_of_samples, 0);
|
||||
|
||||
if (first_call_) {
|
||||
// Set tapering window parameters. Values are in Q15.
|
||||
int16_t muting_window; // Mixing factor for overlap data.
|
||||
int16_t muting_window_increment; // Mixing factor increment (negative).
|
||||
int16_t unmuting_window; // Mixing factor for comfort noise.
|
||||
int16_t unmuting_window_increment; // Mixing factor increment.
|
||||
if (fs_hz_ == 8000) {
|
||||
muting_window = DspHelper::kMuteFactorStart8kHz;
|
||||
muting_window_increment = DspHelper::kMuteFactorIncrement8kHz;
|
||||
unmuting_window = DspHelper::kUnmuteFactorStart8kHz;
|
||||
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz;
|
||||
} else if (fs_hz_ == 16000) {
|
||||
muting_window = DspHelper::kMuteFactorStart16kHz;
|
||||
muting_window_increment = DspHelper::kMuteFactorIncrement16kHz;
|
||||
unmuting_window = DspHelper::kUnmuteFactorStart16kHz;
|
||||
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz;
|
||||
} else if (fs_hz_ == 32000) {
|
||||
muting_window = DspHelper::kMuteFactorStart32kHz;
|
||||
muting_window_increment = DspHelper::kMuteFactorIncrement32kHz;
|
||||
unmuting_window = DspHelper::kUnmuteFactorStart32kHz;
|
||||
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz;
|
||||
} else { // fs_hz_ == 48000
|
||||
muting_window = DspHelper::kMuteFactorStart48kHz;
|
||||
muting_window_increment = DspHelper::kMuteFactorIncrement48kHz;
|
||||
unmuting_window = DspHelper::kUnmuteFactorStart48kHz;
|
||||
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz;
|
||||
}
|
||||
|
||||
// Do overlap-add between new vector and overlap.
|
||||
size_t start_ix = sync_buffer_->Size() - overlap_length_;
|
||||
for (size_t i = 0; i < overlap_length_; i++) {
|
||||
/* overlapVec[i] = WinMute * overlapVec[i] + WinUnMute * outData[i] */
|
||||
// The expression (*output)[0][i] is the i-th element in the first
|
||||
// channel.
|
||||
(*sync_buffer_)[0][start_ix + i] =
|
||||
(((*sync_buffer_)[0][start_ix + i] * muting_window) +
|
||||
((*output)[0][i] * unmuting_window) + 16384) >> 15;
|
||||
muting_window += muting_window_increment;
|
||||
unmuting_window += unmuting_window_increment;
|
||||
}
|
||||
// Remove |overlap_length_| samples from the front of |output| since they
|
||||
// were mixed into |sync_buffer_| above.
|
||||
output->PopFront(overlap_length_);
|
||||
}
|
||||
first_call_ = false;
|
||||
return kOK;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
71
modules/audio_coding/neteq/comfort_noise.h
Normal file
71
modules/audio_coding/neteq/comfort_noise.h
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Forward declarations.
|
||||
class DecoderDatabase;
|
||||
class SyncBuffer;
|
||||
struct Packet;
|
||||
|
||||
// This class acts as an interface to the CNG generator.
|
||||
class ComfortNoise {
|
||||
public:
|
||||
enum ReturnCodes {
|
||||
kOK = 0,
|
||||
kUnknownPayloadType,
|
||||
kInternalError,
|
||||
kMultiChannelNotSupported
|
||||
};
|
||||
|
||||
ComfortNoise(int fs_hz, DecoderDatabase* decoder_database,
|
||||
SyncBuffer* sync_buffer)
|
||||
: fs_hz_(fs_hz),
|
||||
first_call_(true),
|
||||
overlap_length_(5 * fs_hz_ / 8000),
|
||||
decoder_database_(decoder_database),
|
||||
sync_buffer_(sync_buffer) {
|
||||
}
|
||||
|
||||
// Resets the state. Should be called before each new comfort noise period.
|
||||
void Reset();
|
||||
|
||||
// Update the comfort noise generator with the parameters in |packet|.
|
||||
int UpdateParameters(const Packet& packet);
|
||||
|
||||
// Generates |requested_length| samples of comfort noise and writes to
|
||||
// |output|. If this is the first in call after Reset (or first after creating
|
||||
// the object), it will also mix in comfort noise at the end of the
|
||||
// SyncBuffer object provided in the constructor.
|
||||
int Generate(size_t requested_length, AudioMultiVector* output);
|
||||
|
||||
// Returns the last error code that was produced by the comfort noise
|
||||
// decoder. Returns 0 if no error has been encountered since the last reset.
|
||||
int internal_error_code() { return internal_error_code_; }
|
||||
|
||||
private:
|
||||
int fs_hz_;
|
||||
bool first_call_;
|
||||
size_t overlap_length_;
|
||||
DecoderDatabase* decoder_database_;
|
||||
SyncBuffer* sync_buffer_;
|
||||
int internal_error_code_;
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(ComfortNoise);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_COMFORT_NOISE_H_
|
||||
31
modules/audio_coding/neteq/comfort_noise_unittest.cc
Normal file
31
modules/audio_coding/neteq/comfort_noise_unittest.cc
Normal file
@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for ComfortNoise class.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/comfort_noise.h"
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(ComfortNoise, CreateAndDestroy) {
|
||||
int fs = 8000;
|
||||
MockDecoderDatabase db;
|
||||
SyncBuffer sync_buffer(1, 1000);
|
||||
ComfortNoise cn(fs, &db, &sync_buffer);
|
||||
EXPECT_CALL(db, Die()); // Called when |db| goes out of scope.
|
||||
}
|
||||
|
||||
// TODO(hlundin): Write more tests.
|
||||
|
||||
} // namespace webrtc
|
||||
62
modules/audio_coding/neteq/cross_correlation.cc
Normal file
62
modules/audio_coding/neteq/cross_correlation.cc
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <limits>
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This function decides the overflow-protecting scaling and calls
|
||||
// WebRtcSpl_CrossCorrelation.
|
||||
int CrossCorrelationWithAutoShift(const int16_t* sequence_1,
|
||||
const int16_t* sequence_2,
|
||||
size_t sequence_1_length,
|
||||
size_t cross_correlation_length,
|
||||
int cross_correlation_step,
|
||||
int32_t* cross_correlation) {
|
||||
// Find the maximum absolute value of sequence_1 and 2.
|
||||
const int16_t max_1 = WebRtcSpl_MaxAbsValueW16(sequence_1, sequence_1_length);
|
||||
const int sequence_2_shift =
|
||||
cross_correlation_step * (static_cast<int>(cross_correlation_length) - 1);
|
||||
const int16_t* sequence_2_start =
|
||||
sequence_2_shift >= 0 ? sequence_2 : sequence_2 + sequence_2_shift;
|
||||
const size_t sequence_2_length =
|
||||
sequence_1_length + std::abs(sequence_2_shift);
|
||||
const int16_t max_2 =
|
||||
WebRtcSpl_MaxAbsValueW16(sequence_2_start, sequence_2_length);
|
||||
|
||||
// In order to avoid overflow when computing the sum we should scale the
|
||||
// samples so that (in_vector_length * max_1 * max_2) will not overflow.
|
||||
// Expected scaling fulfills
|
||||
// 1) sufficient:
|
||||
// sequence_1_length * (max_1 * max_2 >> scaling) <= 0x7fffffff;
|
||||
// 2) necessary:
|
||||
// if (scaling > 0)
|
||||
// sequence_1_length * (max_1 * max_2 >> (scaling - 1)) > 0x7fffffff;
|
||||
// The following calculation fulfills 1) and almost fulfills 2).
|
||||
// There are some corner cases that 2) is not satisfied, e.g.,
|
||||
// max_1 = 17, max_2 = 30848, sequence_1_length = 4095, in such case,
|
||||
// optimal scaling is 0, while the following calculation results in 1.
|
||||
const int32_t factor = (max_1 * max_2) / (std::numeric_limits<int32_t>::max()
|
||||
/ static_cast<int32_t>(sequence_1_length));
|
||||
const int scaling = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
|
||||
|
||||
WebRtcSpl_CrossCorrelation(cross_correlation, sequence_1, sequence_2,
|
||||
sequence_1_length, cross_correlation_length,
|
||||
scaling, cross_correlation_step);
|
||||
|
||||
return scaling;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
50
modules/audio_coding/neteq/cross_correlation.h
Normal file
50
modules/audio_coding/neteq/cross_correlation.h
Normal file
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_
|
||||
|
||||
#include "webrtc/common_types.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// The function calculates the cross-correlation between two sequences
|
||||
// |sequence_1| and |sequence_2|. |sequence_1| is taken as reference, with
|
||||
// |sequence_1_length| as its length. |sequence_2| slides for the calculation of
|
||||
// cross-correlation. The result will be saved in |cross_correlation|.
|
||||
// |cross_correlation_length| correlation points are calculated.
|
||||
// The corresponding lag starts from 0, and increases with a step of
|
||||
// |cross_correlation_step|. The result is without normalization. To avoid
|
||||
// overflow, the result will be right shifted. The amount of shifts will be
|
||||
// returned.
|
||||
//
|
||||
// Input:
|
||||
// - sequence_1 : First sequence (reference).
|
||||
// - sequence_2 : Second sequence (sliding during calculation).
|
||||
// - sequence_1_length : Length of |sequence_1|.
|
||||
// - cross_correlation_length : Number of cross-correlations to calculate.
|
||||
// - cross_correlation_step : Step in the lag for the cross-correlation.
|
||||
//
|
||||
// Output:
|
||||
// - cross_correlation : The cross-correlation in Q(-right_shifts)
|
||||
//
|
||||
// Return:
|
||||
// Number of right shifts in cross_correlation.
|
||||
|
||||
int CrossCorrelationWithAutoShift(const int16_t* sequence_1,
|
||||
const int16_t* sequence_2,
|
||||
size_t sequence_1_length,
|
||||
size_t cross_correlation_length,
|
||||
int cross_correlation_step,
|
||||
int32_t* cross_correlation);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_
|
||||
170
modules/audio_coding/neteq/decision_logic.cc
Normal file
170
modules/audio_coding/neteq/decision_logic.cc
Normal file
@ -0,0 +1,170 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/decision_logic.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/decision_logic_fax.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/decision_logic_normal.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/delay_manager.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/expand.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
DecisionLogic* DecisionLogic::Create(int fs_hz,
|
||||
size_t output_size_samples,
|
||||
NetEqPlayoutMode playout_mode,
|
||||
DecoderDatabase* decoder_database,
|
||||
const PacketBuffer& packet_buffer,
|
||||
DelayManager* delay_manager,
|
||||
BufferLevelFilter* buffer_level_filter,
|
||||
const TickTimer* tick_timer) {
|
||||
switch (playout_mode) {
|
||||
case kPlayoutOn:
|
||||
case kPlayoutStreaming:
|
||||
return new DecisionLogicNormal(
|
||||
fs_hz, output_size_samples, playout_mode, decoder_database,
|
||||
packet_buffer, delay_manager, buffer_level_filter, tick_timer);
|
||||
case kPlayoutFax:
|
||||
case kPlayoutOff:
|
||||
return new DecisionLogicFax(
|
||||
fs_hz, output_size_samples, playout_mode, decoder_database,
|
||||
packet_buffer, delay_manager, buffer_level_filter, tick_timer);
|
||||
}
|
||||
// This line cannot be reached, but must be here to avoid compiler errors.
|
||||
assert(false);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
DecisionLogic::DecisionLogic(int fs_hz,
|
||||
size_t output_size_samples,
|
||||
NetEqPlayoutMode playout_mode,
|
||||
DecoderDatabase* decoder_database,
|
||||
const PacketBuffer& packet_buffer,
|
||||
DelayManager* delay_manager,
|
||||
BufferLevelFilter* buffer_level_filter,
|
||||
const TickTimer* tick_timer)
|
||||
: decoder_database_(decoder_database),
|
||||
packet_buffer_(packet_buffer),
|
||||
delay_manager_(delay_manager),
|
||||
buffer_level_filter_(buffer_level_filter),
|
||||
tick_timer_(tick_timer),
|
||||
cng_state_(kCngOff),
|
||||
packet_length_samples_(0),
|
||||
sample_memory_(0),
|
||||
prev_time_scale_(false),
|
||||
timescale_countdown_(
|
||||
tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
|
||||
num_consecutive_expands_(0),
|
||||
playout_mode_(playout_mode) {
|
||||
delay_manager_->set_streaming_mode(playout_mode_ == kPlayoutStreaming);
|
||||
SetSampleRate(fs_hz, output_size_samples);
|
||||
}
|
||||
|
||||
DecisionLogic::~DecisionLogic() = default;
|
||||
|
||||
void DecisionLogic::Reset() {
|
||||
cng_state_ = kCngOff;
|
||||
noise_fast_forward_ = 0;
|
||||
packet_length_samples_ = 0;
|
||||
sample_memory_ = 0;
|
||||
prev_time_scale_ = false;
|
||||
timescale_countdown_.reset();
|
||||
num_consecutive_expands_ = 0;
|
||||
}
|
||||
|
||||
void DecisionLogic::SoftReset() {
|
||||
packet_length_samples_ = 0;
|
||||
sample_memory_ = 0;
|
||||
prev_time_scale_ = false;
|
||||
timescale_countdown_ =
|
||||
tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
|
||||
}
|
||||
|
||||
void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
|
||||
// TODO(hlundin): Change to an enumerator and skip assert.
|
||||
assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
|
||||
fs_mult_ = fs_hz / 8000;
|
||||
output_size_samples_ = output_size_samples;
|
||||
}
|
||||
|
||||
Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
size_t decoder_frame_length,
|
||||
const Packet* next_packet,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
size_t generated_noise_samples,
|
||||
bool* reset_decoder) {
|
||||
// If last mode was CNG (or Expand, since this could be covering up for
|
||||
// a lost CNG packet), remember that CNG is on. This is needed if comfort
|
||||
// noise is interrupted by DTMF.
|
||||
if (prev_mode == kModeRfc3389Cng) {
|
||||
cng_state_ = kCngRfc3389On;
|
||||
} else if (prev_mode == kModeCodecInternalCng) {
|
||||
cng_state_ = kCngInternalOn;
|
||||
}
|
||||
|
||||
const size_t samples_left =
|
||||
sync_buffer.FutureLength() - expand.overlap_length();
|
||||
const size_t cur_size_samples =
|
||||
samples_left + packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
|
||||
|
||||
prev_time_scale_ = prev_time_scale_ &&
|
||||
(prev_mode == kModeAccelerateSuccess ||
|
||||
prev_mode == kModeAccelerateLowEnergy ||
|
||||
prev_mode == kModePreemptiveExpandSuccess ||
|
||||
prev_mode == kModePreemptiveExpandLowEnergy);
|
||||
|
||||
FilterBufferLevel(cur_size_samples, prev_mode);
|
||||
|
||||
return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length,
|
||||
next_packet, prev_mode, play_dtmf,
|
||||
reset_decoder, generated_noise_samples);
|
||||
}
|
||||
|
||||
void DecisionLogic::ExpandDecision(Operations operation) {
|
||||
if (operation == kExpand) {
|
||||
num_consecutive_expands_++;
|
||||
} else {
|
||||
num_consecutive_expands_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples,
|
||||
Modes prev_mode) {
|
||||
// Do not update buffer history if currently playing CNG since it will bias
|
||||
// the filtered buffer level.
|
||||
if ((prev_mode != kModeRfc3389Cng) && (prev_mode != kModeCodecInternalCng)) {
|
||||
buffer_level_filter_->SetTargetBufferLevel(
|
||||
delay_manager_->base_target_level());
|
||||
|
||||
size_t buffer_size_packets = 0;
|
||||
if (packet_length_samples_ > 0) {
|
||||
// Calculate size in packets.
|
||||
buffer_size_packets = buffer_size_samples / packet_length_samples_;
|
||||
}
|
||||
int sample_memory_local = 0;
|
||||
if (prev_time_scale_) {
|
||||
sample_memory_local = sample_memory_;
|
||||
timescale_countdown_ =
|
||||
tick_timer_->GetNewCountdown(kMinTimescaleInterval);
|
||||
}
|
||||
buffer_level_filter_->Update(buffer_size_packets, sample_memory_local,
|
||||
packet_length_samples_);
|
||||
prev_time_scale_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
168
modules/audio_coding/neteq/decision_logic.h
Normal file
168
modules/audio_coding/neteq/decision_logic.h
Normal file
@ -0,0 +1,168 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/defines.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/include/neteq.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Forward declarations.
|
||||
class BufferLevelFilter;
|
||||
class DecoderDatabase;
|
||||
class DelayManager;
|
||||
class Expand;
|
||||
class PacketBuffer;
|
||||
class SyncBuffer;
|
||||
struct Packet;
|
||||
|
||||
// This is the base class for the decision tree implementations. Derived classes
|
||||
// must implement the method GetDecisionSpecialized().
|
||||
class DecisionLogic {
|
||||
public:
|
||||
// Static factory function which creates different types of objects depending
|
||||
// on the |playout_mode|.
|
||||
static DecisionLogic* Create(int fs_hz,
|
||||
size_t output_size_samples,
|
||||
NetEqPlayoutMode playout_mode,
|
||||
DecoderDatabase* decoder_database,
|
||||
const PacketBuffer& packet_buffer,
|
||||
DelayManager* delay_manager,
|
||||
BufferLevelFilter* buffer_level_filter,
|
||||
const TickTimer* tick_timer);
|
||||
|
||||
// Constructor.
|
||||
DecisionLogic(int fs_hz,
|
||||
size_t output_size_samples,
|
||||
NetEqPlayoutMode playout_mode,
|
||||
DecoderDatabase* decoder_database,
|
||||
const PacketBuffer& packet_buffer,
|
||||
DelayManager* delay_manager,
|
||||
BufferLevelFilter* buffer_level_filter,
|
||||
const TickTimer* tick_timer);
|
||||
|
||||
virtual ~DecisionLogic();
|
||||
|
||||
// Resets object to a clean state.
|
||||
void Reset();
|
||||
|
||||
// Resets parts of the state. Typically done when switching codecs.
|
||||
void SoftReset();
|
||||
|
||||
// Sets the sample rate and the output block size.
|
||||
void SetSampleRate(int fs_hz, size_t output_size_samples);
|
||||
|
||||
// Returns the operation that should be done next. |sync_buffer| and |expand|
|
||||
// are provided for reference. |decoder_frame_length| is the number of samples
|
||||
// obtained from the last decoded frame. If there is a packet available, it
|
||||
// should be supplied in |next_packet|; otherwise it should be NULL. The mode
|
||||
// resulting from the last call to NetEqImpl::GetAudio is supplied in
|
||||
// |prev_mode|. If there is a DTMF event to play, |play_dtmf| should be set to
|
||||
// true. The output variable |reset_decoder| will be set to true if a reset is
|
||||
// required; otherwise it is left unchanged (i.e., it can remain true if it
|
||||
// was true before the call). This method end with calling
|
||||
// GetDecisionSpecialized to get the actual return value.
|
||||
Operations GetDecision(const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
size_t decoder_frame_length,
|
||||
const Packet* next_packet,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
size_t generated_noise_samples,
|
||||
bool* reset_decoder);
|
||||
|
||||
// These methods test the |cng_state_| for different conditions.
|
||||
bool CngRfc3389On() const { return cng_state_ == kCngRfc3389On; }
|
||||
bool CngOff() const { return cng_state_ == kCngOff; }
|
||||
|
||||
// Resets the |cng_state_| to kCngOff.
|
||||
void SetCngOff() { cng_state_ = kCngOff; }
|
||||
|
||||
// Reports back to DecisionLogic whether the decision to do expand remains or
|
||||
// not. Note that this is necessary, since an expand decision can be changed
|
||||
// to kNormal in NetEqImpl::GetDecision if there is still enough data in the
|
||||
// sync buffer.
|
||||
virtual void ExpandDecision(Operations operation);
|
||||
|
||||
// Adds |value| to |sample_memory_|.
|
||||
void AddSampleMemory(int32_t value) {
|
||||
sample_memory_ += value;
|
||||
}
|
||||
|
||||
// Accessors and mutators.
|
||||
void set_sample_memory(int32_t value) { sample_memory_ = value; }
|
||||
size_t noise_fast_forward() const { return noise_fast_forward_; }
|
||||
size_t packet_length_samples() const { return packet_length_samples_; }
|
||||
void set_packet_length_samples(size_t value) {
|
||||
packet_length_samples_ = value;
|
||||
}
|
||||
void set_prev_time_scale(bool value) { prev_time_scale_ = value; }
|
||||
NetEqPlayoutMode playout_mode() const { return playout_mode_; }
|
||||
|
||||
protected:
|
||||
// The value 5 sets maximum time-stretch rate to about 100 ms/s.
|
||||
static const int kMinTimescaleInterval = 5;
|
||||
|
||||
enum CngState {
|
||||
kCngOff,
|
||||
kCngRfc3389On,
|
||||
kCngInternalOn
|
||||
};
|
||||
|
||||
// Returns the operation that should be done next. |sync_buffer| and |expand|
|
||||
// are provided for reference. |decoder_frame_length| is the number of samples
|
||||
// obtained from the last decoded frame. If there is a packet available, it
|
||||
// should be supplied in |next_packet|; otherwise it should be NULL. The mode
|
||||
// resulting from the last call to NetEqImpl::GetAudio is supplied in
|
||||
// |prev_mode|. If there is a DTMF event to play, |play_dtmf| should be set to
|
||||
// true. The output variable |reset_decoder| will be set to true if a reset is
|
||||
// required; otherwise it is left unchanged (i.e., it can remain true if it
|
||||
// was true before the call). Should be implemented by derived classes.
|
||||
virtual Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
size_t decoder_frame_length,
|
||||
const Packet* next_packet,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
bool* reset_decoder,
|
||||
size_t generated_noise_samples) = 0;
|
||||
|
||||
// Updates the |buffer_level_filter_| with the current buffer level
|
||||
// |buffer_size_packets|.
|
||||
void FilterBufferLevel(size_t buffer_size_packets, Modes prev_mode);
|
||||
|
||||
DecoderDatabase* decoder_database_;
|
||||
const PacketBuffer& packet_buffer_;
|
||||
DelayManager* delay_manager_;
|
||||
BufferLevelFilter* buffer_level_filter_;
|
||||
const TickTimer* tick_timer_;
|
||||
int fs_mult_;
|
||||
size_t output_size_samples_;
|
||||
CngState cng_state_; // Remember if comfort noise is interrupted by other
|
||||
// event (e.g., DTMF).
|
||||
size_t noise_fast_forward_ = 0;
|
||||
size_t packet_length_samples_;
|
||||
int sample_memory_;
|
||||
bool prev_time_scale_;
|
||||
std::unique_ptr<TickTimer::Countdown> timescale_countdown_;
|
||||
int num_consecutive_expands_;
|
||||
const NetEqPlayoutMode playout_mode_;
|
||||
|
||||
private:
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogic);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_H_
|
||||
103
modules/audio_coding/neteq/decision_logic_fax.cc
Normal file
103
modules/audio_coding/neteq/decision_logic_fax.cc
Normal file
@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/decision_logic_fax.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
Operations DecisionLogicFax::GetDecisionSpecialized(
|
||||
const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
size_t decoder_frame_length,
|
||||
const Packet* next_packet,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
bool* reset_decoder,
|
||||
size_t generated_noise_samples) {
|
||||
assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff);
|
||||
uint32_t target_timestamp = sync_buffer.end_timestamp();
|
||||
uint32_t available_timestamp = 0;
|
||||
int is_cng_packet = 0;
|
||||
if (next_packet) {
|
||||
available_timestamp = next_packet->timestamp;
|
||||
is_cng_packet =
|
||||
decoder_database_->IsComfortNoise(next_packet->payload_type);
|
||||
}
|
||||
if (is_cng_packet) {
|
||||
if (static_cast<int32_t>((generated_noise_samples + target_timestamp)
|
||||
- available_timestamp) >= 0) {
|
||||
// Time to play this packet now.
|
||||
return kRfc3389Cng;
|
||||
} else {
|
||||
// Wait before playing this packet.
|
||||
return kRfc3389CngNoPacket;
|
||||
}
|
||||
}
|
||||
if (!next_packet) {
|
||||
// No packet. If in CNG mode, play as usual. Otherwise, use other method to
|
||||
// generate data.
|
||||
if (cng_state_ == kCngRfc3389On) {
|
||||
// Continue playing comfort noise.
|
||||
return kRfc3389CngNoPacket;
|
||||
} else if (cng_state_ == kCngInternalOn) {
|
||||
// Continue playing codec-internal comfort noise.
|
||||
return kCodecInternalCng;
|
||||
} else {
|
||||
// Nothing to play. Generate some data to play out.
|
||||
switch (playout_mode_) {
|
||||
case kPlayoutOff:
|
||||
return kAlternativePlc;
|
||||
case kPlayoutFax:
|
||||
return kAudioRepetition;
|
||||
default:
|
||||
assert(false);
|
||||
return kUndefined;
|
||||
}
|
||||
}
|
||||
} else if (target_timestamp == available_timestamp) {
|
||||
return kNormal;
|
||||
} else {
|
||||
if (static_cast<int32_t>((generated_noise_samples + target_timestamp)
|
||||
- available_timestamp) >= 0) {
|
||||
return kNormal;
|
||||
} else {
|
||||
// If currently playing comfort noise, continue with that. Do not
|
||||
// increase the timestamp counter since generated_noise_stopwatch_ in
|
||||
// NetEqImpl will take care of the time-keeping.
|
||||
if (cng_state_ == kCngRfc3389On) {
|
||||
return kRfc3389CngNoPacket;
|
||||
} else if (cng_state_ == kCngInternalOn) {
|
||||
return kCodecInternalCng;
|
||||
} else {
|
||||
// Otherwise, do packet-loss concealment and increase the
|
||||
// timestamp while waiting for the time to play this packet.
|
||||
switch (playout_mode_) {
|
||||
case kPlayoutOff:
|
||||
return kAlternativePlcIncreaseTimestamp;
|
||||
case kPlayoutFax:
|
||||
return kAudioRepetitionIncreaseTimestamp;
|
||||
default:
|
||||
assert(0);
|
||||
return kUndefined;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace webrtc
|
||||
57
modules/audio_coding/neteq/decision_logic_fax.h
Normal file
57
modules/audio_coding/neteq/decision_logic_fax.h
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_FAX_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_FAX_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/decision_logic.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Implementation of the DecisionLogic class for playout modes kPlayoutFax and
|
||||
// kPlayoutOff.
|
||||
class DecisionLogicFax : public DecisionLogic {
|
||||
public:
|
||||
// Constructor.
|
||||
DecisionLogicFax(int fs_hz,
|
||||
size_t output_size_samples,
|
||||
NetEqPlayoutMode playout_mode,
|
||||
DecoderDatabase* decoder_database,
|
||||
const PacketBuffer& packet_buffer,
|
||||
DelayManager* delay_manager,
|
||||
BufferLevelFilter* buffer_level_filter,
|
||||
const TickTimer* tick_timer)
|
||||
: DecisionLogic(fs_hz,
|
||||
output_size_samples,
|
||||
playout_mode,
|
||||
decoder_database,
|
||||
packet_buffer,
|
||||
delay_manager,
|
||||
buffer_level_filter,
|
||||
tick_timer) {}
|
||||
|
||||
protected:
|
||||
Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
size_t decoder_frame_length,
|
||||
const Packet* next_packet,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
bool* reset_decoder,
|
||||
size_t generated_noise_samples) override;
|
||||
|
||||
private:
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_FAX_H_
|
||||
242
modules/audio_coding/neteq/decision_logic_normal.cc
Normal file
242
modules/audio_coding/neteq/decision_logic_normal.cc
Normal file
@ -0,0 +1,242 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/decision_logic_normal.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/delay_manager.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/expand.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
|
||||
#include "webrtc/modules/include/module_common_types.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
Operations DecisionLogicNormal::GetDecisionSpecialized(
|
||||
const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
size_t decoder_frame_length,
|
||||
const Packet* next_packet,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
bool* reset_decoder,
|
||||
size_t generated_noise_samples) {
|
||||
assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming);
|
||||
// Guard for errors, to avoid getting stuck in error mode.
|
||||
if (prev_mode == kModeError) {
|
||||
if (!next_packet) {
|
||||
return kExpand;
|
||||
} else {
|
||||
return kUndefined; // Use kUndefined to flag for a reset.
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t target_timestamp = sync_buffer.end_timestamp();
|
||||
uint32_t available_timestamp = 0;
|
||||
bool is_cng_packet = false;
|
||||
if (next_packet) {
|
||||
available_timestamp = next_packet->timestamp;
|
||||
is_cng_packet =
|
||||
decoder_database_->IsComfortNoise(next_packet->payload_type);
|
||||
}
|
||||
|
||||
if (is_cng_packet) {
|
||||
return CngOperation(prev_mode, target_timestamp, available_timestamp,
|
||||
generated_noise_samples);
|
||||
}
|
||||
|
||||
// Handle the case with no packet at all available (except maybe DTMF).
|
||||
if (!next_packet) {
|
||||
return NoPacket(play_dtmf);
|
||||
}
|
||||
|
||||
// If the expand period was very long, reset NetEQ since it is likely that the
|
||||
// sender was restarted.
|
||||
if (num_consecutive_expands_ > kReinitAfterExpands) {
|
||||
*reset_decoder = true;
|
||||
return kNormal;
|
||||
}
|
||||
|
||||
const uint32_t five_seconds_samples =
|
||||
static_cast<uint32_t>(5 * 8000 * fs_mult_);
|
||||
// Check if the required packet is available.
|
||||
if (target_timestamp == available_timestamp) {
|
||||
return ExpectedPacketAvailable(prev_mode, play_dtmf);
|
||||
} else if (!PacketBuffer::IsObsoleteTimestamp(
|
||||
available_timestamp, target_timestamp, five_seconds_samples)) {
|
||||
return FuturePacketAvailable(sync_buffer, expand, decoder_frame_length,
|
||||
prev_mode, target_timestamp,
|
||||
available_timestamp, play_dtmf,
|
||||
generated_noise_samples);
|
||||
} else {
|
||||
// This implies that available_timestamp < target_timestamp, which can
|
||||
// happen when a new stream or codec is received. Signal for a reset.
|
||||
return kUndefined;
|
||||
}
|
||||
}
|
||||
|
||||
Operations DecisionLogicNormal::CngOperation(Modes prev_mode,
|
||||
uint32_t target_timestamp,
|
||||
uint32_t available_timestamp,
|
||||
size_t generated_noise_samples) {
|
||||
// Signed difference between target and available timestamp.
|
||||
int32_t timestamp_diff = static_cast<int32_t>(
|
||||
static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
|
||||
available_timestamp);
|
||||
int32_t optimal_level_samp = static_cast<int32_t>(
|
||||
(delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
|
||||
const int64_t excess_waiting_time_samp =
|
||||
-static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
|
||||
|
||||
if (excess_waiting_time_samp > optimal_level_samp / 2) {
|
||||
// The waiting time for this packet will be longer than 1.5
|
||||
// times the wanted buffer delay. Apply fast-forward to cut the
|
||||
// waiting time down to the optimal.
|
||||
noise_fast_forward_ = rtc::dchecked_cast<size_t>(noise_fast_forward_ +
|
||||
excess_waiting_time_samp);
|
||||
timestamp_diff =
|
||||
rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
|
||||
}
|
||||
|
||||
if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) {
|
||||
// Not time to play this packet yet. Wait another round before using this
|
||||
// packet. Keep on playing CNG from previous CNG parameters.
|
||||
return kRfc3389CngNoPacket;
|
||||
} else {
|
||||
// Otherwise, go for the CNG packet now.
|
||||
noise_fast_forward_ = 0;
|
||||
return kRfc3389Cng;
|
||||
}
|
||||
}
|
||||
|
||||
Operations DecisionLogicNormal::NoPacket(bool play_dtmf) {
|
||||
if (cng_state_ == kCngRfc3389On) {
|
||||
// Keep on playing comfort noise.
|
||||
return kRfc3389CngNoPacket;
|
||||
} else if (cng_state_ == kCngInternalOn) {
|
||||
// Keep on playing codec internal comfort noise.
|
||||
return kCodecInternalCng;
|
||||
} else if (play_dtmf) {
|
||||
return kDtmf;
|
||||
} else {
|
||||
// Nothing to play, do expand.
|
||||
return kExpand;
|
||||
}
|
||||
}
|
||||
|
||||
Operations DecisionLogicNormal::ExpectedPacketAvailable(Modes prev_mode,
|
||||
bool play_dtmf) {
|
||||
if (prev_mode != kModeExpand && !play_dtmf) {
|
||||
// Check criterion for time-stretching.
|
||||
int low_limit, high_limit;
|
||||
delay_manager_->BufferLimits(&low_limit, &high_limit);
|
||||
if (buffer_level_filter_->filtered_current_level() >= high_limit << 2)
|
||||
return kFastAccelerate;
|
||||
if (TimescaleAllowed()) {
|
||||
if (buffer_level_filter_->filtered_current_level() >= high_limit)
|
||||
return kAccelerate;
|
||||
if (buffer_level_filter_->filtered_current_level() < low_limit)
|
||||
return kPreemptiveExpand;
|
||||
}
|
||||
}
|
||||
return kNormal;
|
||||
}
|
||||
|
||||
Operations DecisionLogicNormal::FuturePacketAvailable(
|
||||
const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
size_t decoder_frame_length,
|
||||
Modes prev_mode,
|
||||
uint32_t target_timestamp,
|
||||
uint32_t available_timestamp,
|
||||
bool play_dtmf,
|
||||
size_t generated_noise_samples) {
|
||||
// Required packet is not available, but a future packet is.
|
||||
// Check if we should continue with an ongoing expand because the new packet
|
||||
// is too far into the future.
|
||||
uint32_t timestamp_leap = available_timestamp - target_timestamp;
|
||||
if ((prev_mode == kModeExpand) &&
|
||||
!ReinitAfterExpands(timestamp_leap) &&
|
||||
!MaxWaitForPacket() &&
|
||||
PacketTooEarly(timestamp_leap) &&
|
||||
UnderTargetLevel()) {
|
||||
if (play_dtmf) {
|
||||
// Still have DTMF to play, so do not do expand.
|
||||
return kDtmf;
|
||||
} else {
|
||||
// Nothing to play.
|
||||
return kExpand;
|
||||
}
|
||||
}
|
||||
|
||||
const size_t samples_left =
|
||||
sync_buffer.FutureLength() - expand.overlap_length();
|
||||
const size_t cur_size_samples = samples_left +
|
||||
packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
|
||||
|
||||
// If previous was comfort noise, then no merge is needed.
|
||||
if (prev_mode == kModeRfc3389Cng ||
|
||||
prev_mode == kModeCodecInternalCng) {
|
||||
// Keep the same delay as before the CNG (or maximum 70 ms in buffer as
|
||||
// safety precaution), but make sure that the number of samples in buffer
|
||||
// is no higher than 4 times the optimal level. (Note that TargetLevel()
|
||||
// is in Q8.)
|
||||
if (static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
|
||||
available_timestamp ||
|
||||
cur_size_samples >
|
||||
((delay_manager_->TargetLevel() * packet_length_samples_) >> 8) *
|
||||
4) {
|
||||
// Time to play this new packet.
|
||||
return kNormal;
|
||||
} else {
|
||||
// Too early to play this new packet; keep on playing comfort noise.
|
||||
if (prev_mode == kModeRfc3389Cng) {
|
||||
return kRfc3389CngNoPacket;
|
||||
} else { // prevPlayMode == kModeCodecInternalCng.
|
||||
return kCodecInternalCng;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Do not merge unless we have done an expand before.
|
||||
if (prev_mode == kModeExpand) {
|
||||
return kMerge;
|
||||
} else if (play_dtmf) {
|
||||
// Play DTMF instead of expand.
|
||||
return kDtmf;
|
||||
} else {
|
||||
return kExpand;
|
||||
}
|
||||
}
|
||||
|
||||
bool DecisionLogicNormal::UnderTargetLevel() const {
|
||||
return buffer_level_filter_->filtered_current_level() <=
|
||||
delay_manager_->TargetLevel();
|
||||
}
|
||||
|
||||
bool DecisionLogicNormal::ReinitAfterExpands(uint32_t timestamp_leap) const {
|
||||
return timestamp_leap >=
|
||||
static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
|
||||
}
|
||||
|
||||
bool DecisionLogicNormal::PacketTooEarly(uint32_t timestamp_leap) const {
|
||||
return timestamp_leap >
|
||||
static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
|
||||
}
|
||||
|
||||
bool DecisionLogicNormal::MaxWaitForPacket() const {
|
||||
return num_consecutive_expands_ >= kMaxWaitForPacket;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
107
modules/audio_coding/neteq/decision_logic_normal.h
Normal file
107
modules/audio_coding/neteq/decision_logic_normal.h
Normal file
@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_NORMAL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_NORMAL_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/decision_logic.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Implementation of the DecisionLogic class for playout modes kPlayoutOn and
|
||||
// kPlayoutStreaming.
|
||||
class DecisionLogicNormal : public DecisionLogic {
|
||||
public:
|
||||
// Constructor.
|
||||
DecisionLogicNormal(int fs_hz,
|
||||
size_t output_size_samples,
|
||||
NetEqPlayoutMode playout_mode,
|
||||
DecoderDatabase* decoder_database,
|
||||
const PacketBuffer& packet_buffer,
|
||||
DelayManager* delay_manager,
|
||||
BufferLevelFilter* buffer_level_filter,
|
||||
const TickTimer* tick_timer)
|
||||
: DecisionLogic(fs_hz,
|
||||
output_size_samples,
|
||||
playout_mode,
|
||||
decoder_database,
|
||||
packet_buffer,
|
||||
delay_manager,
|
||||
buffer_level_filter,
|
||||
tick_timer) {}
|
||||
|
||||
protected:
|
||||
static const int kReinitAfterExpands = 100;
|
||||
static const int kMaxWaitForPacket = 10;
|
||||
|
||||
Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
size_t decoder_frame_length,
|
||||
const Packet* next_packet,
|
||||
Modes prev_mode,
|
||||
bool play_dtmf,
|
||||
bool* reset_decoder,
|
||||
size_t generated_noise_samples) override;
|
||||
|
||||
// Returns the operation to do given that the expected packet is not
|
||||
// available, but a packet further into the future is at hand.
|
||||
virtual Operations FuturePacketAvailable(
|
||||
const SyncBuffer& sync_buffer,
|
||||
const Expand& expand,
|
||||
size_t decoder_frame_length,
|
||||
Modes prev_mode,
|
||||
uint32_t target_timestamp,
|
||||
uint32_t available_timestamp,
|
||||
bool play_dtmf,
|
||||
size_t generated_noise_samples);
|
||||
|
||||
// Returns the operation to do given that the expected packet is available.
|
||||
virtual Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf);
|
||||
|
||||
// Returns the operation given that no packets are available (except maybe
|
||||
// a DTMF event, flagged by setting |play_dtmf| true).
|
||||
virtual Operations NoPacket(bool play_dtmf);
|
||||
|
||||
private:
|
||||
// Returns the operation given that the next available packet is a comfort
|
||||
// noise payload (RFC 3389 only, not codec-internal).
|
||||
Operations CngOperation(Modes prev_mode,
|
||||
uint32_t target_timestamp,
|
||||
uint32_t available_timestamp,
|
||||
size_t generated_noise_samples);
|
||||
|
||||
// Checks if enough time has elapsed since the last successful timescale
|
||||
// operation was done (i.e., accelerate or preemptive expand).
|
||||
bool TimescaleAllowed() const {
|
||||
return !timescale_countdown_ || timescale_countdown_->Finished();
|
||||
}
|
||||
|
||||
// Checks if the current (filtered) buffer level is under the target level.
|
||||
bool UnderTargetLevel() const;
|
||||
|
||||
// Checks if |timestamp_leap| is so long into the future that a reset due
|
||||
// to exceeding kReinitAfterExpands will be done.
|
||||
bool ReinitAfterExpands(uint32_t timestamp_leap) const;
|
||||
|
||||
// Checks if we still have not done enough expands to cover the distance from
|
||||
// the last decoded packet to the next available packet, the distance beeing
|
||||
// conveyed in |timestamp_leap|.
|
||||
bool PacketTooEarly(uint32_t timestamp_leap) const;
|
||||
|
||||
// Checks if num_consecutive_expands_ >= kMaxWaitForPacket.
|
||||
bool MaxWaitForPacket() const;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(DecisionLogicNormal);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECISION_LOGIC_NORMAL_H_
|
||||
55
modules/audio_coding/neteq/decision_logic_unittest.cc
Normal file
55
modules/audio_coding/neteq/decision_logic_unittest.cc
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for DecisionLogic class and derived classes.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/decision_logic.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/delay_manager.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
#include "webrtc/test/mock_audio_decoder_factory.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(DecisionLogic, CreateAndDestroy) {
|
||||
int fs_hz = 8000;
|
||||
int output_size_samples = fs_hz / 100; // Samples per 10 ms.
|
||||
DecoderDatabase decoder_database(
|
||||
new rtc::RefCountedObject<MockAudioDecoderFactory>);
|
||||
TickTimer tick_timer;
|
||||
PacketBuffer packet_buffer(10, &tick_timer);
|
||||
DelayPeakDetector delay_peak_detector(&tick_timer);
|
||||
DelayManager delay_manager(240, &delay_peak_detector, &tick_timer);
|
||||
BufferLevelFilter buffer_level_filter;
|
||||
DecisionLogic* logic = DecisionLogic::Create(
|
||||
fs_hz, output_size_samples, kPlayoutOn, &decoder_database, packet_buffer,
|
||||
&delay_manager, &buffer_level_filter, &tick_timer);
|
||||
delete logic;
|
||||
logic = DecisionLogic::Create(
|
||||
fs_hz, output_size_samples, kPlayoutStreaming, &decoder_database,
|
||||
packet_buffer, &delay_manager, &buffer_level_filter, &tick_timer);
|
||||
delete logic;
|
||||
logic = DecisionLogic::Create(
|
||||
fs_hz, output_size_samples, kPlayoutFax, &decoder_database, packet_buffer,
|
||||
&delay_manager, &buffer_level_filter, &tick_timer);
|
||||
delete logic;
|
||||
logic = DecisionLogic::Create(
|
||||
fs_hz, output_size_samples, kPlayoutOff, &decoder_database, packet_buffer,
|
||||
&delay_manager, &buffer_level_filter, &tick_timer);
|
||||
delete logic;
|
||||
}
|
||||
|
||||
// TODO(hlundin): Write more tests.
|
||||
|
||||
} // namespace webrtc
|
||||
355
modules/audio_coding/neteq/decoder_database.cc
Normal file
355
modules/audio_coding/neteq/decoder_database.cc
Normal file
@ -0,0 +1,355 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
|
||||
|
||||
#include <utility> // pair
|
||||
|
||||
#include "webrtc/api/audio_codecs/audio_decoder.h"
|
||||
#include "webrtc/rtc_base/checks.h"
|
||||
#include "webrtc/rtc_base/logging.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
DecoderDatabase::DecoderDatabase(
|
||||
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory)
|
||||
: active_decoder_type_(-1),
|
||||
active_cng_decoder_type_(-1),
|
||||
decoder_factory_(decoder_factory) {}
|
||||
|
||||
DecoderDatabase::~DecoderDatabase() = default;
|
||||
|
||||
DecoderDatabase::DecoderInfo::DecoderInfo(const SdpAudioFormat& audio_format,
|
||||
AudioDecoderFactory* factory,
|
||||
const std::string& codec_name)
|
||||
: name_(codec_name),
|
||||
audio_format_(audio_format),
|
||||
factory_(factory),
|
||||
external_decoder_(nullptr),
|
||||
cng_decoder_(CngDecoder::Create(audio_format)),
|
||||
subtype_(SubtypeFromFormat(audio_format)) {}
|
||||
|
||||
DecoderDatabase::DecoderInfo::DecoderInfo(const SdpAudioFormat& audio_format,
|
||||
AudioDecoderFactory* factory)
|
||||
: DecoderInfo(audio_format, factory, audio_format.name) {}
|
||||
|
||||
DecoderDatabase::DecoderInfo::DecoderInfo(NetEqDecoder ct,
|
||||
AudioDecoderFactory* factory)
|
||||
: DecoderInfo(*NetEqDecoderToSdpAudioFormat(ct), factory) {}
|
||||
|
||||
DecoderDatabase::DecoderInfo::DecoderInfo(const SdpAudioFormat& audio_format,
|
||||
AudioDecoder* ext_dec,
|
||||
const std::string& codec_name)
|
||||
: name_(codec_name),
|
||||
audio_format_(audio_format),
|
||||
factory_(nullptr),
|
||||
external_decoder_(ext_dec),
|
||||
subtype_(Subtype::kNormal) {
|
||||
RTC_CHECK(ext_dec);
|
||||
}
|
||||
|
||||
DecoderDatabase::DecoderInfo::DecoderInfo(DecoderInfo&&) = default;
|
||||
DecoderDatabase::DecoderInfo::~DecoderInfo() = default;
|
||||
|
||||
AudioDecoder* DecoderDatabase::DecoderInfo::GetDecoder() const {
|
||||
if (subtype_ != Subtype::kNormal) {
|
||||
// These are handled internally, so they have no AudioDecoder objects.
|
||||
return nullptr;
|
||||
}
|
||||
if (external_decoder_) {
|
||||
RTC_DCHECK(!decoder_);
|
||||
RTC_DCHECK(!cng_decoder_);
|
||||
return external_decoder_;
|
||||
}
|
||||
if (!decoder_) {
|
||||
// TODO(ossu): Keep a check here for now, since a number of tests create
|
||||
// DecoderInfos without factories.
|
||||
RTC_DCHECK(factory_);
|
||||
decoder_ = factory_->MakeAudioDecoder(audio_format_);
|
||||
}
|
||||
RTC_DCHECK(decoder_) << "Failed to create: " << audio_format_;
|
||||
return decoder_.get();
|
||||
}
|
||||
|
||||
bool DecoderDatabase::DecoderInfo::IsType(const char* name) const {
|
||||
return STR_CASE_CMP(audio_format_.name.c_str(), name) == 0;
|
||||
}
|
||||
|
||||
bool DecoderDatabase::DecoderInfo::IsType(const std::string& name) const {
|
||||
return IsType(name.c_str());
|
||||
}
|
||||
|
||||
rtc::Optional<DecoderDatabase::DecoderInfo::CngDecoder>
|
||||
DecoderDatabase::DecoderInfo::CngDecoder::Create(const SdpAudioFormat& format) {
|
||||
if (STR_CASE_CMP(format.name.c_str(), "CN") == 0) {
|
||||
// CN has a 1:1 RTP clock rate to sample rate ratio.
|
||||
const int sample_rate_hz = format.clockrate_hz;
|
||||
RTC_DCHECK(sample_rate_hz == 8000 || sample_rate_hz == 16000 ||
|
||||
sample_rate_hz == 32000 || sample_rate_hz == 48000);
|
||||
return rtc::Optional<DecoderDatabase::DecoderInfo::CngDecoder>(
|
||||
{sample_rate_hz});
|
||||
} else {
|
||||
return rtc::Optional<CngDecoder>();
|
||||
}
|
||||
}
|
||||
|
||||
DecoderDatabase::DecoderInfo::Subtype
|
||||
DecoderDatabase::DecoderInfo::SubtypeFromFormat(const SdpAudioFormat& format) {
|
||||
if (STR_CASE_CMP(format.name.c_str(), "CN") == 0) {
|
||||
return Subtype::kComfortNoise;
|
||||
} else if (STR_CASE_CMP(format.name.c_str(), "telephone-event") == 0) {
|
||||
return Subtype::kDtmf;
|
||||
} else if (STR_CASE_CMP(format.name.c_str(), "red") == 0) {
|
||||
return Subtype::kRed;
|
||||
}
|
||||
|
||||
return Subtype::kNormal;
|
||||
}
|
||||
|
||||
bool DecoderDatabase::Empty() const { return decoders_.empty(); }
|
||||
|
||||
int DecoderDatabase::Size() const { return static_cast<int>(decoders_.size()); }
|
||||
|
||||
void DecoderDatabase::Reset() {
|
||||
decoders_.clear();
|
||||
active_decoder_type_ = -1;
|
||||
active_cng_decoder_type_ = -1;
|
||||
}
|
||||
|
||||
std::vector<int> DecoderDatabase::SetCodecs(
|
||||
const std::map<int, SdpAudioFormat>& codecs) {
|
||||
// First collect all payload types that we'll remove or reassign, then remove
|
||||
// them from the database.
|
||||
std::vector<int> changed_payload_types;
|
||||
for (const std::pair<uint8_t, const DecoderInfo&> kv : decoders_) {
|
||||
auto i = codecs.find(kv.first);
|
||||
if (i == codecs.end() || i->second != kv.second.GetFormat()) {
|
||||
changed_payload_types.push_back(kv.first);
|
||||
}
|
||||
}
|
||||
for (int pl_type : changed_payload_types) {
|
||||
Remove(pl_type);
|
||||
}
|
||||
|
||||
// Enter the new and changed payload type mappings into the database.
|
||||
for (const auto& kv : codecs) {
|
||||
const int& rtp_payload_type = kv.first;
|
||||
const SdpAudioFormat& audio_format = kv.second;
|
||||
RTC_DCHECK_GE(rtp_payload_type, 0);
|
||||
RTC_DCHECK_LE(rtp_payload_type, 0x7f);
|
||||
if (decoders_.count(rtp_payload_type) == 0) {
|
||||
decoders_.insert(std::make_pair(
|
||||
rtp_payload_type, DecoderInfo(audio_format, decoder_factory_.get())));
|
||||
} else {
|
||||
// The mapping for this payload type hasn't changed.
|
||||
}
|
||||
}
|
||||
|
||||
return changed_payload_types;
|
||||
}
|
||||
|
||||
int DecoderDatabase::RegisterPayload(uint8_t rtp_payload_type,
|
||||
NetEqDecoder codec_type,
|
||||
const std::string& name) {
|
||||
if (rtp_payload_type > 0x7F) {
|
||||
return kInvalidRtpPayloadType;
|
||||
}
|
||||
// kCodecArbitrary is only supported through InsertExternal.
|
||||
if (codec_type == NetEqDecoder::kDecoderArbitrary ||
|
||||
!CodecSupported(codec_type)) {
|
||||
return kCodecNotSupported;
|
||||
}
|
||||
const auto opt_format = NetEqDecoderToSdpAudioFormat(codec_type);
|
||||
if (!opt_format) {
|
||||
return kCodecNotSupported;
|
||||
}
|
||||
DecoderInfo info(*opt_format, decoder_factory_, name);
|
||||
auto ret =
|
||||
decoders_.insert(std::make_pair(rtp_payload_type, std::move(info)));
|
||||
if (ret.second == false) {
|
||||
// Database already contains a decoder with type |rtp_payload_type|.
|
||||
return kDecoderExists;
|
||||
}
|
||||
return kOK;
|
||||
}
|
||||
|
||||
int DecoderDatabase::RegisterPayload(int rtp_payload_type,
|
||||
const SdpAudioFormat& audio_format) {
|
||||
if (rtp_payload_type < 0 || rtp_payload_type > 0x7f) {
|
||||
return kInvalidRtpPayloadType;
|
||||
}
|
||||
const auto ret = decoders_.insert(std::make_pair(
|
||||
rtp_payload_type, DecoderInfo(audio_format, decoder_factory_.get())));
|
||||
if (ret.second == false) {
|
||||
// Database already contains a decoder with type |rtp_payload_type|.
|
||||
return kDecoderExists;
|
||||
}
|
||||
return kOK;
|
||||
}
|
||||
|
||||
int DecoderDatabase::InsertExternal(uint8_t rtp_payload_type,
|
||||
NetEqDecoder codec_type,
|
||||
const std::string& codec_name,
|
||||
AudioDecoder* decoder) {
|
||||
if (rtp_payload_type > 0x7F) {
|
||||
return kInvalidRtpPayloadType;
|
||||
}
|
||||
if (!decoder) {
|
||||
return kInvalidPointer;
|
||||
}
|
||||
|
||||
const auto opt_db_format = NetEqDecoderToSdpAudioFormat(codec_type);
|
||||
const SdpAudioFormat format = opt_db_format.value_or({"arbitrary", 0, 0});
|
||||
|
||||
std::pair<DecoderMap::iterator, bool> ret;
|
||||
DecoderInfo info(format, decoder, codec_name);
|
||||
ret = decoders_.insert(std::make_pair(rtp_payload_type, std::move(info)));
|
||||
if (ret.second == false) {
|
||||
// Database already contains a decoder with type |rtp_payload_type|.
|
||||
return kDecoderExists;
|
||||
}
|
||||
return kOK;
|
||||
}
|
||||
|
||||
int DecoderDatabase::Remove(uint8_t rtp_payload_type) {
|
||||
if (decoders_.erase(rtp_payload_type) == 0) {
|
||||
// No decoder with that |rtp_payload_type|.
|
||||
return kDecoderNotFound;
|
||||
}
|
||||
if (active_decoder_type_ == rtp_payload_type) {
|
||||
active_decoder_type_ = -1; // No active decoder.
|
||||
}
|
||||
if (active_cng_decoder_type_ == rtp_payload_type) {
|
||||
active_cng_decoder_type_ = -1; // No active CNG decoder.
|
||||
}
|
||||
return kOK;
|
||||
}
|
||||
|
||||
void DecoderDatabase::RemoveAll() {
|
||||
decoders_.clear();
|
||||
active_decoder_type_ = -1; // No active decoder.
|
||||
active_cng_decoder_type_ = -1; // No active CNG decoder.
|
||||
}
|
||||
|
||||
const DecoderDatabase::DecoderInfo* DecoderDatabase::GetDecoderInfo(
|
||||
uint8_t rtp_payload_type) const {
|
||||
DecoderMap::const_iterator it = decoders_.find(rtp_payload_type);
|
||||
if (it == decoders_.end()) {
|
||||
// Decoder not found.
|
||||
return NULL;
|
||||
}
|
||||
return &it->second;
|
||||
}
|
||||
|
||||
int DecoderDatabase::SetActiveDecoder(uint8_t rtp_payload_type,
|
||||
bool* new_decoder) {
|
||||
// Check that |rtp_payload_type| exists in the database.
|
||||
const DecoderInfo *info = GetDecoderInfo(rtp_payload_type);
|
||||
if (!info) {
|
||||
// Decoder not found.
|
||||
return kDecoderNotFound;
|
||||
}
|
||||
RTC_CHECK(!info->IsComfortNoise());
|
||||
RTC_DCHECK(new_decoder);
|
||||
*new_decoder = false;
|
||||
if (active_decoder_type_ < 0) {
|
||||
// This is the first active decoder.
|
||||
*new_decoder = true;
|
||||
} else if (active_decoder_type_ != rtp_payload_type) {
|
||||
// Moving from one active decoder to another. Delete the first one.
|
||||
const DecoderInfo *old_info = GetDecoderInfo(active_decoder_type_);
|
||||
RTC_DCHECK(old_info);
|
||||
old_info->DropDecoder();
|
||||
*new_decoder = true;
|
||||
}
|
||||
active_decoder_type_ = rtp_payload_type;
|
||||
return kOK;
|
||||
}
|
||||
|
||||
AudioDecoder* DecoderDatabase::GetActiveDecoder() const {
|
||||
if (active_decoder_type_ < 0) {
|
||||
// No active decoder.
|
||||
return NULL;
|
||||
}
|
||||
return GetDecoder(active_decoder_type_);
|
||||
}
|
||||
|
||||
int DecoderDatabase::SetActiveCngDecoder(uint8_t rtp_payload_type) {
|
||||
// Check that |rtp_payload_type| exists in the database.
|
||||
const DecoderInfo *info = GetDecoderInfo(rtp_payload_type);
|
||||
if (!info) {
|
||||
// Decoder not found.
|
||||
return kDecoderNotFound;
|
||||
}
|
||||
if (active_cng_decoder_type_ >= 0 &&
|
||||
active_cng_decoder_type_ != rtp_payload_type) {
|
||||
// Moving from one active CNG decoder to another. Delete the first one.
|
||||
RTC_DCHECK(active_cng_decoder_);
|
||||
active_cng_decoder_.reset();
|
||||
}
|
||||
active_cng_decoder_type_ = rtp_payload_type;
|
||||
return kOK;
|
||||
}
|
||||
|
||||
ComfortNoiseDecoder* DecoderDatabase::GetActiveCngDecoder() const {
|
||||
if (active_cng_decoder_type_ < 0) {
|
||||
// No active CNG decoder.
|
||||
return NULL;
|
||||
}
|
||||
if (!active_cng_decoder_) {
|
||||
active_cng_decoder_.reset(new ComfortNoiseDecoder);
|
||||
}
|
||||
return active_cng_decoder_.get();
|
||||
}
|
||||
|
||||
AudioDecoder* DecoderDatabase::GetDecoder(uint8_t rtp_payload_type) const {
|
||||
const DecoderInfo *info = GetDecoderInfo(rtp_payload_type);
|
||||
return info ? info->GetDecoder() : nullptr;
|
||||
}
|
||||
|
||||
bool DecoderDatabase::IsType(uint8_t rtp_payload_type, const char* name) const {
|
||||
const DecoderInfo* info = GetDecoderInfo(rtp_payload_type);
|
||||
return info && info->IsType(name);
|
||||
}
|
||||
|
||||
bool DecoderDatabase::IsType(uint8_t rtp_payload_type,
|
||||
const std::string& name) const {
|
||||
return IsType(rtp_payload_type, name.c_str());
|
||||
}
|
||||
|
||||
bool DecoderDatabase::IsComfortNoise(uint8_t rtp_payload_type) const {
|
||||
const DecoderInfo *info = GetDecoderInfo(rtp_payload_type);
|
||||
return info && info->IsComfortNoise();
|
||||
}
|
||||
|
||||
bool DecoderDatabase::IsDtmf(uint8_t rtp_payload_type) const {
|
||||
const DecoderInfo *info = GetDecoderInfo(rtp_payload_type);
|
||||
return info && info->IsDtmf();
|
||||
}
|
||||
|
||||
bool DecoderDatabase::IsRed(uint8_t rtp_payload_type) const {
|
||||
const DecoderInfo *info = GetDecoderInfo(rtp_payload_type);
|
||||
return info && info->IsRed();
|
||||
}
|
||||
|
||||
int DecoderDatabase::CheckPayloadTypes(const PacketList& packet_list) const {
|
||||
PacketList::const_iterator it;
|
||||
for (it = packet_list.begin(); it != packet_list.end(); ++it) {
|
||||
if (!GetDecoderInfo(it->payload_type)) {
|
||||
// Payload type is not found.
|
||||
LOG(LS_WARNING) << "CheckPayloadTypes: unknown RTP payload type "
|
||||
<< static_cast<int>(it->payload_type);
|
||||
return kDecoderNotFound;
|
||||
}
|
||||
}
|
||||
return kOK;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
246
modules/audio_coding/neteq/decoder_database.h
Normal file
246
modules/audio_coding/neteq/decoder_database.h
Normal file
@ -0,0 +1,246 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "webrtc/api/audio_codecs/audio_decoder_factory.h"
|
||||
#include "webrtc/api/audio_codecs/audio_format.h"
|
||||
#include "webrtc/common_types.h" // NULL
|
||||
#include "webrtc/modules/audio_coding/codecs/cng/webrtc_cng.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/packet.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/rtc_base/scoped_ref_ptr.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class DecoderDatabase {
|
||||
public:
|
||||
enum DatabaseReturnCodes {
|
||||
kOK = 0,
|
||||
kInvalidRtpPayloadType = -1,
|
||||
kCodecNotSupported = -2,
|
||||
kInvalidSampleRate = -3,
|
||||
kDecoderExists = -4,
|
||||
kDecoderNotFound = -5,
|
||||
kInvalidPointer = -6
|
||||
};
|
||||
|
||||
// Class that stores decoder info in the database.
|
||||
class DecoderInfo {
|
||||
public:
|
||||
DecoderInfo(const SdpAudioFormat& audio_format,
|
||||
AudioDecoderFactory* factory,
|
||||
const std::string& codec_name);
|
||||
explicit DecoderInfo(const SdpAudioFormat& audio_format,
|
||||
AudioDecoderFactory* factory = nullptr);
|
||||
explicit DecoderInfo(NetEqDecoder ct,
|
||||
AudioDecoderFactory* factory = nullptr);
|
||||
DecoderInfo(const SdpAudioFormat& audio_format,
|
||||
AudioDecoder* ext_dec,
|
||||
const std::string& codec_name);
|
||||
DecoderInfo(DecoderInfo&&);
|
||||
~DecoderInfo();
|
||||
|
||||
// Get the AudioDecoder object, creating it first if necessary.
|
||||
AudioDecoder* GetDecoder() const;
|
||||
|
||||
// Delete the AudioDecoder object, unless it's external. (This means we can
|
||||
// always recreate it later if we need it.)
|
||||
void DropDecoder() const { decoder_.reset(); }
|
||||
|
||||
int SampleRateHz() const {
|
||||
if (IsDtmf()) {
|
||||
// DTMF has a 1:1 mapping between clock rate and sample rate.
|
||||
return audio_format_.clockrate_hz;
|
||||
}
|
||||
const AudioDecoder* decoder = GetDecoder();
|
||||
RTC_DCHECK_EQ(1, !!decoder + !!cng_decoder_);
|
||||
return decoder ? decoder->SampleRateHz() : cng_decoder_->sample_rate_hz;
|
||||
}
|
||||
|
||||
const SdpAudioFormat& GetFormat() const { return audio_format_; }
|
||||
|
||||
// Returns true if the decoder's format is comfort noise.
|
||||
bool IsComfortNoise() const {
|
||||
RTC_DCHECK_EQ(!!cng_decoder_, subtype_ == Subtype::kComfortNoise);
|
||||
return subtype_ == Subtype::kComfortNoise;
|
||||
}
|
||||
|
||||
// Returns true if the decoder's format is DTMF.
|
||||
bool IsDtmf() const {
|
||||
return subtype_ == Subtype::kDtmf;
|
||||
}
|
||||
|
||||
// Returns true if the decoder's format is RED.
|
||||
bool IsRed() const {
|
||||
return subtype_ == Subtype::kRed;
|
||||
}
|
||||
|
||||
// Returns true if the decoder's format is named |name|.
|
||||
bool IsType(const char* name) const;
|
||||
// Returns true if the decoder's format is named |name|.
|
||||
bool IsType(const std::string& name) const;
|
||||
|
||||
const std::string& get_name() const { return name_; }
|
||||
|
||||
private:
|
||||
// TODO(ossu): |name_| is kept here while we retain the old external
|
||||
// decoder interface. Remove this once using an
|
||||
// AudioDecoderFactory has supplanted the old functionality.
|
||||
const std::string name_;
|
||||
|
||||
const SdpAudioFormat audio_format_;
|
||||
AudioDecoderFactory* const factory_;
|
||||
mutable std::unique_ptr<AudioDecoder> decoder_;
|
||||
|
||||
// Set iff this is an external decoder.
|
||||
AudioDecoder* const external_decoder_;
|
||||
|
||||
// Set iff this is a comfort noise decoder.
|
||||
struct CngDecoder {
|
||||
static rtc::Optional<CngDecoder> Create(const SdpAudioFormat& format);
|
||||
int sample_rate_hz;
|
||||
};
|
||||
const rtc::Optional<CngDecoder> cng_decoder_;
|
||||
|
||||
enum class Subtype : int8_t {
|
||||
kNormal,
|
||||
kComfortNoise,
|
||||
kDtmf,
|
||||
kRed
|
||||
};
|
||||
|
||||
static Subtype SubtypeFromFormat(const SdpAudioFormat& format);
|
||||
|
||||
const Subtype subtype_;
|
||||
};
|
||||
|
||||
// Maximum value for 8 bits, and an invalid RTP payload type (since it is
|
||||
// only 7 bits).
|
||||
static const uint8_t kRtpPayloadTypeError = 0xFF;
|
||||
|
||||
DecoderDatabase(
|
||||
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);
|
||||
|
||||
virtual ~DecoderDatabase();
|
||||
|
||||
// Returns true if the database is empty.
|
||||
virtual bool Empty() const;
|
||||
|
||||
// Returns the number of decoders registered in the database.
|
||||
virtual int Size() const;
|
||||
|
||||
// Resets the database, erasing all registered payload types, and deleting
|
||||
// any AudioDecoder objects that were not externally created and inserted
|
||||
// using InsertExternal().
|
||||
virtual void Reset();
|
||||
|
||||
// Replaces the existing set of decoders with the given set. Returns the
|
||||
// payload types that were reassigned or removed while doing so.
|
||||
virtual std::vector<int> SetCodecs(
|
||||
const std::map<int, SdpAudioFormat>& codecs);
|
||||
|
||||
// Registers |rtp_payload_type| as a decoder of type |codec_type|. The |name|
|
||||
// is only used to populate the name field in the DecoderInfo struct in the
|
||||
// database, and can be arbitrary (including empty). Returns kOK on success;
|
||||
// otherwise an error code.
|
||||
virtual int RegisterPayload(uint8_t rtp_payload_type,
|
||||
NetEqDecoder codec_type,
|
||||
const std::string& name);
|
||||
|
||||
// Registers a decoder for the given payload type. Returns kOK on success;
|
||||
// otherwise an error code.
|
||||
virtual int RegisterPayload(int rtp_payload_type,
|
||||
const SdpAudioFormat& audio_format);
|
||||
|
||||
// Registers an externally created AudioDecoder object, and associates it
|
||||
// as a decoder of type |codec_type| with |rtp_payload_type|.
|
||||
virtual int InsertExternal(uint8_t rtp_payload_type,
|
||||
NetEqDecoder codec_type,
|
||||
const std::string& codec_name,
|
||||
AudioDecoder* decoder);
|
||||
|
||||
// Removes the entry for |rtp_payload_type| from the database.
|
||||
// Returns kDecoderNotFound or kOK depending on the outcome of the operation.
|
||||
virtual int Remove(uint8_t rtp_payload_type);
|
||||
|
||||
// Remove all entries.
|
||||
virtual void RemoveAll();
|
||||
|
||||
// Returns a pointer to the DecoderInfo struct for |rtp_payload_type|. If
|
||||
// no decoder is registered with that |rtp_payload_type|, NULL is returned.
|
||||
virtual const DecoderInfo* GetDecoderInfo(uint8_t rtp_payload_type) const;
|
||||
|
||||
// Sets the active decoder to be |rtp_payload_type|. If this call results in a
|
||||
// change of active decoder, |new_decoder| is set to true. The previous active
|
||||
// decoder's AudioDecoder object is deleted.
|
||||
virtual int SetActiveDecoder(uint8_t rtp_payload_type, bool* new_decoder);
|
||||
|
||||
// Returns the current active decoder, or NULL if no active decoder exists.
|
||||
virtual AudioDecoder* GetActiveDecoder() const;
|
||||
|
||||
// Sets the active comfort noise decoder to be |rtp_payload_type|. If this
|
||||
// call results in a change of active comfort noise decoder, the previous
|
||||
// active decoder's AudioDecoder object is deleted.
|
||||
virtual int SetActiveCngDecoder(uint8_t rtp_payload_type);
|
||||
|
||||
// Returns the current active comfort noise decoder, or NULL if no active
|
||||
// comfort noise decoder exists.
|
||||
virtual ComfortNoiseDecoder* GetActiveCngDecoder() const;
|
||||
|
||||
// The following are utility methods: they will look up DecoderInfo through
|
||||
// GetDecoderInfo and call the respective method on that info object, if it
|
||||
// exists.
|
||||
|
||||
// Returns a pointer to the AudioDecoder object associated with
|
||||
// |rtp_payload_type|, or NULL if none is registered. If the AudioDecoder
|
||||
// object does not exist for that decoder, the object is created.
|
||||
AudioDecoder* GetDecoder(uint8_t rtp_payload_type) const;
|
||||
|
||||
// Returns if |rtp_payload_type| is registered with a format named |name|.
|
||||
bool IsType(uint8_t rtp_payload_type, const char* name) const;
|
||||
|
||||
// Returns if |rtp_payload_type| is registered with a format named |name|.
|
||||
bool IsType(uint8_t rtp_payload_type, const std::string& name) const;
|
||||
|
||||
// Returns true if |rtp_payload_type| is registered as comfort noise.
|
||||
bool IsComfortNoise(uint8_t rtp_payload_type) const;
|
||||
|
||||
// Returns true if |rtp_payload_type| is registered as DTMF.
|
||||
bool IsDtmf(uint8_t rtp_payload_type) const;
|
||||
|
||||
// Returns true if |rtp_payload_type| is registered as RED.
|
||||
bool IsRed(uint8_t rtp_payload_type) const;
|
||||
|
||||
// Returns kOK if all packets in |packet_list| carry payload types that are
|
||||
// registered in the database. Otherwise, returns kDecoderNotFound.
|
||||
int CheckPayloadTypes(const PacketList& packet_list) const;
|
||||
|
||||
private:
|
||||
typedef std::map<uint8_t, DecoderInfo> DecoderMap;
|
||||
|
||||
DecoderMap decoders_;
|
||||
int active_decoder_type_;
|
||||
int active_cng_decoder_type_;
|
||||
mutable std::unique_ptr<ComfortNoiseDecoder> active_cng_decoder_;
|
||||
rtc::scoped_refptr<AudioDecoderFactory> decoder_factory_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(DecoderDatabase);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DECODER_DATABASE_H_
|
||||
256
modules/audio_coding/neteq/decoder_database_unittest.cc
Normal file
256
modules/audio_coding/neteq/decoder_database_unittest.cc
Normal file
@ -0,0 +1,256 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "webrtc/api/audio_codecs/builtin_audio_decoder_factory.h"
|
||||
#include "webrtc/test/gmock.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
#include "webrtc/test/mock_audio_decoder.h"
|
||||
#include "webrtc/test/mock_audio_decoder_factory.h"
|
||||
|
||||
using testing::_;
|
||||
using testing::Invoke;
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(DecoderDatabase, CreateAndDestroy) {
|
||||
DecoderDatabase db(new rtc::RefCountedObject<MockAudioDecoderFactory>);
|
||||
EXPECT_EQ(0, db.Size());
|
||||
EXPECT_TRUE(db.Empty());
|
||||
}
|
||||
|
||||
TEST(DecoderDatabase, InsertAndRemove) {
|
||||
DecoderDatabase db(new rtc::RefCountedObject<MockAudioDecoderFactory>);
|
||||
const uint8_t kPayloadType = 0;
|
||||
const std::string kCodecName = "Robert\'); DROP TABLE Students;";
|
||||
EXPECT_EQ(
|
||||
DecoderDatabase::kOK,
|
||||
db.RegisterPayload(kPayloadType, NetEqDecoder::kDecoderPCMu, kCodecName));
|
||||
EXPECT_EQ(1, db.Size());
|
||||
EXPECT_FALSE(db.Empty());
|
||||
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(kPayloadType));
|
||||
EXPECT_EQ(0, db.Size());
|
||||
EXPECT_TRUE(db.Empty());
|
||||
}
|
||||
|
||||
TEST(DecoderDatabase, InsertAndRemoveAll) {
|
||||
DecoderDatabase db(new rtc::RefCountedObject<MockAudioDecoderFactory>);
|
||||
const std::string kCodecName1 = "Robert\'); DROP TABLE Students;";
|
||||
const std::string kCodecName2 = "https://xkcd.com/327/";
|
||||
EXPECT_EQ(DecoderDatabase::kOK,
|
||||
db.RegisterPayload(0, NetEqDecoder::kDecoderPCMu, kCodecName1));
|
||||
EXPECT_EQ(DecoderDatabase::kOK,
|
||||
db.RegisterPayload(1, NetEqDecoder::kDecoderPCMa, kCodecName2));
|
||||
EXPECT_EQ(2, db.Size());
|
||||
EXPECT_FALSE(db.Empty());
|
||||
db.RemoveAll();
|
||||
EXPECT_EQ(0, db.Size());
|
||||
EXPECT_TRUE(db.Empty());
|
||||
}
|
||||
|
||||
TEST(DecoderDatabase, GetDecoderInfo) {
|
||||
rtc::scoped_refptr<MockAudioDecoderFactory> factory(
|
||||
new rtc::RefCountedObject<MockAudioDecoderFactory>);
|
||||
auto* decoder = new MockAudioDecoder;
|
||||
EXPECT_CALL(*factory, MakeAudioDecoderMock(_, _))
|
||||
.WillOnce(Invoke([decoder](const SdpAudioFormat& format,
|
||||
std::unique_ptr<AudioDecoder>* dec) {
|
||||
EXPECT_EQ("pcmu", format.name);
|
||||
dec->reset(decoder);
|
||||
}));
|
||||
DecoderDatabase db(factory);
|
||||
const uint8_t kPayloadType = 0;
|
||||
const std::string kCodecName = "Robert\'); DROP TABLE Students;";
|
||||
EXPECT_EQ(
|
||||
DecoderDatabase::kOK,
|
||||
db.RegisterPayload(kPayloadType, NetEqDecoder::kDecoderPCMu, kCodecName));
|
||||
const DecoderDatabase::DecoderInfo* info;
|
||||
info = db.GetDecoderInfo(kPayloadType);
|
||||
ASSERT_TRUE(info != NULL);
|
||||
EXPECT_TRUE(info->IsType("pcmu"));
|
||||
EXPECT_EQ(kCodecName, info->get_name());
|
||||
EXPECT_EQ(decoder, db.GetDecoder(kPayloadType));
|
||||
info = db.GetDecoderInfo(kPayloadType + 1); // Other payload type.
|
||||
EXPECT_TRUE(info == NULL); // Should not be found.
|
||||
}
|
||||
|
||||
TEST(DecoderDatabase, GetDecoder) {
|
||||
DecoderDatabase db(CreateBuiltinAudioDecoderFactory());
|
||||
const uint8_t kPayloadType = 0;
|
||||
const std::string kCodecName = "Robert\'); DROP TABLE Students;";
|
||||
EXPECT_EQ(DecoderDatabase::kOK,
|
||||
db.RegisterPayload(kPayloadType, NetEqDecoder::kDecoderPCM16B,
|
||||
kCodecName));
|
||||
AudioDecoder* dec = db.GetDecoder(kPayloadType);
|
||||
ASSERT_TRUE(dec != NULL);
|
||||
}
|
||||
|
||||
TEST(DecoderDatabase, TypeTests) {
|
||||
DecoderDatabase db(new rtc::RefCountedObject<MockAudioDecoderFactory>);
|
||||
const uint8_t kPayloadTypePcmU = 0;
|
||||
const uint8_t kPayloadTypeCng = 13;
|
||||
const uint8_t kPayloadTypeDtmf = 100;
|
||||
const uint8_t kPayloadTypeRed = 101;
|
||||
const uint8_t kPayloadNotUsed = 102;
|
||||
// Load into database.
|
||||
EXPECT_EQ(
|
||||
DecoderDatabase::kOK,
|
||||
db.RegisterPayload(kPayloadTypePcmU, NetEqDecoder::kDecoderPCMu, "pcmu"));
|
||||
EXPECT_EQ(DecoderDatabase::kOK,
|
||||
db.RegisterPayload(kPayloadTypeCng, NetEqDecoder::kDecoderCNGnb,
|
||||
"cng-nb"));
|
||||
EXPECT_EQ(
|
||||
DecoderDatabase::kOK,
|
||||
db.RegisterPayload(kPayloadTypeDtmf, NetEqDecoder::kDecoderAVT, "avt"));
|
||||
EXPECT_EQ(
|
||||
DecoderDatabase::kOK,
|
||||
db.RegisterPayload(kPayloadTypeRed, NetEqDecoder::kDecoderRED, "red"));
|
||||
EXPECT_EQ(4, db.Size());
|
||||
// Test.
|
||||
EXPECT_FALSE(db.IsComfortNoise(kPayloadNotUsed));
|
||||
EXPECT_FALSE(db.IsDtmf(kPayloadNotUsed));
|
||||
EXPECT_FALSE(db.IsRed(kPayloadNotUsed));
|
||||
EXPECT_FALSE(db.IsComfortNoise(kPayloadTypePcmU));
|
||||
EXPECT_FALSE(db.IsDtmf(kPayloadTypePcmU));
|
||||
EXPECT_FALSE(db.IsRed(kPayloadTypePcmU));
|
||||
EXPECT_FALSE(db.IsType(kPayloadTypePcmU, "isac"));
|
||||
EXPECT_TRUE(db.IsType(kPayloadTypePcmU, "pcmu"));
|
||||
EXPECT_TRUE(db.IsComfortNoise(kPayloadTypeCng));
|
||||
EXPECT_TRUE(db.IsDtmf(kPayloadTypeDtmf));
|
||||
EXPECT_TRUE(db.IsRed(kPayloadTypeRed));
|
||||
}
|
||||
|
||||
TEST(DecoderDatabase, ExternalDecoder) {
|
||||
DecoderDatabase db(new rtc::RefCountedObject<MockAudioDecoderFactory>);
|
||||
const uint8_t kPayloadType = 0;
|
||||
const std::string kCodecName = "Robert\'); DROP TABLE Students;";
|
||||
MockAudioDecoder decoder;
|
||||
// Load into database.
|
||||
EXPECT_EQ(DecoderDatabase::kOK,
|
||||
db.InsertExternal(kPayloadType, NetEqDecoder::kDecoderPCMu,
|
||||
kCodecName, &decoder));
|
||||
EXPECT_EQ(1, db.Size());
|
||||
// Get decoder and make sure we get the external one.
|
||||
EXPECT_EQ(&decoder, db.GetDecoder(kPayloadType));
|
||||
// Get the decoder info struct and check it too.
|
||||
const DecoderDatabase::DecoderInfo* info;
|
||||
info = db.GetDecoderInfo(kPayloadType);
|
||||
ASSERT_TRUE(info != NULL);
|
||||
EXPECT_TRUE(info->IsType("pcmu"));
|
||||
EXPECT_EQ(info->get_name(), kCodecName);
|
||||
EXPECT_EQ(kCodecName, info->get_name());
|
||||
// Expect not to delete the decoder when removing it from the database, since
|
||||
// it was declared externally.
|
||||
EXPECT_CALL(decoder, Die()).Times(0);
|
||||
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(kPayloadType));
|
||||
EXPECT_TRUE(db.Empty());
|
||||
|
||||
EXPECT_CALL(decoder, Die()).Times(1); // Will be called when |db| is deleted.
|
||||
}
|
||||
|
||||
TEST(DecoderDatabase, CheckPayloadTypes) {
|
||||
DecoderDatabase db(new rtc::RefCountedObject<MockAudioDecoderFactory>);
|
||||
// Load a number of payloads into the database. Payload types are 0, 1, ...,
|
||||
// while the decoder type is the same for all payload types (this does not
|
||||
// matter for the test).
|
||||
const int kNumPayloads = 10;
|
||||
for (uint8_t payload_type = 0; payload_type < kNumPayloads; ++payload_type) {
|
||||
EXPECT_EQ(DecoderDatabase::kOK,
|
||||
db.RegisterPayload(payload_type, NetEqDecoder::kDecoderPCMu, ""));
|
||||
}
|
||||
PacketList packet_list;
|
||||
for (int i = 0; i < kNumPayloads + 1; ++i) {
|
||||
// Create packet with payload type |i|. The last packet will have a payload
|
||||
// type that is not registered in the decoder database.
|
||||
Packet packet;
|
||||
packet.payload_type = i;
|
||||
packet_list.push_back(std::move(packet));
|
||||
}
|
||||
|
||||
// Expect to return false, since the last packet is of an unknown type.
|
||||
EXPECT_EQ(DecoderDatabase::kDecoderNotFound,
|
||||
db.CheckPayloadTypes(packet_list));
|
||||
|
||||
packet_list.pop_back(); // Remove the unknown one.
|
||||
|
||||
EXPECT_EQ(DecoderDatabase::kOK, db.CheckPayloadTypes(packet_list));
|
||||
|
||||
// Delete all packets.
|
||||
PacketList::iterator it = packet_list.begin();
|
||||
while (it != packet_list.end()) {
|
||||
it = packet_list.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)
|
||||
#define IF_ISAC(x) x
|
||||
#else
|
||||
#define IF_ISAC(x) DISABLED_##x
|
||||
#endif
|
||||
|
||||
// Test the methods for setting and getting active speech and CNG decoders.
|
||||
TEST(DecoderDatabase, IF_ISAC(ActiveDecoders)) {
|
||||
DecoderDatabase db(CreateBuiltinAudioDecoderFactory());
|
||||
// Load payload types.
|
||||
ASSERT_EQ(DecoderDatabase::kOK,
|
||||
db.RegisterPayload(0, NetEqDecoder::kDecoderPCMu, "pcmu"));
|
||||
ASSERT_EQ(DecoderDatabase::kOK,
|
||||
db.RegisterPayload(103, NetEqDecoder::kDecoderISAC, "isac"));
|
||||
ASSERT_EQ(DecoderDatabase::kOK,
|
||||
db.RegisterPayload(13, NetEqDecoder::kDecoderCNGnb, "cng-nb"));
|
||||
// Verify that no decoders are active from the start.
|
||||
EXPECT_EQ(NULL, db.GetActiveDecoder());
|
||||
EXPECT_EQ(NULL, db.GetActiveCngDecoder());
|
||||
|
||||
// Set active speech codec.
|
||||
bool changed; // Should be true when the active decoder changed.
|
||||
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed));
|
||||
EXPECT_TRUE(changed);
|
||||
AudioDecoder* decoder = db.GetActiveDecoder();
|
||||
ASSERT_FALSE(decoder == NULL); // Should get a decoder here.
|
||||
|
||||
// Set the same again. Expect no change.
|
||||
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed));
|
||||
EXPECT_FALSE(changed);
|
||||
decoder = db.GetActiveDecoder();
|
||||
ASSERT_FALSE(decoder == NULL); // Should get a decoder here.
|
||||
|
||||
// Change active decoder.
|
||||
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(103, &changed));
|
||||
EXPECT_TRUE(changed);
|
||||
decoder = db.GetActiveDecoder();
|
||||
ASSERT_FALSE(decoder == NULL); // Should get a decoder here.
|
||||
|
||||
// Remove the active decoder, and verify that the active becomes NULL.
|
||||
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(103));
|
||||
EXPECT_EQ(NULL, db.GetActiveDecoder());
|
||||
|
||||
// Set active CNG codec.
|
||||
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveCngDecoder(13));
|
||||
ComfortNoiseDecoder* cng = db.GetActiveCngDecoder();
|
||||
ASSERT_FALSE(cng == NULL); // Should get a decoder here.
|
||||
|
||||
// Remove the active CNG decoder, and verify that the active becomes NULL.
|
||||
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(13));
|
||||
EXPECT_EQ(NULL, db.GetActiveCngDecoder());
|
||||
|
||||
// Try to set non-existing codecs as active.
|
||||
EXPECT_EQ(DecoderDatabase::kDecoderNotFound,
|
||||
db.SetActiveDecoder(17, &changed));
|
||||
EXPECT_EQ(DecoderDatabase::kDecoderNotFound,
|
||||
db.SetActiveCngDecoder(17));
|
||||
}
|
||||
} // namespace webrtc
|
||||
52
modules/audio_coding/neteq/defines.h
Normal file
52
modules/audio_coding/neteq/defines.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DEFINES_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DEFINES_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
enum Operations {
|
||||
kNormal = 0,
|
||||
kMerge,
|
||||
kExpand,
|
||||
kAccelerate,
|
||||
kFastAccelerate,
|
||||
kPreemptiveExpand,
|
||||
kRfc3389Cng,
|
||||
kRfc3389CngNoPacket,
|
||||
kCodecInternalCng,
|
||||
kDtmf,
|
||||
kAlternativePlc,
|
||||
kAlternativePlcIncreaseTimestamp,
|
||||
kAudioRepetition,
|
||||
kAudioRepetitionIncreaseTimestamp,
|
||||
kUndefined = -1
|
||||
};
|
||||
|
||||
enum Modes {
|
||||
kModeNormal = 0,
|
||||
kModeExpand,
|
||||
kModeMerge,
|
||||
kModeAccelerateSuccess,
|
||||
kModeAccelerateLowEnergy,
|
||||
kModeAccelerateFail,
|
||||
kModePreemptiveExpandSuccess,
|
||||
kModePreemptiveExpandLowEnergy,
|
||||
kModePreemptiveExpandFail,
|
||||
kModeRfc3389Cng,
|
||||
kModeCodecInternalCng,
|
||||
kModeDtmf,
|
||||
kModeError,
|
||||
kModeUndefined = -1
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DEFINES_H_
|
||||
421
modules/audio_coding/neteq/delay_manager.cc
Normal file
421
modules/audio_coding/neteq/delay_manager.cc
Normal file
@ -0,0 +1,421 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/delay_manager.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm> // max, min
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h"
|
||||
#include "webrtc/modules/include/module_common_types.h"
|
||||
#include "webrtc/rtc_base/logging.h"
|
||||
#include "webrtc/rtc_base/safe_conversions.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
DelayManager::DelayManager(size_t max_packets_in_buffer,
|
||||
DelayPeakDetector* peak_detector,
|
||||
const TickTimer* tick_timer)
|
||||
: first_packet_received_(false),
|
||||
max_packets_in_buffer_(max_packets_in_buffer),
|
||||
iat_vector_(kMaxIat + 1, 0),
|
||||
iat_factor_(0),
|
||||
tick_timer_(tick_timer),
|
||||
base_target_level_(4), // In Q0 domain.
|
||||
target_level_(base_target_level_ << 8), // In Q8 domain.
|
||||
packet_len_ms_(0),
|
||||
streaming_mode_(false),
|
||||
last_seq_no_(0),
|
||||
last_timestamp_(0),
|
||||
minimum_delay_ms_(0),
|
||||
least_required_delay_ms_(target_level_),
|
||||
maximum_delay_ms_(target_level_),
|
||||
iat_cumulative_sum_(0),
|
||||
max_iat_cumulative_sum_(0),
|
||||
peak_detector_(*peak_detector),
|
||||
last_pack_cng_or_dtmf_(1) {
|
||||
assert(peak_detector); // Should never be NULL.
|
||||
Reset();
|
||||
}
|
||||
|
||||
DelayManager::~DelayManager() {}
|
||||
|
||||
const DelayManager::IATVector& DelayManager::iat_vector() const {
|
||||
return iat_vector_;
|
||||
}
|
||||
|
||||
// Set the histogram vector to an exponentially decaying distribution
|
||||
// iat_vector_[i] = 0.5^(i+1), i = 0, 1, 2, ...
|
||||
// iat_vector_ is in Q30.
|
||||
void DelayManager::ResetHistogram() {
|
||||
// Set temp_prob to (slightly more than) 1 in Q14. This ensures that the sum
|
||||
// of iat_vector_ is 1.
|
||||
uint16_t temp_prob = 0x4002; // 16384 + 2 = 100000000000010 binary.
|
||||
IATVector::iterator it = iat_vector_.begin();
|
||||
for (; it < iat_vector_.end(); it++) {
|
||||
temp_prob >>= 1;
|
||||
(*it) = temp_prob << 16;
|
||||
}
|
||||
base_target_level_ = 4;
|
||||
target_level_ = base_target_level_ << 8;
|
||||
}
|
||||
|
||||
int DelayManager::Update(uint16_t sequence_number,
|
||||
uint32_t timestamp,
|
||||
int sample_rate_hz) {
|
||||
if (sample_rate_hz <= 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!first_packet_received_) {
|
||||
// Prepare for next packet arrival.
|
||||
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
|
||||
last_seq_no_ = sequence_number;
|
||||
last_timestamp_ = timestamp;
|
||||
first_packet_received_ = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Try calculating packet length from current and previous timestamps.
|
||||
int packet_len_ms;
|
||||
if (!IsNewerTimestamp(timestamp, last_timestamp_) ||
|
||||
!IsNewerSequenceNumber(sequence_number, last_seq_no_)) {
|
||||
// Wrong timestamp or sequence order; use stored value.
|
||||
packet_len_ms = packet_len_ms_;
|
||||
} else {
|
||||
// Calculate timestamps per packet and derive packet length in ms.
|
||||
int64_t packet_len_samp =
|
||||
static_cast<uint32_t>(timestamp - last_timestamp_) /
|
||||
static_cast<uint16_t>(sequence_number - last_seq_no_);
|
||||
packet_len_ms =
|
||||
rtc::saturated_cast<int>(1000 * packet_len_samp / sample_rate_hz);
|
||||
}
|
||||
|
||||
if (packet_len_ms > 0) {
|
||||
// Cannot update statistics unless |packet_len_ms| is valid.
|
||||
// Calculate inter-arrival time (IAT) in integer "packet times"
|
||||
// (rounding down). This is the value used as index to the histogram
|
||||
// vector |iat_vector_|.
|
||||
int iat_packets = packet_iat_stopwatch_->ElapsedMs() / packet_len_ms;
|
||||
|
||||
if (streaming_mode_) {
|
||||
UpdateCumulativeSums(packet_len_ms, sequence_number);
|
||||
}
|
||||
|
||||
// Check for discontinuous packet sequence and re-ordering.
|
||||
if (IsNewerSequenceNumber(sequence_number, last_seq_no_ + 1)) {
|
||||
// Compensate for gap in the sequence numbers. Reduce IAT with the
|
||||
// expected extra time due to lost packets, but ensure that the IAT is
|
||||
// not negative.
|
||||
iat_packets -= static_cast<uint16_t>(sequence_number - last_seq_no_ - 1);
|
||||
iat_packets = std::max(iat_packets, 0);
|
||||
} else if (!IsNewerSequenceNumber(sequence_number, last_seq_no_)) {
|
||||
iat_packets += static_cast<uint16_t>(last_seq_no_ + 1 - sequence_number);
|
||||
}
|
||||
|
||||
// Saturate IAT at maximum value.
|
||||
const int max_iat = kMaxIat;
|
||||
iat_packets = std::min(iat_packets, max_iat);
|
||||
UpdateHistogram(iat_packets);
|
||||
// Calculate new |target_level_| based on updated statistics.
|
||||
target_level_ = CalculateTargetLevel(iat_packets);
|
||||
if (streaming_mode_) {
|
||||
target_level_ = std::max(target_level_, max_iat_cumulative_sum_);
|
||||
}
|
||||
|
||||
LimitTargetLevel();
|
||||
} // End if (packet_len_ms > 0).
|
||||
|
||||
// Prepare for next packet arrival.
|
||||
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
|
||||
last_seq_no_ = sequence_number;
|
||||
last_timestamp_ = timestamp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void DelayManager::UpdateCumulativeSums(int packet_len_ms,
|
||||
uint16_t sequence_number) {
|
||||
// Calculate IAT in Q8, including fractions of a packet (i.e., more
|
||||
// accurate than |iat_packets|.
|
||||
int iat_packets_q8 =
|
||||
(packet_iat_stopwatch_->ElapsedMs() << 8) / packet_len_ms;
|
||||
// Calculate cumulative sum IAT with sequence number compensation. The sum
|
||||
// is zero if there is no clock-drift.
|
||||
iat_cumulative_sum_ += (iat_packets_q8 -
|
||||
(static_cast<int>(sequence_number - last_seq_no_) << 8));
|
||||
// Subtract drift term.
|
||||
iat_cumulative_sum_ -= kCumulativeSumDrift;
|
||||
// Ensure not negative.
|
||||
iat_cumulative_sum_ = std::max(iat_cumulative_sum_, 0);
|
||||
if (iat_cumulative_sum_ > max_iat_cumulative_sum_) {
|
||||
// Found a new maximum.
|
||||
max_iat_cumulative_sum_ = iat_cumulative_sum_;
|
||||
max_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
|
||||
}
|
||||
if (max_iat_stopwatch_->ElapsedMs() > kMaxStreamingPeakPeriodMs) {
|
||||
// Too long since the last maximum was observed; decrease max value.
|
||||
max_iat_cumulative_sum_ -= kCumulativeSumDrift;
|
||||
}
|
||||
}
|
||||
|
||||
// Each element in the vector is first multiplied by the forgetting factor
|
||||
// |iat_factor_|. Then the vector element indicated by |iat_packets| is then
|
||||
// increased (additive) by 1 - |iat_factor_|. This way, the probability of
|
||||
// |iat_packets| is slightly increased, while the sum of the histogram remains
|
||||
// constant (=1).
|
||||
// Due to inaccuracies in the fixed-point arithmetic, the histogram may no
|
||||
// longer sum up to 1 (in Q30) after the update. To correct this, a correction
|
||||
// term is added or subtracted from the first element (or elements) of the
|
||||
// vector.
|
||||
// The forgetting factor |iat_factor_| is also updated. When the DelayManager
|
||||
// is reset, the factor is set to 0 to facilitate rapid convergence in the
|
||||
// beginning. With each update of the histogram, the factor is increased towards
|
||||
// the steady-state value |kIatFactor_|.
|
||||
void DelayManager::UpdateHistogram(size_t iat_packets) {
|
||||
assert(iat_packets < iat_vector_.size());
|
||||
int vector_sum = 0; // Sum up the vector elements as they are processed.
|
||||
// Multiply each element in |iat_vector_| with |iat_factor_|.
|
||||
for (IATVector::iterator it = iat_vector_.begin();
|
||||
it != iat_vector_.end(); ++it) {
|
||||
*it = (static_cast<int64_t>(*it) * iat_factor_) >> 15;
|
||||
vector_sum += *it;
|
||||
}
|
||||
|
||||
// Increase the probability for the currently observed inter-arrival time
|
||||
// by 1 - |iat_factor_|. The factor is in Q15, |iat_vector_| in Q30.
|
||||
// Thus, left-shift 15 steps to obtain result in Q30.
|
||||
iat_vector_[iat_packets] += (32768 - iat_factor_) << 15;
|
||||
vector_sum += (32768 - iat_factor_) << 15; // Add to vector sum.
|
||||
|
||||
// |iat_vector_| should sum up to 1 (in Q30), but it may not due to
|
||||
// fixed-point rounding errors.
|
||||
vector_sum -= 1 << 30; // Should be zero. Compensate if not.
|
||||
if (vector_sum != 0) {
|
||||
// Modify a few values early in |iat_vector_|.
|
||||
int flip_sign = vector_sum > 0 ? -1 : 1;
|
||||
IATVector::iterator it = iat_vector_.begin();
|
||||
while (it != iat_vector_.end() && abs(vector_sum) > 0) {
|
||||
// Add/subtract 1/16 of the element, but not more than |vector_sum|.
|
||||
int correction = flip_sign * std::min(abs(vector_sum), (*it) >> 4);
|
||||
*it += correction;
|
||||
vector_sum += correction;
|
||||
++it;
|
||||
}
|
||||
}
|
||||
assert(vector_sum == 0); // Verify that the above is correct.
|
||||
|
||||
// Update |iat_factor_| (changes only during the first seconds after a reset).
|
||||
// The factor converges to |kIatFactor_|.
|
||||
iat_factor_ += (kIatFactor_ - iat_factor_ + 3) >> 2;
|
||||
}
|
||||
|
||||
// Enforces upper and lower limits for |target_level_|. The upper limit is
|
||||
// chosen to be minimum of i) 75% of |max_packets_in_buffer_|, to leave some
|
||||
// headroom for natural fluctuations around the target, and ii) equivalent of
|
||||
// |maximum_delay_ms_| in packets. Note that in practice, if no
|
||||
// |maximum_delay_ms_| is specified, this does not have any impact, since the
|
||||
// target level is far below the buffer capacity in all reasonable cases.
|
||||
// The lower limit is equivalent of |minimum_delay_ms_| in packets. We update
|
||||
// |least_required_level_| while the above limits are applied.
|
||||
// TODO(hlundin): Move this check to the buffer logistics class.
|
||||
void DelayManager::LimitTargetLevel() {
|
||||
least_required_delay_ms_ = (target_level_ * packet_len_ms_) >> 8;
|
||||
|
||||
if (packet_len_ms_ > 0 && minimum_delay_ms_ > 0) {
|
||||
int minimum_delay_packet_q8 = (minimum_delay_ms_ << 8) / packet_len_ms_;
|
||||
target_level_ = std::max(target_level_, minimum_delay_packet_q8);
|
||||
}
|
||||
|
||||
if (maximum_delay_ms_ > 0 && packet_len_ms_ > 0) {
|
||||
int maximum_delay_packet_q8 = (maximum_delay_ms_ << 8) / packet_len_ms_;
|
||||
target_level_ = std::min(target_level_, maximum_delay_packet_q8);
|
||||
}
|
||||
|
||||
// Shift to Q8, then 75%.;
|
||||
int max_buffer_packets_q8 =
|
||||
static_cast<int>((3 * (max_packets_in_buffer_ << 8)) / 4);
|
||||
target_level_ = std::min(target_level_, max_buffer_packets_q8);
|
||||
|
||||
// Sanity check, at least 1 packet (in Q8).
|
||||
target_level_ = std::max(target_level_, 1 << 8);
|
||||
}
|
||||
|
||||
int DelayManager::CalculateTargetLevel(int iat_packets) {
|
||||
int limit_probability = kLimitProbability;
|
||||
if (streaming_mode_) {
|
||||
limit_probability = kLimitProbabilityStreaming;
|
||||
}
|
||||
|
||||
// Calculate target buffer level from inter-arrival time histogram.
|
||||
// Find the |iat_index| for which the probability of observing an
|
||||
// inter-arrival time larger than or equal to |iat_index| is less than or
|
||||
// equal to |limit_probability|. The sought probability is estimated using
|
||||
// the histogram as the reverse cumulant PDF, i.e., the sum of elements from
|
||||
// the end up until |iat_index|. Now, since the sum of all elements is 1
|
||||
// (in Q30) by definition, and since the solution is often a low value for
|
||||
// |iat_index|, it is more efficient to start with |sum| = 1 and subtract
|
||||
// elements from the start of the histogram.
|
||||
size_t index = 0; // Start from the beginning of |iat_vector_|.
|
||||
int sum = 1 << 30; // Assign to 1 in Q30.
|
||||
sum -= iat_vector_[index]; // Ensure that target level is >= 1.
|
||||
|
||||
do {
|
||||
// Subtract the probabilities one by one until the sum is no longer greater
|
||||
// than limit_probability.
|
||||
++index;
|
||||
sum -= iat_vector_[index];
|
||||
} while ((sum > limit_probability) && (index < iat_vector_.size() - 1));
|
||||
|
||||
// This is the base value for the target buffer level.
|
||||
int target_level = static_cast<int>(index);
|
||||
base_target_level_ = static_cast<int>(index);
|
||||
|
||||
// Update detector for delay peaks.
|
||||
bool delay_peak_found = peak_detector_.Update(iat_packets, target_level);
|
||||
if (delay_peak_found) {
|
||||
target_level = std::max(target_level, peak_detector_.MaxPeakHeight());
|
||||
}
|
||||
|
||||
// Sanity check. |target_level| must be strictly positive.
|
||||
target_level = std::max(target_level, 1);
|
||||
// Scale to Q8 and assign to member variable.
|
||||
target_level_ = target_level << 8;
|
||||
return target_level_;
|
||||
}
|
||||
|
||||
int DelayManager::SetPacketAudioLength(int length_ms) {
|
||||
if (length_ms <= 0) {
|
||||
LOG_F(LS_ERROR) << "length_ms = " << length_ms;
|
||||
return -1;
|
||||
}
|
||||
packet_len_ms_ = length_ms;
|
||||
peak_detector_.SetPacketAudioLength(packet_len_ms_);
|
||||
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
|
||||
last_pack_cng_or_dtmf_ = 1; // TODO(hlundin): Legacy. Remove?
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void DelayManager::Reset() {
|
||||
packet_len_ms_ = 0; // Packet size unknown.
|
||||
streaming_mode_ = false;
|
||||
peak_detector_.Reset();
|
||||
ResetHistogram(); // Resets target levels too.
|
||||
iat_factor_ = 0; // Adapt the histogram faster for the first few packets.
|
||||
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
|
||||
max_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
|
||||
iat_cumulative_sum_ = 0;
|
||||
max_iat_cumulative_sum_ = 0;
|
||||
last_pack_cng_or_dtmf_ = 1;
|
||||
}
|
||||
|
||||
double DelayManager::EstimatedClockDriftPpm() const {
|
||||
double sum = 0.0;
|
||||
// Calculate the expected value based on the probabilities in |iat_vector_|.
|
||||
for (size_t i = 0; i < iat_vector_.size(); ++i) {
|
||||
sum += static_cast<double>(iat_vector_[i]) * i;
|
||||
}
|
||||
// The probabilities in |iat_vector_| are in Q30. Divide by 1 << 30 to convert
|
||||
// to Q0; subtract the nominal inter-arrival time (1) to make a zero
|
||||
// clockdrift represent as 0; mulitply by 1000000 to produce parts-per-million
|
||||
// (ppm).
|
||||
return (sum / (1 << 30) - 1) * 1e6;
|
||||
}
|
||||
|
||||
bool DelayManager::PeakFound() const {
|
||||
return peak_detector_.peak_found();
|
||||
}
|
||||
|
||||
void DelayManager::ResetPacketIatCount() {
|
||||
packet_iat_stopwatch_ = tick_timer_->GetNewStopwatch();
|
||||
}
|
||||
|
||||
// Note that |low_limit| and |higher_limit| are not assigned to
|
||||
// |minimum_delay_ms_| and |maximum_delay_ms_| defined by the client of this
|
||||
// class. They are computed from |target_level_| and used for decision making.
|
||||
void DelayManager::BufferLimits(int* lower_limit, int* higher_limit) const {
|
||||
if (!lower_limit || !higher_limit) {
|
||||
LOG_F(LS_ERROR) << "NULL pointers supplied as input";
|
||||
assert(false);
|
||||
return;
|
||||
}
|
||||
|
||||
int window_20ms = 0x7FFF; // Default large value for legacy bit-exactness.
|
||||
if (packet_len_ms_ > 0) {
|
||||
window_20ms = (20 << 8) / packet_len_ms_;
|
||||
}
|
||||
|
||||
// |target_level_| is in Q8 already.
|
||||
*lower_limit = (target_level_ * 3) / 4;
|
||||
// |higher_limit| is equal to |target_level_|, but should at
|
||||
// least be 20 ms higher than |lower_limit_|.
|
||||
*higher_limit = std::max(target_level_, *lower_limit + window_20ms);
|
||||
}
|
||||
|
||||
int DelayManager::TargetLevel() const {
|
||||
return target_level_;
|
||||
}
|
||||
|
||||
void DelayManager::LastDecodedWasCngOrDtmf(bool it_was) {
|
||||
if (it_was) {
|
||||
last_pack_cng_or_dtmf_ = 1;
|
||||
} else if (last_pack_cng_or_dtmf_ != 0) {
|
||||
last_pack_cng_or_dtmf_ = -1;
|
||||
}
|
||||
}
|
||||
|
||||
void DelayManager::RegisterEmptyPacket() {
|
||||
++last_seq_no_;
|
||||
}
|
||||
|
||||
bool DelayManager::SetMinimumDelay(int delay_ms) {
|
||||
// Minimum delay shouldn't be more than maximum delay, if any maximum is set.
|
||||
// Also, if possible check |delay| to less than 75% of
|
||||
// |max_packets_in_buffer_|.
|
||||
if ((maximum_delay_ms_ > 0 && delay_ms > maximum_delay_ms_) ||
|
||||
(packet_len_ms_ > 0 &&
|
||||
delay_ms >
|
||||
static_cast<int>(3 * max_packets_in_buffer_ * packet_len_ms_ / 4))) {
|
||||
return false;
|
||||
}
|
||||
minimum_delay_ms_ = delay_ms;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DelayManager::SetMaximumDelay(int delay_ms) {
|
||||
if (delay_ms == 0) {
|
||||
// Zero input unsets the maximum delay.
|
||||
maximum_delay_ms_ = 0;
|
||||
return true;
|
||||
} else if (delay_ms < minimum_delay_ms_ || delay_ms < packet_len_ms_) {
|
||||
// Maximum delay shouldn't be less than minimum delay or less than a packet.
|
||||
return false;
|
||||
}
|
||||
maximum_delay_ms_ = delay_ms;
|
||||
return true;
|
||||
}
|
||||
|
||||
int DelayManager::least_required_delay_ms() const {
|
||||
return least_required_delay_ms_;
|
||||
}
|
||||
|
||||
int DelayManager::base_target_level() const { return base_target_level_; }
|
||||
void DelayManager::set_streaming_mode(bool value) { streaming_mode_ = value; }
|
||||
int DelayManager::last_pack_cng_or_dtmf() const {
|
||||
return last_pack_cng_or_dtmf_;
|
||||
}
|
||||
|
||||
void DelayManager::set_last_pack_cng_or_dtmf(int value) {
|
||||
last_pack_cng_or_dtmf_ = value;
|
||||
}
|
||||
} // namespace webrtc
|
||||
174
modules/audio_coding/neteq/delay_manager.h
Normal file
174
modules/audio_coding/neteq/delay_manager.h
Normal file
@ -0,0 +1,174 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_
|
||||
|
||||
#include <string.h> // Provide access to size_t.
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Forward declaration.
|
||||
class DelayPeakDetector;
|
||||
|
||||
class DelayManager {
|
||||
public:
|
||||
typedef std::vector<int> IATVector;
|
||||
|
||||
// Create a DelayManager object. Notify the delay manager that the packet
|
||||
// buffer can hold no more than |max_packets_in_buffer| packets (i.e., this
|
||||
// is the number of packet slots in the buffer). Supply a PeakDetector
|
||||
// object to the DelayManager.
|
||||
DelayManager(size_t max_packets_in_buffer,
|
||||
DelayPeakDetector* peak_detector,
|
||||
const TickTimer* tick_timer);
|
||||
|
||||
virtual ~DelayManager();
|
||||
|
||||
// Read the inter-arrival time histogram. Mainly for testing purposes.
|
||||
virtual const IATVector& iat_vector() const;
|
||||
|
||||
// Updates the delay manager with a new incoming packet, with
|
||||
// |sequence_number| and |timestamp| from the RTP header. This updates the
|
||||
// inter-arrival time histogram and other statistics, as well as the
|
||||
// associated DelayPeakDetector. A new target buffer level is calculated.
|
||||
// Returns 0 on success, -1 on failure (invalid sample rate).
|
||||
virtual int Update(uint16_t sequence_number,
|
||||
uint32_t timestamp,
|
||||
int sample_rate_hz);
|
||||
|
||||
// Calculates a new target buffer level. Called from the Update() method.
|
||||
// Sets target_level_ (in Q8) and returns the same value. Also calculates
|
||||
// and updates base_target_level_, which is the target buffer level before
|
||||
// taking delay peaks into account.
|
||||
virtual int CalculateTargetLevel(int iat_packets);
|
||||
|
||||
// Notifies the DelayManager of how much audio data is carried in each packet.
|
||||
// The method updates the DelayPeakDetector too, and resets the inter-arrival
|
||||
// time counter. Returns 0 on success, -1 on failure.
|
||||
virtual int SetPacketAudioLength(int length_ms);
|
||||
|
||||
// Resets the DelayManager and the associated DelayPeakDetector.
|
||||
virtual void Reset();
|
||||
|
||||
// Calculates the average inter-arrival time deviation from the histogram.
|
||||
// The result is returned as parts-per-million deviation from the nominal
|
||||
// inter-arrival time. That is, if the average inter-arrival time is equal to
|
||||
// the nominal frame time, the return value is zero. A positive value
|
||||
// corresponds to packet spacing being too large, while a negative value means
|
||||
// that the packets arrive with less spacing than expected.
|
||||
virtual double EstimatedClockDriftPpm() const;
|
||||
|
||||
// Returns true if peak-mode is active. That is, delay peaks were observed
|
||||
// recently. This method simply asks for the same information from the
|
||||
// DelayPeakDetector object.
|
||||
virtual bool PeakFound() const;
|
||||
|
||||
// Reset the inter-arrival time counter to 0.
|
||||
virtual void ResetPacketIatCount();
|
||||
|
||||
// Writes the lower and higher limits which the buffer level should stay
|
||||
// within to the corresponding pointers. The values are in (fractions of)
|
||||
// packets in Q8.
|
||||
virtual void BufferLimits(int* lower_limit, int* higher_limit) const;
|
||||
|
||||
// Gets the target buffer level, in (fractions of) packets in Q8. This value
|
||||
// includes any extra delay set through the set_extra_delay_ms() method.
|
||||
virtual int TargetLevel() const;
|
||||
|
||||
// Informs the delay manager whether or not the last decoded packet contained
|
||||
// speech.
|
||||
virtual void LastDecodedWasCngOrDtmf(bool it_was);
|
||||
|
||||
// Notify the delay manager that empty packets have been received. These are
|
||||
// packets that are part of the sequence number series, so that an empty
|
||||
// packet will shift the sequence numbers for the following packets.
|
||||
virtual void RegisterEmptyPacket();
|
||||
|
||||
// Accessors and mutators.
|
||||
// Assuming |delay| is in valid range.
|
||||
virtual bool SetMinimumDelay(int delay_ms);
|
||||
virtual bool SetMaximumDelay(int delay_ms);
|
||||
virtual int least_required_delay_ms() const;
|
||||
virtual int base_target_level() const;
|
||||
virtual void set_streaming_mode(bool value);
|
||||
virtual int last_pack_cng_or_dtmf() const;
|
||||
virtual void set_last_pack_cng_or_dtmf(int value);
|
||||
|
||||
private:
|
||||
static const int kLimitProbability = 53687091; // 1/20 in Q30.
|
||||
static const int kLimitProbabilityStreaming = 536871; // 1/2000 in Q30.
|
||||
static const int kMaxStreamingPeakPeriodMs = 600000; // 10 minutes in ms.
|
||||
static const int kCumulativeSumDrift = 2; // Drift term for cumulative sum
|
||||
// |iat_cumulative_sum_|.
|
||||
// Steady-state forgetting factor for |iat_vector_|, 0.9993 in Q15.
|
||||
static const int kIatFactor_ = 32745;
|
||||
static const int kMaxIat = 64; // Max inter-arrival time to register.
|
||||
|
||||
// Sets |iat_vector_| to the default start distribution and sets the
|
||||
// |base_target_level_| and |target_level_| to the corresponding values.
|
||||
void ResetHistogram();
|
||||
|
||||
// Updates |iat_cumulative_sum_| and |max_iat_cumulative_sum_|. (These are
|
||||
// used by the streaming mode.) This method is called by Update().
|
||||
void UpdateCumulativeSums(int packet_len_ms, uint16_t sequence_number);
|
||||
|
||||
// Updates the histogram |iat_vector_|. The probability for inter-arrival time
|
||||
// equal to |iat_packets| (in integer packets) is increased slightly, while
|
||||
// all other entries are decreased. This method is called by Update().
|
||||
void UpdateHistogram(size_t iat_packets);
|
||||
|
||||
// Makes sure that |target_level_| is not too large, taking
|
||||
// |max_packets_in_buffer_| and |extra_delay_ms_| into account. This method is
|
||||
// called by Update().
|
||||
void LimitTargetLevel();
|
||||
|
||||
bool first_packet_received_;
|
||||
const size_t max_packets_in_buffer_; // Capacity of the packet buffer.
|
||||
IATVector iat_vector_; // Histogram of inter-arrival times.
|
||||
int iat_factor_; // Forgetting factor for updating the IAT histogram (Q15).
|
||||
const TickTimer* tick_timer_;
|
||||
// Time elapsed since last packet.
|
||||
std::unique_ptr<TickTimer::Stopwatch> packet_iat_stopwatch_;
|
||||
int base_target_level_; // Currently preferred buffer level before peak
|
||||
// detection and streaming mode (Q0).
|
||||
// TODO(turajs) change the comment according to the implementation of
|
||||
// minimum-delay.
|
||||
int target_level_; // Currently preferred buffer level in (fractions)
|
||||
// of packets (Q8), before adding any extra delay.
|
||||
int packet_len_ms_; // Length of audio in each incoming packet [ms].
|
||||
bool streaming_mode_;
|
||||
uint16_t last_seq_no_; // Sequence number for last received packet.
|
||||
uint32_t last_timestamp_; // Timestamp for the last received packet.
|
||||
int minimum_delay_ms_; // Externally set minimum delay.
|
||||
int least_required_delay_ms_; // Smallest preferred buffer level (same unit
|
||||
// as |target_level_|), before applying
|
||||
// |minimum_delay_ms_| and/or |maximum_delay_ms_|.
|
||||
int maximum_delay_ms_; // Externally set maximum allowed delay.
|
||||
int iat_cumulative_sum_; // Cumulative sum of delta inter-arrival times.
|
||||
int max_iat_cumulative_sum_; // Max of |iat_cumulative_sum_|.
|
||||
// Time elapsed since maximum was observed.
|
||||
std::unique_ptr<TickTimer::Stopwatch> max_iat_stopwatch_;
|
||||
DelayPeakDetector& peak_detector_;
|
||||
int last_pack_cng_or_dtmf_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(DelayManager);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DELAY_MANAGER_H_
|
||||
338
modules/audio_coding/neteq/delay_manager_unittest.cc
Normal file
338
modules/audio_coding/neteq/delay_manager_unittest.cc
Normal file
@ -0,0 +1,338 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for DelayManager class.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/delay_manager.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/mock/mock_delay_peak_detector.h"
|
||||
#include "webrtc/test/gmock.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
using ::testing::Return;
|
||||
using ::testing::_;
|
||||
|
||||
class DelayManagerTest : public ::testing::Test {
|
||||
protected:
|
||||
static const int kMaxNumberOfPackets = 240;
|
||||
static const int kTimeStepMs = 10;
|
||||
static const int kFs = 8000;
|
||||
static const int kFrameSizeMs = 20;
|
||||
static const int kTsIncrement = kFrameSizeMs * kFs / 1000;
|
||||
|
||||
DelayManagerTest();
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
void SetPacketAudioLength(int lengt_ms);
|
||||
void InsertNextPacket();
|
||||
void IncreaseTime(int inc_ms);
|
||||
|
||||
DelayManager* dm_;
|
||||
TickTimer tick_timer_;
|
||||
MockDelayPeakDetector detector_;
|
||||
uint16_t seq_no_;
|
||||
uint32_t ts_;
|
||||
};
|
||||
|
||||
DelayManagerTest::DelayManagerTest()
|
||||
: dm_(NULL), detector_(&tick_timer_), seq_no_(0x1234), ts_(0x12345678) {}
|
||||
|
||||
void DelayManagerTest::SetUp() {
|
||||
EXPECT_CALL(detector_, Reset())
|
||||
.Times(1);
|
||||
dm_ = new DelayManager(kMaxNumberOfPackets, &detector_, &tick_timer_);
|
||||
}
|
||||
|
||||
void DelayManagerTest::SetPacketAudioLength(int lengt_ms) {
|
||||
EXPECT_CALL(detector_, SetPacketAudioLength(lengt_ms));
|
||||
dm_->SetPacketAudioLength(lengt_ms);
|
||||
}
|
||||
|
||||
void DelayManagerTest::InsertNextPacket() {
|
||||
EXPECT_EQ(0, dm_->Update(seq_no_, ts_, kFs));
|
||||
seq_no_ += 1;
|
||||
ts_ += kTsIncrement;
|
||||
}
|
||||
|
||||
void DelayManagerTest::IncreaseTime(int inc_ms) {
|
||||
for (int t = 0; t < inc_ms; t += kTimeStepMs) {
|
||||
tick_timer_.Increment();
|
||||
}
|
||||
}
|
||||
void DelayManagerTest::TearDown() {
|
||||
EXPECT_CALL(detector_, Die());
|
||||
delete dm_;
|
||||
}
|
||||
|
||||
TEST_F(DelayManagerTest, CreateAndDestroy) {
|
||||
// Nothing to do here. The test fixture creates and destroys the DelayManager
|
||||
// object.
|
||||
}
|
||||
|
||||
TEST_F(DelayManagerTest, VectorInitialization) {
|
||||
const DelayManager::IATVector& vec = dm_->iat_vector();
|
||||
double sum = 0.0;
|
||||
for (size_t i = 0; i < vec.size(); i++) {
|
||||
EXPECT_NEAR(ldexp(pow(0.5, static_cast<int>(i + 1)), 30), vec[i], 65537);
|
||||
// Tolerance 65537 in Q30 corresponds to a delta of approximately 0.00006.
|
||||
sum += vec[i];
|
||||
}
|
||||
EXPECT_EQ(1 << 30, static_cast<int>(sum)); // Should be 1 in Q30.
|
||||
}
|
||||
|
||||
TEST_F(DelayManagerTest, SetPacketAudioLength) {
|
||||
const int kLengthMs = 30;
|
||||
// Expect DelayManager to pass on the new length to the detector object.
|
||||
EXPECT_CALL(detector_, SetPacketAudioLength(kLengthMs))
|
||||
.Times(1);
|
||||
EXPECT_EQ(0, dm_->SetPacketAudioLength(kLengthMs));
|
||||
EXPECT_EQ(-1, dm_->SetPacketAudioLength(-1)); // Illegal parameter value.
|
||||
}
|
||||
|
||||
TEST_F(DelayManagerTest, PeakFound) {
|
||||
// Expect DelayManager to pass on the question to the detector.
|
||||
// Call twice, and let the detector return true the first time and false the
|
||||
// second time.
|
||||
EXPECT_CALL(detector_, peak_found())
|
||||
.WillOnce(Return(true))
|
||||
.WillOnce(Return(false));
|
||||
EXPECT_TRUE(dm_->PeakFound());
|
||||
EXPECT_FALSE(dm_->PeakFound());
|
||||
}
|
||||
|
||||
TEST_F(DelayManagerTest, UpdateNormal) {
|
||||
SetPacketAudioLength(kFrameSizeMs);
|
||||
// First packet arrival.
|
||||
InsertNextPacket();
|
||||
// Advance time by one frame size.
|
||||
IncreaseTime(kFrameSizeMs);
|
||||
// Second packet arrival.
|
||||
// Expect detector update method to be called once with inter-arrival time
|
||||
// equal to 1 packet, and (base) target level equal to 1 as well.
|
||||
// Return false to indicate no peaks found.
|
||||
EXPECT_CALL(detector_, Update(1, 1))
|
||||
.WillOnce(Return(false));
|
||||
InsertNextPacket();
|
||||
EXPECT_EQ(1 << 8, dm_->TargetLevel()); // In Q8.
|
||||
EXPECT_EQ(1, dm_->base_target_level());
|
||||
int lower, higher;
|
||||
dm_->BufferLimits(&lower, &higher);
|
||||
// Expect |lower| to be 75% of target level, and |higher| to be target level,
|
||||
// but also at least 20 ms higher than |lower|, which is the limiting case
|
||||
// here.
|
||||
EXPECT_EQ((1 << 8) * 3 / 4, lower);
|
||||
EXPECT_EQ(lower + (20 << 8) / kFrameSizeMs, higher);
|
||||
}
|
||||
|
||||
TEST_F(DelayManagerTest, UpdateLongInterArrivalTime) {
|
||||
SetPacketAudioLength(kFrameSizeMs);
|
||||
// First packet arrival.
|
||||
InsertNextPacket();
|
||||
// Advance time by two frame size.
|
||||
IncreaseTime(2 * kFrameSizeMs);
|
||||
// Second packet arrival.
|
||||
// Expect detector update method to be called once with inter-arrival time
|
||||
// equal to 1 packet, and (base) target level equal to 1 as well.
|
||||
// Return false to indicate no peaks found.
|
||||
EXPECT_CALL(detector_, Update(2, 2))
|
||||
.WillOnce(Return(false));
|
||||
InsertNextPacket();
|
||||
EXPECT_EQ(2 << 8, dm_->TargetLevel()); // In Q8.
|
||||
EXPECT_EQ(2, dm_->base_target_level());
|
||||
int lower, higher;
|
||||
dm_->BufferLimits(&lower, &higher);
|
||||
// Expect |lower| to be 75% of target level, and |higher| to be target level,
|
||||
// but also at least 20 ms higher than |lower|, which is the limiting case
|
||||
// here.
|
||||
EXPECT_EQ((2 << 8) * 3 / 4, lower);
|
||||
EXPECT_EQ(lower + (20 << 8) / kFrameSizeMs, higher);
|
||||
}
|
||||
|
||||
TEST_F(DelayManagerTest, UpdatePeakFound) {
|
||||
SetPacketAudioLength(kFrameSizeMs);
|
||||
// First packet arrival.
|
||||
InsertNextPacket();
|
||||
// Advance time by one frame size.
|
||||
IncreaseTime(kFrameSizeMs);
|
||||
// Second packet arrival.
|
||||
// Expect detector update method to be called once with inter-arrival time
|
||||
// equal to 1 packet, and (base) target level equal to 1 as well.
|
||||
// Return true to indicate that peaks are found. Let the peak height be 5.
|
||||
EXPECT_CALL(detector_, Update(1, 1))
|
||||
.WillOnce(Return(true));
|
||||
EXPECT_CALL(detector_, MaxPeakHeight())
|
||||
.WillOnce(Return(5));
|
||||
InsertNextPacket();
|
||||
EXPECT_EQ(5 << 8, dm_->TargetLevel());
|
||||
EXPECT_EQ(1, dm_->base_target_level()); // Base target level is w/o peaks.
|
||||
int lower, higher;
|
||||
dm_->BufferLimits(&lower, &higher);
|
||||
// Expect |lower| to be 75% of target level, and |higher| to be target level.
|
||||
EXPECT_EQ((5 << 8) * 3 / 4, lower);
|
||||
EXPECT_EQ(5 << 8, higher);
|
||||
}
|
||||
|
||||
TEST_F(DelayManagerTest, TargetDelay) {
|
||||
SetPacketAudioLength(kFrameSizeMs);
|
||||
// First packet arrival.
|
||||
InsertNextPacket();
|
||||
// Advance time by one frame size.
|
||||
IncreaseTime(kFrameSizeMs);
|
||||
// Second packet arrival.
|
||||
// Expect detector update method to be called once with inter-arrival time
|
||||
// equal to 1 packet, and (base) target level equal to 1 as well.
|
||||
// Return false to indicate no peaks found.
|
||||
EXPECT_CALL(detector_, Update(1, 1))
|
||||
.WillOnce(Return(false));
|
||||
InsertNextPacket();
|
||||
const int kExpectedTarget = 1;
|
||||
EXPECT_EQ(kExpectedTarget << 8, dm_->TargetLevel()); // In Q8.
|
||||
EXPECT_EQ(1, dm_->base_target_level());
|
||||
int lower, higher;
|
||||
dm_->BufferLimits(&lower, &higher);
|
||||
// Expect |lower| to be 75% of base target level, and |higher| to be
|
||||
// lower + 20 ms headroom.
|
||||
EXPECT_EQ((1 << 8) * 3 / 4, lower);
|
||||
EXPECT_EQ(lower + (20 << 8) / kFrameSizeMs, higher);
|
||||
}
|
||||
|
||||
TEST_F(DelayManagerTest, MaxAndRequiredDelay) {
|
||||
const int kExpectedTarget = 5;
|
||||
const int kTimeIncrement = kExpectedTarget * kFrameSizeMs;
|
||||
SetPacketAudioLength(kFrameSizeMs);
|
||||
// First packet arrival.
|
||||
InsertNextPacket();
|
||||
// Second packet arrival.
|
||||
// Expect detector update method to be called once with inter-arrival time
|
||||
// equal to |kExpectedTarget| packet. Return true to indicate peaks found.
|
||||
EXPECT_CALL(detector_, Update(kExpectedTarget, _))
|
||||
.WillRepeatedly(Return(true));
|
||||
EXPECT_CALL(detector_, MaxPeakHeight())
|
||||
.WillRepeatedly(Return(kExpectedTarget));
|
||||
IncreaseTime(kTimeIncrement);
|
||||
InsertNextPacket();
|
||||
|
||||
// No limit is set.
|
||||
EXPECT_EQ(kExpectedTarget << 8, dm_->TargetLevel());
|
||||
|
||||
int kMaxDelayPackets = kExpectedTarget - 2;
|
||||
int kMaxDelayMs = kMaxDelayPackets * kFrameSizeMs;
|
||||
EXPECT_TRUE(dm_->SetMaximumDelay(kMaxDelayMs));
|
||||
IncreaseTime(kTimeIncrement);
|
||||
InsertNextPacket();
|
||||
EXPECT_EQ(kExpectedTarget * kFrameSizeMs, dm_->least_required_delay_ms());
|
||||
EXPECT_EQ(kMaxDelayPackets << 8, dm_->TargetLevel());
|
||||
|
||||
// Target level at least should be one packet.
|
||||
EXPECT_FALSE(dm_->SetMaximumDelay(kFrameSizeMs - 1));
|
||||
}
|
||||
|
||||
TEST_F(DelayManagerTest, MinAndRequiredDelay) {
|
||||
const int kExpectedTarget = 5;
|
||||
const int kTimeIncrement = kExpectedTarget * kFrameSizeMs;
|
||||
SetPacketAudioLength(kFrameSizeMs);
|
||||
// First packet arrival.
|
||||
InsertNextPacket();
|
||||
// Second packet arrival.
|
||||
// Expect detector update method to be called once with inter-arrival time
|
||||
// equal to |kExpectedTarget| packet. Return true to indicate peaks found.
|
||||
EXPECT_CALL(detector_, Update(kExpectedTarget, _))
|
||||
.WillRepeatedly(Return(true));
|
||||
EXPECT_CALL(detector_, MaxPeakHeight())
|
||||
.WillRepeatedly(Return(kExpectedTarget));
|
||||
IncreaseTime(kTimeIncrement);
|
||||
InsertNextPacket();
|
||||
|
||||
// No limit is applied.
|
||||
EXPECT_EQ(kExpectedTarget << 8, dm_->TargetLevel());
|
||||
|
||||
int kMinDelayPackets = kExpectedTarget + 2;
|
||||
int kMinDelayMs = kMinDelayPackets * kFrameSizeMs;
|
||||
dm_->SetMinimumDelay(kMinDelayMs);
|
||||
IncreaseTime(kTimeIncrement);
|
||||
InsertNextPacket();
|
||||
EXPECT_EQ(kExpectedTarget * kFrameSizeMs, dm_->least_required_delay_ms());
|
||||
EXPECT_EQ(kMinDelayPackets << 8, dm_->TargetLevel());
|
||||
}
|
||||
|
||||
// Tests that skipped sequence numbers (simulating empty packets) are handled
|
||||
// correctly.
|
||||
TEST_F(DelayManagerTest, EmptyPacketsReported) {
|
||||
SetPacketAudioLength(kFrameSizeMs);
|
||||
// First packet arrival.
|
||||
InsertNextPacket();
|
||||
|
||||
// Advance time by one frame size.
|
||||
IncreaseTime(kFrameSizeMs);
|
||||
|
||||
// Advance the sequence number by 5, simulating that 5 empty packets were
|
||||
// received, but never inserted.
|
||||
seq_no_ += 10;
|
||||
for (int j = 0; j < 10; ++j) {
|
||||
dm_->RegisterEmptyPacket();
|
||||
}
|
||||
|
||||
// Second packet arrival.
|
||||
// Expect detector update method to be called once with inter-arrival time
|
||||
// equal to 1 packet, and (base) target level equal to 1 as well.
|
||||
// Return false to indicate no peaks found.
|
||||
EXPECT_CALL(detector_, Update(1, 1)).WillOnce(Return(false));
|
||||
InsertNextPacket();
|
||||
|
||||
EXPECT_EQ(1 << 8, dm_->TargetLevel()); // In Q8.
|
||||
}
|
||||
|
||||
// Same as above, but do not call RegisterEmptyPacket. Observe the target level
|
||||
// increase dramatically.
|
||||
TEST_F(DelayManagerTest, EmptyPacketsNotReported) {
|
||||
SetPacketAudioLength(kFrameSizeMs);
|
||||
// First packet arrival.
|
||||
InsertNextPacket();
|
||||
|
||||
// Advance time by one frame size.
|
||||
IncreaseTime(kFrameSizeMs);
|
||||
|
||||
// Advance the sequence number by 5, simulating that 5 empty packets were
|
||||
// received, but never inserted.
|
||||
seq_no_ += 10;
|
||||
|
||||
// Second packet arrival.
|
||||
// Expect detector update method to be called once with inter-arrival time
|
||||
// equal to 1 packet, and (base) target level equal to 1 as well.
|
||||
// Return false to indicate no peaks found.
|
||||
EXPECT_CALL(detector_, Update(10, 10)).WillOnce(Return(false));
|
||||
InsertNextPacket();
|
||||
|
||||
// Note 10 times higher target value.
|
||||
EXPECT_EQ(10 * 1 << 8, dm_->TargetLevel()); // In Q8.
|
||||
}
|
||||
|
||||
TEST_F(DelayManagerTest, Failures) {
|
||||
// Wrong sample rate.
|
||||
EXPECT_EQ(-1, dm_->Update(0, 0, -1));
|
||||
// Wrong packet size.
|
||||
EXPECT_EQ(-1, dm_->SetPacketAudioLength(0));
|
||||
EXPECT_EQ(-1, dm_->SetPacketAudioLength(-1));
|
||||
|
||||
// Minimum delay higher than a maximum delay is not accepted.
|
||||
EXPECT_TRUE(dm_->SetMaximumDelay(10));
|
||||
EXPECT_FALSE(dm_->SetMinimumDelay(20));
|
||||
|
||||
// Maximum delay less than minimum delay is not accepted.
|
||||
EXPECT_TRUE(dm_->SetMaximumDelay(100));
|
||||
EXPECT_TRUE(dm_->SetMinimumDelay(80));
|
||||
EXPECT_FALSE(dm_->SetMaximumDelay(60));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
118
modules/audio_coding/neteq/delay_peak_detector.cc
Normal file
118
modules/audio_coding/neteq/delay_peak_detector.cc
Normal file
@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h"
|
||||
|
||||
#include <algorithm> // max
|
||||
|
||||
#include "webrtc/rtc_base/checks.h"
|
||||
#include "webrtc/rtc_base/safe_conversions.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// The DelayPeakDetector keeps track of severe inter-arrival times, called
|
||||
// delay peaks. When a peak is observed, the "height" (the time elapsed since
|
||||
// the previous packet arrival) and the peak "period" (the time since the last
|
||||
// observed peak) is recorded in a vector. When enough peaks have been observed,
|
||||
// peak-mode is engaged and the DelayManager asks the DelayPeakDetector for
|
||||
// the worst peak height.
|
||||
|
||||
DelayPeakDetector::~DelayPeakDetector() = default;
|
||||
|
||||
DelayPeakDetector::DelayPeakDetector(const TickTimer* tick_timer)
|
||||
: peak_found_(false),
|
||||
peak_detection_threshold_(0),
|
||||
tick_timer_(tick_timer) {
|
||||
RTC_DCHECK(!peak_period_stopwatch_);
|
||||
}
|
||||
|
||||
void DelayPeakDetector::Reset() {
|
||||
peak_period_stopwatch_.reset();
|
||||
peak_found_ = false;
|
||||
peak_history_.clear();
|
||||
}
|
||||
|
||||
// Calculates the threshold in number of packets.
|
||||
void DelayPeakDetector::SetPacketAudioLength(int length_ms) {
|
||||
if (length_ms > 0) {
|
||||
peak_detection_threshold_ = kPeakHeightMs / length_ms;
|
||||
}
|
||||
}
|
||||
|
||||
bool DelayPeakDetector::peak_found() {
|
||||
return peak_found_;
|
||||
}
|
||||
|
||||
int DelayPeakDetector::MaxPeakHeight() const {
|
||||
int max_height = -1; // Returns -1 for an empty history.
|
||||
std::list<Peak>::const_iterator it;
|
||||
for (it = peak_history_.begin(); it != peak_history_.end(); ++it) {
|
||||
max_height = std::max(max_height, it->peak_height_packets);
|
||||
}
|
||||
return max_height;
|
||||
}
|
||||
|
||||
uint64_t DelayPeakDetector::MaxPeakPeriod() const {
|
||||
auto max_period_element = std::max_element(
|
||||
peak_history_.begin(), peak_history_.end(),
|
||||
[](Peak a, Peak b) { return a.period_ms < b.period_ms; });
|
||||
if (max_period_element == peak_history_.end()) {
|
||||
return 0; // |peak_history_| is empty.
|
||||
}
|
||||
RTC_DCHECK_GT(max_period_element->period_ms, 0);
|
||||
return max_period_element->period_ms;
|
||||
}
|
||||
|
||||
bool DelayPeakDetector::Update(int inter_arrival_time, int target_level) {
|
||||
if (inter_arrival_time > target_level + peak_detection_threshold_ ||
|
||||
inter_arrival_time > 2 * target_level) {
|
||||
// A delay peak is observed.
|
||||
if (!peak_period_stopwatch_) {
|
||||
// This is the first peak. Reset the period counter.
|
||||
peak_period_stopwatch_ = tick_timer_->GetNewStopwatch();
|
||||
} else if (peak_period_stopwatch_->ElapsedMs() > 0) {
|
||||
if (peak_period_stopwatch_->ElapsedMs() <= kMaxPeakPeriodMs) {
|
||||
// This is not the first peak, and the period is valid.
|
||||
// Store peak data in the vector.
|
||||
Peak peak_data;
|
||||
peak_data.period_ms = peak_period_stopwatch_->ElapsedMs();
|
||||
peak_data.peak_height_packets = inter_arrival_time;
|
||||
peak_history_.push_back(peak_data);
|
||||
while (peak_history_.size() > kMaxNumPeaks) {
|
||||
// Delete the oldest data point.
|
||||
peak_history_.pop_front();
|
||||
}
|
||||
peak_period_stopwatch_ = tick_timer_->GetNewStopwatch();
|
||||
} else if (peak_period_stopwatch_->ElapsedMs() <= 2 * kMaxPeakPeriodMs) {
|
||||
// Invalid peak due to too long period. Reset period counter and start
|
||||
// looking for next peak.
|
||||
peak_period_stopwatch_ = tick_timer_->GetNewStopwatch();
|
||||
} else {
|
||||
// More than 2 times the maximum period has elapsed since the last peak
|
||||
// was registered. It seams that the network conditions have changed.
|
||||
// Reset the peak statistics.
|
||||
Reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
return CheckPeakConditions();
|
||||
}
|
||||
|
||||
bool DelayPeakDetector::CheckPeakConditions() {
|
||||
size_t s = peak_history_.size();
|
||||
if (s >= kMinPeaksToTrigger &&
|
||||
peak_period_stopwatch_->ElapsedMs() <= 2 * MaxPeakPeriod()) {
|
||||
peak_found_ = true;
|
||||
} else {
|
||||
peak_found_ = false;
|
||||
}
|
||||
return peak_found_;
|
||||
}
|
||||
} // namespace webrtc
|
||||
74
modules/audio_coding/neteq/delay_peak_detector.h
Normal file
74
modules/audio_coding/neteq/delay_peak_detector.h
Normal file
@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DELAY_PEAK_DETECTOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DELAY_PEAK_DETECTOR_H_
|
||||
|
||||
#include <string.h> // size_t
|
||||
|
||||
#include <list>
|
||||
#include <memory>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class DelayPeakDetector {
|
||||
public:
|
||||
DelayPeakDetector(const TickTimer* tick_timer);
|
||||
virtual ~DelayPeakDetector();
|
||||
virtual void Reset();
|
||||
|
||||
// Notifies the DelayPeakDetector of how much audio data is carried in each
|
||||
// packet.
|
||||
virtual void SetPacketAudioLength(int length_ms);
|
||||
|
||||
// Returns true if peak-mode is active. That is, delay peaks were observed
|
||||
// recently.
|
||||
virtual bool peak_found();
|
||||
|
||||
// Calculates and returns the maximum delay peak height. Returns -1 if no
|
||||
// delay peaks have been observed recently. The unit is number of packets.
|
||||
virtual int MaxPeakHeight() const;
|
||||
|
||||
// Calculates and returns the maximum delay peak distance in ms (strictly
|
||||
// larger than 0), or 0 if no delay peaks have been observed recently.
|
||||
virtual uint64_t MaxPeakPeriod() const;
|
||||
|
||||
// Updates the DelayPeakDetector with a new inter-arrival time (in packets)
|
||||
// and the current target buffer level (needed to decide if a peak is observed
|
||||
// or not). Returns true if peak-mode is active, false if not.
|
||||
virtual bool Update(int inter_arrival_time, int target_level);
|
||||
|
||||
private:
|
||||
static const size_t kMaxNumPeaks = 8;
|
||||
static const size_t kMinPeaksToTrigger = 2;
|
||||
static const int kPeakHeightMs = 78;
|
||||
static const int kMaxPeakPeriodMs = 10000;
|
||||
|
||||
typedef struct {
|
||||
uint64_t period_ms;
|
||||
int peak_height_packets;
|
||||
} Peak;
|
||||
|
||||
bool CheckPeakConditions();
|
||||
|
||||
std::list<Peak> peak_history_;
|
||||
bool peak_found_;
|
||||
int peak_detection_threshold_;
|
||||
const TickTimer* tick_timer_;
|
||||
std::unique_ptr<TickTimer::Stopwatch> peak_period_stopwatch_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(DelayPeakDetector);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DELAY_PEAK_DETECTOR_H_
|
||||
143
modules/audio_coding/neteq/delay_peak_detector_unittest.cc
Normal file
143
modules/audio_coding/neteq/delay_peak_detector_unittest.cc
Normal file
@ -0,0 +1,143 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for DelayPeakDetector class.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h"
|
||||
|
||||
#include "webrtc/test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(DelayPeakDetector, CreateAndDestroy) {
|
||||
TickTimer tick_timer;
|
||||
DelayPeakDetector* detector = new DelayPeakDetector(&tick_timer);
|
||||
EXPECT_FALSE(detector->peak_found());
|
||||
delete detector;
|
||||
}
|
||||
|
||||
TEST(DelayPeakDetector, EmptyHistory) {
|
||||
TickTimer tick_timer;
|
||||
DelayPeakDetector detector(&tick_timer);
|
||||
EXPECT_EQ(-1, detector.MaxPeakHeight());
|
||||
EXPECT_EQ(0u, detector.MaxPeakPeriod());
|
||||
}
|
||||
|
||||
// Inject a series of packet arrivals into the detector. Three of the packets
|
||||
// have suffered delays. After the third delay peak, peak-mode is expected to
|
||||
// start. This should then continue until it is disengaged due to lack of peaks.
|
||||
TEST(DelayPeakDetector, TriggerPeakMode) {
|
||||
TickTimer tick_timer;
|
||||
DelayPeakDetector detector(&tick_timer);
|
||||
const int kPacketSizeMs = 30;
|
||||
detector.SetPacketAudioLength(kPacketSizeMs);
|
||||
|
||||
// Load up normal arrival times; 0 ms, 30 ms, 60 ms, 90 ms, ...
|
||||
const int kNumPackets = 1000;
|
||||
int arrival_times_ms[kNumPackets];
|
||||
for (int i = 0; i < kNumPackets; ++i) {
|
||||
arrival_times_ms[i] = i * kPacketSizeMs;
|
||||
}
|
||||
|
||||
// Delay three packets.
|
||||
const int kPeakDelayMs = 100;
|
||||
// First delay peak.
|
||||
arrival_times_ms[100] += kPeakDelayMs;
|
||||
// Second delay peak.
|
||||
arrival_times_ms[200] += kPeakDelayMs;
|
||||
// Third delay peak. Trigger peak-mode after this packet.
|
||||
arrival_times_ms[400] += kPeakDelayMs;
|
||||
// The second peak period is the longest, 200 packets.
|
||||
const uint64_t kWorstPeakPeriod = 200 * kPacketSizeMs;
|
||||
int peak_mode_start_ms = arrival_times_ms[400];
|
||||
// Expect to disengage after no peaks are observed for two period times.
|
||||
int peak_mode_end_ms = peak_mode_start_ms + 2 * kWorstPeakPeriod;
|
||||
|
||||
// Load into detector.
|
||||
int time = 0;
|
||||
int next = 1; // Start with the second packet to get a proper IAT.
|
||||
while (next < kNumPackets) {
|
||||
while (next < kNumPackets && arrival_times_ms[next] <= time) {
|
||||
int iat_packets = (arrival_times_ms[next] - arrival_times_ms[next - 1]) /
|
||||
kPacketSizeMs;
|
||||
const int kTargetBufferLevel = 1; // Define peaks to be iat > 2.
|
||||
if (time < peak_mode_start_ms || time > peak_mode_end_ms) {
|
||||
EXPECT_FALSE(detector.Update(iat_packets, kTargetBufferLevel));
|
||||
} else {
|
||||
EXPECT_TRUE(detector.Update(iat_packets, kTargetBufferLevel));
|
||||
EXPECT_EQ(kWorstPeakPeriod, detector.MaxPeakPeriod());
|
||||
EXPECT_EQ(kPeakDelayMs / kPacketSizeMs + 1, detector.MaxPeakHeight());
|
||||
}
|
||||
++next;
|
||||
}
|
||||
tick_timer.Increment();
|
||||
time += 10; // Increase time 10 ms.
|
||||
}
|
||||
}
|
||||
|
||||
// Same test as TriggerPeakMode, but with base target buffer level increased to
|
||||
// 2, in order to raise the bar for delay peaks to inter-arrival times > 4.
|
||||
// The delay pattern has peaks with delay = 3, thus should not trigger.
|
||||
TEST(DelayPeakDetector, DoNotTriggerPeakMode) {
|
||||
TickTimer tick_timer;
|
||||
DelayPeakDetector detector(&tick_timer);
|
||||
const int kPacketSizeMs = 30;
|
||||
detector.SetPacketAudioLength(kPacketSizeMs);
|
||||
|
||||
// Load up normal arrival times; 0 ms, 30 ms, 60 ms, 90 ms, ...
|
||||
const int kNumPackets = 1000;
|
||||
int arrival_times_ms[kNumPackets];
|
||||
for (int i = 0; i < kNumPackets; ++i) {
|
||||
arrival_times_ms[i] = i * kPacketSizeMs;
|
||||
}
|
||||
|
||||
// Delay three packets.
|
||||
const int kPeakDelayMs = 100;
|
||||
// First delay peak.
|
||||
arrival_times_ms[100] += kPeakDelayMs;
|
||||
// Second delay peak.
|
||||
arrival_times_ms[200] += kPeakDelayMs;
|
||||
// Third delay peak.
|
||||
arrival_times_ms[400] += kPeakDelayMs;
|
||||
|
||||
// Load into detector.
|
||||
int time = 0;
|
||||
int next = 1; // Start with the second packet to get a proper IAT.
|
||||
while (next < kNumPackets) {
|
||||
while (next < kNumPackets && arrival_times_ms[next] <= time) {
|
||||
int iat_packets = (arrival_times_ms[next] - arrival_times_ms[next - 1]) /
|
||||
kPacketSizeMs;
|
||||
const int kTargetBufferLevel = 2; // Define peaks to be iat > 4.
|
||||
EXPECT_FALSE(detector.Update(iat_packets, kTargetBufferLevel));
|
||||
++next;
|
||||
}
|
||||
tick_timer.Increment();
|
||||
time += 10; // Increase time 10 ms.
|
||||
}
|
||||
}
|
||||
|
||||
// In situations with reordered packets, the DelayPeakDetector may be updated
|
||||
// back-to-back (i.e., without the tick_timer moving) but still with non-zero
|
||||
// inter-arrival time. This test is to make sure that this does not cause
|
||||
// problems.
|
||||
TEST(DelayPeakDetector, ZeroDistancePeaks) {
|
||||
TickTimer tick_timer;
|
||||
DelayPeakDetector detector(&tick_timer);
|
||||
const int kPacketSizeMs = 30;
|
||||
detector.SetPacketAudioLength(kPacketSizeMs);
|
||||
|
||||
const int kTargetBufferLevel = 2; // Define peaks to be iat > 4.
|
||||
const int kInterArrivalTime = 3 * kTargetBufferLevel; // Will trigger a peak.
|
||||
EXPECT_FALSE(detector.Update(kInterArrivalTime, kTargetBufferLevel));
|
||||
EXPECT_FALSE(detector.Update(kInterArrivalTime, kTargetBufferLevel));
|
||||
EXPECT_FALSE(detector.Update(kInterArrivalTime, kTargetBufferLevel));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
368
modules/audio_coding/neteq/dsp_helper.cc
Normal file
368
modules/audio_coding/neteq/dsp_helper.cc
Normal file
@ -0,0 +1,368 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h> // Access to memset.
|
||||
|
||||
#include <algorithm> // Access to min, max.
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Table of constants used in method DspHelper::ParabolicFit().
|
||||
const int16_t DspHelper::kParabolaCoefficients[17][3] = {
|
||||
{ 120, 32, 64 },
|
||||
{ 140, 44, 75 },
|
||||
{ 150, 50, 80 },
|
||||
{ 160, 57, 85 },
|
||||
{ 180, 72, 96 },
|
||||
{ 200, 89, 107 },
|
||||
{ 210, 98, 112 },
|
||||
{ 220, 108, 117 },
|
||||
{ 240, 128, 128 },
|
||||
{ 260, 150, 139 },
|
||||
{ 270, 162, 144 },
|
||||
{ 280, 174, 149 },
|
||||
{ 300, 200, 160 },
|
||||
{ 320, 228, 171 },
|
||||
{ 330, 242, 176 },
|
||||
{ 340, 257, 181 },
|
||||
{ 360, 288, 192 } };
|
||||
|
||||
// Filter coefficients used when downsampling from the indicated sample rates
|
||||
// (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12. The corresponding Q0
|
||||
// values are provided in the comments before each array.
|
||||
|
||||
// Q0 values: {0.3, 0.4, 0.3}.
|
||||
const int16_t DspHelper::kDownsample8kHzTbl[3] = { 1229, 1638, 1229 };
|
||||
|
||||
// Q0 values: {0.15, 0.2, 0.3, 0.2, 0.15}.
|
||||
const int16_t DspHelper::kDownsample16kHzTbl[5] = { 614, 819, 1229, 819, 614 };
|
||||
|
||||
// Q0 values: {0.1425, 0.1251, 0.1525, 0.1628, 0.1525, 0.1251, 0.1425}.
|
||||
const int16_t DspHelper::kDownsample32kHzTbl[7] = {
|
||||
584, 512, 625, 667, 625, 512, 584 };
|
||||
|
||||
// Q0 values: {0.2487, 0.0952, 0.1042, 0.1074, 0.1042, 0.0952, 0.2487}.
|
||||
const int16_t DspHelper::kDownsample48kHzTbl[7] = {
|
||||
1019, 390, 427, 440, 427, 390, 1019 };
|
||||
|
||||
int DspHelper::RampSignal(const int16_t* input,
|
||||
size_t length,
|
||||
int factor,
|
||||
int increment,
|
||||
int16_t* output) {
|
||||
int factor_q20 = (factor << 6) + 32;
|
||||
// TODO(hlundin): Add 32 to factor_q20 when converting back to Q14?
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
output[i] = (factor * input[i] + 8192) >> 14;
|
||||
factor_q20 += increment;
|
||||
factor_q20 = std::max(factor_q20, 0); // Never go negative.
|
||||
factor = std::min(factor_q20 >> 6, 16384);
|
||||
}
|
||||
return factor;
|
||||
}
|
||||
|
||||
int DspHelper::RampSignal(int16_t* signal,
|
||||
size_t length,
|
||||
int factor,
|
||||
int increment) {
|
||||
return RampSignal(signal, length, factor, increment, signal);
|
||||
}
|
||||
|
||||
int DspHelper::RampSignal(AudioVector* signal,
|
||||
size_t start_index,
|
||||
size_t length,
|
||||
int factor,
|
||||
int increment) {
|
||||
int factor_q20 = (factor << 6) + 32;
|
||||
// TODO(hlundin): Add 32 to factor_q20 when converting back to Q14?
|
||||
for (size_t i = start_index; i < start_index + length; ++i) {
|
||||
(*signal)[i] = (factor * (*signal)[i] + 8192) >> 14;
|
||||
factor_q20 += increment;
|
||||
factor_q20 = std::max(factor_q20, 0); // Never go negative.
|
||||
factor = std::min(factor_q20 >> 6, 16384);
|
||||
}
|
||||
return factor;
|
||||
}
|
||||
|
||||
int DspHelper::RampSignal(AudioMultiVector* signal,
|
||||
size_t start_index,
|
||||
size_t length,
|
||||
int factor,
|
||||
int increment) {
|
||||
assert(start_index + length <= signal->Size());
|
||||
if (start_index + length > signal->Size()) {
|
||||
// Wrong parameters. Do nothing and return the scale factor unaltered.
|
||||
return factor;
|
||||
}
|
||||
int end_factor = 0;
|
||||
// Loop over the channels, starting at the same |factor| each time.
|
||||
for (size_t channel = 0; channel < signal->Channels(); ++channel) {
|
||||
end_factor =
|
||||
RampSignal(&(*signal)[channel], start_index, length, factor, increment);
|
||||
}
|
||||
return end_factor;
|
||||
}
|
||||
|
||||
void DspHelper::PeakDetection(int16_t* data, size_t data_length,
|
||||
size_t num_peaks, int fs_mult,
|
||||
size_t* peak_index, int16_t* peak_value) {
|
||||
size_t min_index = 0;
|
||||
size_t max_index = 0;
|
||||
|
||||
for (size_t i = 0; i <= num_peaks - 1; i++) {
|
||||
if (num_peaks == 1) {
|
||||
// Single peak. The parabola fit assumes that an extra point is
|
||||
// available; worst case it gets a zero on the high end of the signal.
|
||||
// TODO(hlundin): This can potentially get much worse. It breaks the
|
||||
// API contract, that the length of |data| is |data_length|.
|
||||
data_length++;
|
||||
}
|
||||
|
||||
peak_index[i] = WebRtcSpl_MaxIndexW16(data, data_length - 1);
|
||||
|
||||
if (i != num_peaks - 1) {
|
||||
min_index = (peak_index[i] > 2) ? (peak_index[i] - 2) : 0;
|
||||
max_index = std::min(data_length - 1, peak_index[i] + 2);
|
||||
}
|
||||
|
||||
if ((peak_index[i] != 0) && (peak_index[i] != (data_length - 2))) {
|
||||
ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i],
|
||||
&peak_value[i]);
|
||||
} else {
|
||||
if (peak_index[i] == data_length - 2) {
|
||||
if (data[peak_index[i]] > data[peak_index[i] + 1]) {
|
||||
ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i],
|
||||
&peak_value[i]);
|
||||
} else if (data[peak_index[i]] <= data[peak_index[i] + 1]) {
|
||||
// Linear approximation.
|
||||
peak_value[i] = (data[peak_index[i]] + data[peak_index[i] + 1]) >> 1;
|
||||
peak_index[i] = (peak_index[i] * 2 + 1) * fs_mult;
|
||||
}
|
||||
} else {
|
||||
peak_value[i] = data[peak_index[i]];
|
||||
peak_index[i] = peak_index[i] * 2 * fs_mult;
|
||||
}
|
||||
}
|
||||
|
||||
if (i != num_peaks - 1) {
|
||||
memset(&data[min_index], 0,
|
||||
sizeof(data[0]) * (max_index - min_index + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DspHelper::ParabolicFit(int16_t* signal_points, int fs_mult,
|
||||
size_t* peak_index, int16_t* peak_value) {
|
||||
uint16_t fit_index[13];
|
||||
if (fs_mult == 1) {
|
||||
fit_index[0] = 0;
|
||||
fit_index[1] = 8;
|
||||
fit_index[2] = 16;
|
||||
} else if (fs_mult == 2) {
|
||||
fit_index[0] = 0;
|
||||
fit_index[1] = 4;
|
||||
fit_index[2] = 8;
|
||||
fit_index[3] = 12;
|
||||
fit_index[4] = 16;
|
||||
} else if (fs_mult == 4) {
|
||||
fit_index[0] = 0;
|
||||
fit_index[1] = 2;
|
||||
fit_index[2] = 4;
|
||||
fit_index[3] = 6;
|
||||
fit_index[4] = 8;
|
||||
fit_index[5] = 10;
|
||||
fit_index[6] = 12;
|
||||
fit_index[7] = 14;
|
||||
fit_index[8] = 16;
|
||||
} else {
|
||||
fit_index[0] = 0;
|
||||
fit_index[1] = 1;
|
||||
fit_index[2] = 3;
|
||||
fit_index[3] = 4;
|
||||
fit_index[4] = 5;
|
||||
fit_index[5] = 7;
|
||||
fit_index[6] = 8;
|
||||
fit_index[7] = 9;
|
||||
fit_index[8] = 11;
|
||||
fit_index[9] = 12;
|
||||
fit_index[10] = 13;
|
||||
fit_index[11] = 15;
|
||||
fit_index[12] = 16;
|
||||
}
|
||||
|
||||
// num = -3 * signal_points[0] + 4 * signal_points[1] - signal_points[2];
|
||||
// den = signal_points[0] - 2 * signal_points[1] + signal_points[2];
|
||||
int32_t num = (signal_points[0] * -3) + (signal_points[1] * 4)
|
||||
- signal_points[2];
|
||||
int32_t den = signal_points[0] + (signal_points[1] * -2) + signal_points[2];
|
||||
int32_t temp = num * 120;
|
||||
int flag = 1;
|
||||
int16_t stp = kParabolaCoefficients[fit_index[fs_mult]][0]
|
||||
- kParabolaCoefficients[fit_index[fs_mult - 1]][0];
|
||||
int16_t strt = (kParabolaCoefficients[fit_index[fs_mult]][0]
|
||||
+ kParabolaCoefficients[fit_index[fs_mult - 1]][0]) / 2;
|
||||
int16_t lmt;
|
||||
if (temp < -den * strt) {
|
||||
lmt = strt - stp;
|
||||
while (flag) {
|
||||
if ((flag == fs_mult) || (temp > -den * lmt)) {
|
||||
*peak_value = (den * kParabolaCoefficients[fit_index[fs_mult - flag]][1]
|
||||
+ num * kParabolaCoefficients[fit_index[fs_mult - flag]][2]
|
||||
+ signal_points[0] * 256) / 256;
|
||||
*peak_index = *peak_index * 2 * fs_mult - flag;
|
||||
flag = 0;
|
||||
} else {
|
||||
flag++;
|
||||
lmt -= stp;
|
||||
}
|
||||
}
|
||||
} else if (temp > -den * (strt + stp)) {
|
||||
lmt = strt + 2 * stp;
|
||||
while (flag) {
|
||||
if ((flag == fs_mult) || (temp < -den * lmt)) {
|
||||
int32_t temp_term_1 =
|
||||
den * kParabolaCoefficients[fit_index[fs_mult+flag]][1];
|
||||
int32_t temp_term_2 =
|
||||
num * kParabolaCoefficients[fit_index[fs_mult+flag]][2];
|
||||
int32_t temp_term_3 = signal_points[0] * 256;
|
||||
*peak_value = (temp_term_1 + temp_term_2 + temp_term_3) / 256;
|
||||
*peak_index = *peak_index * 2 * fs_mult + flag;
|
||||
flag = 0;
|
||||
} else {
|
||||
flag++;
|
||||
lmt += stp;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
*peak_value = signal_points[1];
|
||||
*peak_index = *peak_index * 2 * fs_mult;
|
||||
}
|
||||
}
|
||||
|
||||
size_t DspHelper::MinDistortion(const int16_t* signal, size_t min_lag,
|
||||
size_t max_lag, size_t length,
|
||||
int32_t* distortion_value) {
|
||||
size_t best_index = 0;
|
||||
int32_t min_distortion = WEBRTC_SPL_WORD32_MAX;
|
||||
for (size_t i = min_lag; i <= max_lag; i++) {
|
||||
int32_t sum_diff = 0;
|
||||
const int16_t* data1 = signal;
|
||||
const int16_t* data2 = signal - i;
|
||||
for (size_t j = 0; j < length; j++) {
|
||||
sum_diff += WEBRTC_SPL_ABS_W32(data1[j] - data2[j]);
|
||||
}
|
||||
// Compare with previous minimum.
|
||||
if (sum_diff < min_distortion) {
|
||||
min_distortion = sum_diff;
|
||||
best_index = i;
|
||||
}
|
||||
}
|
||||
*distortion_value = min_distortion;
|
||||
return best_index;
|
||||
}
|
||||
|
||||
void DspHelper::CrossFade(const int16_t* input1, const int16_t* input2,
|
||||
size_t length, int16_t* mix_factor,
|
||||
int16_t factor_decrement, int16_t* output) {
|
||||
int16_t factor = *mix_factor;
|
||||
int16_t complement_factor = 16384 - factor;
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
output[i] =
|
||||
(factor * input1[i] + complement_factor * input2[i] + 8192) >> 14;
|
||||
factor -= factor_decrement;
|
||||
complement_factor += factor_decrement;
|
||||
}
|
||||
*mix_factor = factor;
|
||||
}
|
||||
|
||||
void DspHelper::UnmuteSignal(const int16_t* input, size_t length,
|
||||
int16_t* factor, int increment,
|
||||
int16_t* output) {
|
||||
uint16_t factor_16b = *factor;
|
||||
int32_t factor_32b = (static_cast<int32_t>(factor_16b) << 6) + 32;
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
output[i] = (factor_16b * input[i] + 8192) >> 14;
|
||||
factor_32b = std::max(factor_32b + increment, 0);
|
||||
factor_16b = std::min(16384, factor_32b >> 6);
|
||||
}
|
||||
*factor = factor_16b;
|
||||
}
|
||||
|
||||
void DspHelper::MuteSignal(int16_t* signal, int mute_slope, size_t length) {
|
||||
int32_t factor = (16384 << 6) + 32;
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
signal[i] = ((factor >> 6) * signal[i] + 8192) >> 14;
|
||||
factor -= mute_slope;
|
||||
}
|
||||
}
|
||||
|
||||
int DspHelper::DownsampleTo4kHz(const int16_t* input, size_t input_length,
|
||||
size_t output_length, int input_rate_hz,
|
||||
bool compensate_delay, int16_t* output) {
|
||||
// Set filter parameters depending on input frequency.
|
||||
// NOTE: The phase delay values are wrong compared to the true phase delay
|
||||
// of the filters. However, the error is preserved (through the +1 term) for
|
||||
// consistency.
|
||||
const int16_t* filter_coefficients; // Filter coefficients.
|
||||
size_t filter_length; // Number of coefficients.
|
||||
size_t filter_delay; // Phase delay in samples.
|
||||
int16_t factor; // Conversion rate (inFsHz / 8000).
|
||||
switch (input_rate_hz) {
|
||||
case 8000: {
|
||||
filter_length = 3;
|
||||
factor = 2;
|
||||
filter_coefficients = kDownsample8kHzTbl;
|
||||
filter_delay = 1 + 1;
|
||||
break;
|
||||
}
|
||||
case 16000: {
|
||||
filter_length = 5;
|
||||
factor = 4;
|
||||
filter_coefficients = kDownsample16kHzTbl;
|
||||
filter_delay = 2 + 1;
|
||||
break;
|
||||
}
|
||||
case 32000: {
|
||||
filter_length = 7;
|
||||
factor = 8;
|
||||
filter_coefficients = kDownsample32kHzTbl;
|
||||
filter_delay = 3 + 1;
|
||||
break;
|
||||
}
|
||||
case 48000: {
|
||||
filter_length = 7;
|
||||
factor = 12;
|
||||
filter_coefficients = kDownsample48kHzTbl;
|
||||
filter_delay = 3 + 1;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
assert(false);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!compensate_delay) {
|
||||
// Disregard delay compensation.
|
||||
filter_delay = 0;
|
||||
}
|
||||
|
||||
// Returns -1 if input signal is too short; 0 otherwise.
|
||||
return WebRtcSpl_DownsampleFast(
|
||||
&input[filter_length - 1], input_length - filter_length + 1, output,
|
||||
output_length, filter_coefficients, filter_length, factor, filter_delay);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
144
modules/audio_coding/neteq/dsp_helper.h
Normal file
144
modules/audio_coding/neteq/dsp_helper.h
Normal file
@ -0,0 +1,144 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_
|
||||
|
||||
#include <string.h> // Access to size_t.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This class contains various signal processing functions, all implemented as
|
||||
// static methods.
|
||||
class DspHelper {
|
||||
public:
|
||||
// Filter coefficients used when downsampling from the indicated sample rates
|
||||
// (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12.
|
||||
static const int16_t kDownsample8kHzTbl[3];
|
||||
static const int16_t kDownsample16kHzTbl[5];
|
||||
static const int16_t kDownsample32kHzTbl[7];
|
||||
static const int16_t kDownsample48kHzTbl[7];
|
||||
|
||||
// Constants used to mute and unmute over 5 samples. The coefficients are
|
||||
// in Q15.
|
||||
static const int kMuteFactorStart8kHz = 27307;
|
||||
static const int kMuteFactorIncrement8kHz = -5461;
|
||||
static const int kUnmuteFactorStart8kHz = 5461;
|
||||
static const int kUnmuteFactorIncrement8kHz = 5461;
|
||||
static const int kMuteFactorStart16kHz = 29789;
|
||||
static const int kMuteFactorIncrement16kHz = -2979;
|
||||
static const int kUnmuteFactorStart16kHz = 2979;
|
||||
static const int kUnmuteFactorIncrement16kHz = 2979;
|
||||
static const int kMuteFactorStart32kHz = 31208;
|
||||
static const int kMuteFactorIncrement32kHz = -1560;
|
||||
static const int kUnmuteFactorStart32kHz = 1560;
|
||||
static const int kUnmuteFactorIncrement32kHz = 1560;
|
||||
static const int kMuteFactorStart48kHz = 31711;
|
||||
static const int kMuteFactorIncrement48kHz = -1057;
|
||||
static const int kUnmuteFactorStart48kHz = 1057;
|
||||
static const int kUnmuteFactorIncrement48kHz = 1057;
|
||||
|
||||
// Multiplies the signal with a gradually changing factor.
|
||||
// The first sample is multiplied with |factor| (in Q14). For each sample,
|
||||
// |factor| is increased (additive) by the |increment| (in Q20), which can
|
||||
// be negative. Returns the scale factor after the last increment.
|
||||
static int RampSignal(const int16_t* input,
|
||||
size_t length,
|
||||
int factor,
|
||||
int increment,
|
||||
int16_t* output);
|
||||
|
||||
// Same as above, but with the samples of |signal| being modified in-place.
|
||||
static int RampSignal(int16_t* signal,
|
||||
size_t length,
|
||||
int factor,
|
||||
int increment);
|
||||
|
||||
// Same as above, but processes |length| samples from |signal|, starting at
|
||||
// |start_index|.
|
||||
static int RampSignal(AudioVector* signal,
|
||||
size_t start_index,
|
||||
size_t length,
|
||||
int factor,
|
||||
int increment);
|
||||
|
||||
// Same as above, but for an AudioMultiVector.
|
||||
static int RampSignal(AudioMultiVector* signal,
|
||||
size_t start_index,
|
||||
size_t length,
|
||||
int factor,
|
||||
int increment);
|
||||
|
||||
// Peak detection with parabolic fit. Looks for |num_peaks| maxima in |data|,
|
||||
// having length |data_length| and sample rate multiplier |fs_mult|. The peak
|
||||
// locations and values are written to the arrays |peak_index| and
|
||||
// |peak_value|, respectively. Both arrays must hold at least |num_peaks|
|
||||
// elements.
|
||||
static void PeakDetection(int16_t* data, size_t data_length,
|
||||
size_t num_peaks, int fs_mult,
|
||||
size_t* peak_index, int16_t* peak_value);
|
||||
|
||||
// Estimates the height and location of a maximum. The three values in the
|
||||
// array |signal_points| are used as basis for a parabolic fit, which is then
|
||||
// used to find the maximum in an interpolated signal. The |signal_points| are
|
||||
// assumed to be from a 4 kHz signal, while the maximum, written to
|
||||
// |peak_index| and |peak_value| is given in the full sample rate, as
|
||||
// indicated by the sample rate multiplier |fs_mult|.
|
||||
static void ParabolicFit(int16_t* signal_points, int fs_mult,
|
||||
size_t* peak_index, int16_t* peak_value);
|
||||
|
||||
// Calculates the sum-abs-diff for |signal| when compared to a displaced
|
||||
// version of itself. Returns the displacement lag that results in the minimum
|
||||
// distortion. The resulting distortion is written to |distortion_value|.
|
||||
// The values of |min_lag| and |max_lag| are boundaries for the search.
|
||||
static size_t MinDistortion(const int16_t* signal, size_t min_lag,
|
||||
size_t max_lag, size_t length,
|
||||
int32_t* distortion_value);
|
||||
|
||||
// Mixes |length| samples from |input1| and |input2| together and writes the
|
||||
// result to |output|. The gain for |input1| starts at |mix_factor| (Q14) and
|
||||
// is decreased by |factor_decrement| (Q14) for each sample. The gain for
|
||||
// |input2| is the complement 16384 - mix_factor.
|
||||
static void CrossFade(const int16_t* input1, const int16_t* input2,
|
||||
size_t length, int16_t* mix_factor,
|
||||
int16_t factor_decrement, int16_t* output);
|
||||
|
||||
// Scales |input| with an increasing gain. Applies |factor| (Q14) to the first
|
||||
// sample and increases the gain by |increment| (Q20) for each sample. The
|
||||
// result is written to |output|. |length| samples are processed.
|
||||
static void UnmuteSignal(const int16_t* input, size_t length, int16_t* factor,
|
||||
int increment, int16_t* output);
|
||||
|
||||
// Starts at unity gain and gradually fades out |signal|. For each sample,
|
||||
// the gain is reduced by |mute_slope| (Q14). |length| samples are processed.
|
||||
static void MuteSignal(int16_t* signal, int mute_slope, size_t length);
|
||||
|
||||
// Downsamples |input| from |sample_rate_hz| to 4 kHz sample rate. The input
|
||||
// has |input_length| samples, and the method will write |output_length|
|
||||
// samples to |output|. Compensates for the phase delay of the downsampling
|
||||
// filters if |compensate_delay| is true. Returns -1 if the input is too short
|
||||
// to produce |output_length| samples, otherwise 0.
|
||||
static int DownsampleTo4kHz(const int16_t* input, size_t input_length,
|
||||
size_t output_length, int input_rate_hz,
|
||||
bool compensate_delay, int16_t* output);
|
||||
|
||||
private:
|
||||
// Table of constants used in method DspHelper::ParabolicFit().
|
||||
static const int16_t kParabolaCoefficients[17][3];
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(DspHelper);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DSP_HELPER_H_
|
||||
89
modules/audio_coding/neteq/dsp_helper_unittest.cc
Normal file
89
modules/audio_coding/neteq/dsp_helper_unittest.cc
Normal file
@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(DspHelper, RampSignalArray) {
|
||||
static const int kLen = 100;
|
||||
int16_t input[kLen];
|
||||
int16_t output[kLen];
|
||||
// Fill input with 1000.
|
||||
for (int i = 0; i < kLen; ++i) {
|
||||
input[i] = 1000;
|
||||
}
|
||||
int start_factor = 0;
|
||||
// Ramp from 0 to 1 (in Q14) over the array. Note that |increment| is in Q20,
|
||||
// while the factor is in Q14, hence the shift by 6.
|
||||
int increment = (16384 << 6) / kLen;
|
||||
|
||||
// Test first method.
|
||||
int stop_factor = DspHelper::RampSignal(input, kLen, start_factor, increment,
|
||||
output);
|
||||
EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14.
|
||||
for (int i = 0; i < kLen; ++i) {
|
||||
EXPECT_EQ(1000 * i / kLen, output[i]);
|
||||
}
|
||||
|
||||
// Test second method. (Note that this modifies |input|.)
|
||||
stop_factor = DspHelper::RampSignal(input, kLen, start_factor, increment);
|
||||
EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14.
|
||||
for (int i = 0; i < kLen; ++i) {
|
||||
EXPECT_EQ(1000 * i / kLen, input[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DspHelper, RampSignalAudioMultiVector) {
|
||||
static const int kLen = 100;
|
||||
static const int kChannels = 5;
|
||||
AudioMultiVector input(kChannels, kLen * 3);
|
||||
// Fill input with 1000.
|
||||
for (int i = 0; i < kLen * 3; ++i) {
|
||||
for (int channel = 0; channel < kChannels; ++channel) {
|
||||
input[channel][i] = 1000;
|
||||
}
|
||||
}
|
||||
// We want to start ramping at |start_index| and keep ramping for |kLen|
|
||||
// samples.
|
||||
int start_index = kLen;
|
||||
int start_factor = 0;
|
||||
// Ramp from 0 to 1 (in Q14) in |kLen| samples. Note that |increment| is in
|
||||
// Q20, while the factor is in Q14, hence the shift by 6.
|
||||
int increment = (16384 << 6) / kLen;
|
||||
|
||||
int stop_factor = DspHelper::RampSignal(&input, start_index, kLen,
|
||||
start_factor, increment);
|
||||
EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14.
|
||||
// Verify that the first |kLen| samples are left untouched.
|
||||
int i;
|
||||
for (i = 0; i < kLen; ++i) {
|
||||
for (int channel = 0; channel < kChannels; ++channel) {
|
||||
EXPECT_EQ(1000, input[channel][i]);
|
||||
}
|
||||
}
|
||||
// Verify that the next block of |kLen| samples are ramped.
|
||||
for (; i < 2 * kLen; ++i) {
|
||||
for (int channel = 0; channel < kChannels; ++channel) {
|
||||
EXPECT_EQ(1000 * (i - kLen) / kLen, input[channel][i]);
|
||||
}
|
||||
}
|
||||
// Verify the last |kLen| samples are left untouched.
|
||||
for (; i < 3 * kLen; ++i) {
|
||||
for (int channel = 0; channel < kChannels; ++channel) {
|
||||
EXPECT_EQ(1000, input[channel][i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace webrtc
|
||||
248
modules/audio_coding/neteq/dtmf_buffer.cc
Normal file
248
modules/audio_coding/neteq/dtmf_buffer.cc
Normal file
@ -0,0 +1,248 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/dtmf_buffer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <algorithm> // max
|
||||
|
||||
#include "webrtc/rtc_base/checks.h"
|
||||
#include "webrtc/rtc_base/logging.h"
|
||||
|
||||
// Modify the code to obtain backwards bit-exactness. Once bit-exactness is no
|
||||
// longer required, this #define should be removed (and the code that it
|
||||
// enables).
|
||||
#define LEGACY_BITEXACT
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
DtmfBuffer::DtmfBuffer(int fs_hz) {
|
||||
SetSampleRate(fs_hz);
|
||||
}
|
||||
|
||||
DtmfBuffer::~DtmfBuffer() = default;
|
||||
|
||||
void DtmfBuffer::Flush() {
|
||||
buffer_.clear();
|
||||
}
|
||||
|
||||
// The ParseEvent method parses 4 bytes from |payload| according to this format
|
||||
// from RFC 4733:
|
||||
//
|
||||
// 0 1 2 3
|
||||
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
// | event |E|R| volume | duration |
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
//
|
||||
// Legend (adapted from RFC 4733)
|
||||
// - event: The event field is a number between 0 and 255 identifying a
|
||||
// specific telephony event. The buffer will not accept any event
|
||||
// numbers larger than 15.
|
||||
// - E: If set to a value of one, the "end" bit indicates that this
|
||||
// packet contains the end of the event. For long-lasting events
|
||||
// that have to be split into segments, only the final packet for
|
||||
// the final segment will have the E bit set.
|
||||
// - R: Reserved.
|
||||
// - volume: For DTMF digits and other events representable as tones, this
|
||||
// field describes the power level of the tone, expressed in dBm0
|
||||
// after dropping the sign. Power levels range from 0 to -63 dBm0.
|
||||
// Thus, larger values denote lower volume. The buffer discards
|
||||
// values larger than 36 (i.e., lower than -36 dBm0).
|
||||
// - duration: The duration field indicates the duration of the event or segment
|
||||
// being reported, in timestamp units, expressed as an unsigned
|
||||
// integer in network byte order. For a non-zero value, the event
|
||||
// or segment began at the instant identified by the RTP timestamp
|
||||
// and has so far lasted as long as indicated by this parameter.
|
||||
// The event may or may not have ended. If the event duration
|
||||
// exceeds the maximum representable by the duration field, the
|
||||
// event is split into several contiguous segments. The buffer will
|
||||
// discard zero-duration events.
|
||||
//
|
||||
int DtmfBuffer::ParseEvent(uint32_t rtp_timestamp,
|
||||
const uint8_t* payload,
|
||||
size_t payload_length_bytes,
|
||||
DtmfEvent* event) {
|
||||
RTC_CHECK(payload);
|
||||
RTC_CHECK(event);
|
||||
if (payload_length_bytes < 4) {
|
||||
LOG(LS_WARNING) << "ParseEvent payload too short";
|
||||
return kPayloadTooShort;
|
||||
}
|
||||
|
||||
event->event_no = payload[0];
|
||||
event->end_bit = ((payload[1] & 0x80) != 0);
|
||||
event->volume = (payload[1] & 0x3F);
|
||||
event->duration = payload[2] << 8 | payload[3];
|
||||
event->timestamp = rtp_timestamp;
|
||||
return kOK;
|
||||
}
|
||||
|
||||
// Inserts a DTMF event into the buffer. The event should be parsed from the
|
||||
// bit stream using the ParseEvent method above before inserting it in the
|
||||
// buffer.
|
||||
// DTMF events can be quite long, and in most cases the duration of the event
|
||||
// is not known when the first packet describing it is sent. To deal with that,
|
||||
// the RFC 4733 specifies that multiple packets are sent for one and the same
|
||||
// event as it is being created (typically, as the user is pressing the key).
|
||||
// These packets will all share the same start timestamp and event number,
|
||||
// while the duration will be the cumulative duration from the start. When
|
||||
// inserting a new event, the InsertEvent method tries to find a matching event
|
||||
// already in the buffer. If so, the new event is simply merged with the
|
||||
// existing one.
|
||||
int DtmfBuffer::InsertEvent(const DtmfEvent& event) {
|
||||
if (event.event_no < 0 || event.event_no > 15 ||
|
||||
event.volume < 0 || event.volume > 63 ||
|
||||
event.duration <= 0 || event.duration > 65535) {
|
||||
LOG(LS_WARNING) << "InsertEvent invalid parameters";
|
||||
return kInvalidEventParameters;
|
||||
}
|
||||
DtmfList::iterator it = buffer_.begin();
|
||||
while (it != buffer_.end()) {
|
||||
if (MergeEvents(it, event)) {
|
||||
// A matching event was found and the new event was merged.
|
||||
return kOK;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
buffer_.push_back(event);
|
||||
// Sort the buffer using CompareEvents to rank the events.
|
||||
buffer_.sort(CompareEvents);
|
||||
return kOK;
|
||||
}
|
||||
|
||||
bool DtmfBuffer::GetEvent(uint32_t current_timestamp, DtmfEvent* event) {
|
||||
DtmfList::iterator it = buffer_.begin();
|
||||
while (it != buffer_.end()) {
|
||||
// |event_end| is an estimate of where the current event ends. If the end
|
||||
// bit is set, we know that the event ends at |timestamp| + |duration|.
|
||||
uint32_t event_end = it->timestamp + it->duration;
|
||||
#ifdef LEGACY_BITEXACT
|
||||
bool next_available = false;
|
||||
#endif
|
||||
if (!it->end_bit) {
|
||||
// If the end bit is not set, we allow extrapolation of the event for
|
||||
// some time.
|
||||
event_end += max_extrapolation_samples_;
|
||||
DtmfList::iterator next = it;
|
||||
++next;
|
||||
if (next != buffer_.end()) {
|
||||
// If there is a next event in the buffer, we will not extrapolate over
|
||||
// the start of that new event.
|
||||
event_end = std::min(event_end, next->timestamp);
|
||||
#ifdef LEGACY_BITEXACT
|
||||
next_available = true;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if (current_timestamp >= it->timestamp
|
||||
&& current_timestamp <= event_end) { // TODO(hlundin): Change to <.
|
||||
// Found a matching event.
|
||||
if (event) {
|
||||
event->event_no = it->event_no;
|
||||
event->end_bit = it->end_bit;
|
||||
event->volume = it->volume;
|
||||
event->duration = it->duration;
|
||||
event->timestamp = it->timestamp;
|
||||
}
|
||||
#ifdef LEGACY_BITEXACT
|
||||
if (it->end_bit &&
|
||||
current_timestamp + frame_len_samples_ >= event_end) {
|
||||
// We are done playing this. Erase the event.
|
||||
buffer_.erase(it);
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
} else if (current_timestamp > event_end) { // TODO(hlundin): Change to >=.
|
||||
// Erase old event. Operation returns a valid pointer to the next element
|
||||
// in the list.
|
||||
#ifdef LEGACY_BITEXACT
|
||||
if (!next_available) {
|
||||
if (event) {
|
||||
event->event_no = it->event_no;
|
||||
event->end_bit = it->end_bit;
|
||||
event->volume = it->volume;
|
||||
event->duration = it->duration;
|
||||
event->timestamp = it->timestamp;
|
||||
}
|
||||
it = buffer_.erase(it);
|
||||
return true;
|
||||
} else {
|
||||
it = buffer_.erase(it);
|
||||
}
|
||||
#else
|
||||
it = buffer_.erase(it);
|
||||
#endif
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t DtmfBuffer::Length() const {
|
||||
return buffer_.size();
|
||||
}
|
||||
|
||||
bool DtmfBuffer::Empty() const {
|
||||
return buffer_.empty();
|
||||
}
|
||||
|
||||
int DtmfBuffer::SetSampleRate(int fs_hz) {
|
||||
if (fs_hz != 8000 &&
|
||||
fs_hz != 16000 &&
|
||||
fs_hz != 32000 &&
|
||||
fs_hz != 48000) {
|
||||
return kInvalidSampleRate;
|
||||
}
|
||||
max_extrapolation_samples_ = 7 * fs_hz / 100;
|
||||
frame_len_samples_ = fs_hz / 100;
|
||||
return kOK;
|
||||
}
|
||||
|
||||
// The method returns true if the two events are considered to be the same.
|
||||
// The are defined as equal if they share the same timestamp and event number.
|
||||
// The special case with long-lasting events that have to be split into segments
|
||||
// is not handled in this method. These will be treated as separate events in
|
||||
// the buffer.
|
||||
bool DtmfBuffer::SameEvent(const DtmfEvent& a, const DtmfEvent& b) {
|
||||
return (a.event_no == b.event_no) && (a.timestamp == b.timestamp);
|
||||
}
|
||||
|
||||
bool DtmfBuffer::MergeEvents(DtmfList::iterator it, const DtmfEvent& event) {
|
||||
if (SameEvent(*it, event)) {
|
||||
if (!it->end_bit) {
|
||||
// Do not extend the duration of an event for which the end bit was
|
||||
// already received.
|
||||
it->duration = std::max(event.duration, it->duration);
|
||||
}
|
||||
if (event.end_bit) {
|
||||
it->end_bit = true;
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if |a| goes before |b| in the sorting order ("|a| < |b|").
|
||||
// The events are ranked using their start timestamp (taking wrap-around into
|
||||
// account). In the unlikely situation that two events share the same start
|
||||
// timestamp, the event number is used to rank the two. Note that packets
|
||||
// that belong to the same events, and therefore sharing the same start
|
||||
// timestamp, have already been merged before the sort method is called.
|
||||
bool DtmfBuffer::CompareEvents(const DtmfEvent& a, const DtmfEvent& b) {
|
||||
if (a.timestamp == b.timestamp) {
|
||||
return a.event_no < b.event_no;
|
||||
}
|
||||
// Take wrap-around into account.
|
||||
return (static_cast<uint32_t>(b.timestamp - a.timestamp) < 0xFFFFFFFF / 2);
|
||||
}
|
||||
} // namespace webrtc
|
||||
114
modules/audio_coding/neteq/dtmf_buffer.h
Normal file
114
modules/audio_coding/neteq/dtmf_buffer.h
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_
|
||||
|
||||
#include <list>
|
||||
#include <string> // size_t
|
||||
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
struct DtmfEvent {
|
||||
uint32_t timestamp;
|
||||
int event_no;
|
||||
int volume;
|
||||
int duration;
|
||||
bool end_bit;
|
||||
|
||||
// Constructors
|
||||
DtmfEvent()
|
||||
: timestamp(0),
|
||||
event_no(0),
|
||||
volume(0),
|
||||
duration(0),
|
||||
end_bit(false) {
|
||||
}
|
||||
DtmfEvent(uint32_t ts, int ev, int vol, int dur, bool end)
|
||||
: timestamp(ts),
|
||||
event_no(ev),
|
||||
volume(vol),
|
||||
duration(dur),
|
||||
end_bit(end) {
|
||||
}
|
||||
};
|
||||
|
||||
// This is the buffer holding DTMF events while waiting for them to be played.
|
||||
class DtmfBuffer {
|
||||
public:
|
||||
enum BufferReturnCodes {
|
||||
kOK = 0,
|
||||
kInvalidPointer,
|
||||
kPayloadTooShort,
|
||||
kInvalidEventParameters,
|
||||
kInvalidSampleRate
|
||||
};
|
||||
|
||||
// Set up the buffer for use at sample rate |fs_hz|.
|
||||
explicit DtmfBuffer(int fs_hz);
|
||||
|
||||
virtual ~DtmfBuffer();
|
||||
|
||||
// Flushes the buffer.
|
||||
virtual void Flush();
|
||||
|
||||
// Static method to parse 4 bytes from |payload| as a DTMF event (RFC 4733)
|
||||
// and write the parsed information into the struct |event|. Input variable
|
||||
// |rtp_timestamp| is simply copied into the struct.
|
||||
static int ParseEvent(uint32_t rtp_timestamp,
|
||||
const uint8_t* payload,
|
||||
size_t payload_length_bytes,
|
||||
DtmfEvent* event);
|
||||
|
||||
// Inserts |event| into the buffer. The method looks for a matching event and
|
||||
// merges the two if a match is found.
|
||||
virtual int InsertEvent(const DtmfEvent& event);
|
||||
|
||||
// Checks if a DTMF event should be played at time |current_timestamp|. If so,
|
||||
// the method returns true; otherwise false. The parameters of the event to
|
||||
// play will be written to |event|.
|
||||
virtual bool GetEvent(uint32_t current_timestamp, DtmfEvent* event);
|
||||
|
||||
// Number of events in the buffer.
|
||||
virtual size_t Length() const;
|
||||
|
||||
virtual bool Empty() const;
|
||||
|
||||
// Set a new sample rate.
|
||||
virtual int SetSampleRate(int fs_hz);
|
||||
|
||||
private:
|
||||
typedef std::list<DtmfEvent> DtmfList;
|
||||
|
||||
int max_extrapolation_samples_;
|
||||
int frame_len_samples_; // TODO(hlundin): Remove this later.
|
||||
|
||||
// Compares two events and returns true if they are the same.
|
||||
static bool SameEvent(const DtmfEvent& a, const DtmfEvent& b);
|
||||
|
||||
// Merges |event| to the event pointed out by |it|. The method checks that
|
||||
// the two events are the same (using the SameEvent method), and merges them
|
||||
// if that was the case, returning true. If the events are not the same, false
|
||||
// is returned.
|
||||
bool MergeEvents(DtmfList::iterator it, const DtmfEvent& event);
|
||||
|
||||
// Method used by the sort algorithm to rank events in the buffer.
|
||||
static bool CompareEvents(const DtmfEvent& a, const DtmfEvent& b);
|
||||
|
||||
DtmfList buffer_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(DtmfBuffer);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DTMF_BUFFER_H_
|
||||
301
modules/audio_coding/neteq/dtmf_buffer_unittest.cc
Normal file
301
modules/audio_coding/neteq/dtmf_buffer_unittest.cc
Normal file
@ -0,0 +1,301 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/dtmf_buffer.h"
|
||||
|
||||
#ifdef WIN32
|
||||
#include <winsock2.h> // ntohl()
|
||||
#else
|
||||
#include <arpa/inet.h> // ntohl()
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "webrtc/test/gtest.h"
|
||||
|
||||
// Modify the tests so that they pass with the modifications done to DtmfBuffer
|
||||
// for backwards bit-exactness. Once bit-exactness is no longer required, this
|
||||
// #define should be removed (and the code that it enables).
|
||||
#define LEGACY_BITEXACT
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static int sample_rate_hz = 8000;
|
||||
|
||||
static uint32_t MakeDtmfPayload(int event, bool end, int volume, int duration) {
|
||||
uint32_t payload = 0;
|
||||
// 0 1 2 3
|
||||
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
// | event |E|R| volume | duration |
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
payload |= (event & 0x00FF) << 24;
|
||||
payload |= (end ? 0x00800000 : 0x00000000);
|
||||
payload |= (volume & 0x003F) << 16;
|
||||
payload |= (duration & 0xFFFF);
|
||||
payload = ntohl(payload);
|
||||
return payload;
|
||||
}
|
||||
|
||||
static bool EqualEvents(const DtmfEvent& a,
|
||||
const DtmfEvent& b) {
|
||||
return (a.duration == b.duration
|
||||
&& a.end_bit == b.end_bit
|
||||
&& a.event_no == b.event_no
|
||||
&& a.timestamp == b.timestamp
|
||||
&& a.volume == b.volume);
|
||||
}
|
||||
|
||||
TEST(DtmfBuffer, CreateAndDestroy) {
|
||||
DtmfBuffer* buffer = new DtmfBuffer(sample_rate_hz);
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
// Test the event parser.
|
||||
TEST(DtmfBuffer, ParseEvent) {
|
||||
int event_no = 7;
|
||||
bool end_bit = true;
|
||||
int volume = 17;
|
||||
int duration = 4711;
|
||||
uint32_t timestamp = 0x12345678;
|
||||
uint32_t payload = MakeDtmfPayload(event_no, end_bit, volume, duration);
|
||||
uint8_t* payload_ptr = reinterpret_cast<uint8_t*>(&payload);
|
||||
DtmfEvent event;
|
||||
EXPECT_EQ(DtmfBuffer::kOK,
|
||||
DtmfBuffer::ParseEvent(timestamp, payload_ptr, sizeof(payload),
|
||||
&event));
|
||||
EXPECT_EQ(duration, event.duration);
|
||||
EXPECT_EQ(end_bit, event.end_bit);
|
||||
EXPECT_EQ(event_no, event.event_no);
|
||||
EXPECT_EQ(timestamp, event.timestamp);
|
||||
EXPECT_EQ(volume, event.volume);
|
||||
|
||||
EXPECT_EQ(DtmfBuffer::kPayloadTooShort,
|
||||
DtmfBuffer::ParseEvent(timestamp, payload_ptr, 3, &event));
|
||||
}
|
||||
|
||||
TEST(DtmfBuffer, SimpleInsertAndGet) {
|
||||
int event_no = 7;
|
||||
bool end_bit = true;
|
||||
int volume = 17;
|
||||
int duration = 4711;
|
||||
uint32_t timestamp = 0x12345678;
|
||||
DtmfEvent event(timestamp, event_no, volume, duration, end_bit);
|
||||
DtmfBuffer buffer(sample_rate_hz);
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
|
||||
EXPECT_EQ(1u, buffer.Length());
|
||||
EXPECT_FALSE(buffer.Empty());
|
||||
DtmfEvent out_event;
|
||||
// Too early to get event.
|
||||
EXPECT_FALSE(buffer.GetEvent(timestamp - 10, &out_event));
|
||||
EXPECT_EQ(1u, buffer.Length());
|
||||
EXPECT_FALSE(buffer.Empty());
|
||||
// Get the event at its starting timestamp.
|
||||
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
|
||||
EXPECT_TRUE(EqualEvents(event, out_event));
|
||||
EXPECT_EQ(1u, buffer.Length());
|
||||
EXPECT_FALSE(buffer.Empty());
|
||||
// Get the event some time into the event.
|
||||
EXPECT_TRUE(buffer.GetEvent(timestamp + duration / 2, &out_event));
|
||||
EXPECT_TRUE(EqualEvents(event, out_event));
|
||||
EXPECT_EQ(1u, buffer.Length());
|
||||
EXPECT_FALSE(buffer.Empty());
|
||||
// Give a "current" timestamp after the event has ended.
|
||||
#ifdef LEGACY_BITEXACT
|
||||
EXPECT_TRUE(buffer.GetEvent(timestamp + duration + 10, &out_event));
|
||||
#endif
|
||||
EXPECT_FALSE(buffer.GetEvent(timestamp + duration + 10, &out_event));
|
||||
EXPECT_EQ(0u, buffer.Length());
|
||||
EXPECT_TRUE(buffer.Empty());
|
||||
}
|
||||
|
||||
TEST(DtmfBuffer, MergingPackets) {
|
||||
int event_no = 0;
|
||||
bool end_bit = false;
|
||||
int volume = 17;
|
||||
int duration = 80;
|
||||
uint32_t timestamp = 0x12345678;
|
||||
DtmfEvent event(timestamp, event_no, volume, duration, end_bit);
|
||||
DtmfBuffer buffer(sample_rate_hz);
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
|
||||
|
||||
event.duration += 80;
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
|
||||
|
||||
event.duration += 80;
|
||||
event.end_bit = true;
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
|
||||
|
||||
EXPECT_EQ(1u, buffer.Length());
|
||||
|
||||
DtmfEvent out_event;
|
||||
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
|
||||
EXPECT_TRUE(EqualEvents(event, out_event));
|
||||
}
|
||||
|
||||
// This test case inserts one shorter event completely overlapped by one longer
|
||||
// event. The expected outcome is that only the longer event is played.
|
||||
TEST(DtmfBuffer, OverlappingEvents) {
|
||||
int event_no = 0;
|
||||
bool end_bit = true;
|
||||
int volume = 1;
|
||||
int duration = 80;
|
||||
uint32_t timestamp = 0x12345678 + 80;
|
||||
DtmfEvent short_event(timestamp, event_no, volume, duration, end_bit);
|
||||
DtmfBuffer buffer(sample_rate_hz);
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(short_event));
|
||||
|
||||
event_no = 10;
|
||||
end_bit = false;
|
||||
timestamp = 0x12345678;
|
||||
DtmfEvent long_event(timestamp, event_no, volume, duration, end_bit);
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event));
|
||||
|
||||
long_event.duration += 80;
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event));
|
||||
|
||||
long_event.duration += 80;
|
||||
long_event.end_bit = true;
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event));
|
||||
|
||||
EXPECT_EQ(2u, buffer.Length());
|
||||
|
||||
DtmfEvent out_event;
|
||||
// Expect to get the long event.
|
||||
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
|
||||
EXPECT_TRUE(EqualEvents(long_event, out_event));
|
||||
// Expect no more events.
|
||||
#ifdef LEGACY_BITEXACT
|
||||
EXPECT_TRUE(buffer.GetEvent(timestamp + long_event.duration + 10,
|
||||
&out_event));
|
||||
EXPECT_TRUE(EqualEvents(long_event, out_event));
|
||||
EXPECT_TRUE(buffer.GetEvent(timestamp + long_event.duration + 10,
|
||||
&out_event));
|
||||
EXPECT_TRUE(EqualEvents(short_event, out_event));
|
||||
#else
|
||||
EXPECT_FALSE(buffer.GetEvent(timestamp + long_event.duration + 10,
|
||||
&out_event));
|
||||
#endif
|
||||
EXPECT_TRUE(buffer.Empty());
|
||||
}
|
||||
|
||||
TEST(DtmfBuffer, ExtrapolationTime) {
|
||||
int event_no = 0;
|
||||
bool end_bit = false;
|
||||
int volume = 1;
|
||||
int duration = 80;
|
||||
uint32_t timestamp = 0x12345678;
|
||||
DtmfEvent event1(timestamp, event_no, volume, duration, end_bit);
|
||||
DtmfBuffer buffer(sample_rate_hz);
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1));
|
||||
EXPECT_EQ(1u, buffer.Length());
|
||||
|
||||
DtmfEvent out_event;
|
||||
// Get the event at the start.
|
||||
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
|
||||
EXPECT_TRUE(EqualEvents(event1, out_event));
|
||||
// Also get the event 100 samples after the end of the event (since we're
|
||||
// missing the end bit).
|
||||
uint32_t timestamp_now = timestamp + duration + 100;
|
||||
EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event));
|
||||
EXPECT_TRUE(EqualEvents(event1, out_event));
|
||||
// Insert another event starting back-to-back with the previous event.
|
||||
timestamp += duration;
|
||||
event_no = 1;
|
||||
DtmfEvent event2(timestamp, event_no, volume, duration, end_bit);
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2));
|
||||
EXPECT_EQ(2u, buffer.Length());
|
||||
// Now we expect to get the new event when supplying |timestamp_now|.
|
||||
EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event));
|
||||
EXPECT_TRUE(EqualEvents(event2, out_event));
|
||||
// Expect the the first event to be erased now.
|
||||
EXPECT_EQ(1u, buffer.Length());
|
||||
// Move |timestamp_now| to more than 560 samples after the end of the second
|
||||
// event. Expect that event to be erased.
|
||||
timestamp_now = timestamp + duration + 600;
|
||||
#ifdef LEGACY_BITEXACT
|
||||
EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event));
|
||||
#endif
|
||||
EXPECT_FALSE(buffer.GetEvent(timestamp_now, &out_event));
|
||||
EXPECT_TRUE(buffer.Empty());
|
||||
}
|
||||
|
||||
TEST(DtmfBuffer, TimestampWraparound) {
|
||||
int event_no = 0;
|
||||
bool end_bit = true;
|
||||
int volume = 1;
|
||||
int duration = 80;
|
||||
uint32_t timestamp1 = 0xFFFFFFFF - duration;
|
||||
DtmfEvent event1(timestamp1, event_no, volume, duration, end_bit);
|
||||
uint32_t timestamp2 = 0;
|
||||
DtmfEvent event2(timestamp2, event_no, volume, duration, end_bit);
|
||||
DtmfBuffer buffer(sample_rate_hz);
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1));
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2));
|
||||
EXPECT_EQ(2u, buffer.Length());
|
||||
DtmfEvent out_event;
|
||||
EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event));
|
||||
EXPECT_TRUE(EqualEvents(event1, out_event));
|
||||
#ifdef LEGACY_BITEXACT
|
||||
EXPECT_EQ(1u, buffer.Length());
|
||||
#else
|
||||
EXPECT_EQ(2u, buffer.Length());
|
||||
#endif
|
||||
|
||||
buffer.Flush();
|
||||
// Reverse the insert order. Expect same results.
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2));
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1));
|
||||
EXPECT_EQ(2u, buffer.Length());
|
||||
EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event));
|
||||
EXPECT_TRUE(EqualEvents(event1, out_event));
|
||||
#ifdef LEGACY_BITEXACT
|
||||
EXPECT_EQ(1u, buffer.Length());
|
||||
#else
|
||||
EXPECT_EQ(2u, buffer.Length());
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(DtmfBuffer, InvalidEvents) {
|
||||
int event_no = 0;
|
||||
bool end_bit = true;
|
||||
int volume = 1;
|
||||
int duration = 80;
|
||||
uint32_t timestamp = 0x12345678;
|
||||
DtmfEvent event(timestamp, event_no, volume, duration, end_bit);
|
||||
DtmfBuffer buffer(sample_rate_hz);
|
||||
|
||||
// Invalid event number.
|
||||
event.event_no = -1;
|
||||
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
|
||||
event.event_no = 16;
|
||||
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
|
||||
event.event_no = 0; // Valid value;
|
||||
|
||||
// Invalid volume.
|
||||
event.volume = -1;
|
||||
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
|
||||
event.volume = 64;
|
||||
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
|
||||
event.volume = 0; // Valid value;
|
||||
|
||||
// Invalid duration.
|
||||
event.duration = -1;
|
||||
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
|
||||
event.duration = 0;
|
||||
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
|
||||
event.duration = 0xFFFF + 1;
|
||||
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
|
||||
event.duration = 1; // Valid value;
|
||||
|
||||
// Finish with a valid event, just to verify that all is ok.
|
||||
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
|
||||
}
|
||||
} // namespace webrtc
|
||||
218
modules/audio_coding/neteq/dtmf_tone_generator.cc
Normal file
218
modules/audio_coding/neteq/dtmf_tone_generator.cc
Normal file
@ -0,0 +1,218 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// This class provides a generator for DTMF tones. The tone generation is based
|
||||
// on a sinusoid recursion. Each sinusoid is generated using a recursion
|
||||
// formula; x[n] = a * x[n-1] - x[n-2], where the coefficient
|
||||
// a = 2*cos(2*pi*f/fs). The recursion is started with x[-1] = 0 and
|
||||
// x[-2] = sin(2*pi*f/fs). (Note that with this initialization, the resulting
|
||||
// sinusoid gets a "negative" rotation; x[n] = sin(-2*pi*f/fs * n + phi), but
|
||||
// kept this way due to historical reasons.)
|
||||
// TODO(hlundin): Change to positive rotation?
|
||||
//
|
||||
// Each key on the telephone keypad corresponds to an "event", 0-15. Each event
|
||||
// is mapped to a tone pair, with a low and a high frequency. There are four
|
||||
// low and four high frequencies, each corresponding to a row and column,
|
||||
// respectively, on the keypad as illustrated below.
|
||||
//
|
||||
// 1209 Hz 1336 Hz 1477 Hz 1633 Hz
|
||||
// 697 Hz 1 2 3 12
|
||||
// 770 Hz 4 5 6 13
|
||||
// 852 Hz 7 8 9 14
|
||||
// 941 Hz 10 0 11 15
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/dtmf_tone_generator.h"
|
||||
|
||||
#include "webrtc/rtc_base/arraysize.h"
|
||||
#include "webrtc/rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// The filter coefficient a = 2*cos(2*pi*f/fs) for the low frequency tone, for
|
||||
// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15.
|
||||
// Values are in Q14.
|
||||
const int DtmfToneGenerator::kCoeff1[4][16] = {
|
||||
{ 24219, 27980, 27980, 27980, 26956, 26956, 26956, 25701, 25701, 25701,
|
||||
24219, 24219, 27980, 26956, 25701, 24219 },
|
||||
{ 30556, 31548, 31548, 31548, 31281, 31281, 31281, 30951, 30951, 30951,
|
||||
30556, 30556, 31548, 31281, 30951, 30556 },
|
||||
{ 32210, 32462, 32462, 32462, 32394, 32394, 32394, 32311, 32311, 32311,
|
||||
32210, 32210, 32462, 32394, 32311, 32210 },
|
||||
{ 32520, 32632, 32632, 32632, 32602, 32602, 32602, 32564, 32564, 32564,
|
||||
32520, 32520, 32632, 32602, 32564, 32520 } };
|
||||
|
||||
// The filter coefficient a = 2*cos(2*pi*f/fs) for the high frequency tone, for
|
||||
// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15.
|
||||
// Values are in Q14.
|
||||
const int DtmfToneGenerator::kCoeff2[4][16] = {
|
||||
{ 16325, 19073, 16325, 13085, 19073, 16325, 13085, 19073, 16325, 13085,
|
||||
19073, 13085, 9315, 9315, 9315, 9315},
|
||||
{ 28361, 29144, 28361, 27409, 29144, 28361, 27409, 29144, 28361, 27409,
|
||||
29144, 27409, 26258, 26258, 26258, 26258},
|
||||
{ 31647, 31849, 31647, 31400, 31849, 31647, 31400, 31849, 31647, 31400,
|
||||
31849, 31400, 31098, 31098, 31098, 31098},
|
||||
{ 32268, 32359, 32268, 32157, 32359, 32268, 32157, 32359, 32268, 32157,
|
||||
32359, 32157, 32022, 32022, 32022, 32022} };
|
||||
|
||||
// The initialization value x[-2] = sin(2*pi*f/fs) for the low frequency tone,
|
||||
// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15.
|
||||
// Values are in Q14.
|
||||
const int DtmfToneGenerator::kInitValue1[4][16] = {
|
||||
{ 11036, 8528, 8528, 8528, 9315, 9315, 9315, 10163, 10163, 10163, 11036,
|
||||
11036, 8528, 9315, 10163, 11036},
|
||||
{ 5918, 4429, 4429, 4429, 4879, 4879, 4879, 5380, 5380, 5380, 5918, 5918,
|
||||
4429, 4879, 5380, 5918},
|
||||
{ 3010, 2235, 2235, 2235, 2468, 2468, 2468, 2728, 2728, 2728, 3010, 3010,
|
||||
2235, 2468, 2728, 3010},
|
||||
{ 2013, 1493, 1493, 1493, 1649, 1649, 1649, 1823, 1823, 1823, 2013, 2013,
|
||||
1493, 1649, 1823, 2013 } };
|
||||
|
||||
// The initialization value x[-2] = sin(2*pi*f/fs) for the high frequency tone,
|
||||
// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15.
|
||||
// Values are in Q14.
|
||||
const int DtmfToneGenerator::kInitValue2[4][16] = {
|
||||
{ 14206, 13323, 14206, 15021, 13323, 14206, 15021, 13323, 14206, 15021,
|
||||
13323, 15021, 15708, 15708, 15708, 15708},
|
||||
{ 8207, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8979,
|
||||
9801, 9801, 9801, 9801},
|
||||
{ 4249, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4685,
|
||||
5164, 5164, 5164, 5164},
|
||||
{ 2851, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 3148,
|
||||
3476, 3476, 3476, 3476} };
|
||||
|
||||
// Amplitude multipliers for volume values 0 through 63, corresponding to
|
||||
// 0 dBm0 through -63 dBm0. Values are in Q14.
|
||||
// for a in range(0, 64):
|
||||
// print round(16141.0 * 10**(-float(a)/20))
|
||||
const int DtmfToneGenerator::kAmplitude[64] = {
|
||||
16141, 14386, 12821, 11427, 10184, 9077, 8090, 7210, 6426, 5727, 5104, 4549,
|
||||
4054, 3614, 3221, 2870, 2558, 2280, 2032, 1811, 1614, 1439, 1282, 1143,
|
||||
1018, 908, 809, 721, 643, 573, 510, 455, 405, 361, 322, 287, 256, 228, 203,
|
||||
181, 161, 144, 128, 114, 102, 91, 81, 72, 64, 57, 51, 45, 41, 36, 32, 29,
|
||||
26, 23, 20, 18, 16, 14, 13, 11 };
|
||||
|
||||
// Constructor.
|
||||
DtmfToneGenerator::DtmfToneGenerator()
|
||||
: initialized_(false),
|
||||
coeff1_(0),
|
||||
coeff2_(0),
|
||||
amplitude_(0) {
|
||||
}
|
||||
|
||||
// Initialize the DTMF generator with sample rate fs Hz (8000, 16000, 32000,
|
||||
// 48000), event (0-15) and attenuation (0-36 dB).
|
||||
// Returns 0 on success, otherwise an error code.
|
||||
int DtmfToneGenerator::Init(int fs, int event, int attenuation) {
|
||||
initialized_ = false;
|
||||
size_t fs_index;
|
||||
if (fs == 8000) {
|
||||
fs_index = 0;
|
||||
} else if (fs == 16000) {
|
||||
fs_index = 1;
|
||||
} else if (fs == 32000) {
|
||||
fs_index = 2;
|
||||
} else if (fs == 48000) {
|
||||
fs_index = 3;
|
||||
} else {
|
||||
RTC_NOTREACHED();
|
||||
fs_index = 1; // Default to 8000 Hz.
|
||||
}
|
||||
|
||||
if (event < 0 || event > 15) {
|
||||
return kParameterError; // Invalid event number.
|
||||
}
|
||||
|
||||
if (attenuation < 0 || attenuation > 63) {
|
||||
return kParameterError; // Invalid attenuation.
|
||||
}
|
||||
|
||||
// Look up oscillator coefficient for low and high frequencies.
|
||||
RTC_DCHECK_LE(0, fs_index);
|
||||
RTC_DCHECK_GT(arraysize(kCoeff1), fs_index);
|
||||
RTC_DCHECK_GT(arraysize(kCoeff2), fs_index);
|
||||
RTC_DCHECK_LE(0, event);
|
||||
RTC_DCHECK_GT(arraysize(kCoeff1[fs_index]), event);
|
||||
RTC_DCHECK_GT(arraysize(kCoeff2[fs_index]), event);
|
||||
coeff1_ = kCoeff1[fs_index][event];
|
||||
coeff2_ = kCoeff2[fs_index][event];
|
||||
|
||||
// Look up amplitude multiplier.
|
||||
RTC_DCHECK_LE(0, attenuation);
|
||||
RTC_DCHECK_GT(arraysize(kAmplitude), attenuation);
|
||||
amplitude_ = kAmplitude[attenuation];
|
||||
|
||||
// Initialize sample history.
|
||||
RTC_DCHECK_LE(0, fs_index);
|
||||
RTC_DCHECK_GT(arraysize(kInitValue1), fs_index);
|
||||
RTC_DCHECK_GT(arraysize(kInitValue2), fs_index);
|
||||
RTC_DCHECK_LE(0, event);
|
||||
RTC_DCHECK_GT(arraysize(kInitValue1[fs_index]), event);
|
||||
RTC_DCHECK_GT(arraysize(kInitValue2[fs_index]), event);
|
||||
sample_history1_[0] = kInitValue1[fs_index][event];
|
||||
sample_history1_[1] = 0;
|
||||
sample_history2_[0] = kInitValue2[fs_index][event];
|
||||
sample_history2_[1] = 0;
|
||||
|
||||
initialized_ = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Reset tone generator to uninitialized state.
|
||||
void DtmfToneGenerator::Reset() {
|
||||
initialized_ = false;
|
||||
}
|
||||
|
||||
// Generate num_samples of DTMF signal and write to |output|.
|
||||
int DtmfToneGenerator::Generate(size_t num_samples,
|
||||
AudioMultiVector* output) {
|
||||
if (!initialized_) {
|
||||
return kNotInitialized;
|
||||
}
|
||||
|
||||
if (!output) {
|
||||
return kParameterError;
|
||||
}
|
||||
|
||||
output->AssertSize(num_samples);
|
||||
for (size_t i = 0; i < num_samples; ++i) {
|
||||
// Use recursion formula y[n] = a * y[n - 1] - y[n - 2].
|
||||
int16_t temp_val_low = ((coeff1_ * sample_history1_[1] + 8192) >> 14)
|
||||
- sample_history1_[0];
|
||||
int16_t temp_val_high = ((coeff2_ * sample_history2_[1] + 8192) >> 14)
|
||||
- sample_history2_[0];
|
||||
|
||||
// Update recursion memory.
|
||||
sample_history1_[0] = sample_history1_[1];
|
||||
sample_history1_[1] = temp_val_low;
|
||||
sample_history2_[0] = sample_history2_[1];
|
||||
sample_history2_[1] = temp_val_high;
|
||||
|
||||
// Attenuate the low frequency tone 3 dB.
|
||||
int32_t temp_val =
|
||||
kAmpMultiplier * temp_val_low + temp_val_high * (1 << 15);
|
||||
// Normalize the signal to Q14 with proper rounding.
|
||||
temp_val = (temp_val + 16384) >> 15;
|
||||
// Scale the signal to correct volume.
|
||||
(*output)[0][i] =
|
||||
static_cast<int16_t>((temp_val * amplitude_ + 8192) >> 14);
|
||||
}
|
||||
// Copy first channel to all other channels.
|
||||
for (size_t channel = 1; channel < output->Channels(); ++channel) {
|
||||
output->CopyChannel(0, channel);
|
||||
}
|
||||
|
||||
return static_cast<int>(num_samples);
|
||||
}
|
||||
|
||||
bool DtmfToneGenerator::initialized() const {
|
||||
return initialized_;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
54
modules/audio_coding/neteq/dtmf_tone_generator.h
Normal file
54
modules/audio_coding/neteq/dtmf_tone_generator.h
Normal file
@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This class provides a generator for DTMF tones.
|
||||
class DtmfToneGenerator {
|
||||
public:
|
||||
enum ReturnCodes {
|
||||
kNotInitialized = -1,
|
||||
kParameterError = -2,
|
||||
};
|
||||
|
||||
DtmfToneGenerator();
|
||||
virtual ~DtmfToneGenerator() {}
|
||||
virtual int Init(int fs, int event, int attenuation);
|
||||
virtual void Reset();
|
||||
virtual int Generate(size_t num_samples, AudioMultiVector* output);
|
||||
virtual bool initialized() const;
|
||||
|
||||
private:
|
||||
static const int kCoeff1[4][16]; // 1st oscillator model coefficient table.
|
||||
static const int kCoeff2[4][16]; // 2nd oscillator model coefficient table.
|
||||
static const int kInitValue1[4][16]; // Initialization for 1st oscillator.
|
||||
static const int kInitValue2[4][16]; // Initialization for 2nd oscillator.
|
||||
static const int kAmplitude[64]; // Amplitude for 0 through -63 dBm0.
|
||||
static const int16_t kAmpMultiplier = 23171; // 3 dB attenuation (in Q15).
|
||||
|
||||
bool initialized_; // True if generator is initialized properly.
|
||||
int coeff1_; // 1st oscillator coefficient for this event.
|
||||
int coeff2_; // 2nd oscillator coefficient for this event.
|
||||
int amplitude_; // Amplitude for this event.
|
||||
int16_t sample_history1_[2]; // Last 2 samples for the 1st oscillator.
|
||||
int16_t sample_history2_[2]; // Last 2 samples for the 2nd oscillator.
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(DtmfToneGenerator);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_DTMF_TONE_GENERATOR_H_
|
||||
179
modules/audio_coding/neteq/dtmf_tone_generator_unittest.cc
Normal file
179
modules/audio_coding/neteq/dtmf_tone_generator_unittest.cc
Normal file
@ -0,0 +1,179 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for DtmfToneGenerator class.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/dtmf_tone_generator.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class DtmfToneGeneratorTest : public ::testing::Test {
|
||||
protected:
|
||||
static const double kLowFreqHz[16];
|
||||
static const double kHighFreqHz[16];
|
||||
// This is the attenuation applied to all cases.
|
||||
const double kBaseAttenuation = 16141.0 / 16384.0;
|
||||
const double k3dbAttenuation = 23171.0 / 32768;
|
||||
const int kNumSamples = 10;
|
||||
|
||||
void TestAllTones(int fs_hz, int channels) {
|
||||
AudioMultiVector signal(channels);
|
||||
|
||||
for (int event = 0; event <= 15; ++event) {
|
||||
std::ostringstream ss;
|
||||
ss << "Checking event " << event << " at sample rate " << fs_hz;
|
||||
SCOPED_TRACE(ss.str());
|
||||
const int kAttenuation = 0;
|
||||
ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, kAttenuation));
|
||||
EXPECT_TRUE(tone_gen_.initialized());
|
||||
EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &signal));
|
||||
|
||||
double f1 = kLowFreqHz[event];
|
||||
double f2 = kHighFreqHz[event];
|
||||
const double pi = 3.14159265358979323846;
|
||||
|
||||
for (int n = 0; n < kNumSamples; ++n) {
|
||||
double x = k3dbAttenuation * sin(2.0 * pi * f1 / fs_hz * (-n - 1)) +
|
||||
sin(2.0 * pi * f2 / fs_hz * (-n - 1));
|
||||
x *= kBaseAttenuation;
|
||||
x = ldexp(x, 14); // Scale to Q14.
|
||||
for (int channel = 0; channel < channels; ++channel) {
|
||||
EXPECT_NEAR(x, static_cast<double>(signal[channel][n]), 25);
|
||||
}
|
||||
}
|
||||
|
||||
tone_gen_.Reset();
|
||||
EXPECT_FALSE(tone_gen_.initialized());
|
||||
}
|
||||
}
|
||||
|
||||
void TestAmplitudes(int fs_hz, int channels) {
|
||||
AudioMultiVector signal(channels);
|
||||
AudioMultiVector ref_signal(channels);
|
||||
|
||||
const int event_vec[] = {0, 4, 9, 13}; // Test a few events.
|
||||
for (int e = 0; e < 4; ++e) {
|
||||
int event = event_vec[e];
|
||||
// Create full-scale reference.
|
||||
ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, 0)); // 0 attenuation.
|
||||
EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &ref_signal));
|
||||
// Test every 5 steps (to save time).
|
||||
for (int attenuation = 1; attenuation <= 63; attenuation += 5) {
|
||||
std::ostringstream ss;
|
||||
ss << "Checking event " << event << " at sample rate " << fs_hz;
|
||||
ss << "; attenuation " << attenuation;
|
||||
SCOPED_TRACE(ss.str());
|
||||
ASSERT_EQ(0, tone_gen_.Init(fs_hz, event, attenuation));
|
||||
EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &signal));
|
||||
for (int n = 0; n < kNumSamples; ++n) {
|
||||
double attenuation_factor =
|
||||
pow(10, -static_cast<double>(attenuation) / 20);
|
||||
// Verify that the attenuation is correct.
|
||||
for (int channel = 0; channel < channels; ++channel) {
|
||||
EXPECT_NEAR(attenuation_factor * ref_signal[channel][n],
|
||||
signal[channel][n],
|
||||
2);
|
||||
}
|
||||
}
|
||||
|
||||
tone_gen_.Reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DtmfToneGenerator tone_gen_;
|
||||
};
|
||||
|
||||
// Low and high frequencies for events 0 through 15.
|
||||
const double DtmfToneGeneratorTest::kLowFreqHz[16] = {
|
||||
941.0, 697.0, 697.0, 697.0, 770.0, 770.0, 770.0, 852.0,
|
||||
852.0, 852.0, 941.0, 941.0, 697.0, 770.0, 852.0, 941.0};
|
||||
const double DtmfToneGeneratorTest::kHighFreqHz[16] = {
|
||||
1336.0, 1209.0, 1336.0, 1477.0, 1209.0, 1336.0, 1477.0, 1209.0,
|
||||
1336.0, 1477.0, 1209.0, 1477.0, 1633.0, 1633.0, 1633.0, 1633.0};
|
||||
|
||||
TEST_F(DtmfToneGeneratorTest, Test8000Mono) {
|
||||
TestAllTones(8000, 1);
|
||||
TestAmplitudes(8000, 1);
|
||||
}
|
||||
|
||||
TEST_F(DtmfToneGeneratorTest, Test16000Mono) {
|
||||
TestAllTones(16000, 1);
|
||||
TestAmplitudes(16000, 1);
|
||||
}
|
||||
|
||||
TEST_F(DtmfToneGeneratorTest, Test32000Mono) {
|
||||
TestAllTones(32000, 1);
|
||||
TestAmplitudes(32000, 1);
|
||||
}
|
||||
|
||||
TEST_F(DtmfToneGeneratorTest, Test48000Mono) {
|
||||
TestAllTones(48000, 1);
|
||||
TestAmplitudes(48000, 1);
|
||||
}
|
||||
|
||||
TEST_F(DtmfToneGeneratorTest, Test8000Stereo) {
|
||||
TestAllTones(8000, 2);
|
||||
TestAmplitudes(8000, 2);
|
||||
}
|
||||
|
||||
TEST_F(DtmfToneGeneratorTest, Test16000Stereo) {
|
||||
TestAllTones(16000, 2);
|
||||
TestAmplitudes(16000, 2);
|
||||
}
|
||||
|
||||
TEST_F(DtmfToneGeneratorTest, Test32000Stereo) {
|
||||
TestAllTones(32000, 2);
|
||||
TestAmplitudes(32000, 2);
|
||||
}
|
||||
|
||||
TEST_F(DtmfToneGeneratorTest, Test48000Stereo) {
|
||||
TestAllTones(48000, 2);
|
||||
TestAmplitudes(48000, 2);
|
||||
}
|
||||
|
||||
TEST(DtmfToneGenerator, TestErrors) {
|
||||
DtmfToneGenerator tone_gen;
|
||||
const int kNumSamples = 10;
|
||||
AudioMultiVector signal(1); // One channel.
|
||||
|
||||
// Try to generate tones without initializing.
|
||||
EXPECT_EQ(DtmfToneGenerator::kNotInitialized,
|
||||
tone_gen.Generate(kNumSamples, &signal));
|
||||
|
||||
const int fs = 16000; // Valid sample rate.
|
||||
const int event = 7; // Valid event.
|
||||
const int attenuation = 0; // Valid attenuation.
|
||||
// Initialize with invalid event -1.
|
||||
EXPECT_EQ(DtmfToneGenerator::kParameterError,
|
||||
tone_gen.Init(fs, -1, attenuation));
|
||||
// Initialize with invalid event 16.
|
||||
EXPECT_EQ(DtmfToneGenerator::kParameterError,
|
||||
tone_gen.Init(fs, 16, attenuation));
|
||||
// Initialize with invalid attenuation -1.
|
||||
EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, -1));
|
||||
// Initialize with invalid attenuation 64.
|
||||
EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, 64));
|
||||
EXPECT_FALSE(tone_gen.initialized()); // Should still be uninitialized.
|
||||
|
||||
// Initialize with valid parameters.
|
||||
ASSERT_EQ(0, tone_gen.Init(fs, event, attenuation));
|
||||
EXPECT_TRUE(tone_gen.initialized());
|
||||
// NULL pointer to destination.
|
||||
EXPECT_EQ(DtmfToneGenerator::kParameterError,
|
||||
tone_gen.Generate(kNumSamples, NULL));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
978
modules/audio_coding/neteq/expand.cc
Normal file
978
modules/audio_coding/neteq/expand.cc
Normal file
@ -0,0 +1,978 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/expand.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h> // memset
|
||||
|
||||
#include <algorithm> // min, max
|
||||
#include <limits> // numeric_limits<T>
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
|
||||
#include "webrtc/rtc_base/safe_conversions.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
Expand::Expand(BackgroundNoise* background_noise,
|
||||
SyncBuffer* sync_buffer,
|
||||
RandomVector* random_vector,
|
||||
StatisticsCalculator* statistics,
|
||||
int fs,
|
||||
size_t num_channels)
|
||||
: random_vector_(random_vector),
|
||||
sync_buffer_(sync_buffer),
|
||||
first_expand_(true),
|
||||
fs_hz_(fs),
|
||||
num_channels_(num_channels),
|
||||
consecutive_expands_(0),
|
||||
background_noise_(background_noise),
|
||||
statistics_(statistics),
|
||||
overlap_length_(5 * fs / 8000),
|
||||
lag_index_direction_(0),
|
||||
current_lag_index_(0),
|
||||
stop_muting_(false),
|
||||
expand_duration_samples_(0),
|
||||
channel_parameters_(new ChannelParameters[num_channels_]) {
|
||||
assert(fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000);
|
||||
assert(fs <= static_cast<int>(kMaxSampleRate)); // Should not be possible.
|
||||
assert(num_channels_ > 0);
|
||||
memset(expand_lags_, 0, sizeof(expand_lags_));
|
||||
Reset();
|
||||
}
|
||||
|
||||
Expand::~Expand() = default;
|
||||
|
||||
void Expand::Reset() {
|
||||
first_expand_ = true;
|
||||
consecutive_expands_ = 0;
|
||||
max_lag_ = 0;
|
||||
for (size_t ix = 0; ix < num_channels_; ++ix) {
|
||||
channel_parameters_[ix].expand_vector0.Clear();
|
||||
channel_parameters_[ix].expand_vector1.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
int Expand::Process(AudioMultiVector* output) {
|
||||
int16_t random_vector[kMaxSampleRate / 8000 * 120 + 30];
|
||||
int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125];
|
||||
static const int kTempDataSize = 3600;
|
||||
int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this.
|
||||
int16_t* voiced_vector_storage = temp_data;
|
||||
int16_t* voiced_vector = &voiced_vector_storage[overlap_length_];
|
||||
static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
|
||||
int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125];
|
||||
int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder;
|
||||
int16_t* noise_vector = unvoiced_array_memory + kNoiseLpcOrder;
|
||||
|
||||
int fs_mult = fs_hz_ / 8000;
|
||||
|
||||
if (first_expand_) {
|
||||
// Perform initial setup if this is the first expansion since last reset.
|
||||
AnalyzeSignal(random_vector);
|
||||
first_expand_ = false;
|
||||
expand_duration_samples_ = 0;
|
||||
} else {
|
||||
// This is not the first expansion, parameters are already estimated.
|
||||
// Extract a noise segment.
|
||||
size_t rand_length = max_lag_;
|
||||
// This only applies to SWB where length could be larger than 256.
|
||||
assert(rand_length <= kMaxSampleRate / 8000 * 120 + 30);
|
||||
GenerateRandomVector(2, rand_length, random_vector);
|
||||
}
|
||||
|
||||
|
||||
// Generate signal.
|
||||
UpdateLagIndex();
|
||||
|
||||
// Voiced part.
|
||||
// Generate a weighted vector with the current lag.
|
||||
size_t expansion_vector_length = max_lag_ + overlap_length_;
|
||||
size_t current_lag = expand_lags_[current_lag_index_];
|
||||
// Copy lag+overlap data.
|
||||
size_t expansion_vector_position = expansion_vector_length - current_lag -
|
||||
overlap_length_;
|
||||
size_t temp_length = current_lag + overlap_length_;
|
||||
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
|
||||
ChannelParameters& parameters = channel_parameters_[channel_ix];
|
||||
if (current_lag_index_ == 0) {
|
||||
// Use only expand_vector0.
|
||||
assert(expansion_vector_position + temp_length <=
|
||||
parameters.expand_vector0.Size());
|
||||
parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position,
|
||||
voiced_vector_storage);
|
||||
} else if (current_lag_index_ == 1) {
|
||||
std::unique_ptr<int16_t[]> temp_0(new int16_t[temp_length]);
|
||||
parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position,
|
||||
temp_0.get());
|
||||
std::unique_ptr<int16_t[]> temp_1(new int16_t[temp_length]);
|
||||
parameters.expand_vector1.CopyTo(temp_length, expansion_vector_position,
|
||||
temp_1.get());
|
||||
// Mix 3/4 of expand_vector0 with 1/4 of expand_vector1.
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound(temp_0.get(), 3, temp_1.get(), 1, 2,
|
||||
voiced_vector_storage, temp_length);
|
||||
} else if (current_lag_index_ == 2) {
|
||||
// Mix 1/2 of expand_vector0 with 1/2 of expand_vector1.
|
||||
assert(expansion_vector_position + temp_length <=
|
||||
parameters.expand_vector0.Size());
|
||||
assert(expansion_vector_position + temp_length <=
|
||||
parameters.expand_vector1.Size());
|
||||
|
||||
std::unique_ptr<int16_t[]> temp_0(new int16_t[temp_length]);
|
||||
parameters.expand_vector0.CopyTo(temp_length, expansion_vector_position,
|
||||
temp_0.get());
|
||||
std::unique_ptr<int16_t[]> temp_1(new int16_t[temp_length]);
|
||||
parameters.expand_vector1.CopyTo(temp_length, expansion_vector_position,
|
||||
temp_1.get());
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound(temp_0.get(), 1, temp_1.get(), 1, 1,
|
||||
voiced_vector_storage, temp_length);
|
||||
}
|
||||
|
||||
// Get tapering window parameters. Values are in Q15.
|
||||
int16_t muting_window, muting_window_increment;
|
||||
int16_t unmuting_window, unmuting_window_increment;
|
||||
if (fs_hz_ == 8000) {
|
||||
muting_window = DspHelper::kMuteFactorStart8kHz;
|
||||
muting_window_increment = DspHelper::kMuteFactorIncrement8kHz;
|
||||
unmuting_window = DspHelper::kUnmuteFactorStart8kHz;
|
||||
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz;
|
||||
} else if (fs_hz_ == 16000) {
|
||||
muting_window = DspHelper::kMuteFactorStart16kHz;
|
||||
muting_window_increment = DspHelper::kMuteFactorIncrement16kHz;
|
||||
unmuting_window = DspHelper::kUnmuteFactorStart16kHz;
|
||||
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz;
|
||||
} else if (fs_hz_ == 32000) {
|
||||
muting_window = DspHelper::kMuteFactorStart32kHz;
|
||||
muting_window_increment = DspHelper::kMuteFactorIncrement32kHz;
|
||||
unmuting_window = DspHelper::kUnmuteFactorStart32kHz;
|
||||
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz;
|
||||
} else { // fs_ == 48000
|
||||
muting_window = DspHelper::kMuteFactorStart48kHz;
|
||||
muting_window_increment = DspHelper::kMuteFactorIncrement48kHz;
|
||||
unmuting_window = DspHelper::kUnmuteFactorStart48kHz;
|
||||
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz;
|
||||
}
|
||||
|
||||
// Smooth the expanded if it has not been muted to a low amplitude and
|
||||
// |current_voice_mix_factor| is larger than 0.5.
|
||||
if ((parameters.mute_factor > 819) &&
|
||||
(parameters.current_voice_mix_factor > 8192)) {
|
||||
size_t start_ix = sync_buffer_->Size() - overlap_length_;
|
||||
for (size_t i = 0; i < overlap_length_; i++) {
|
||||
// Do overlap add between new vector and overlap.
|
||||
(*sync_buffer_)[channel_ix][start_ix + i] =
|
||||
(((*sync_buffer_)[channel_ix][start_ix + i] * muting_window) +
|
||||
(((parameters.mute_factor * voiced_vector_storage[i]) >> 14) *
|
||||
unmuting_window) + 16384) >> 15;
|
||||
muting_window += muting_window_increment;
|
||||
unmuting_window += unmuting_window_increment;
|
||||
}
|
||||
} else if (parameters.mute_factor == 0) {
|
||||
// The expanded signal will consist of only comfort noise if
|
||||
// mute_factor = 0. Set the output length to 15 ms for best noise
|
||||
// production.
|
||||
// TODO(hlundin): This has been disabled since the length of
|
||||
// parameters.expand_vector0 and parameters.expand_vector1 no longer
|
||||
// match with expand_lags_, causing invalid reads and writes. Is it a good
|
||||
// idea to enable this again, and solve the vector size problem?
|
||||
// max_lag_ = fs_mult * 120;
|
||||
// expand_lags_[0] = fs_mult * 120;
|
||||
// expand_lags_[1] = fs_mult * 120;
|
||||
// expand_lags_[2] = fs_mult * 120;
|
||||
}
|
||||
|
||||
// Unvoiced part.
|
||||
// Filter |scaled_random_vector| through |ar_filter_|.
|
||||
memcpy(unvoiced_vector - kUnvoicedLpcOrder, parameters.ar_filter_state,
|
||||
sizeof(int16_t) * kUnvoicedLpcOrder);
|
||||
int32_t add_constant = 0;
|
||||
if (parameters.ar_gain_scale > 0) {
|
||||
add_constant = 1 << (parameters.ar_gain_scale - 1);
|
||||
}
|
||||
WebRtcSpl_AffineTransformVector(scaled_random_vector, random_vector,
|
||||
parameters.ar_gain, add_constant,
|
||||
parameters.ar_gain_scale,
|
||||
current_lag);
|
||||
WebRtcSpl_FilterARFastQ12(scaled_random_vector, unvoiced_vector,
|
||||
parameters.ar_filter, kUnvoicedLpcOrder + 1,
|
||||
current_lag);
|
||||
memcpy(parameters.ar_filter_state,
|
||||
&(unvoiced_vector[current_lag - kUnvoicedLpcOrder]),
|
||||
sizeof(int16_t) * kUnvoicedLpcOrder);
|
||||
|
||||
// Combine voiced and unvoiced contributions.
|
||||
|
||||
// Set a suitable cross-fading slope.
|
||||
// For lag =
|
||||
// <= 31 * fs_mult => go from 1 to 0 in about 8 ms;
|
||||
// (>= 31 .. <= 63) * fs_mult => go from 1 to 0 in about 16 ms;
|
||||
// >= 64 * fs_mult => go from 1 to 0 in about 32 ms.
|
||||
// temp_shift = getbits(max_lag_) - 5.
|
||||
int temp_shift =
|
||||
(31 - WebRtcSpl_NormW32(rtc::dchecked_cast<int32_t>(max_lag_))) - 5;
|
||||
int16_t mix_factor_increment = 256 >> temp_shift;
|
||||
if (stop_muting_) {
|
||||
mix_factor_increment = 0;
|
||||
}
|
||||
|
||||
// Create combined signal by shifting in more and more of unvoiced part.
|
||||
temp_shift = 8 - temp_shift; // = getbits(mix_factor_increment).
|
||||
size_t temp_length = (parameters.current_voice_mix_factor -
|
||||
parameters.voice_mix_factor) >> temp_shift;
|
||||
temp_length = std::min(temp_length, current_lag);
|
||||
DspHelper::CrossFade(voiced_vector, unvoiced_vector, temp_length,
|
||||
¶meters.current_voice_mix_factor,
|
||||
mix_factor_increment, temp_data);
|
||||
|
||||
// End of cross-fading period was reached before end of expanded signal
|
||||
// path. Mix the rest with a fixed mixing factor.
|
||||
if (temp_length < current_lag) {
|
||||
if (mix_factor_increment != 0) {
|
||||
parameters.current_voice_mix_factor = parameters.voice_mix_factor;
|
||||
}
|
||||
int16_t temp_scale = 16384 - parameters.current_voice_mix_factor;
|
||||
WebRtcSpl_ScaleAndAddVectorsWithRound(
|
||||
voiced_vector + temp_length, parameters.current_voice_mix_factor,
|
||||
unvoiced_vector + temp_length, temp_scale, 14,
|
||||
temp_data + temp_length, current_lag - temp_length);
|
||||
}
|
||||
|
||||
// Select muting slope depending on how many consecutive expands we have
|
||||
// done.
|
||||
if (consecutive_expands_ == 3) {
|
||||
// Let the mute factor decrease from 1.0 to 0.95 in 6.25 ms.
|
||||
// mute_slope = 0.0010 / fs_mult in Q20.
|
||||
parameters.mute_slope = std::max(parameters.mute_slope, 1049 / fs_mult);
|
||||
}
|
||||
if (consecutive_expands_ == 7) {
|
||||
// Let the mute factor decrease from 1.0 to 0.90 in 6.25 ms.
|
||||
// mute_slope = 0.0020 / fs_mult in Q20.
|
||||
parameters.mute_slope = std::max(parameters.mute_slope, 2097 / fs_mult);
|
||||
}
|
||||
|
||||
// Mute segment according to slope value.
|
||||
if ((consecutive_expands_ != 0) || !parameters.onset) {
|
||||
// Mute to the previous level, then continue with the muting.
|
||||
WebRtcSpl_AffineTransformVector(temp_data, temp_data,
|
||||
parameters.mute_factor, 8192,
|
||||
14, current_lag);
|
||||
|
||||
if (!stop_muting_) {
|
||||
DspHelper::MuteSignal(temp_data, parameters.mute_slope, current_lag);
|
||||
|
||||
// Shift by 6 to go from Q20 to Q14.
|
||||
// TODO(hlundin): Adding 8192 before shifting 6 steps seems wrong.
|
||||
// Legacy.
|
||||
int16_t gain = static_cast<int16_t>(16384 -
|
||||
(((current_lag * parameters.mute_slope) + 8192) >> 6));
|
||||
gain = ((gain * parameters.mute_factor) + 8192) >> 14;
|
||||
|
||||
// Guard against getting stuck with very small (but sometimes audible)
|
||||
// gain.
|
||||
if ((consecutive_expands_ > 3) && (gain >= parameters.mute_factor)) {
|
||||
parameters.mute_factor = 0;
|
||||
} else {
|
||||
parameters.mute_factor = gain;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Background noise part.
|
||||
GenerateBackgroundNoise(random_vector,
|
||||
channel_ix,
|
||||
channel_parameters_[channel_ix].mute_slope,
|
||||
TooManyExpands(),
|
||||
current_lag,
|
||||
unvoiced_array_memory);
|
||||
|
||||
// Add background noise to the combined voiced-unvoiced signal.
|
||||
for (size_t i = 0; i < current_lag; i++) {
|
||||
temp_data[i] = temp_data[i] + noise_vector[i];
|
||||
}
|
||||
if (channel_ix == 0) {
|
||||
output->AssertSize(current_lag);
|
||||
} else {
|
||||
assert(output->Size() == current_lag);
|
||||
}
|
||||
(*output)[channel_ix].OverwriteAt(temp_data, current_lag, 0);
|
||||
}
|
||||
|
||||
// Increase call number and cap it.
|
||||
consecutive_expands_ = consecutive_expands_ >= kMaxConsecutiveExpands ?
|
||||
kMaxConsecutiveExpands : consecutive_expands_ + 1;
|
||||
expand_duration_samples_ += output->Size();
|
||||
// Clamp the duration counter at 2 seconds.
|
||||
expand_duration_samples_ = std::min(expand_duration_samples_,
|
||||
rtc::dchecked_cast<size_t>(fs_hz_ * 2));
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Expand::SetParametersForNormalAfterExpand() {
|
||||
current_lag_index_ = 0;
|
||||
lag_index_direction_ = 0;
|
||||
stop_muting_ = true; // Do not mute signal any more.
|
||||
statistics_->LogDelayedPacketOutageEvent(
|
||||
rtc::dchecked_cast<int>(expand_duration_samples_) / (fs_hz_ / 1000));
|
||||
}
|
||||
|
||||
void Expand::SetParametersForMergeAfterExpand() {
|
||||
current_lag_index_ = -1; /* out of the 3 possible ones */
|
||||
lag_index_direction_ = 1; /* make sure we get the "optimal" lag */
|
||||
stop_muting_ = true;
|
||||
}
|
||||
|
||||
bool Expand::Muted() const {
|
||||
if (first_expand_ || stop_muting_)
|
||||
return false;
|
||||
RTC_DCHECK(channel_parameters_);
|
||||
for (size_t ch = 0; ch < num_channels_; ++ch) {
|
||||
if (channel_parameters_[ch].mute_factor != 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t Expand::overlap_length() const {
|
||||
return overlap_length_;
|
||||
}
|
||||
|
||||
void Expand::InitializeForAnExpandPeriod() {
|
||||
lag_index_direction_ = 1;
|
||||
current_lag_index_ = -1;
|
||||
stop_muting_ = false;
|
||||
random_vector_->set_seed_increment(1);
|
||||
consecutive_expands_ = 0;
|
||||
for (size_t ix = 0; ix < num_channels_; ++ix) {
|
||||
channel_parameters_[ix].current_voice_mix_factor = 16384; // 1.0 in Q14.
|
||||
channel_parameters_[ix].mute_factor = 16384; // 1.0 in Q14.
|
||||
// Start with 0 gain for background noise.
|
||||
background_noise_->SetMuteFactor(ix, 0);
|
||||
}
|
||||
}
|
||||
|
||||
bool Expand::TooManyExpands() {
|
||||
return consecutive_expands_ >= kMaxConsecutiveExpands;
|
||||
}
|
||||
|
||||
void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
int32_t auto_correlation[kUnvoicedLpcOrder + 1];
|
||||
int16_t reflection_coeff[kUnvoicedLpcOrder];
|
||||
int16_t correlation_vector[kMaxSampleRate / 8000 * 102];
|
||||
size_t best_correlation_index[kNumCorrelationCandidates];
|
||||
int16_t best_correlation[kNumCorrelationCandidates];
|
||||
size_t best_distortion_index[kNumCorrelationCandidates];
|
||||
int16_t best_distortion[kNumCorrelationCandidates];
|
||||
int32_t correlation_vector2[(99 * kMaxSampleRate / 8000) + 1];
|
||||
int32_t best_distortion_w32[kNumCorrelationCandidates];
|
||||
static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
|
||||
int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125];
|
||||
int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder;
|
||||
|
||||
int fs_mult = fs_hz_ / 8000;
|
||||
|
||||
// Pre-calculate common multiplications with fs_mult.
|
||||
size_t fs_mult_4 = static_cast<size_t>(fs_mult * 4);
|
||||
size_t fs_mult_20 = static_cast<size_t>(fs_mult * 20);
|
||||
size_t fs_mult_120 = static_cast<size_t>(fs_mult * 120);
|
||||
size_t fs_mult_dist_len = fs_mult * kDistortionLength;
|
||||
size_t fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;
|
||||
|
||||
const size_t signal_length = static_cast<size_t>(256 * fs_mult);
|
||||
|
||||
const size_t audio_history_position = sync_buffer_->Size() - signal_length;
|
||||
std::unique_ptr<int16_t[]> audio_history(new int16_t[signal_length]);
|
||||
(*sync_buffer_)[0].CopyTo(signal_length, audio_history_position,
|
||||
audio_history.get());
|
||||
|
||||
// Initialize.
|
||||
InitializeForAnExpandPeriod();
|
||||
|
||||
// Calculate correlation in downsampled domain (4 kHz sample rate).
|
||||
size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.
|
||||
// If it is decided to break bit-exactness |correlation_length| should be
|
||||
// initialized to the return value of Correlation().
|
||||
Correlation(audio_history.get(), signal_length, correlation_vector);
|
||||
|
||||
// Find peaks in correlation vector.
|
||||
DspHelper::PeakDetection(correlation_vector, correlation_length,
|
||||
kNumCorrelationCandidates, fs_mult,
|
||||
best_correlation_index, best_correlation);
|
||||
|
||||
// Adjust peak locations; cross-correlation lags start at 2.5 ms
|
||||
// (20 * fs_mult samples).
|
||||
best_correlation_index[0] += fs_mult_20;
|
||||
best_correlation_index[1] += fs_mult_20;
|
||||
best_correlation_index[2] += fs_mult_20;
|
||||
|
||||
// Calculate distortion around the |kNumCorrelationCandidates| best lags.
|
||||
int distortion_scale = 0;
|
||||
for (size_t i = 0; i < kNumCorrelationCandidates; i++) {
|
||||
size_t min_index = std::max(fs_mult_20,
|
||||
best_correlation_index[i] - fs_mult_4);
|
||||
size_t max_index = std::min(fs_mult_120 - 1,
|
||||
best_correlation_index[i] + fs_mult_4);
|
||||
best_distortion_index[i] = DspHelper::MinDistortion(
|
||||
&(audio_history[signal_length - fs_mult_dist_len]), min_index,
|
||||
max_index, fs_mult_dist_len, &best_distortion_w32[i]);
|
||||
distortion_scale = std::max(16 - WebRtcSpl_NormW32(best_distortion_w32[i]),
|
||||
distortion_scale);
|
||||
}
|
||||
// Shift the distortion values to fit in 16 bits.
|
||||
WebRtcSpl_VectorBitShiftW32ToW16(best_distortion, kNumCorrelationCandidates,
|
||||
best_distortion_w32, distortion_scale);
|
||||
|
||||
// Find the maximizing index |i| of the cost function
|
||||
// f[i] = best_correlation[i] / best_distortion[i].
|
||||
int32_t best_ratio = std::numeric_limits<int32_t>::min();
|
||||
size_t best_index = std::numeric_limits<size_t>::max();
|
||||
for (size_t i = 0; i < kNumCorrelationCandidates; ++i) {
|
||||
int32_t ratio;
|
||||
if (best_distortion[i] > 0) {
|
||||
ratio = (best_correlation[i] * (1 << 16)) / best_distortion[i];
|
||||
} else if (best_correlation[i] == 0) {
|
||||
ratio = 0; // No correlation set result to zero.
|
||||
} else {
|
||||
ratio = std::numeric_limits<int32_t>::max(); // Denominator is zero.
|
||||
}
|
||||
if (ratio > best_ratio) {
|
||||
best_index = i;
|
||||
best_ratio = ratio;
|
||||
}
|
||||
}
|
||||
|
||||
size_t distortion_lag = best_distortion_index[best_index];
|
||||
size_t correlation_lag = best_correlation_index[best_index];
|
||||
max_lag_ = std::max(distortion_lag, correlation_lag);
|
||||
|
||||
// Calculate the exact best correlation in the range between
|
||||
// |correlation_lag| and |distortion_lag|.
|
||||
correlation_length =
|
||||
std::max(std::min(distortion_lag + 10, fs_mult_120),
|
||||
static_cast<size_t>(60 * fs_mult));
|
||||
|
||||
size_t start_index = std::min(distortion_lag, correlation_lag);
|
||||
size_t correlation_lags = static_cast<size_t>(
|
||||
WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + 1);
|
||||
assert(correlation_lags <= static_cast<size_t>(99 * fs_mult + 1));
|
||||
|
||||
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
|
||||
ChannelParameters& parameters = channel_parameters_[channel_ix];
|
||||
// Calculate suitable scaling.
|
||||
int16_t signal_max = WebRtcSpl_MaxAbsValueW16(
|
||||
&audio_history[signal_length - correlation_length - start_index
|
||||
- correlation_lags],
|
||||
correlation_length + start_index + correlation_lags - 1);
|
||||
int correlation_scale = (31 - WebRtcSpl_NormW32(signal_max * signal_max)) +
|
||||
(31 - WebRtcSpl_NormW32(static_cast<int32_t>(correlation_length))) - 31;
|
||||
correlation_scale = std::max(0, correlation_scale);
|
||||
|
||||
// Calculate the correlation, store in |correlation_vector2|.
|
||||
WebRtcSpl_CrossCorrelation(
|
||||
correlation_vector2,
|
||||
&(audio_history[signal_length - correlation_length]),
|
||||
&(audio_history[signal_length - correlation_length - start_index]),
|
||||
correlation_length, correlation_lags, correlation_scale, -1);
|
||||
|
||||
// Find maximizing index.
|
||||
best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags);
|
||||
int32_t max_correlation = correlation_vector2[best_index];
|
||||
// Compensate index with start offset.
|
||||
best_index = best_index + start_index;
|
||||
|
||||
// Calculate energies.
|
||||
int32_t energy1 = WebRtcSpl_DotProductWithScale(
|
||||
&(audio_history[signal_length - correlation_length]),
|
||||
&(audio_history[signal_length - correlation_length]),
|
||||
correlation_length, correlation_scale);
|
||||
int32_t energy2 = WebRtcSpl_DotProductWithScale(
|
||||
&(audio_history[signal_length - correlation_length - best_index]),
|
||||
&(audio_history[signal_length - correlation_length - best_index]),
|
||||
correlation_length, correlation_scale);
|
||||
|
||||
// Calculate the correlation coefficient between the two portions of the
|
||||
// signal.
|
||||
int32_t corr_coefficient;
|
||||
if ((energy1 > 0) && (energy2 > 0)) {
|
||||
int energy1_scale = std::max(16 - WebRtcSpl_NormW32(energy1), 0);
|
||||
int energy2_scale = std::max(16 - WebRtcSpl_NormW32(energy2), 0);
|
||||
// Make sure total scaling is even (to simplify scale factor after sqrt).
|
||||
if ((energy1_scale + energy2_scale) & 1) {
|
||||
// If sum is odd, add 1 to make it even.
|
||||
energy1_scale += 1;
|
||||
}
|
||||
int32_t scaled_energy1 = energy1 >> energy1_scale;
|
||||
int32_t scaled_energy2 = energy2 >> energy2_scale;
|
||||
int16_t sqrt_energy_product = static_cast<int16_t>(
|
||||
WebRtcSpl_SqrtFloor(scaled_energy1 * scaled_energy2));
|
||||
// Calculate max_correlation / sqrt(energy1 * energy2) in Q14.
|
||||
int cc_shift = 14 - (energy1_scale + energy2_scale) / 2;
|
||||
max_correlation = WEBRTC_SPL_SHIFT_W32(max_correlation, cc_shift);
|
||||
corr_coefficient = WebRtcSpl_DivW32W16(max_correlation,
|
||||
sqrt_energy_product);
|
||||
// Cap at 1.0 in Q14.
|
||||
corr_coefficient = std::min(16384, corr_coefficient);
|
||||
} else {
|
||||
corr_coefficient = 0;
|
||||
}
|
||||
|
||||
// Extract the two vectors expand_vector0 and expand_vector1 from
|
||||
// |audio_history|.
|
||||
size_t expansion_length = max_lag_ + overlap_length_;
|
||||
const int16_t* vector1 = &(audio_history[signal_length - expansion_length]);
|
||||
const int16_t* vector2 = vector1 - distortion_lag;
|
||||
// Normalize the second vector to the same energy as the first.
|
||||
energy1 = WebRtcSpl_DotProductWithScale(vector1, vector1, expansion_length,
|
||||
correlation_scale);
|
||||
energy2 = WebRtcSpl_DotProductWithScale(vector2, vector2, expansion_length,
|
||||
correlation_scale);
|
||||
// Confirm that amplitude ratio sqrt(energy1 / energy2) is within 0.5 - 2.0,
|
||||
// i.e., energy1 / energy2 is within 0.25 - 4.
|
||||
int16_t amplitude_ratio;
|
||||
if ((energy1 / 4 < energy2) && (energy1 > energy2 / 4)) {
|
||||
// Energy constraint fulfilled. Use both vectors and scale them
|
||||
// accordingly.
|
||||
int32_t scaled_energy2 = std::max(16 - WebRtcSpl_NormW32(energy2), 0);
|
||||
int32_t scaled_energy1 = scaled_energy2 - 13;
|
||||
// Calculate scaled_energy1 / scaled_energy2 in Q13.
|
||||
int32_t energy_ratio = WebRtcSpl_DivW32W16(
|
||||
WEBRTC_SPL_SHIFT_W32(energy1, -scaled_energy1),
|
||||
static_cast<int16_t>(energy2 >> scaled_energy2));
|
||||
// Calculate sqrt ratio in Q13 (sqrt of en1/en2 in Q26).
|
||||
amplitude_ratio =
|
||||
static_cast<int16_t>(WebRtcSpl_SqrtFloor(energy_ratio << 13));
|
||||
// Copy the two vectors and give them the same energy.
|
||||
parameters.expand_vector0.Clear();
|
||||
parameters.expand_vector0.PushBack(vector1, expansion_length);
|
||||
parameters.expand_vector1.Clear();
|
||||
if (parameters.expand_vector1.Size() < expansion_length) {
|
||||
parameters.expand_vector1.Extend(
|
||||
expansion_length - parameters.expand_vector1.Size());
|
||||
}
|
||||
std::unique_ptr<int16_t[]> temp_1(new int16_t[expansion_length]);
|
||||
WebRtcSpl_AffineTransformVector(temp_1.get(),
|
||||
const_cast<int16_t*>(vector2),
|
||||
amplitude_ratio,
|
||||
4096,
|
||||
13,
|
||||
expansion_length);
|
||||
parameters.expand_vector1.OverwriteAt(temp_1.get(), expansion_length, 0);
|
||||
} else {
|
||||
// Energy change constraint not fulfilled. Only use last vector.
|
||||
parameters.expand_vector0.Clear();
|
||||
parameters.expand_vector0.PushBack(vector1, expansion_length);
|
||||
// Copy from expand_vector0 to expand_vector1.
|
||||
parameters.expand_vector0.CopyTo(¶meters.expand_vector1);
|
||||
// Set the energy_ratio since it is used by muting slope.
|
||||
if ((energy1 / 4 < energy2) || (energy2 == 0)) {
|
||||
amplitude_ratio = 4096; // 0.5 in Q13.
|
||||
} else {
|
||||
amplitude_ratio = 16384; // 2.0 in Q13.
|
||||
}
|
||||
}
|
||||
|
||||
// Set the 3 lag values.
|
||||
if (distortion_lag == correlation_lag) {
|
||||
expand_lags_[0] = distortion_lag;
|
||||
expand_lags_[1] = distortion_lag;
|
||||
expand_lags_[2] = distortion_lag;
|
||||
} else {
|
||||
// |distortion_lag| and |correlation_lag| are not equal; use different
|
||||
// combinations of the two.
|
||||
// First lag is |distortion_lag| only.
|
||||
expand_lags_[0] = distortion_lag;
|
||||
// Second lag is the average of the two.
|
||||
expand_lags_[1] = (distortion_lag + correlation_lag) / 2;
|
||||
// Third lag is the average again, but rounding towards |correlation_lag|.
|
||||
if (distortion_lag > correlation_lag) {
|
||||
expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2;
|
||||
} else {
|
||||
expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the LPC and the gain of the filters.
|
||||
|
||||
// Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.
|
||||
size_t temp_index = signal_length - fs_mult_lpc_analysis_len -
|
||||
kUnvoicedLpcOrder;
|
||||
// Copy signal to temporary vector to be able to pad with leading zeros.
|
||||
int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len
|
||||
+ kUnvoicedLpcOrder];
|
||||
memset(temp_signal, 0,
|
||||
sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder));
|
||||
memcpy(&temp_signal[kUnvoicedLpcOrder],
|
||||
&audio_history[temp_index + kUnvoicedLpcOrder],
|
||||
sizeof(int16_t) * fs_mult_lpc_analysis_len);
|
||||
CrossCorrelationWithAutoShift(
|
||||
&temp_signal[kUnvoicedLpcOrder], &temp_signal[kUnvoicedLpcOrder],
|
||||
fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, -1, auto_correlation);
|
||||
delete [] temp_signal;
|
||||
|
||||
// Verify that variance is positive.
|
||||
if (auto_correlation[0] > 0) {
|
||||
// Estimate AR filter parameters using Levinson-Durbin algorithm;
|
||||
// kUnvoicedLpcOrder + 1 filter coefficients.
|
||||
int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation,
|
||||
parameters.ar_filter,
|
||||
reflection_coeff,
|
||||
kUnvoicedLpcOrder);
|
||||
|
||||
// Keep filter parameters only if filter is stable.
|
||||
if (stability != 1) {
|
||||
// Set first coefficient to 4096 (1.0 in Q12).
|
||||
parameters.ar_filter[0] = 4096;
|
||||
// Set remaining |kUnvoicedLpcOrder| coefficients to zero.
|
||||
WebRtcSpl_MemSetW16(parameters.ar_filter + 1, 0, kUnvoicedLpcOrder);
|
||||
}
|
||||
}
|
||||
|
||||
if (channel_ix == 0) {
|
||||
// Extract a noise segment.
|
||||
size_t noise_length;
|
||||
if (distortion_lag < 40) {
|
||||
noise_length = 2 * distortion_lag + 30;
|
||||
} else {
|
||||
noise_length = distortion_lag + 30;
|
||||
}
|
||||
if (noise_length <= RandomVector::kRandomTableSize) {
|
||||
memcpy(random_vector, RandomVector::kRandomTable,
|
||||
sizeof(int16_t) * noise_length);
|
||||
} else {
|
||||
// Only applies to SWB where length could be larger than
|
||||
// |kRandomTableSize|.
|
||||
memcpy(random_vector, RandomVector::kRandomTable,
|
||||
sizeof(int16_t) * RandomVector::kRandomTableSize);
|
||||
assert(noise_length <= kMaxSampleRate / 8000 * 120 + 30);
|
||||
random_vector_->IncreaseSeedIncrement(2);
|
||||
random_vector_->Generate(
|
||||
noise_length - RandomVector::kRandomTableSize,
|
||||
&random_vector[RandomVector::kRandomTableSize]);
|
||||
}
|
||||
}
|
||||
|
||||
// Set up state vector and calculate scale factor for unvoiced filtering.
|
||||
memcpy(parameters.ar_filter_state,
|
||||
&(audio_history[signal_length - kUnvoicedLpcOrder]),
|
||||
sizeof(int16_t) * kUnvoicedLpcOrder);
|
||||
memcpy(unvoiced_vector - kUnvoicedLpcOrder,
|
||||
&(audio_history[signal_length - 128 - kUnvoicedLpcOrder]),
|
||||
sizeof(int16_t) * kUnvoicedLpcOrder);
|
||||
WebRtcSpl_FilterMAFastQ12(&audio_history[signal_length - 128],
|
||||
unvoiced_vector,
|
||||
parameters.ar_filter,
|
||||
kUnvoicedLpcOrder + 1,
|
||||
128);
|
||||
const int unvoiced_max_abs = [&] {
|
||||
const int16_t max_abs = WebRtcSpl_MaxAbsValueW16(unvoiced_vector, 128);
|
||||
// Since WebRtcSpl_MaxAbsValueW16 returns 2^15 - 1 when the input contains
|
||||
// -2^15, we have to conservatively bump the return value by 1
|
||||
// if it is 2^15 - 1.
|
||||
return max_abs == WEBRTC_SPL_WORD16_MAX ? max_abs + 1 : max_abs;
|
||||
}();
|
||||
// Pick the smallest n such that 2^n > unvoiced_max_abs; then the maximum
|
||||
// value of the dot product is less than 2^7 * 2^(2*n) = 2^(2*n + 7), so to
|
||||
// prevent overflows we want 2n + 7 <= 31, which means we should shift by
|
||||
// 2n + 7 - 31 bits, if this value is greater than zero.
|
||||
int unvoiced_prescale =
|
||||
std::max(0, 2 * WebRtcSpl_GetSizeInBits(unvoiced_max_abs) - 24);
|
||||
|
||||
int32_t unvoiced_energy = WebRtcSpl_DotProductWithScale(unvoiced_vector,
|
||||
unvoiced_vector,
|
||||
128,
|
||||
unvoiced_prescale);
|
||||
|
||||
// Normalize |unvoiced_energy| to 28 or 29 bits to preserve sqrt() accuracy.
|
||||
int16_t unvoiced_scale = WebRtcSpl_NormW32(unvoiced_energy) - 3;
|
||||
// Make sure we do an odd number of shifts since we already have 7 shifts
|
||||
// from dividing with 128 earlier. This will make the total scale factor
|
||||
// even, which is suitable for the sqrt.
|
||||
unvoiced_scale += ((unvoiced_scale & 0x1) ^ 0x1);
|
||||
unvoiced_energy = WEBRTC_SPL_SHIFT_W32(unvoiced_energy, unvoiced_scale);
|
||||
int16_t unvoiced_gain =
|
||||
static_cast<int16_t>(WebRtcSpl_SqrtFloor(unvoiced_energy));
|
||||
parameters.ar_gain_scale = 13
|
||||
+ (unvoiced_scale + 7 - unvoiced_prescale) / 2;
|
||||
parameters.ar_gain = unvoiced_gain;
|
||||
|
||||
// Calculate voice_mix_factor from corr_coefficient.
|
||||
// Let x = corr_coefficient. Then, we compute:
|
||||
// if (x > 0.48)
|
||||
// voice_mix_factor = (-5179 + 19931x - 16422x^2 + 5776x^3) / 4096;
|
||||
// else
|
||||
// voice_mix_factor = 0;
|
||||
if (corr_coefficient > 7875) {
|
||||
int16_t x1, x2, x3;
|
||||
// |corr_coefficient| is in Q14.
|
||||
x1 = static_cast<int16_t>(corr_coefficient);
|
||||
x2 = (x1 * x1) >> 14; // Shift 14 to keep result in Q14.
|
||||
x3 = (x1 * x2) >> 14;
|
||||
static const int kCoefficients[4] = { -5179, 19931, -16422, 5776 };
|
||||
int32_t temp_sum = kCoefficients[0] * 16384;
|
||||
temp_sum += kCoefficients[1] * x1;
|
||||
temp_sum += kCoefficients[2] * x2;
|
||||
temp_sum += kCoefficients[3] * x3;
|
||||
parameters.voice_mix_factor =
|
||||
static_cast<int16_t>(std::min(temp_sum / 4096, 16384));
|
||||
parameters.voice_mix_factor = std::max(parameters.voice_mix_factor,
|
||||
static_cast<int16_t>(0));
|
||||
} else {
|
||||
parameters.voice_mix_factor = 0;
|
||||
}
|
||||
|
||||
// Calculate muting slope. Reuse value from earlier scaling of
|
||||
// |expand_vector0| and |expand_vector1|.
|
||||
int16_t slope = amplitude_ratio;
|
||||
if (slope > 12288) {
|
||||
// slope > 1.5.
|
||||
// Calculate (1 - (1 / slope)) / distortion_lag =
|
||||
// (slope - 1) / (distortion_lag * slope).
|
||||
// |slope| is in Q13, so 1 corresponds to 8192. Shift up to Q25 before
|
||||
// the division.
|
||||
// Shift the denominator from Q13 to Q5 before the division. The result of
|
||||
// the division will then be in Q20.
|
||||
int temp_ratio = WebRtcSpl_DivW32W16(
|
||||
(slope - 8192) << 12,
|
||||
static_cast<int16_t>((distortion_lag * slope) >> 8));
|
||||
if (slope > 14746) {
|
||||
// slope > 1.8.
|
||||
// Divide by 2, with proper rounding.
|
||||
parameters.mute_slope = (temp_ratio + 1) / 2;
|
||||
} else {
|
||||
// Divide by 8, with proper rounding.
|
||||
parameters.mute_slope = (temp_ratio + 4) / 8;
|
||||
}
|
||||
parameters.onset = true;
|
||||
} else {
|
||||
// Calculate (1 - slope) / distortion_lag.
|
||||
// Shift |slope| by 7 to Q20 before the division. The result is in Q20.
|
||||
parameters.mute_slope = WebRtcSpl_DivW32W16(
|
||||
(8192 - slope) * 128, static_cast<int16_t>(distortion_lag));
|
||||
if (parameters.voice_mix_factor <= 13107) {
|
||||
// Make sure the mute factor decreases from 1.0 to 0.9 in no more than
|
||||
// 6.25 ms.
|
||||
// mute_slope >= 0.005 / fs_mult in Q20.
|
||||
parameters.mute_slope = std::max(5243 / fs_mult, parameters.mute_slope);
|
||||
} else if (slope > 8028) {
|
||||
parameters.mute_slope = 0;
|
||||
}
|
||||
parameters.onset = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Expand::ChannelParameters::ChannelParameters()
|
||||
: mute_factor(16384),
|
||||
ar_gain(0),
|
||||
ar_gain_scale(0),
|
||||
voice_mix_factor(0),
|
||||
current_voice_mix_factor(0),
|
||||
onset(false),
|
||||
mute_slope(0) {
|
||||
memset(ar_filter, 0, sizeof(ar_filter));
|
||||
memset(ar_filter_state, 0, sizeof(ar_filter_state));
|
||||
}
|
||||
|
||||
void Expand::Correlation(const int16_t* input,
|
||||
size_t input_length,
|
||||
int16_t* output) const {
|
||||
// Set parameters depending on sample rate.
|
||||
const int16_t* filter_coefficients;
|
||||
size_t num_coefficients;
|
||||
int16_t downsampling_factor;
|
||||
if (fs_hz_ == 8000) {
|
||||
num_coefficients = 3;
|
||||
downsampling_factor = 2;
|
||||
filter_coefficients = DspHelper::kDownsample8kHzTbl;
|
||||
} else if (fs_hz_ == 16000) {
|
||||
num_coefficients = 5;
|
||||
downsampling_factor = 4;
|
||||
filter_coefficients = DspHelper::kDownsample16kHzTbl;
|
||||
} else if (fs_hz_ == 32000) {
|
||||
num_coefficients = 7;
|
||||
downsampling_factor = 8;
|
||||
filter_coefficients = DspHelper::kDownsample32kHzTbl;
|
||||
} else { // fs_hz_ == 48000.
|
||||
num_coefficients = 7;
|
||||
downsampling_factor = 12;
|
||||
filter_coefficients = DspHelper::kDownsample48kHzTbl;
|
||||
}
|
||||
|
||||
// Correlate from lag 10 to lag 60 in downsampled domain.
|
||||
// (Corresponds to 20-120 for narrow-band, 40-240 for wide-band, and so on.)
|
||||
static const size_t kCorrelationStartLag = 10;
|
||||
static const size_t kNumCorrelationLags = 54;
|
||||
static const size_t kCorrelationLength = 60;
|
||||
// Downsample to 4 kHz sample rate.
|
||||
static const size_t kDownsampledLength = kCorrelationStartLag
|
||||
+ kNumCorrelationLags + kCorrelationLength;
|
||||
int16_t downsampled_input[kDownsampledLength];
|
||||
static const size_t kFilterDelay = 0;
|
||||
WebRtcSpl_DownsampleFast(
|
||||
input + input_length - kDownsampledLength * downsampling_factor,
|
||||
kDownsampledLength * downsampling_factor, downsampled_input,
|
||||
kDownsampledLength, filter_coefficients, num_coefficients,
|
||||
downsampling_factor, kFilterDelay);
|
||||
|
||||
// Normalize |downsampled_input| to using all 16 bits.
|
||||
int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input,
|
||||
kDownsampledLength);
|
||||
int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value);
|
||||
WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength,
|
||||
downsampled_input, norm_shift);
|
||||
|
||||
int32_t correlation[kNumCorrelationLags];
|
||||
CrossCorrelationWithAutoShift(
|
||||
&downsampled_input[kDownsampledLength - kCorrelationLength],
|
||||
&downsampled_input[kDownsampledLength - kCorrelationLength
|
||||
- kCorrelationStartLag],
|
||||
kCorrelationLength, kNumCorrelationLags, -1, correlation);
|
||||
|
||||
// Normalize and move data from 32-bit to 16-bit vector.
|
||||
int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,
|
||||
kNumCorrelationLags);
|
||||
int16_t norm_shift2 = static_cast<int16_t>(
|
||||
std::max(18 - WebRtcSpl_NormW32(max_correlation), 0));
|
||||
WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,
|
||||
norm_shift2);
|
||||
}
|
||||
|
||||
void Expand::UpdateLagIndex() {
|
||||
current_lag_index_ = current_lag_index_ + lag_index_direction_;
|
||||
// Change direction if needed.
|
||||
if (current_lag_index_ <= 0) {
|
||||
lag_index_direction_ = 1;
|
||||
}
|
||||
if (current_lag_index_ >= kNumLags - 1) {
|
||||
lag_index_direction_ = -1;
|
||||
}
|
||||
}
|
||||
|
||||
Expand* ExpandFactory::Create(BackgroundNoise* background_noise,
|
||||
SyncBuffer* sync_buffer,
|
||||
RandomVector* random_vector,
|
||||
StatisticsCalculator* statistics,
|
||||
int fs,
|
||||
size_t num_channels) const {
|
||||
return new Expand(background_noise, sync_buffer, random_vector, statistics,
|
||||
fs, num_channels);
|
||||
}
|
||||
|
||||
// TODO(turajs): This can be moved to BackgroundNoise class.
|
||||
void Expand::GenerateBackgroundNoise(int16_t* random_vector,
|
||||
size_t channel,
|
||||
int mute_slope,
|
||||
bool too_many_expands,
|
||||
size_t num_noise_samples,
|
||||
int16_t* buffer) {
|
||||
static const size_t kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
|
||||
int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125];
|
||||
assert(num_noise_samples <= (kMaxSampleRate / 8000 * 125));
|
||||
int16_t* noise_samples = &buffer[kNoiseLpcOrder];
|
||||
if (background_noise_->initialized()) {
|
||||
// Use background noise parameters.
|
||||
memcpy(noise_samples - kNoiseLpcOrder,
|
||||
background_noise_->FilterState(channel),
|
||||
sizeof(int16_t) * kNoiseLpcOrder);
|
||||
|
||||
int dc_offset = 0;
|
||||
if (background_noise_->ScaleShift(channel) > 1) {
|
||||
dc_offset = 1 << (background_noise_->ScaleShift(channel) - 1);
|
||||
}
|
||||
|
||||
// Scale random vector to correct energy level.
|
||||
WebRtcSpl_AffineTransformVector(
|
||||
scaled_random_vector, random_vector,
|
||||
background_noise_->Scale(channel), dc_offset,
|
||||
background_noise_->ScaleShift(channel),
|
||||
num_noise_samples);
|
||||
|
||||
WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_samples,
|
||||
background_noise_->Filter(channel),
|
||||
kNoiseLpcOrder + 1,
|
||||
num_noise_samples);
|
||||
|
||||
background_noise_->SetFilterState(
|
||||
channel,
|
||||
&(noise_samples[num_noise_samples - kNoiseLpcOrder]),
|
||||
kNoiseLpcOrder);
|
||||
|
||||
// Unmute the background noise.
|
||||
int16_t bgn_mute_factor = background_noise_->MuteFactor(channel);
|
||||
NetEq::BackgroundNoiseMode bgn_mode = background_noise_->mode();
|
||||
if (bgn_mode == NetEq::kBgnFade && too_many_expands &&
|
||||
bgn_mute_factor > 0) {
|
||||
// Fade BGN to zero.
|
||||
// Calculate muting slope, approximately -2^18 / fs_hz.
|
||||
int mute_slope;
|
||||
if (fs_hz_ == 8000) {
|
||||
mute_slope = -32;
|
||||
} else if (fs_hz_ == 16000) {
|
||||
mute_slope = -16;
|
||||
} else if (fs_hz_ == 32000) {
|
||||
mute_slope = -8;
|
||||
} else {
|
||||
mute_slope = -5;
|
||||
}
|
||||
// Use UnmuteSignal function with negative slope.
|
||||
// |bgn_mute_factor| is in Q14. |mute_slope| is in Q20.
|
||||
DspHelper::UnmuteSignal(noise_samples,
|
||||
num_noise_samples,
|
||||
&bgn_mute_factor,
|
||||
mute_slope,
|
||||
noise_samples);
|
||||
} else if (bgn_mute_factor < 16384) {
|
||||
// If mode is kBgnOn, or if kBgnFade has started fading,
|
||||
// use regular |mute_slope|.
|
||||
if (!stop_muting_ && bgn_mode != NetEq::kBgnOff &&
|
||||
!(bgn_mode == NetEq::kBgnFade && too_many_expands)) {
|
||||
DspHelper::UnmuteSignal(noise_samples,
|
||||
static_cast<int>(num_noise_samples),
|
||||
&bgn_mute_factor,
|
||||
mute_slope,
|
||||
noise_samples);
|
||||
} else {
|
||||
// kBgnOn and stop muting, or
|
||||
// kBgnOff (mute factor is always 0), or
|
||||
// kBgnFade has reached 0.
|
||||
WebRtcSpl_AffineTransformVector(noise_samples, noise_samples,
|
||||
bgn_mute_factor, 8192, 14,
|
||||
num_noise_samples);
|
||||
}
|
||||
}
|
||||
// Update mute_factor in BackgroundNoise class.
|
||||
background_noise_->SetMuteFactor(channel, bgn_mute_factor);
|
||||
} else {
|
||||
// BGN parameters have not been initialized; use zero noise.
|
||||
memset(noise_samples, 0, sizeof(int16_t) * num_noise_samples);
|
||||
}
|
||||
}
|
||||
|
||||
void Expand::GenerateRandomVector(int16_t seed_increment,
|
||||
size_t length,
|
||||
int16_t* random_vector) {
|
||||
// TODO(turajs): According to hlundin The loop should not be needed. Should be
|
||||
// just as good to generate all of the vector in one call.
|
||||
size_t samples_generated = 0;
|
||||
const size_t kMaxRandSamples = RandomVector::kRandomTableSize;
|
||||
while (samples_generated < length) {
|
||||
size_t rand_length = std::min(length - samples_generated, kMaxRandSamples);
|
||||
random_vector_->IncreaseSeedIncrement(seed_increment);
|
||||
random_vector_->Generate(rand_length, &random_vector[samples_generated]);
|
||||
samples_generated += rand_length;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
161
modules/audio_coding/neteq/expand.h
Normal file
161
modules/audio_coding/neteq/expand.h
Normal file
@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_EXPAND_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_EXPAND_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <memory>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Forward declarations.
|
||||
class BackgroundNoise;
|
||||
class RandomVector;
|
||||
class StatisticsCalculator;
|
||||
class SyncBuffer;
|
||||
|
||||
// This class handles extrapolation of audio data from the sync_buffer to
|
||||
// produce packet-loss concealment.
|
||||
// TODO(hlundin): Refactor this class to divide the long methods into shorter
|
||||
// ones.
|
||||
class Expand {
|
||||
public:
|
||||
Expand(BackgroundNoise* background_noise,
|
||||
SyncBuffer* sync_buffer,
|
||||
RandomVector* random_vector,
|
||||
StatisticsCalculator* statistics,
|
||||
int fs,
|
||||
size_t num_channels);
|
||||
|
||||
virtual ~Expand();
|
||||
|
||||
// Resets the object.
|
||||
virtual void Reset();
|
||||
|
||||
// The main method to produce concealment data. The data is appended to the
|
||||
// end of |output|.
|
||||
virtual int Process(AudioMultiVector* output);
|
||||
|
||||
// Prepare the object to do extra expansion during normal operation following
|
||||
// a period of expands.
|
||||
virtual void SetParametersForNormalAfterExpand();
|
||||
|
||||
// Prepare the object to do extra expansion during merge operation following
|
||||
// a period of expands.
|
||||
virtual void SetParametersForMergeAfterExpand();
|
||||
|
||||
// Returns the mute factor for |channel|.
|
||||
int16_t MuteFactor(size_t channel) {
|
||||
assert(channel < num_channels_);
|
||||
return channel_parameters_[channel].mute_factor;
|
||||
}
|
||||
|
||||
// Returns true if expansion has been faded down to zero amplitude (for all
|
||||
// channels); false otherwise.
|
||||
bool Muted() const;
|
||||
|
||||
// Accessors and mutators.
|
||||
virtual size_t overlap_length() const;
|
||||
size_t max_lag() const { return max_lag_; }
|
||||
|
||||
protected:
|
||||
static const int kMaxConsecutiveExpands = 200;
|
||||
void GenerateRandomVector(int16_t seed_increment,
|
||||
size_t length,
|
||||
int16_t* random_vector);
|
||||
|
||||
void GenerateBackgroundNoise(int16_t* random_vector,
|
||||
size_t channel,
|
||||
int mute_slope,
|
||||
bool too_many_expands,
|
||||
size_t num_noise_samples,
|
||||
int16_t* buffer);
|
||||
|
||||
// Initializes member variables at the beginning of an expand period.
|
||||
void InitializeForAnExpandPeriod();
|
||||
|
||||
bool TooManyExpands();
|
||||
|
||||
// Analyzes the signal history in |sync_buffer_|, and set up all parameters
|
||||
// necessary to produce concealment data.
|
||||
void AnalyzeSignal(int16_t* random_vector);
|
||||
|
||||
RandomVector* const random_vector_;
|
||||
SyncBuffer* const sync_buffer_;
|
||||
bool first_expand_;
|
||||
const int fs_hz_;
|
||||
const size_t num_channels_;
|
||||
int consecutive_expands_;
|
||||
|
||||
private:
|
||||
static const size_t kUnvoicedLpcOrder = 6;
|
||||
static const size_t kNumCorrelationCandidates = 3;
|
||||
static const size_t kDistortionLength = 20;
|
||||
static const size_t kLpcAnalysisLength = 160;
|
||||
static const size_t kMaxSampleRate = 48000;
|
||||
static const int kNumLags = 3;
|
||||
|
||||
struct ChannelParameters {
|
||||
ChannelParameters();
|
||||
int16_t mute_factor;
|
||||
int16_t ar_filter[kUnvoicedLpcOrder + 1];
|
||||
int16_t ar_filter_state[kUnvoicedLpcOrder];
|
||||
int16_t ar_gain;
|
||||
int16_t ar_gain_scale;
|
||||
int16_t voice_mix_factor; /* Q14 */
|
||||
int16_t current_voice_mix_factor; /* Q14 */
|
||||
AudioVector expand_vector0;
|
||||
AudioVector expand_vector1;
|
||||
bool onset;
|
||||
int mute_slope; /* Q20 */
|
||||
};
|
||||
|
||||
// Calculate the auto-correlation of |input|, with length |input_length|
|
||||
// samples. The correlation is calculated from a downsampled version of
|
||||
// |input|, and is written to |output|.
|
||||
void Correlation(const int16_t* input,
|
||||
size_t input_length,
|
||||
int16_t* output) const;
|
||||
|
||||
void UpdateLagIndex();
|
||||
|
||||
BackgroundNoise* const background_noise_;
|
||||
StatisticsCalculator* const statistics_;
|
||||
const size_t overlap_length_;
|
||||
size_t max_lag_;
|
||||
size_t expand_lags_[kNumLags];
|
||||
int lag_index_direction_;
|
||||
int current_lag_index_;
|
||||
bool stop_muting_;
|
||||
size_t expand_duration_samples_;
|
||||
std::unique_ptr<ChannelParameters[]> channel_parameters_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(Expand);
|
||||
};
|
||||
|
||||
struct ExpandFactory {
|
||||
ExpandFactory() {}
|
||||
virtual ~ExpandFactory() {}
|
||||
|
||||
virtual Expand* Create(BackgroundNoise* background_noise,
|
||||
SyncBuffer* sync_buffer,
|
||||
RandomVector* random_vector,
|
||||
StatisticsCalculator* statistics,
|
||||
int fs,
|
||||
size_t num_channels) const;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_EXPAND_H_
|
||||
206
modules/audio_coding/neteq/expand_unittest.cc
Normal file
206
modules/audio_coding/neteq/expand_unittest.cc
Normal file
@ -0,0 +1,206 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for Expand class.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/expand.h"
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h"
|
||||
#include "webrtc/rtc_base/safe_conversions.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(Expand, CreateAndDestroy) {
|
||||
int fs = 8000;
|
||||
size_t channels = 1;
|
||||
BackgroundNoise bgn(channels);
|
||||
SyncBuffer sync_buffer(1, 1000);
|
||||
RandomVector random_vector;
|
||||
StatisticsCalculator statistics;
|
||||
Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels);
|
||||
}
|
||||
|
||||
TEST(Expand, CreateUsingFactory) {
|
||||
int fs = 8000;
|
||||
size_t channels = 1;
|
||||
BackgroundNoise bgn(channels);
|
||||
SyncBuffer sync_buffer(1, 1000);
|
||||
RandomVector random_vector;
|
||||
StatisticsCalculator statistics;
|
||||
ExpandFactory expand_factory;
|
||||
Expand* expand = expand_factory.Create(&bgn, &sync_buffer, &random_vector,
|
||||
&statistics, fs, channels);
|
||||
EXPECT_TRUE(expand != NULL);
|
||||
delete expand;
|
||||
}
|
||||
|
||||
namespace {
|
||||
class FakeStatisticsCalculator : public StatisticsCalculator {
|
||||
public:
|
||||
void LogDelayedPacketOutageEvent(int outage_duration_ms) override {
|
||||
last_outage_duration_ms_ = outage_duration_ms;
|
||||
}
|
||||
|
||||
int last_outage_duration_ms() const { return last_outage_duration_ms_; }
|
||||
|
||||
private:
|
||||
int last_outage_duration_ms_ = 0;
|
||||
};
|
||||
|
||||
// This is the same size that is given to the SyncBuffer object in NetEq.
|
||||
const size_t kNetEqSyncBufferLengthMs = 720;
|
||||
} // namespace
|
||||
|
||||
class ExpandTest : public ::testing::Test {
|
||||
protected:
|
||||
ExpandTest()
|
||||
: input_file_(test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
|
||||
32000),
|
||||
test_sample_rate_hz_(32000),
|
||||
num_channels_(1),
|
||||
background_noise_(num_channels_),
|
||||
sync_buffer_(num_channels_,
|
||||
kNetEqSyncBufferLengthMs * test_sample_rate_hz_ / 1000),
|
||||
expand_(&background_noise_,
|
||||
&sync_buffer_,
|
||||
&random_vector_,
|
||||
&statistics_,
|
||||
test_sample_rate_hz_,
|
||||
num_channels_) {
|
||||
WebRtcSpl_Init();
|
||||
input_file_.set_output_rate_hz(test_sample_rate_hz_);
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
// Fast-forward the input file until there is speech (about 1.1 second into
|
||||
// the file).
|
||||
const size_t speech_start_samples =
|
||||
static_cast<size_t>(test_sample_rate_hz_ * 1.1f);
|
||||
ASSERT_TRUE(input_file_.Seek(speech_start_samples));
|
||||
|
||||
// Pre-load the sync buffer with speech data.
|
||||
std::unique_ptr<int16_t[]> temp(new int16_t[sync_buffer_.Size()]);
|
||||
ASSERT_TRUE(input_file_.Read(sync_buffer_.Size(), temp.get()));
|
||||
sync_buffer_.Channel(0).OverwriteAt(temp.get(), sync_buffer_.Size(), 0);
|
||||
ASSERT_EQ(1u, num_channels_) << "Fix: Must populate all channels.";
|
||||
}
|
||||
|
||||
test::ResampleInputAudioFile input_file_;
|
||||
int test_sample_rate_hz_;
|
||||
size_t num_channels_;
|
||||
BackgroundNoise background_noise_;
|
||||
SyncBuffer sync_buffer_;
|
||||
RandomVector random_vector_;
|
||||
FakeStatisticsCalculator statistics_;
|
||||
Expand expand_;
|
||||
};
|
||||
|
||||
// This test calls the expand object to produce concealment data a few times,
|
||||
// and then ends by calling SetParametersForNormalAfterExpand. This simulates
|
||||
// the situation where the packet next up for decoding was just delayed, not
|
||||
// lost.
|
||||
TEST_F(ExpandTest, DelayedPacketOutage) {
|
||||
AudioMultiVector output(num_channels_);
|
||||
size_t sum_output_len_samples = 0;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
EXPECT_EQ(0, expand_.Process(&output));
|
||||
EXPECT_GT(output.Size(), 0u);
|
||||
sum_output_len_samples += output.Size();
|
||||
EXPECT_EQ(0, statistics_.last_outage_duration_ms());
|
||||
}
|
||||
expand_.SetParametersForNormalAfterExpand();
|
||||
// Convert |sum_output_len_samples| to milliseconds.
|
||||
EXPECT_EQ(rtc::checked_cast<int>(sum_output_len_samples /
|
||||
(test_sample_rate_hz_ / 1000)),
|
||||
statistics_.last_outage_duration_ms());
|
||||
}
|
||||
|
||||
// This test is similar to DelayedPacketOutage, but ends by calling
|
||||
// SetParametersForMergeAfterExpand. This simulates the situation where the
|
||||
// packet next up for decoding was actually lost (or at least a later packet
|
||||
// arrived before it).
|
||||
TEST_F(ExpandTest, LostPacketOutage) {
|
||||
AudioMultiVector output(num_channels_);
|
||||
size_t sum_output_len_samples = 0;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
EXPECT_EQ(0, expand_.Process(&output));
|
||||
EXPECT_GT(output.Size(), 0u);
|
||||
sum_output_len_samples += output.Size();
|
||||
EXPECT_EQ(0, statistics_.last_outage_duration_ms());
|
||||
}
|
||||
expand_.SetParametersForMergeAfterExpand();
|
||||
EXPECT_EQ(0, statistics_.last_outage_duration_ms());
|
||||
}
|
||||
|
||||
// This test is similar to the DelayedPacketOutage test above, but with the
|
||||
// difference that Expand::Reset() is called after 5 calls to Expand::Process().
|
||||
// This should reset the statistics, and will in the end lead to an outage of
|
||||
// 5 periods instead of 10.
|
||||
TEST_F(ExpandTest, CheckOutageStatsAfterReset) {
|
||||
AudioMultiVector output(num_channels_);
|
||||
size_t sum_output_len_samples = 0;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
EXPECT_EQ(0, expand_.Process(&output));
|
||||
EXPECT_GT(output.Size(), 0u);
|
||||
sum_output_len_samples += output.Size();
|
||||
if (i == 5) {
|
||||
expand_.Reset();
|
||||
sum_output_len_samples = 0;
|
||||
}
|
||||
EXPECT_EQ(0, statistics_.last_outage_duration_ms());
|
||||
}
|
||||
expand_.SetParametersForNormalAfterExpand();
|
||||
// Convert |sum_output_len_samples| to milliseconds.
|
||||
EXPECT_EQ(rtc::checked_cast<int>(sum_output_len_samples /
|
||||
(test_sample_rate_hz_ / 1000)),
|
||||
statistics_.last_outage_duration_ms());
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Runs expand until Muted() returns true. Times out after 1000 calls.
|
||||
void ExpandUntilMuted(size_t num_channels, Expand* expand) {
|
||||
EXPECT_FALSE(expand->Muted()) << "Instance is muted from the start";
|
||||
AudioMultiVector output(num_channels);
|
||||
int num_calls = 0;
|
||||
while (!expand->Muted()) {
|
||||
ASSERT_LT(num_calls++, 1000) << "Test timed out";
|
||||
EXPECT_EQ(0, expand->Process(&output));
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Verifies that Muted() returns true after a long expand period. Also verifies
|
||||
// that Muted() is reset to false after calling Reset(),
|
||||
// SetParametersForMergeAfterExpand() and SetParametersForNormalAfterExpand().
|
||||
TEST_F(ExpandTest, Muted) {
|
||||
ExpandUntilMuted(num_channels_, &expand_);
|
||||
expand_.Reset();
|
||||
EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted.
|
||||
|
||||
ExpandUntilMuted(num_channels_, &expand_);
|
||||
expand_.SetParametersForMergeAfterExpand();
|
||||
EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted.
|
||||
|
||||
expand_.Reset(); // Must reset in order to start a new expand period.
|
||||
ExpandUntilMuted(num_channels_, &expand_);
|
||||
expand_.SetParametersForNormalAfterExpand();
|
||||
EXPECT_FALSE(expand_.Muted()); // Should be back to unmuted.
|
||||
}
|
||||
|
||||
// TODO(hlundin): Write more tests.
|
||||
|
||||
} // namespace webrtc
|
||||
314
modules/audio_coding/neteq/include/neteq.h
Normal file
314
modules/audio_coding/neteq/include/neteq.h
Normal file
@ -0,0 +1,314 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
|
||||
|
||||
#include <string.h> // Provide access to size_t.
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/api/optional.h"
|
||||
#include "webrtc/common_types.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_decoder_impl.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/rtc_base/scoped_ref_ptr.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Forward declarations.
|
||||
class AudioFrame;
|
||||
class AudioDecoderFactory;
|
||||
|
||||
struct NetEqNetworkStatistics {
|
||||
uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
|
||||
uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
|
||||
uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
|
||||
// jitter; 0 otherwise.
|
||||
uint16_t packet_loss_rate; // Loss rate (network + late) in Q14.
|
||||
uint16_t expand_rate; // Fraction (of original stream) of synthesized
|
||||
// audio inserted through expansion (in Q14).
|
||||
uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized
|
||||
// speech inserted through expansion (in Q14).
|
||||
uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
|
||||
// expansion (in Q14).
|
||||
uint16_t accelerate_rate; // Fraction of data removed through acceleration
|
||||
// (in Q14).
|
||||
uint16_t secondary_decoded_rate; // Fraction of data coming from FEC/RED
|
||||
// decoding (in Q14).
|
||||
uint16_t secondary_discarded_rate; // Fraction of discarded FEC/RED data (in
|
||||
// Q14).
|
||||
int32_t clockdrift_ppm; // Average clock-drift in parts-per-million
|
||||
// (positive or negative).
|
||||
size_t added_zero_samples; // Number of zero samples added in "off" mode.
|
||||
// Statistics for packet waiting times, i.e., the time between a packet
|
||||
// arrives until it is decoded.
|
||||
int mean_waiting_time_ms;
|
||||
int median_waiting_time_ms;
|
||||
int min_waiting_time_ms;
|
||||
int max_waiting_time_ms;
|
||||
};
|
||||
|
||||
// NetEq statistics that persist over the lifetime of the class.
|
||||
// These metrics are never reset.
|
||||
struct NetEqLifetimeStatistics {
|
||||
// Total number of audio samples received, including synthesized samples.
|
||||
// https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalsamplesreceived
|
||||
uint64_t total_samples_received = 0;
|
||||
// Total number of inbound audio samples that are based on synthesized data to
|
||||
// conceal packet loss.
|
||||
// https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-concealedsamples
|
||||
uint64_t concealed_samples = 0;
|
||||
};
|
||||
|
||||
enum NetEqPlayoutMode {
|
||||
kPlayoutOn,
|
||||
kPlayoutOff,
|
||||
kPlayoutFax,
|
||||
kPlayoutStreaming
|
||||
};
|
||||
|
||||
// This is the interface class for NetEq.
|
||||
class NetEq {
|
||||
public:
|
||||
enum BackgroundNoiseMode {
|
||||
kBgnOn, // Default behavior with eternal noise.
|
||||
kBgnFade, // Noise fades to zero after some time.
|
||||
kBgnOff // Background noise is always zero.
|
||||
};
|
||||
|
||||
struct Config {
|
||||
Config()
|
||||
: sample_rate_hz(16000),
|
||||
enable_post_decode_vad(false),
|
||||
max_packets_in_buffer(50),
|
||||
// |max_delay_ms| has the same effect as calling SetMaximumDelay().
|
||||
max_delay_ms(2000),
|
||||
background_noise_mode(kBgnOff),
|
||||
playout_mode(kPlayoutOn),
|
||||
enable_fast_accelerate(false) {}
|
||||
|
||||
std::string ToString() const;
|
||||
|
||||
int sample_rate_hz; // Initial value. Will change with input data.
|
||||
bool enable_post_decode_vad;
|
||||
size_t max_packets_in_buffer;
|
||||
int max_delay_ms;
|
||||
BackgroundNoiseMode background_noise_mode;
|
||||
NetEqPlayoutMode playout_mode;
|
||||
bool enable_fast_accelerate;
|
||||
bool enable_muted_state = false;
|
||||
};
|
||||
|
||||
enum ReturnCodes {
|
||||
kOK = 0,
|
||||
kFail = -1,
|
||||
kNotImplemented = -2
|
||||
};
|
||||
|
||||
// Creates a new NetEq object, with parameters set in |config|. The |config|
|
||||
// object will only have to be valid for the duration of the call to this
|
||||
// method.
|
||||
static NetEq* Create(
|
||||
const NetEq::Config& config,
|
||||
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);
|
||||
|
||||
virtual ~NetEq() {}
|
||||
|
||||
// Inserts a new packet into NetEq. The |receive_timestamp| is an indication
|
||||
// of the time when the packet was received, and should be measured with
|
||||
// the same tick rate as the RTP timestamp of the current payload.
|
||||
// Returns 0 on success, -1 on failure.
|
||||
virtual int InsertPacket(const RTPHeader& rtp_header,
|
||||
rtc::ArrayView<const uint8_t> payload,
|
||||
uint32_t receive_timestamp) = 0;
|
||||
|
||||
// Lets NetEq know that a packet arrived with an empty payload. This typically
|
||||
// happens when empty packets are used for probing the network channel, and
|
||||
// these packets use RTP sequence numbers from the same series as the actual
|
||||
// audio packets.
|
||||
virtual void InsertEmptyPacket(const RTPHeader& rtp_header) = 0;
|
||||
|
||||
// Instructs NetEq to deliver 10 ms of audio data. The data is written to
|
||||
// |audio_frame|. All data in |audio_frame| is wiped; |data_|, |speech_type_|,
|
||||
// |num_channels_|, |sample_rate_hz_|, |samples_per_channel_|, and
|
||||
// |vad_activity_| are updated upon success. If an error is returned, some
|
||||
// fields may not have been updated, or may contain inconsistent values.
|
||||
// If muted state is enabled (through Config::enable_muted_state), |muted|
|
||||
// may be set to true after a prolonged expand period. When this happens, the
|
||||
// |data_| in |audio_frame| is not written, but should be interpreted as being
|
||||
// all zeros.
|
||||
// Returns kOK on success, or kFail in case of an error.
|
||||
virtual int GetAudio(AudioFrame* audio_frame, bool* muted) = 0;
|
||||
|
||||
// Replaces the current set of decoders with the given one.
|
||||
virtual void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) = 0;
|
||||
|
||||
// Associates |rtp_payload_type| with |codec| and |codec_name|, and stores the
|
||||
// information in the codec database. Returns 0 on success, -1 on failure.
|
||||
// The name is only used to provide information back to the caller about the
|
||||
// decoders. Hence, the name is arbitrary, and may be empty.
|
||||
virtual int RegisterPayloadType(NetEqDecoder codec,
|
||||
const std::string& codec_name,
|
||||
uint8_t rtp_payload_type) = 0;
|
||||
|
||||
// Provides an externally created decoder object |decoder| to insert in the
|
||||
// decoder database. The decoder implements a decoder of type |codec| and
|
||||
// associates it with |rtp_payload_type| and |codec_name|. Returns kOK on
|
||||
// success, kFail on failure. The name is only used to provide information
|
||||
// back to the caller about the decoders. Hence, the name is arbitrary, and
|
||||
// may be empty.
|
||||
virtual int RegisterExternalDecoder(AudioDecoder* decoder,
|
||||
NetEqDecoder codec,
|
||||
const std::string& codec_name,
|
||||
uint8_t rtp_payload_type) = 0;
|
||||
|
||||
// Associates |rtp_payload_type| with the given codec, which NetEq will
|
||||
// instantiate when it needs it. Returns true iff successful.
|
||||
virtual bool RegisterPayloadType(int rtp_payload_type,
|
||||
const SdpAudioFormat& audio_format) = 0;
|
||||
|
||||
// Removes |rtp_payload_type| from the codec database. Returns 0 on success,
|
||||
// -1 on failure. Removing a payload type that is not registered is ok and
|
||||
// will not result in an error.
|
||||
virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
|
||||
|
||||
// Removes all payload types from the codec database.
|
||||
virtual void RemoveAllPayloadTypes() = 0;
|
||||
|
||||
// Sets a minimum delay in millisecond for packet buffer. The minimum is
|
||||
// maintained unless a higher latency is dictated by channel condition.
|
||||
// Returns true if the minimum is successfully applied, otherwise false is
|
||||
// returned.
|
||||
virtual bool SetMinimumDelay(int delay_ms) = 0;
|
||||
|
||||
// Sets a maximum delay in milliseconds for packet buffer. The latency will
|
||||
// not exceed the given value, even required delay (given the channel
|
||||
// conditions) is higher. Calling this method has the same effect as setting
|
||||
// the |max_delay_ms| value in the NetEq::Config struct.
|
||||
virtual bool SetMaximumDelay(int delay_ms) = 0;
|
||||
|
||||
// The smallest latency required. This is computed bases on inter-arrival
|
||||
// time and internal NetEq logic. Note that in computing this latency none of
|
||||
// the user defined limits (applied by calling setMinimumDelay() and/or
|
||||
// SetMaximumDelay()) are applied.
|
||||
virtual int LeastRequiredDelayMs() const = 0;
|
||||
|
||||
// Not implemented.
|
||||
virtual int SetTargetDelay() = 0;
|
||||
|
||||
// Returns the current target delay in ms. This includes any extra delay
|
||||
// requested through SetMinimumDelay.
|
||||
virtual int TargetDelayMs() = 0;
|
||||
|
||||
// Returns the current total delay (packet buffer and sync buffer) in ms.
|
||||
virtual int CurrentDelayMs() const = 0;
|
||||
|
||||
// Returns the current total delay (packet buffer and sync buffer) in ms,
|
||||
// with smoothing applied to even out short-time fluctuations due to jitter.
|
||||
// The packet buffer part of the delay is not updated during DTX/CNG periods.
|
||||
virtual int FilteredCurrentDelayMs() const = 0;
|
||||
|
||||
// Sets the playout mode to |mode|.
|
||||
// Deprecated. Set the mode in the Config struct passed to the constructor.
|
||||
// TODO(henrik.lundin) Delete.
|
||||
virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
|
||||
|
||||
// Returns the current playout mode.
|
||||
// Deprecated.
|
||||
// TODO(henrik.lundin) Delete.
|
||||
virtual NetEqPlayoutMode PlayoutMode() const = 0;
|
||||
|
||||
// Writes the current network statistics to |stats|. The statistics are reset
|
||||
// after the call.
|
||||
virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
|
||||
|
||||
// Returns a copy of this class's lifetime statistics. These statistics are
|
||||
// never reset.
|
||||
virtual NetEqLifetimeStatistics GetLifetimeStatistics() const = 0;
|
||||
|
||||
// Writes the current RTCP statistics to |stats|. The statistics are reset
|
||||
// and a new report period is started with the call.
|
||||
virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
|
||||
|
||||
// Same as RtcpStatistics(), but does not reset anything.
|
||||
virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
|
||||
|
||||
// Enables post-decode VAD. When enabled, GetAudio() will return
|
||||
// kOutputVADPassive when the signal contains no speech.
|
||||
virtual void EnableVad() = 0;
|
||||
|
||||
// Disables post-decode VAD.
|
||||
virtual void DisableVad() = 0;
|
||||
|
||||
// Returns the RTP timestamp for the last sample delivered by GetAudio().
|
||||
// The return value will be empty if no valid timestamp is available.
|
||||
virtual rtc::Optional<uint32_t> GetPlayoutTimestamp() const = 0;
|
||||
|
||||
// Returns the sample rate in Hz of the audio produced in the last GetAudio
|
||||
// call. If GetAudio has not been called yet, the configured sample rate
|
||||
// (Config::sample_rate_hz) is returned.
|
||||
virtual int last_output_sample_rate_hz() const = 0;
|
||||
|
||||
// Returns info about the decoder for the given payload type, or an empty
|
||||
// value if we have no decoder for that payload type.
|
||||
virtual rtc::Optional<CodecInst> GetDecoder(int payload_type) const = 0;
|
||||
|
||||
// Returns the decoder format for the given payload type. Returns empty if no
|
||||
// such payload type was registered.
|
||||
virtual rtc::Optional<SdpAudioFormat> GetDecoderFormat(
|
||||
int payload_type) const = 0;
|
||||
|
||||
// Not implemented.
|
||||
virtual int SetTargetNumberOfChannels() = 0;
|
||||
|
||||
// Not implemented.
|
||||
virtual int SetTargetSampleRate() = 0;
|
||||
|
||||
// Flushes both the packet buffer and the sync buffer.
|
||||
virtual void FlushBuffers() = 0;
|
||||
|
||||
// Current usage of packet-buffer and it's limits.
|
||||
virtual void PacketBufferStatistics(int* current_num_packets,
|
||||
int* max_num_packets) const = 0;
|
||||
|
||||
// Enables NACK and sets the maximum size of the NACK list, which should be
|
||||
// positive and no larger than Nack::kNackListSizeLimit. If NACK is already
|
||||
// enabled then the maximum NACK list size is modified accordingly.
|
||||
virtual void EnableNack(size_t max_nack_list_size) = 0;
|
||||
|
||||
virtual void DisableNack() = 0;
|
||||
|
||||
// Returns a list of RTP sequence numbers corresponding to packets to be
|
||||
// retransmitted, given an estimate of the round-trip time in milliseconds.
|
||||
virtual std::vector<uint16_t> GetNackList(
|
||||
int64_t round_trip_time_ms) const = 0;
|
||||
|
||||
// Returns a vector containing the timestamps of the packets that were decoded
|
||||
// in the last GetAudio call. If no packets were decoded in the last call, the
|
||||
// vector is empty.
|
||||
// Mainly intended for testing.
|
||||
virtual std::vector<uint32_t> LastDecodedTimestamps() const = 0;
|
||||
|
||||
// Returns the length of the audio yet to play in the sync buffer.
|
||||
// Mainly intended for testing.
|
||||
virtual int SyncBufferSizeMs() const = 0;
|
||||
|
||||
protected:
|
||||
NetEq() {}
|
||||
|
||||
private:
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(NetEq);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_NETEQ_H_
|
||||
381
modules/audio_coding/neteq/merge.cc
Normal file
381
modules/audio_coding/neteq/merge.cc
Normal file
@ -0,0 +1,381 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/merge.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h> // memmove, memcpy, memset, size_t
|
||||
|
||||
#include <algorithm> // min, max
|
||||
#include <memory>
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/expand.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
|
||||
#include "webrtc/rtc_base/safe_conversions.h"
|
||||
#include "webrtc/rtc_base/safe_minmax.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
Merge::Merge(int fs_hz,
|
||||
size_t num_channels,
|
||||
Expand* expand,
|
||||
SyncBuffer* sync_buffer)
|
||||
: fs_hz_(fs_hz),
|
||||
num_channels_(num_channels),
|
||||
fs_mult_(fs_hz_ / 8000),
|
||||
timestamps_per_call_(static_cast<size_t>(fs_hz_ / 100)),
|
||||
expand_(expand),
|
||||
sync_buffer_(sync_buffer),
|
||||
expanded_(num_channels_) {
|
||||
assert(num_channels_ > 0);
|
||||
}
|
||||
|
||||
Merge::~Merge() = default;
|
||||
|
||||
size_t Merge::Process(int16_t* input, size_t input_length,
|
||||
int16_t* external_mute_factor_array,
|
||||
AudioMultiVector* output) {
|
||||
// TODO(hlundin): Change to an enumerator and skip assert.
|
||||
assert(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 ||
|
||||
fs_hz_ == 48000);
|
||||
assert(fs_hz_ <= kMaxSampleRate); // Should not be possible.
|
||||
|
||||
size_t old_length;
|
||||
size_t expand_period;
|
||||
// Get expansion data to overlap and mix with.
|
||||
size_t expanded_length = GetExpandedSignal(&old_length, &expand_period);
|
||||
|
||||
// Transfer input signal to an AudioMultiVector.
|
||||
AudioMultiVector input_vector(num_channels_);
|
||||
input_vector.PushBackInterleaved(input, input_length);
|
||||
size_t input_length_per_channel = input_vector.Size();
|
||||
assert(input_length_per_channel == input_length / num_channels_);
|
||||
|
||||
size_t best_correlation_index = 0;
|
||||
size_t output_length = 0;
|
||||
|
||||
std::unique_ptr<int16_t[]> input_channel(
|
||||
new int16_t[input_length_per_channel]);
|
||||
std::unique_ptr<int16_t[]> expanded_channel(new int16_t[expanded_length]);
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
input_vector[channel].CopyTo(
|
||||
input_length_per_channel, 0, input_channel.get());
|
||||
expanded_[channel].CopyTo(expanded_length, 0, expanded_channel.get());
|
||||
|
||||
int16_t new_mute_factor = SignalScaling(
|
||||
input_channel.get(), input_length_per_channel, expanded_channel.get());
|
||||
|
||||
// Adjust muting factor (product of "main" muting factor and expand muting
|
||||
// factor).
|
||||
int16_t* external_mute_factor = &external_mute_factor_array[channel];
|
||||
*external_mute_factor =
|
||||
(*external_mute_factor * expand_->MuteFactor(channel)) >> 14;
|
||||
|
||||
// Update |external_mute_factor| if it is lower than |new_mute_factor|.
|
||||
if (new_mute_factor > *external_mute_factor) {
|
||||
*external_mute_factor = std::min(new_mute_factor,
|
||||
static_cast<int16_t>(16384));
|
||||
}
|
||||
|
||||
if (channel == 0) {
|
||||
// Downsample, correlate, and find strongest correlation period for the
|
||||
// master (i.e., first) channel only.
|
||||
// Downsample to 4kHz sample rate.
|
||||
Downsample(input_channel.get(), input_length_per_channel,
|
||||
expanded_channel.get(), expanded_length);
|
||||
|
||||
// Calculate the lag of the strongest correlation period.
|
||||
best_correlation_index = CorrelateAndPeakSearch(
|
||||
old_length, input_length_per_channel, expand_period);
|
||||
}
|
||||
|
||||
temp_data_.resize(input_length_per_channel + best_correlation_index);
|
||||
int16_t* decoded_output = temp_data_.data() + best_correlation_index;
|
||||
|
||||
// Mute the new decoded data if needed (and unmute it linearly).
|
||||
// This is the overlapping part of expanded_signal.
|
||||
size_t interpolation_length = std::min(
|
||||
kMaxCorrelationLength * fs_mult_,
|
||||
expanded_length - best_correlation_index);
|
||||
interpolation_length = std::min(interpolation_length,
|
||||
input_length_per_channel);
|
||||
if (*external_mute_factor < 16384) {
|
||||
// Set a suitable muting slope (Q20). 0.004 for NB, 0.002 for WB,
|
||||
// and so on.
|
||||
int increment = 4194 / fs_mult_;
|
||||
*external_mute_factor =
|
||||
static_cast<int16_t>(DspHelper::RampSignal(input_channel.get(),
|
||||
interpolation_length,
|
||||
*external_mute_factor,
|
||||
increment));
|
||||
DspHelper::UnmuteSignal(&input_channel[interpolation_length],
|
||||
input_length_per_channel - interpolation_length,
|
||||
external_mute_factor, increment,
|
||||
&decoded_output[interpolation_length]);
|
||||
} else {
|
||||
// No muting needed.
|
||||
memmove(
|
||||
&decoded_output[interpolation_length],
|
||||
&input_channel[interpolation_length],
|
||||
sizeof(int16_t) * (input_length_per_channel - interpolation_length));
|
||||
}
|
||||
|
||||
// Do overlap and mix linearly.
|
||||
int16_t increment =
|
||||
static_cast<int16_t>(16384 / (interpolation_length + 1)); // In Q14.
|
||||
int16_t mute_factor = 16384 - increment;
|
||||
memmove(temp_data_.data(), expanded_channel.get(),
|
||||
sizeof(int16_t) * best_correlation_index);
|
||||
DspHelper::CrossFade(&expanded_channel[best_correlation_index],
|
||||
input_channel.get(), interpolation_length,
|
||||
&mute_factor, increment, decoded_output);
|
||||
|
||||
output_length = best_correlation_index + input_length_per_channel;
|
||||
if (channel == 0) {
|
||||
assert(output->Empty()); // Output should be empty at this point.
|
||||
output->AssertSize(output_length);
|
||||
} else {
|
||||
assert(output->Size() == output_length);
|
||||
}
|
||||
(*output)[channel].OverwriteAt(temp_data_.data(), output_length, 0);
|
||||
}
|
||||
|
||||
// Copy back the first part of the data to |sync_buffer_| and remove it from
|
||||
// |output|.
|
||||
sync_buffer_->ReplaceAtIndex(*output, old_length, sync_buffer_->next_index());
|
||||
output->PopFront(old_length);
|
||||
|
||||
// Return new added length. |old_length| samples were borrowed from
|
||||
// |sync_buffer_|.
|
||||
RTC_DCHECK_GE(output_length, old_length);
|
||||
return output_length - old_length;
|
||||
}
|
||||
|
||||
size_t Merge::GetExpandedSignal(size_t* old_length, size_t* expand_period) {
|
||||
// Check how much data that is left since earlier.
|
||||
*old_length = sync_buffer_->FutureLength();
|
||||
// Should never be less than overlap_length.
|
||||
assert(*old_length >= expand_->overlap_length());
|
||||
// Generate data to merge the overlap with using expand.
|
||||
expand_->SetParametersForMergeAfterExpand();
|
||||
|
||||
if (*old_length >= 210 * kMaxSampleRate / 8000) {
|
||||
// TODO(hlundin): Write test case for this.
|
||||
// The number of samples available in the sync buffer is more than what fits
|
||||
// in expanded_signal. Keep the first 210 * kMaxSampleRate / 8000 samples,
|
||||
// but shift them towards the end of the buffer. This is ok, since all of
|
||||
// the buffer will be expand data anyway, so as long as the beginning is
|
||||
// left untouched, we're fine.
|
||||
size_t length_diff = *old_length - 210 * kMaxSampleRate / 8000;
|
||||
sync_buffer_->InsertZerosAtIndex(length_diff, sync_buffer_->next_index());
|
||||
*old_length = 210 * kMaxSampleRate / 8000;
|
||||
// This is the truncated length.
|
||||
}
|
||||
// This assert should always be true thanks to the if statement above.
|
||||
assert(210 * kMaxSampleRate / 8000 >= *old_length);
|
||||
|
||||
AudioMultiVector expanded_temp(num_channels_);
|
||||
expand_->Process(&expanded_temp);
|
||||
*expand_period = expanded_temp.Size(); // Samples per channel.
|
||||
|
||||
expanded_.Clear();
|
||||
// Copy what is left since earlier into the expanded vector.
|
||||
expanded_.PushBackFromIndex(*sync_buffer_, sync_buffer_->next_index());
|
||||
assert(expanded_.Size() == *old_length);
|
||||
assert(expanded_temp.Size() > 0);
|
||||
// Do "ugly" copy and paste from the expanded in order to generate more data
|
||||
// to correlate (but not interpolate) with.
|
||||
const size_t required_length = static_cast<size_t>((120 + 80 + 2) * fs_mult_);
|
||||
if (expanded_.Size() < required_length) {
|
||||
while (expanded_.Size() < required_length) {
|
||||
// Append one more pitch period each time.
|
||||
expanded_.PushBack(expanded_temp);
|
||||
}
|
||||
// Trim the length to exactly |required_length|.
|
||||
expanded_.PopBack(expanded_.Size() - required_length);
|
||||
}
|
||||
assert(expanded_.Size() >= required_length);
|
||||
return required_length;
|
||||
}
|
||||
|
||||
int16_t Merge::SignalScaling(const int16_t* input, size_t input_length,
|
||||
const int16_t* expanded_signal) const {
|
||||
// Adjust muting factor if new vector is more or less of the BGN energy.
|
||||
const auto mod_input_length = rtc::SafeMin<size_t>(
|
||||
64 * rtc::dchecked_cast<size_t>(fs_mult_), input_length);
|
||||
const int16_t expanded_max =
|
||||
WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length);
|
||||
int32_t factor = (expanded_max * expanded_max) /
|
||||
(std::numeric_limits<int32_t>::max() /
|
||||
static_cast<int32_t>(mod_input_length));
|
||||
const int expanded_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
|
||||
int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal,
|
||||
expanded_signal,
|
||||
mod_input_length,
|
||||
expanded_shift);
|
||||
|
||||
// Calculate energy of input signal.
|
||||
const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);
|
||||
factor = (input_max * input_max) / (std::numeric_limits<int32_t>::max() /
|
||||
static_cast<int32_t>(mod_input_length));
|
||||
const int input_shift = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
|
||||
int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input,
|
||||
mod_input_length,
|
||||
input_shift);
|
||||
|
||||
// Align to the same Q-domain.
|
||||
if (input_shift > expanded_shift) {
|
||||
energy_expanded = energy_expanded >> (input_shift - expanded_shift);
|
||||
} else {
|
||||
energy_input = energy_input >> (expanded_shift - input_shift);
|
||||
}
|
||||
|
||||
// Calculate muting factor to use for new frame.
|
||||
int16_t mute_factor;
|
||||
if (energy_input > energy_expanded) {
|
||||
// Normalize |energy_input| to 14 bits.
|
||||
int16_t temp_shift = WebRtcSpl_NormW32(energy_input) - 17;
|
||||
energy_input = WEBRTC_SPL_SHIFT_W32(energy_input, temp_shift);
|
||||
// Put |energy_expanded| in a domain 14 higher, so that
|
||||
// energy_expanded / energy_input is in Q14.
|
||||
energy_expanded = WEBRTC_SPL_SHIFT_W32(energy_expanded, temp_shift + 14);
|
||||
// Calculate sqrt(energy_expanded / energy_input) in Q14.
|
||||
mute_factor = static_cast<int16_t>(
|
||||
WebRtcSpl_SqrtFloor((energy_expanded / energy_input) << 14));
|
||||
} else {
|
||||
// Set to 1 (in Q14) when |expanded| has higher energy than |input|.
|
||||
mute_factor = 16384;
|
||||
}
|
||||
|
||||
return mute_factor;
|
||||
}
|
||||
|
||||
// TODO(hlundin): There are some parameter values in this method that seem
|
||||
// strange. Compare with Expand::Correlation.
|
||||
void Merge::Downsample(const int16_t* input, size_t input_length,
|
||||
const int16_t* expanded_signal, size_t expanded_length) {
|
||||
const int16_t* filter_coefficients;
|
||||
size_t num_coefficients;
|
||||
int decimation_factor = fs_hz_ / 4000;
|
||||
static const size_t kCompensateDelay = 0;
|
||||
size_t length_limit = static_cast<size_t>(fs_hz_ / 100); // 10 ms in samples.
|
||||
if (fs_hz_ == 8000) {
|
||||
filter_coefficients = DspHelper::kDownsample8kHzTbl;
|
||||
num_coefficients = 3;
|
||||
} else if (fs_hz_ == 16000) {
|
||||
filter_coefficients = DspHelper::kDownsample16kHzTbl;
|
||||
num_coefficients = 5;
|
||||
} else if (fs_hz_ == 32000) {
|
||||
filter_coefficients = DspHelper::kDownsample32kHzTbl;
|
||||
num_coefficients = 7;
|
||||
} else { // fs_hz_ == 48000
|
||||
filter_coefficients = DspHelper::kDownsample48kHzTbl;
|
||||
num_coefficients = 7;
|
||||
}
|
||||
size_t signal_offset = num_coefficients - 1;
|
||||
WebRtcSpl_DownsampleFast(&expanded_signal[signal_offset],
|
||||
expanded_length - signal_offset,
|
||||
expanded_downsampled_, kExpandDownsampLength,
|
||||
filter_coefficients, num_coefficients,
|
||||
decimation_factor, kCompensateDelay);
|
||||
if (input_length <= length_limit) {
|
||||
// Not quite long enough, so we have to cheat a bit.
|
||||
size_t temp_len = input_length - signal_offset;
|
||||
// TODO(hlundin): Should |downsamp_temp_len| be corrected for round-off
|
||||
// errors? I.e., (temp_len + decimation_factor - 1) / decimation_factor?
|
||||
size_t downsamp_temp_len = temp_len / decimation_factor;
|
||||
WebRtcSpl_DownsampleFast(&input[signal_offset], temp_len,
|
||||
input_downsampled_, downsamp_temp_len,
|
||||
filter_coefficients, num_coefficients,
|
||||
decimation_factor, kCompensateDelay);
|
||||
memset(&input_downsampled_[downsamp_temp_len], 0,
|
||||
sizeof(int16_t) * (kInputDownsampLength - downsamp_temp_len));
|
||||
} else {
|
||||
WebRtcSpl_DownsampleFast(&input[signal_offset],
|
||||
input_length - signal_offset, input_downsampled_,
|
||||
kInputDownsampLength, filter_coefficients,
|
||||
num_coefficients, decimation_factor,
|
||||
kCompensateDelay);
|
||||
}
|
||||
}
|
||||
|
||||
size_t Merge::CorrelateAndPeakSearch(size_t start_position, size_t input_length,
|
||||
size_t expand_period) const {
|
||||
// Calculate correlation without any normalization.
|
||||
const size_t max_corr_length = kMaxCorrelationLength;
|
||||
size_t stop_position_downsamp =
|
||||
std::min(max_corr_length, expand_->max_lag() / (fs_mult_ * 2) + 1);
|
||||
|
||||
int32_t correlation[kMaxCorrelationLength];
|
||||
CrossCorrelationWithAutoShift(input_downsampled_, expanded_downsampled_,
|
||||
kInputDownsampLength, stop_position_downsamp, 1,
|
||||
correlation);
|
||||
|
||||
// Normalize correlation to 14 bits and copy to a 16-bit array.
|
||||
const size_t pad_length = expand_->overlap_length() - 1;
|
||||
const size_t correlation_buffer_size = 2 * pad_length + kMaxCorrelationLength;
|
||||
std::unique_ptr<int16_t[]> correlation16(
|
||||
new int16_t[correlation_buffer_size]);
|
||||
memset(correlation16.get(), 0, correlation_buffer_size * sizeof(int16_t));
|
||||
int16_t* correlation_ptr = &correlation16[pad_length];
|
||||
int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,
|
||||
stop_position_downsamp);
|
||||
int norm_shift = std::max(0, 17 - WebRtcSpl_NormW32(max_correlation));
|
||||
WebRtcSpl_VectorBitShiftW32ToW16(correlation_ptr, stop_position_downsamp,
|
||||
correlation, norm_shift);
|
||||
|
||||
// Calculate allowed starting point for peak finding.
|
||||
// The peak location bestIndex must fulfill two criteria:
|
||||
// (1) w16_bestIndex + input_length <
|
||||
// timestamps_per_call_ + expand_->overlap_length();
|
||||
// (2) w16_bestIndex + input_length < start_position.
|
||||
size_t start_index = timestamps_per_call_ + expand_->overlap_length();
|
||||
start_index = std::max(start_position, start_index);
|
||||
start_index = (input_length > start_index) ? 0 : (start_index - input_length);
|
||||
// Downscale starting index to 4kHz domain. (fs_mult_ * 2 = fs_hz_ / 4000.)
|
||||
size_t start_index_downsamp = start_index / (fs_mult_ * 2);
|
||||
|
||||
// Calculate a modified |stop_position_downsamp| to account for the increased
|
||||
// start index |start_index_downsamp| and the effective array length.
|
||||
size_t modified_stop_pos =
|
||||
std::min(stop_position_downsamp,
|
||||
kMaxCorrelationLength + pad_length - start_index_downsamp);
|
||||
size_t best_correlation_index;
|
||||
int16_t best_correlation;
|
||||
static const size_t kNumCorrelationCandidates = 1;
|
||||
DspHelper::PeakDetection(&correlation_ptr[start_index_downsamp],
|
||||
modified_stop_pos, kNumCorrelationCandidates,
|
||||
fs_mult_, &best_correlation_index,
|
||||
&best_correlation);
|
||||
// Compensate for modified start index.
|
||||
best_correlation_index += start_index;
|
||||
|
||||
// Ensure that underrun does not occur for 10ms case => we have to get at
|
||||
// least 10ms + overlap . (This should never happen thanks to the above
|
||||
// modification of peak-finding starting point.)
|
||||
while (((best_correlation_index + input_length) <
|
||||
(timestamps_per_call_ + expand_->overlap_length())) ||
|
||||
((best_correlation_index + input_length) < start_position)) {
|
||||
assert(false); // Should never happen.
|
||||
best_correlation_index += expand_period; // Jump one lag ahead.
|
||||
}
|
||||
return best_correlation_index;
|
||||
}
|
||||
|
||||
size_t Merge::RequiredFutureSamples() {
|
||||
return fs_hz_ / 100 * num_channels_; // 10 ms.
|
||||
}
|
||||
|
||||
|
||||
} // namespace webrtc
|
||||
102
modules/audio_coding/neteq/merge.h
Normal file
102
modules/audio_coding/neteq/merge.h
Normal file
@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MERGE_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MERGE_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Forward declarations.
|
||||
class Expand;
|
||||
class SyncBuffer;
|
||||
|
||||
// This class handles the transition from expansion to normal operation.
|
||||
// When a packet is not available for decoding when needed, the expand operation
|
||||
// is called to generate extrapolation data. If the missing packet arrives,
|
||||
// i.e., it was just delayed, it can be decoded and appended directly to the
|
||||
// end of the expanded data (thanks to how the Expand class operates). However,
|
||||
// if a later packet arrives instead, the loss is a fact, and the new data must
|
||||
// be stitched together with the end of the expanded data. This stitching is
|
||||
// what the Merge class does.
|
||||
class Merge {
|
||||
public:
|
||||
Merge(int fs_hz,
|
||||
size_t num_channels,
|
||||
Expand* expand,
|
||||
SyncBuffer* sync_buffer);
|
||||
virtual ~Merge();
|
||||
|
||||
// The main method to produce the audio data. The decoded data is supplied in
|
||||
// |input|, having |input_length| samples in total for all channels
|
||||
// (interleaved). The result is written to |output|. The number of channels
|
||||
// allocated in |output| defines the number of channels that will be used when
|
||||
// de-interleaving |input|. The values in |external_mute_factor_array| (Q14)
|
||||
// will be used to scale the audio, and is updated in the process. The array
|
||||
// must have |num_channels_| elements.
|
||||
virtual size_t Process(int16_t* input, size_t input_length,
|
||||
int16_t* external_mute_factor_array,
|
||||
AudioMultiVector* output);
|
||||
|
||||
virtual size_t RequiredFutureSamples();
|
||||
|
||||
protected:
|
||||
const int fs_hz_;
|
||||
const size_t num_channels_;
|
||||
|
||||
private:
|
||||
static const int kMaxSampleRate = 48000;
|
||||
static const size_t kExpandDownsampLength = 100;
|
||||
static const size_t kInputDownsampLength = 40;
|
||||
static const size_t kMaxCorrelationLength = 60;
|
||||
|
||||
// Calls |expand_| to get more expansion data to merge with. The data is
|
||||
// written to |expanded_signal_|. Returns the length of the expanded data,
|
||||
// while |expand_period| will be the number of samples in one expansion period
|
||||
// (typically one pitch period). The value of |old_length| will be the number
|
||||
// of samples that were taken from the |sync_buffer_|.
|
||||
size_t GetExpandedSignal(size_t* old_length, size_t* expand_period);
|
||||
|
||||
// Analyzes |input| and |expanded_signal| and returns muting factor (Q14) to
|
||||
// be used on the new data.
|
||||
int16_t SignalScaling(const int16_t* input, size_t input_length,
|
||||
const int16_t* expanded_signal) const;
|
||||
|
||||
// Downsamples |input| (|input_length| samples) and |expanded_signal| to
|
||||
// 4 kHz sample rate. The downsampled signals are written to
|
||||
// |input_downsampled_| and |expanded_downsampled_|, respectively.
|
||||
void Downsample(const int16_t* input, size_t input_length,
|
||||
const int16_t* expanded_signal, size_t expanded_length);
|
||||
|
||||
// Calculates cross-correlation between |input_downsampled_| and
|
||||
// |expanded_downsampled_|, and finds the correlation maximum. The maximizing
|
||||
// lag is returned.
|
||||
size_t CorrelateAndPeakSearch(size_t start_position, size_t input_length,
|
||||
size_t expand_period) const;
|
||||
|
||||
const int fs_mult_; // fs_hz_ / 8000.
|
||||
const size_t timestamps_per_call_;
|
||||
Expand* expand_;
|
||||
SyncBuffer* sync_buffer_;
|
||||
int16_t expanded_downsampled_[kExpandDownsampLength];
|
||||
int16_t input_downsampled_[kInputDownsampLength];
|
||||
AudioMultiVector expanded_;
|
||||
std::vector<int16_t> temp_data_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(Merge);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MERGE_H_
|
||||
39
modules/audio_coding/neteq/merge_unittest.cc
Normal file
39
modules/audio_coding/neteq/merge_unittest.cc
Normal file
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for Merge class.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/merge.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/expand.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(Merge, CreateAndDestroy) {
|
||||
int fs = 8000;
|
||||
size_t channels = 1;
|
||||
BackgroundNoise bgn(channels);
|
||||
SyncBuffer sync_buffer(1, 1000);
|
||||
RandomVector random_vector;
|
||||
StatisticsCalculator statistics;
|
||||
Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels);
|
||||
Merge merge(fs, channels, &expand, &sync_buffer);
|
||||
}
|
||||
|
||||
// TODO(hlundin): Write more tests.
|
||||
|
||||
} // namespace webrtc
|
||||
37
modules/audio_coding/neteq/mock/mock_buffer_level_filter.h
Normal file
37
modules/audio_coding/neteq/mock/mock_buffer_level_filter.h
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/buffer_level_filter.h"
|
||||
|
||||
#include "webrtc/test/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockBufferLevelFilter : public BufferLevelFilter {
|
||||
public:
|
||||
virtual ~MockBufferLevelFilter() { Die(); }
|
||||
MOCK_METHOD0(Die,
|
||||
void());
|
||||
MOCK_METHOD0(Reset,
|
||||
void());
|
||||
MOCK_METHOD3(Update,
|
||||
void(size_t buffer_size_packets, int time_stretched_samples,
|
||||
size_t packet_len_samples));
|
||||
MOCK_METHOD1(SetTargetBufferLevel,
|
||||
void(int target_buffer_level));
|
||||
MOCK_CONST_METHOD0(filtered_current_level,
|
||||
int());
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_
|
||||
61
modules/audio_coding/neteq/mock/mock_decoder_database.h
Normal file
61
modules/audio_coding/neteq/mock/mock_decoder_database.h
Normal file
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
|
||||
|
||||
#include "webrtc/test/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockDecoderDatabase : public DecoderDatabase {
|
||||
public:
|
||||
explicit MockDecoderDatabase(
|
||||
rtc::scoped_refptr<AudioDecoderFactory> factory = nullptr)
|
||||
: DecoderDatabase(factory) {}
|
||||
virtual ~MockDecoderDatabase() { Die(); }
|
||||
MOCK_METHOD0(Die, void());
|
||||
MOCK_CONST_METHOD0(Empty,
|
||||
bool());
|
||||
MOCK_CONST_METHOD0(Size,
|
||||
int());
|
||||
MOCK_METHOD0(Reset,
|
||||
void());
|
||||
MOCK_METHOD3(RegisterPayload,
|
||||
int(uint8_t rtp_payload_type, NetEqDecoder codec_type,
|
||||
const std::string& name));
|
||||
MOCK_METHOD2(RegisterPayload,
|
||||
int(int rtp_payload_type, const SdpAudioFormat& audio_format));
|
||||
MOCK_METHOD4(InsertExternal,
|
||||
int(uint8_t rtp_payload_type,
|
||||
NetEqDecoder codec_type,
|
||||
const std::string& codec_name,
|
||||
AudioDecoder* decoder));
|
||||
MOCK_METHOD1(Remove,
|
||||
int(uint8_t rtp_payload_type));
|
||||
MOCK_METHOD0(RemoveAll, void());
|
||||
MOCK_CONST_METHOD1(GetDecoderInfo,
|
||||
const DecoderInfo*(uint8_t rtp_payload_type));
|
||||
MOCK_METHOD2(SetActiveDecoder,
|
||||
int(uint8_t rtp_payload_type, bool* new_decoder));
|
||||
MOCK_CONST_METHOD0(GetActiveDecoder,
|
||||
AudioDecoder*());
|
||||
MOCK_METHOD1(SetActiveCngDecoder,
|
||||
int(uint8_t rtp_payload_type));
|
||||
MOCK_CONST_METHOD0(GetActiveCngDecoder,
|
||||
ComfortNoiseDecoder*());
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DECODER_DATABASE_H_
|
||||
62
modules/audio_coding/neteq/mock/mock_delay_manager.h
Normal file
62
modules/audio_coding/neteq/mock/mock_delay_manager.h
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/delay_manager.h"
|
||||
|
||||
#include "webrtc/test/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockDelayManager : public DelayManager {
|
||||
public:
|
||||
MockDelayManager(size_t max_packets_in_buffer,
|
||||
DelayPeakDetector* peak_detector,
|
||||
const TickTimer* tick_timer)
|
||||
: DelayManager(max_packets_in_buffer, peak_detector, tick_timer) {}
|
||||
virtual ~MockDelayManager() { Die(); }
|
||||
MOCK_METHOD0(Die, void());
|
||||
MOCK_CONST_METHOD0(iat_vector,
|
||||
const IATVector&());
|
||||
MOCK_METHOD3(Update,
|
||||
int(uint16_t sequence_number, uint32_t timestamp, int sample_rate_hz));
|
||||
MOCK_METHOD1(CalculateTargetLevel,
|
||||
int(int iat_packets));
|
||||
MOCK_METHOD1(SetPacketAudioLength,
|
||||
int(int length_ms));
|
||||
MOCK_METHOD0(Reset,
|
||||
void());
|
||||
MOCK_CONST_METHOD0(PeakFound,
|
||||
bool());
|
||||
MOCK_METHOD1(UpdateCounters,
|
||||
void(int elapsed_time_ms));
|
||||
MOCK_METHOD0(ResetPacketIatCount,
|
||||
void());
|
||||
MOCK_CONST_METHOD2(BufferLimits,
|
||||
void(int* lower_limit, int* higher_limit));
|
||||
MOCK_CONST_METHOD0(TargetLevel,
|
||||
int());
|
||||
MOCK_METHOD0(RegisterEmptyPacket, void());
|
||||
MOCK_METHOD1(set_extra_delay_ms,
|
||||
void(int16_t delay));
|
||||
MOCK_CONST_METHOD0(base_target_level,
|
||||
int());
|
||||
MOCK_METHOD1(set_streaming_mode,
|
||||
void(bool value));
|
||||
MOCK_CONST_METHOD0(last_pack_cng_or_dtmf,
|
||||
int());
|
||||
MOCK_METHOD1(set_last_pack_cng_or_dtmf,
|
||||
void(int value));
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_MANAGER_H_
|
||||
35
modules/audio_coding/neteq/mock/mock_delay_peak_detector.h
Normal file
35
modules/audio_coding/neteq/mock/mock_delay_peak_detector.h
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_PEAK_DETECTOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_PEAK_DETECTOR_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/delay_peak_detector.h"
|
||||
|
||||
#include "webrtc/test/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockDelayPeakDetector : public DelayPeakDetector {
|
||||
public:
|
||||
MockDelayPeakDetector(const TickTimer* tick_timer)
|
||||
: DelayPeakDetector(tick_timer) {}
|
||||
virtual ~MockDelayPeakDetector() { Die(); }
|
||||
MOCK_METHOD0(Die, void());
|
||||
MOCK_METHOD0(Reset, void());
|
||||
MOCK_METHOD1(SetPacketAudioLength, void(int length_ms));
|
||||
MOCK_METHOD0(peak_found, bool());
|
||||
MOCK_CONST_METHOD0(MaxPeakHeight, int());
|
||||
MOCK_CONST_METHOD0(MaxPeakPeriod, uint64_t());
|
||||
MOCK_METHOD2(Update, bool(int inter_arrival_time, int target_level));
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DELAY_PEAK_DETECTOR_H_
|
||||
38
modules/audio_coding/neteq/mock/mock_dtmf_buffer.h
Normal file
38
modules/audio_coding/neteq/mock/mock_dtmf_buffer.h
Normal file
@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/dtmf_buffer.h"
|
||||
|
||||
#include "webrtc/test/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockDtmfBuffer : public DtmfBuffer {
|
||||
public:
|
||||
MockDtmfBuffer(int fs) : DtmfBuffer(fs) {}
|
||||
virtual ~MockDtmfBuffer() { Die(); }
|
||||
MOCK_METHOD0(Die, void());
|
||||
MOCK_METHOD0(Flush,
|
||||
void());
|
||||
MOCK_METHOD1(InsertEvent,
|
||||
int(const DtmfEvent& event));
|
||||
MOCK_METHOD2(GetEvent,
|
||||
bool(uint32_t current_timestamp, DtmfEvent* event));
|
||||
MOCK_CONST_METHOD0(Length,
|
||||
size_t());
|
||||
MOCK_CONST_METHOD0(Empty,
|
||||
bool());
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_BUFFER_H_
|
||||
35
modules/audio_coding/neteq/mock/mock_dtmf_tone_generator.h
Normal file
35
modules/audio_coding/neteq/mock/mock_dtmf_tone_generator.h
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/dtmf_tone_generator.h"
|
||||
|
||||
#include "webrtc/test/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockDtmfToneGenerator : public DtmfToneGenerator {
|
||||
public:
|
||||
virtual ~MockDtmfToneGenerator() { Die(); }
|
||||
MOCK_METHOD0(Die, void());
|
||||
MOCK_METHOD3(Init,
|
||||
int(int fs, int event, int attenuation));
|
||||
MOCK_METHOD0(Reset,
|
||||
void());
|
||||
MOCK_METHOD2(Generate,
|
||||
int(size_t num_samples, AudioMultiVector* output));
|
||||
MOCK_CONST_METHOD0(initialized,
|
||||
bool());
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_DTMF_TONE_GENERATOR_H_
|
||||
64
modules/audio_coding/neteq/mock/mock_expand.h
Normal file
64
modules/audio_coding/neteq/mock/mock_expand.h
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/expand.h"
|
||||
|
||||
#include "webrtc/test/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockExpand : public Expand {
|
||||
public:
|
||||
MockExpand(BackgroundNoise* background_noise,
|
||||
SyncBuffer* sync_buffer,
|
||||
RandomVector* random_vector,
|
||||
StatisticsCalculator* statistics,
|
||||
int fs,
|
||||
size_t num_channels)
|
||||
: Expand(background_noise,
|
||||
sync_buffer,
|
||||
random_vector,
|
||||
statistics,
|
||||
fs,
|
||||
num_channels) {}
|
||||
virtual ~MockExpand() { Die(); }
|
||||
MOCK_METHOD0(Die, void());
|
||||
MOCK_METHOD0(Reset,
|
||||
void());
|
||||
MOCK_METHOD1(Process,
|
||||
int(AudioMultiVector* output));
|
||||
MOCK_METHOD0(SetParametersForNormalAfterExpand,
|
||||
void());
|
||||
MOCK_METHOD0(SetParametersForMergeAfterExpand,
|
||||
void());
|
||||
MOCK_CONST_METHOD0(overlap_length,
|
||||
size_t());
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockExpandFactory : public ExpandFactory {
|
||||
public:
|
||||
MOCK_CONST_METHOD6(Create,
|
||||
Expand*(BackgroundNoise* background_noise,
|
||||
SyncBuffer* sync_buffer,
|
||||
RandomVector* random_vector,
|
||||
StatisticsCalculator* statistics,
|
||||
int fs,
|
||||
size_t num_channels));
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXPAND_H_
|
||||
@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXTERNAL_DECODER_PCM16B_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXTERNAL_DECODER_PCM16B_H_
|
||||
|
||||
#include "webrtc/api/audio_codecs/audio_decoder.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/pcm16b/pcm16b.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/test/gmock.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
using ::testing::_;
|
||||
using ::testing::Invoke;
|
||||
|
||||
// Implement an external version of the PCM16b decoder.
|
||||
class ExternalPcm16B : public AudioDecoder {
|
||||
public:
|
||||
explicit ExternalPcm16B(int sample_rate_hz)
|
||||
: sample_rate_hz_(sample_rate_hz) {}
|
||||
void Reset() override {}
|
||||
|
||||
int DecodeInternal(const uint8_t* encoded,
|
||||
size_t encoded_len,
|
||||
int sample_rate_hz,
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type) override {
|
||||
EXPECT_EQ(sample_rate_hz_, sample_rate_hz);
|
||||
size_t ret = WebRtcPcm16b_Decode(encoded, encoded_len, decoded);
|
||||
*speech_type = ConvertSpeechType(1);
|
||||
return static_cast<int>(ret);
|
||||
}
|
||||
int SampleRateHz() const override { return sample_rate_hz_; }
|
||||
size_t Channels() const override { return 1; }
|
||||
|
||||
private:
|
||||
const int sample_rate_hz_;
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(ExternalPcm16B);
|
||||
};
|
||||
|
||||
// Create a mock of ExternalPcm16B which delegates all calls to the real object.
|
||||
// The reason is that we can then track that the correct calls are being made.
|
||||
class MockExternalPcm16B : public AudioDecoder {
|
||||
public:
|
||||
explicit MockExternalPcm16B(int sample_rate_hz) : real_(sample_rate_hz) {
|
||||
// By default, all calls are delegated to the real object.
|
||||
ON_CALL(*this, DecodeInternal(_, _, _, _, _))
|
||||
.WillByDefault(Invoke(&real_, &ExternalPcm16B::DecodeInternal));
|
||||
ON_CALL(*this, HasDecodePlc())
|
||||
.WillByDefault(Invoke(&real_, &ExternalPcm16B::HasDecodePlc));
|
||||
ON_CALL(*this, DecodePlc(_, _))
|
||||
.WillByDefault(Invoke(&real_, &ExternalPcm16B::DecodePlc));
|
||||
ON_CALL(*this, Reset())
|
||||
.WillByDefault(Invoke(&real_, &ExternalPcm16B::Reset));
|
||||
ON_CALL(*this, IncomingPacket(_, _, _, _, _))
|
||||
.WillByDefault(Invoke(&real_, &ExternalPcm16B::IncomingPacket));
|
||||
ON_CALL(*this, ErrorCode())
|
||||
.WillByDefault(Invoke(&real_, &ExternalPcm16B::ErrorCode));
|
||||
}
|
||||
virtual ~MockExternalPcm16B() { Die(); }
|
||||
|
||||
MOCK_METHOD0(Die, void());
|
||||
MOCK_METHOD5(DecodeInternal,
|
||||
int(const uint8_t* encoded,
|
||||
size_t encoded_len,
|
||||
int sample_rate_hz,
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type));
|
||||
MOCK_CONST_METHOD0(HasDecodePlc,
|
||||
bool());
|
||||
MOCK_METHOD2(DecodePlc,
|
||||
size_t(size_t num_frames, int16_t* decoded));
|
||||
MOCK_METHOD0(Reset, void());
|
||||
MOCK_METHOD5(IncomingPacket,
|
||||
int(const uint8_t* payload, size_t payload_len,
|
||||
uint16_t rtp_sequence_number, uint32_t rtp_timestamp,
|
||||
uint32_t arrival_timestamp));
|
||||
MOCK_METHOD0(ErrorCode,
|
||||
int());
|
||||
|
||||
int SampleRateHz() const /* override */ { return real_.SampleRateHz(); }
|
||||
size_t Channels() const /* override */ { return real_.Channels(); }
|
||||
|
||||
private:
|
||||
ExternalPcm16B real_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_EXTERNAL_DECODER_PCM16B_H_
|
||||
68
modules/audio_coding/neteq/mock/mock_packet_buffer.h
Normal file
68
modules/audio_coding/neteq/mock/mock_packet_buffer.h
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
|
||||
|
||||
#include "webrtc/test/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockPacketBuffer : public PacketBuffer {
|
||||
public:
|
||||
MockPacketBuffer(size_t max_number_of_packets, const TickTimer* tick_timer)
|
||||
: PacketBuffer(max_number_of_packets, tick_timer) {}
|
||||
virtual ~MockPacketBuffer() { Die(); }
|
||||
MOCK_METHOD0(Die, void());
|
||||
MOCK_METHOD0(Flush,
|
||||
void());
|
||||
MOCK_CONST_METHOD0(Empty,
|
||||
bool());
|
||||
int InsertPacket(Packet&& packet, StatisticsCalculator* stats) {
|
||||
return InsertPacketWrapped(&packet, stats);
|
||||
}
|
||||
// Since gtest does not properly support move-only types, InsertPacket is
|
||||
// implemented as a wrapper. You'll have to implement InsertPacketWrapped
|
||||
// instead and move from |*packet|.
|
||||
MOCK_METHOD2(InsertPacketWrapped,
|
||||
int(Packet* packet, StatisticsCalculator* stats));
|
||||
MOCK_METHOD5(InsertPacketList,
|
||||
int(PacketList* packet_list,
|
||||
const DecoderDatabase& decoder_database,
|
||||
rtc::Optional<uint8_t>* current_rtp_payload_type,
|
||||
rtc::Optional<uint8_t>* current_cng_rtp_payload_type,
|
||||
StatisticsCalculator* stats));
|
||||
MOCK_CONST_METHOD1(NextTimestamp,
|
||||
int(uint32_t* next_timestamp));
|
||||
MOCK_CONST_METHOD2(NextHigherTimestamp,
|
||||
int(uint32_t timestamp, uint32_t* next_timestamp));
|
||||
MOCK_CONST_METHOD0(PeekNextPacket,
|
||||
const Packet*());
|
||||
MOCK_METHOD0(GetNextPacket,
|
||||
rtc::Optional<Packet>());
|
||||
MOCK_METHOD1(DiscardNextPacket, int(StatisticsCalculator* stats));
|
||||
MOCK_METHOD3(DiscardOldPackets,
|
||||
void(uint32_t timestamp_limit,
|
||||
uint32_t horizon_samples,
|
||||
StatisticsCalculator* stats));
|
||||
MOCK_METHOD2(DiscardAllOldPackets,
|
||||
void(uint32_t timestamp_limit, StatisticsCalculator* stats));
|
||||
MOCK_CONST_METHOD0(NumPacketsInBuffer,
|
||||
size_t());
|
||||
MOCK_METHOD1(IncrementWaitingTimes,
|
||||
void(int));
|
||||
MOCK_CONST_METHOD0(current_memory_bytes,
|
||||
int());
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_PACKET_BUFFER_H_
|
||||
29
modules/audio_coding/neteq/mock/mock_red_payload_splitter.h
Normal file
29
modules/audio_coding/neteq/mock/mock_red_payload_splitter.h
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/red_payload_splitter.h"
|
||||
|
||||
#include "webrtc/test/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockRedPayloadSplitter : public RedPayloadSplitter {
|
||||
public:
|
||||
MOCK_METHOD1(SplitRed, bool(PacketList* packet_list));
|
||||
MOCK_METHOD2(CheckRedPayloads,
|
||||
int(PacketList* packet_list,
|
||||
const DecoderDatabase& decoder_database));
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_RED_PAYLOAD_SPLITTER_H_
|
||||
27
modules/audio_coding/neteq/mock/mock_statistics_calculator.h
Normal file
27
modules/audio_coding/neteq/mock/mock_statistics_calculator.h
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
|
||||
|
||||
#include "webrtc/test/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockStatisticsCalculator : public StatisticsCalculator {
|
||||
public:
|
||||
MOCK_METHOD1(PacketsDiscarded, void(size_t num_packets));
|
||||
MOCK_METHOD1(SecondaryPacketsDiscarded, void(size_t num_packets));
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_MOCK_MOCK_STATISTICS_CALCULATOR_H_
|
||||
232
modules/audio_coding/neteq/nack_tracker.cc
Normal file
232
modules/audio_coding/neteq/nack_tracker.cc
Normal file
@ -0,0 +1,232 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/nack_tracker.h"
|
||||
|
||||
#include <assert.h> // For assert.
|
||||
|
||||
#include <algorithm> // For std::max.
|
||||
|
||||
#include "webrtc/modules/include/module_common_types.h"
|
||||
#include "webrtc/rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
const int kDefaultSampleRateKhz = 48;
|
||||
const int kDefaultPacketSizeMs = 20;
|
||||
|
||||
} // namespace
|
||||
|
||||
NackTracker::NackTracker(int nack_threshold_packets)
|
||||
: nack_threshold_packets_(nack_threshold_packets),
|
||||
sequence_num_last_received_rtp_(0),
|
||||
timestamp_last_received_rtp_(0),
|
||||
any_rtp_received_(false),
|
||||
sequence_num_last_decoded_rtp_(0),
|
||||
timestamp_last_decoded_rtp_(0),
|
||||
any_rtp_decoded_(false),
|
||||
sample_rate_khz_(kDefaultSampleRateKhz),
|
||||
samples_per_packet_(sample_rate_khz_ * kDefaultPacketSizeMs),
|
||||
max_nack_list_size_(kNackListSizeLimit) {}
|
||||
|
||||
NackTracker::~NackTracker() = default;
|
||||
|
||||
NackTracker* NackTracker::Create(int nack_threshold_packets) {
|
||||
return new NackTracker(nack_threshold_packets);
|
||||
}
|
||||
|
||||
void NackTracker::UpdateSampleRate(int sample_rate_hz) {
|
||||
assert(sample_rate_hz > 0);
|
||||
sample_rate_khz_ = sample_rate_hz / 1000;
|
||||
}
|
||||
|
||||
void NackTracker::UpdateLastReceivedPacket(uint16_t sequence_number,
|
||||
uint32_t timestamp) {
|
||||
// Just record the value of sequence number and timestamp if this is the
|
||||
// first packet.
|
||||
if (!any_rtp_received_) {
|
||||
sequence_num_last_received_rtp_ = sequence_number;
|
||||
timestamp_last_received_rtp_ = timestamp;
|
||||
any_rtp_received_ = true;
|
||||
// If no packet is decoded, to have a reasonable estimate of time-to-play
|
||||
// use the given values.
|
||||
if (!any_rtp_decoded_) {
|
||||
sequence_num_last_decoded_rtp_ = sequence_number;
|
||||
timestamp_last_decoded_rtp_ = timestamp;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (sequence_number == sequence_num_last_received_rtp_)
|
||||
return;
|
||||
|
||||
// Received RTP should not be in the list.
|
||||
nack_list_.erase(sequence_number);
|
||||
|
||||
// If this is an old sequence number, no more action is required, return.
|
||||
if (IsNewerSequenceNumber(sequence_num_last_received_rtp_, sequence_number))
|
||||
return;
|
||||
|
||||
UpdateSamplesPerPacket(sequence_number, timestamp);
|
||||
|
||||
UpdateList(sequence_number);
|
||||
|
||||
sequence_num_last_received_rtp_ = sequence_number;
|
||||
timestamp_last_received_rtp_ = timestamp;
|
||||
LimitNackListSize();
|
||||
}
|
||||
|
||||
void NackTracker::UpdateSamplesPerPacket(
|
||||
uint16_t sequence_number_current_received_rtp,
|
||||
uint32_t timestamp_current_received_rtp) {
|
||||
uint32_t timestamp_increase =
|
||||
timestamp_current_received_rtp - timestamp_last_received_rtp_;
|
||||
uint16_t sequence_num_increase =
|
||||
sequence_number_current_received_rtp - sequence_num_last_received_rtp_;
|
||||
|
||||
samples_per_packet_ = timestamp_increase / sequence_num_increase;
|
||||
}
|
||||
|
||||
void NackTracker::UpdateList(uint16_t sequence_number_current_received_rtp) {
|
||||
// Some of the packets which were considered late, now are considered missing.
|
||||
ChangeFromLateToMissing(sequence_number_current_received_rtp);
|
||||
|
||||
if (IsNewerSequenceNumber(sequence_number_current_received_rtp,
|
||||
sequence_num_last_received_rtp_ + 1))
|
||||
AddToList(sequence_number_current_received_rtp);
|
||||
}
|
||||
|
||||
void NackTracker::ChangeFromLateToMissing(
|
||||
uint16_t sequence_number_current_received_rtp) {
|
||||
NackList::const_iterator lower_bound =
|
||||
nack_list_.lower_bound(static_cast<uint16_t>(
|
||||
sequence_number_current_received_rtp - nack_threshold_packets_));
|
||||
|
||||
for (NackList::iterator it = nack_list_.begin(); it != lower_bound; ++it)
|
||||
it->second.is_missing = true;
|
||||
}
|
||||
|
||||
uint32_t NackTracker::EstimateTimestamp(uint16_t sequence_num) {
|
||||
uint16_t sequence_num_diff = sequence_num - sequence_num_last_received_rtp_;
|
||||
return sequence_num_diff * samples_per_packet_ + timestamp_last_received_rtp_;
|
||||
}
|
||||
|
||||
void NackTracker::AddToList(uint16_t sequence_number_current_received_rtp) {
|
||||
assert(!any_rtp_decoded_ ||
|
||||
IsNewerSequenceNumber(sequence_number_current_received_rtp,
|
||||
sequence_num_last_decoded_rtp_));
|
||||
|
||||
// Packets with sequence numbers older than |upper_bound_missing| are
|
||||
// considered missing, and the rest are considered late.
|
||||
uint16_t upper_bound_missing =
|
||||
sequence_number_current_received_rtp - nack_threshold_packets_;
|
||||
|
||||
for (uint16_t n = sequence_num_last_received_rtp_ + 1;
|
||||
IsNewerSequenceNumber(sequence_number_current_received_rtp, n); ++n) {
|
||||
bool is_missing = IsNewerSequenceNumber(upper_bound_missing, n);
|
||||
uint32_t timestamp = EstimateTimestamp(n);
|
||||
NackElement nack_element(TimeToPlay(timestamp), timestamp, is_missing);
|
||||
nack_list_.insert(nack_list_.end(), std::make_pair(n, nack_element));
|
||||
}
|
||||
}
|
||||
|
||||
void NackTracker::UpdateEstimatedPlayoutTimeBy10ms() {
|
||||
while (!nack_list_.empty() &&
|
||||
nack_list_.begin()->second.time_to_play_ms <= 10)
|
||||
nack_list_.erase(nack_list_.begin());
|
||||
|
||||
for (NackList::iterator it = nack_list_.begin(); it != nack_list_.end(); ++it)
|
||||
it->second.time_to_play_ms -= 10;
|
||||
}
|
||||
|
||||
void NackTracker::UpdateLastDecodedPacket(uint16_t sequence_number,
|
||||
uint32_t timestamp) {
|
||||
if (IsNewerSequenceNumber(sequence_number, sequence_num_last_decoded_rtp_) ||
|
||||
!any_rtp_decoded_) {
|
||||
sequence_num_last_decoded_rtp_ = sequence_number;
|
||||
timestamp_last_decoded_rtp_ = timestamp;
|
||||
// Packets in the list with sequence numbers less than the
|
||||
// sequence number of the decoded RTP should be removed from the lists.
|
||||
// They will be discarded by the jitter buffer if they arrive.
|
||||
nack_list_.erase(nack_list_.begin(),
|
||||
nack_list_.upper_bound(sequence_num_last_decoded_rtp_));
|
||||
|
||||
// Update estimated time-to-play.
|
||||
for (NackList::iterator it = nack_list_.begin(); it != nack_list_.end();
|
||||
++it)
|
||||
it->second.time_to_play_ms = TimeToPlay(it->second.estimated_timestamp);
|
||||
} else {
|
||||
assert(sequence_number == sequence_num_last_decoded_rtp_);
|
||||
|
||||
// Same sequence number as before. 10 ms is elapsed, update estimations for
|
||||
// time-to-play.
|
||||
UpdateEstimatedPlayoutTimeBy10ms();
|
||||
|
||||
// Update timestamp for better estimate of time-to-play, for packets which
|
||||
// are added to NACK list later on.
|
||||
timestamp_last_decoded_rtp_ += sample_rate_khz_ * 10;
|
||||
}
|
||||
any_rtp_decoded_ = true;
|
||||
}
|
||||
|
||||
NackTracker::NackList NackTracker::GetNackList() const {
|
||||
return nack_list_;
|
||||
}
|
||||
|
||||
void NackTracker::Reset() {
|
||||
nack_list_.clear();
|
||||
|
||||
sequence_num_last_received_rtp_ = 0;
|
||||
timestamp_last_received_rtp_ = 0;
|
||||
any_rtp_received_ = false;
|
||||
sequence_num_last_decoded_rtp_ = 0;
|
||||
timestamp_last_decoded_rtp_ = 0;
|
||||
any_rtp_decoded_ = false;
|
||||
sample_rate_khz_ = kDefaultSampleRateKhz;
|
||||
samples_per_packet_ = sample_rate_khz_ * kDefaultPacketSizeMs;
|
||||
}
|
||||
|
||||
void NackTracker::SetMaxNackListSize(size_t max_nack_list_size) {
|
||||
RTC_CHECK_GT(max_nack_list_size, 0);
|
||||
// Ugly hack to get around the problem of passing static consts by reference.
|
||||
const size_t kNackListSizeLimitLocal = NackTracker::kNackListSizeLimit;
|
||||
RTC_CHECK_LE(max_nack_list_size, kNackListSizeLimitLocal);
|
||||
|
||||
max_nack_list_size_ = max_nack_list_size;
|
||||
LimitNackListSize();
|
||||
}
|
||||
|
||||
void NackTracker::LimitNackListSize() {
|
||||
uint16_t limit = sequence_num_last_received_rtp_ -
|
||||
static_cast<uint16_t>(max_nack_list_size_) - 1;
|
||||
nack_list_.erase(nack_list_.begin(), nack_list_.upper_bound(limit));
|
||||
}
|
||||
|
||||
int64_t NackTracker::TimeToPlay(uint32_t timestamp) const {
|
||||
uint32_t timestamp_increase = timestamp - timestamp_last_decoded_rtp_;
|
||||
return timestamp_increase / sample_rate_khz_;
|
||||
}
|
||||
|
||||
// We don't erase elements with time-to-play shorter than round-trip-time.
|
||||
std::vector<uint16_t> NackTracker::GetNackList(
|
||||
int64_t round_trip_time_ms) const {
|
||||
RTC_DCHECK_GE(round_trip_time_ms, 0);
|
||||
std::vector<uint16_t> sequence_numbers;
|
||||
for (NackList::const_iterator it = nack_list_.begin(); it != nack_list_.end();
|
||||
++it) {
|
||||
if (it->second.is_missing &&
|
||||
it->second.time_to_play_ms > round_trip_time_ms)
|
||||
sequence_numbers.push_back(it->first);
|
||||
}
|
||||
return sequence_numbers;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
208
modules/audio_coding/neteq/nack_tracker.h
Normal file
208
modules/audio_coding/neteq/nack_tracker.h
Normal file
@ -0,0 +1,208 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
#include "webrtc/modules/audio_coding/include/audio_coding_module_typedefs.h"
|
||||
#include "webrtc/rtc_base/gtest_prod_util.h"
|
||||
|
||||
//
|
||||
// The NackTracker class keeps track of the lost packets, an estimate of
|
||||
// time-to-play for each packet is also given.
|
||||
//
|
||||
// Every time a packet is pushed into NetEq, LastReceivedPacket() has to be
|
||||
// called to update the NACK list.
|
||||
//
|
||||
// Every time 10ms audio is pulled from NetEq LastDecodedPacket() should be
|
||||
// called, and time-to-play is updated at that moment.
|
||||
//
|
||||
// If packet N is received, any packet prior to |N - NackThreshold| which is not
|
||||
// arrived is considered lost, and should be labeled as "missing" (the size of
|
||||
// the list might be limited and older packet eliminated from the list). Packets
|
||||
// |N - NackThreshold|, |N - NackThreshold + 1|, ..., |N - 1| are considered
|
||||
// "late." A "late" packet with sequence number K is changed to "missing" any
|
||||
// time a packet with sequence number newer than |K + NackList| is arrived.
|
||||
//
|
||||
// The NackTracker class has to know about the sample rate of the packets to
|
||||
// compute time-to-play. So sample rate should be set as soon as the first
|
||||
// packet is received. If there is a change in the receive codec (sender changes
|
||||
// codec) then NackTracker should be reset. This is because NetEQ would flush
|
||||
// its buffer and re-transmission is meaning less for old packet. Therefore, in
|
||||
// that case, after reset the sampling rate has to be updated.
|
||||
//
|
||||
// Thread Safety
|
||||
// =============
|
||||
// Please note that this class in not thread safe. The class must be protected
|
||||
// if different APIs are called from different threads.
|
||||
//
|
||||
namespace webrtc {
|
||||
|
||||
class NackTracker {
|
||||
public:
|
||||
// A limit for the size of the NACK list.
|
||||
static const size_t kNackListSizeLimit = 500; // 10 seconds for 20 ms frame
|
||||
// packets.
|
||||
// Factory method.
|
||||
static NackTracker* Create(int nack_threshold_packets);
|
||||
|
||||
~NackTracker();
|
||||
|
||||
// Set a maximum for the size of the NACK list. If the last received packet
|
||||
// has sequence number of N, then NACK list will not contain any element
|
||||
// with sequence number earlier than N - |max_nack_list_size|.
|
||||
//
|
||||
// The largest maximum size is defined by |kNackListSizeLimit|
|
||||
void SetMaxNackListSize(size_t max_nack_list_size);
|
||||
|
||||
// Set the sampling rate.
|
||||
//
|
||||
// If associated sampling rate of the received packets is changed, call this
|
||||
// function to update sampling rate. Note that if there is any change in
|
||||
// received codec then NetEq will flush its buffer and NACK has to be reset.
|
||||
// After Reset() is called sampling rate has to be set.
|
||||
void UpdateSampleRate(int sample_rate_hz);
|
||||
|
||||
// Update the sequence number and the timestamp of the last decoded RTP. This
|
||||
// API should be called every time 10 ms audio is pulled from NetEq.
|
||||
void UpdateLastDecodedPacket(uint16_t sequence_number, uint32_t timestamp);
|
||||
|
||||
// Update the sequence number and the timestamp of the last received RTP. This
|
||||
// API should be called every time a packet pushed into ACM.
|
||||
void UpdateLastReceivedPacket(uint16_t sequence_number, uint32_t timestamp);
|
||||
|
||||
// Get a list of "missing" packets which have expected time-to-play larger
|
||||
// than the given round-trip-time (in milliseconds).
|
||||
// Note: Late packets are not included.
|
||||
std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const;
|
||||
|
||||
// Reset to default values. The NACK list is cleared.
|
||||
// |nack_threshold_packets_| & |max_nack_list_size_| preserve their values.
|
||||
void Reset();
|
||||
|
||||
private:
|
||||
// This test need to access the private method GetNackList().
|
||||
FRIEND_TEST_ALL_PREFIXES(NackTrackerTest, EstimateTimestampAndTimeToPlay);
|
||||
|
||||
struct NackElement {
|
||||
NackElement(int64_t initial_time_to_play_ms,
|
||||
uint32_t initial_timestamp,
|
||||
bool missing)
|
||||
: time_to_play_ms(initial_time_to_play_ms),
|
||||
estimated_timestamp(initial_timestamp),
|
||||
is_missing(missing) {}
|
||||
|
||||
// Estimated time (ms) left for this packet to be decoded. This estimate is
|
||||
// updated every time jitter buffer decodes a packet.
|
||||
int64_t time_to_play_ms;
|
||||
|
||||
// A guess about the timestamp of the missing packet, it is used for
|
||||
// estimation of |time_to_play_ms|. The estimate might be slightly wrong if
|
||||
// there has been frame-size change since the last received packet and the
|
||||
// missing packet. However, the risk of this is low, and in case of such
|
||||
// errors, there will be a minor misestimation in time-to-play of missing
|
||||
// packets. This will have a very minor effect on NACK performance.
|
||||
uint32_t estimated_timestamp;
|
||||
|
||||
// True if the packet is considered missing. Otherwise indicates packet is
|
||||
// late.
|
||||
bool is_missing;
|
||||
};
|
||||
|
||||
class NackListCompare {
|
||||
public:
|
||||
bool operator()(uint16_t sequence_number_old,
|
||||
uint16_t sequence_number_new) const {
|
||||
return IsNewerSequenceNumber(sequence_number_new, sequence_number_old);
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::map<uint16_t, NackElement, NackListCompare> NackList;
|
||||
|
||||
// Constructor.
|
||||
explicit NackTracker(int nack_threshold_packets);
|
||||
|
||||
// This API is used only for testing to assess whether time-to-play is
|
||||
// computed correctly.
|
||||
NackList GetNackList() const;
|
||||
|
||||
// Given the |sequence_number_current_received_rtp| of currently received RTP,
|
||||
// recognize packets which are not arrive and add to the list.
|
||||
void AddToList(uint16_t sequence_number_current_received_rtp);
|
||||
|
||||
// This function subtracts 10 ms of time-to-play for all packets in NACK list.
|
||||
// This is called when 10 ms elapsed with no new RTP packet decoded.
|
||||
void UpdateEstimatedPlayoutTimeBy10ms();
|
||||
|
||||
// Given the |sequence_number_current_received_rtp| and
|
||||
// |timestamp_current_received_rtp| of currently received RTP update number
|
||||
// of samples per packet.
|
||||
void UpdateSamplesPerPacket(uint16_t sequence_number_current_received_rtp,
|
||||
uint32_t timestamp_current_received_rtp);
|
||||
|
||||
// Given the |sequence_number_current_received_rtp| of currently received RTP
|
||||
// update the list. That is; some packets will change from late to missing,
|
||||
// some packets are inserted as missing and some inserted as late.
|
||||
void UpdateList(uint16_t sequence_number_current_received_rtp);
|
||||
|
||||
// Packets which are considered late for too long (according to
|
||||
// |nack_threshold_packets_|) are flagged as missing.
|
||||
void ChangeFromLateToMissing(uint16_t sequence_number_current_received_rtp);
|
||||
|
||||
// Packets which have sequence number older that
|
||||
// |sequence_num_last_received_rtp_| - |max_nack_list_size_| are removed
|
||||
// from the NACK list.
|
||||
void LimitNackListSize();
|
||||
|
||||
// Estimate timestamp of a missing packet given its sequence number.
|
||||
uint32_t EstimateTimestamp(uint16_t sequence_number);
|
||||
|
||||
// Compute time-to-play given a timestamp.
|
||||
int64_t TimeToPlay(uint32_t timestamp) const;
|
||||
|
||||
// If packet N is arrived, any packet prior to N - |nack_threshold_packets_|
|
||||
// which is not arrived is considered missing, and should be in NACK list.
|
||||
// Also any packet in the range of N-1 and N - |nack_threshold_packets_|,
|
||||
// exclusive, which is not arrived is considered late, and should should be
|
||||
// in the list of late packets.
|
||||
const int nack_threshold_packets_;
|
||||
|
||||
// Valid if a packet is received.
|
||||
uint16_t sequence_num_last_received_rtp_;
|
||||
uint32_t timestamp_last_received_rtp_;
|
||||
bool any_rtp_received_; // If any packet received.
|
||||
|
||||
// Valid if a packet is decoded.
|
||||
uint16_t sequence_num_last_decoded_rtp_;
|
||||
uint32_t timestamp_last_decoded_rtp_;
|
||||
bool any_rtp_decoded_; // If any packet decoded.
|
||||
|
||||
int sample_rate_khz_; // Sample rate in kHz.
|
||||
|
||||
// Number of samples per packet. We update this every time we receive a
|
||||
// packet, not only for consecutive packets.
|
||||
int samples_per_packet_;
|
||||
|
||||
// A list of missing packets to be retransmitted. Components of the list
|
||||
// contain the sequence number of missing packets and the estimated time that
|
||||
// each pack is going to be played out.
|
||||
NackList nack_list_;
|
||||
|
||||
// NACK list will not keep track of missing packets prior to
|
||||
// |sequence_num_last_received_rtp_| - |max_nack_list_size_|.
|
||||
size_t max_nack_list_size_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_NACK_TRACKER_H_
|
||||
483
modules/audio_coding/neteq/nack_tracker_unittest.cc
Normal file
483
modules/audio_coding/neteq/nack_tracker_unittest.cc
Normal file
@ -0,0 +1,483 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/nack_tracker.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#include "webrtc/modules/audio_coding/include/audio_coding_module_typedefs.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
const int kNackThreshold = 3;
|
||||
const int kSampleRateHz = 16000;
|
||||
const int kPacketSizeMs = 30;
|
||||
const uint32_t kTimestampIncrement = 480; // 30 ms.
|
||||
const int64_t kShortRoundTripTimeMs = 1;
|
||||
|
||||
bool IsNackListCorrect(const std::vector<uint16_t>& nack_list,
|
||||
const uint16_t* lost_sequence_numbers,
|
||||
size_t num_lost_packets) {
|
||||
if (nack_list.size() != num_lost_packets)
|
||||
return false;
|
||||
|
||||
if (num_lost_packets == 0)
|
||||
return true;
|
||||
|
||||
for (size_t k = 0; k < nack_list.size(); ++k) {
|
||||
int seq_num = nack_list[k];
|
||||
bool seq_num_matched = false;
|
||||
for (size_t n = 0; n < num_lost_packets; ++n) {
|
||||
if (seq_num == lost_sequence_numbers[n]) {
|
||||
seq_num_matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!seq_num_matched)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(NackTrackerTest, EmptyListWhenNoPacketLoss) {
|
||||
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
|
||||
nack->UpdateSampleRate(kSampleRateHz);
|
||||
|
||||
int seq_num = 1;
|
||||
uint32_t timestamp = 0;
|
||||
|
||||
std::vector<uint16_t> nack_list;
|
||||
for (int n = 0; n < 100; n++) {
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
seq_num++;
|
||||
timestamp += kTimestampIncrement;
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(nack_list.empty());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(NackTrackerTest, NoNackIfReorderWithinNackThreshold) {
|
||||
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
|
||||
nack->UpdateSampleRate(kSampleRateHz);
|
||||
|
||||
int seq_num = 1;
|
||||
uint32_t timestamp = 0;
|
||||
std::vector<uint16_t> nack_list;
|
||||
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(nack_list.empty());
|
||||
int num_late_packets = kNackThreshold + 1;
|
||||
|
||||
// Push in reverse order
|
||||
while (num_late_packets > 0) {
|
||||
nack->UpdateLastReceivedPacket(
|
||||
seq_num + num_late_packets,
|
||||
timestamp + num_late_packets * kTimestampIncrement);
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(nack_list.empty());
|
||||
num_late_packets--;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(NackTrackerTest, LatePacketsMovedToNackThenNackListDoesNotChange) {
|
||||
const uint16_t kSequenceNumberLostPackets[] = {2, 3, 4, 5, 6, 7, 8, 9};
|
||||
static const int kNumAllLostPackets = sizeof(kSequenceNumberLostPackets) /
|
||||
sizeof(kSequenceNumberLostPackets[0]);
|
||||
|
||||
for (int k = 0; k < 2; k++) { // Two iteration with/without wrap around.
|
||||
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
|
||||
nack->UpdateSampleRate(kSampleRateHz);
|
||||
|
||||
uint16_t sequence_num_lost_packets[kNumAllLostPackets];
|
||||
for (int n = 0; n < kNumAllLostPackets; n++) {
|
||||
sequence_num_lost_packets[n] =
|
||||
kSequenceNumberLostPackets[n] +
|
||||
k * 65531; // Have wrap around in sequence numbers for |k == 1|.
|
||||
}
|
||||
uint16_t seq_num = sequence_num_lost_packets[0] - 1;
|
||||
|
||||
uint32_t timestamp = 0;
|
||||
std::vector<uint16_t> nack_list;
|
||||
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(nack_list.empty());
|
||||
|
||||
seq_num = sequence_num_lost_packets[kNumAllLostPackets - 1] + 1;
|
||||
timestamp += kTimestampIncrement * (kNumAllLostPackets + 1);
|
||||
int num_lost_packets = std::max(0, kNumAllLostPackets - kNackThreshold);
|
||||
|
||||
for (int n = 0; n < kNackThreshold + 1; ++n) {
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(IsNackListCorrect(nack_list, sequence_num_lost_packets,
|
||||
num_lost_packets));
|
||||
seq_num++;
|
||||
timestamp += kTimestampIncrement;
|
||||
num_lost_packets++;
|
||||
}
|
||||
|
||||
for (int n = 0; n < 100; ++n) {
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(IsNackListCorrect(nack_list, sequence_num_lost_packets,
|
||||
kNumAllLostPackets));
|
||||
seq_num++;
|
||||
timestamp += kTimestampIncrement;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(NackTrackerTest, ArrivedPacketsAreRemovedFromNackList) {
|
||||
const uint16_t kSequenceNumberLostPackets[] = {2, 3, 4, 5, 6, 7, 8, 9};
|
||||
static const int kNumAllLostPackets = sizeof(kSequenceNumberLostPackets) /
|
||||
sizeof(kSequenceNumberLostPackets[0]);
|
||||
|
||||
for (int k = 0; k < 2; ++k) { // Two iteration with/without wrap around.
|
||||
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
|
||||
nack->UpdateSampleRate(kSampleRateHz);
|
||||
|
||||
uint16_t sequence_num_lost_packets[kNumAllLostPackets];
|
||||
for (int n = 0; n < kNumAllLostPackets; ++n) {
|
||||
sequence_num_lost_packets[n] = kSequenceNumberLostPackets[n] +
|
||||
k * 65531; // Wrap around for |k == 1|.
|
||||
}
|
||||
|
||||
uint16_t seq_num = sequence_num_lost_packets[0] - 1;
|
||||
uint32_t timestamp = 0;
|
||||
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
std::vector<uint16_t> nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(nack_list.empty());
|
||||
|
||||
size_t index_retransmitted_rtp = 0;
|
||||
uint32_t timestamp_retransmitted_rtp = timestamp + kTimestampIncrement;
|
||||
|
||||
seq_num = sequence_num_lost_packets[kNumAllLostPackets - 1] + 1;
|
||||
timestamp += kTimestampIncrement * (kNumAllLostPackets + 1);
|
||||
size_t num_lost_packets = std::max(0, kNumAllLostPackets - kNackThreshold);
|
||||
for (int n = 0; n < kNumAllLostPackets; ++n) {
|
||||
// Number of lost packets does not change for the first
|
||||
// |kNackThreshold + 1| packets, one is added to the list and one is
|
||||
// removed. Thereafter, the list shrinks every iteration.
|
||||
if (n >= kNackThreshold + 1)
|
||||
num_lost_packets--;
|
||||
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(IsNackListCorrect(
|
||||
nack_list, &sequence_num_lost_packets[index_retransmitted_rtp],
|
||||
num_lost_packets));
|
||||
seq_num++;
|
||||
timestamp += kTimestampIncrement;
|
||||
|
||||
// Retransmission of a lost RTP.
|
||||
nack->UpdateLastReceivedPacket(
|
||||
sequence_num_lost_packets[index_retransmitted_rtp],
|
||||
timestamp_retransmitted_rtp);
|
||||
index_retransmitted_rtp++;
|
||||
timestamp_retransmitted_rtp += kTimestampIncrement;
|
||||
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(IsNackListCorrect(
|
||||
nack_list, &sequence_num_lost_packets[index_retransmitted_rtp],
|
||||
num_lost_packets - 1)); // One less lost packet in the list.
|
||||
}
|
||||
ASSERT_TRUE(nack_list.empty());
|
||||
}
|
||||
}
|
||||
|
||||
// Assess if estimation of timestamps and time-to-play is correct. Introduce all
|
||||
// combinations that timestamps and sequence numbers might have wrap around.
|
||||
TEST(NackTrackerTest, EstimateTimestampAndTimeToPlay) {
|
||||
const uint16_t kLostPackets[] = {2, 3, 4, 5, 6, 7, 8,
|
||||
9, 10, 11, 12, 13, 14, 15};
|
||||
static const int kNumAllLostPackets =
|
||||
sizeof(kLostPackets) / sizeof(kLostPackets[0]);
|
||||
|
||||
for (int k = 0; k < 4; ++k) {
|
||||
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
|
||||
nack->UpdateSampleRate(kSampleRateHz);
|
||||
|
||||
// Sequence number wrap around if |k| is 2 or 3;
|
||||
int seq_num_offset = (k < 2) ? 0 : 65531;
|
||||
|
||||
// Timestamp wrap around if |k| is 1 or 3.
|
||||
uint32_t timestamp_offset =
|
||||
(k & 0x1) ? static_cast<uint32_t>(0xffffffff) - 6 : 0;
|
||||
|
||||
uint32_t timestamp_lost_packets[kNumAllLostPackets];
|
||||
uint16_t seq_num_lost_packets[kNumAllLostPackets];
|
||||
for (int n = 0; n < kNumAllLostPackets; ++n) {
|
||||
timestamp_lost_packets[n] =
|
||||
timestamp_offset + kLostPackets[n] * kTimestampIncrement;
|
||||
seq_num_lost_packets[n] = seq_num_offset + kLostPackets[n];
|
||||
}
|
||||
|
||||
// We and to push two packets before lost burst starts.
|
||||
uint16_t seq_num = seq_num_lost_packets[0] - 2;
|
||||
uint32_t timestamp = timestamp_lost_packets[0] - 2 * kTimestampIncrement;
|
||||
|
||||
const uint16_t first_seq_num = seq_num;
|
||||
const uint32_t first_timestamp = timestamp;
|
||||
|
||||
// Two consecutive packets to have a correct estimate of timestamp increase.
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
seq_num++;
|
||||
timestamp += kTimestampIncrement;
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
|
||||
// A packet after the last one which is supposed to be lost.
|
||||
seq_num = seq_num_lost_packets[kNumAllLostPackets - 1] + 1;
|
||||
timestamp =
|
||||
timestamp_lost_packets[kNumAllLostPackets - 1] + kTimestampIncrement;
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
|
||||
NackTracker::NackList nack_list = nack->GetNackList();
|
||||
EXPECT_EQ(static_cast<size_t>(kNumAllLostPackets), nack_list.size());
|
||||
|
||||
// Pretend the first packet is decoded.
|
||||
nack->UpdateLastDecodedPacket(first_seq_num, first_timestamp);
|
||||
nack_list = nack->GetNackList();
|
||||
|
||||
NackTracker::NackList::iterator it = nack_list.begin();
|
||||
while (it != nack_list.end()) {
|
||||
seq_num = it->first - seq_num_offset;
|
||||
int index = seq_num - kLostPackets[0];
|
||||
EXPECT_EQ(timestamp_lost_packets[index], it->second.estimated_timestamp);
|
||||
EXPECT_EQ((index + 2) * kPacketSizeMs, it->second.time_to_play_ms);
|
||||
++it;
|
||||
}
|
||||
|
||||
// Pretend 10 ms is passed, and we had pulled audio from NetEq, it still
|
||||
// reports the same sequence number as decoded, time-to-play should be
|
||||
// updated by 10 ms.
|
||||
nack->UpdateLastDecodedPacket(first_seq_num, first_timestamp);
|
||||
nack_list = nack->GetNackList();
|
||||
it = nack_list.begin();
|
||||
while (it != nack_list.end()) {
|
||||
seq_num = it->first - seq_num_offset;
|
||||
int index = seq_num - kLostPackets[0];
|
||||
EXPECT_EQ((index + 2) * kPacketSizeMs - 10, it->second.time_to_play_ms);
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(NackTrackerTest,
|
||||
MissingPacketsPriorToLastDecodedRtpShouldNotBeInNackList) {
|
||||
for (int m = 0; m < 2; ++m) {
|
||||
uint16_t seq_num_offset = (m == 0) ? 0 : 65531; // Wrap around if |m| is 1.
|
||||
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
|
||||
nack->UpdateSampleRate(kSampleRateHz);
|
||||
|
||||
// Two consecutive packets to have a correct estimate of timestamp increase.
|
||||
uint16_t seq_num = 0;
|
||||
nack->UpdateLastReceivedPacket(seq_num_offset + seq_num,
|
||||
seq_num * kTimestampIncrement);
|
||||
seq_num++;
|
||||
nack->UpdateLastReceivedPacket(seq_num_offset + seq_num,
|
||||
seq_num * kTimestampIncrement);
|
||||
|
||||
// Skip 10 packets (larger than NACK threshold).
|
||||
const int kNumLostPackets = 10;
|
||||
seq_num += kNumLostPackets + 1;
|
||||
nack->UpdateLastReceivedPacket(seq_num_offset + seq_num,
|
||||
seq_num * kTimestampIncrement);
|
||||
|
||||
const size_t kExpectedListSize = kNumLostPackets - kNackThreshold;
|
||||
std::vector<uint16_t> nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_EQ(kExpectedListSize, nack_list.size());
|
||||
|
||||
for (int k = 0; k < 2; ++k) {
|
||||
// Decoding of the first and the second arrived packets.
|
||||
for (int n = 0; n < kPacketSizeMs / 10; ++n) {
|
||||
nack->UpdateLastDecodedPacket(seq_num_offset + k,
|
||||
k * kTimestampIncrement);
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_EQ(kExpectedListSize, nack_list.size());
|
||||
}
|
||||
}
|
||||
|
||||
// Decoding of the last received packet.
|
||||
nack->UpdateLastDecodedPacket(seq_num + seq_num_offset,
|
||||
seq_num * kTimestampIncrement);
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(nack_list.empty());
|
||||
|
||||
// Make sure list of late packets is also empty. To check that, push few
|
||||
// packets, if the late list is not empty its content will pop up in NACK
|
||||
// list.
|
||||
for (int n = 0; n < kNackThreshold + 10; ++n) {
|
||||
seq_num++;
|
||||
nack->UpdateLastReceivedPacket(seq_num_offset + seq_num,
|
||||
seq_num * kTimestampIncrement);
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(nack_list.empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(NackTrackerTest, Reset) {
|
||||
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
|
||||
nack->UpdateSampleRate(kSampleRateHz);
|
||||
|
||||
// Two consecutive packets to have a correct estimate of timestamp increase.
|
||||
uint16_t seq_num = 0;
|
||||
nack->UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement);
|
||||
seq_num++;
|
||||
nack->UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement);
|
||||
|
||||
// Skip 10 packets (larger than NACK threshold).
|
||||
const int kNumLostPackets = 10;
|
||||
seq_num += kNumLostPackets + 1;
|
||||
nack->UpdateLastReceivedPacket(seq_num, seq_num * kTimestampIncrement);
|
||||
|
||||
const size_t kExpectedListSize = kNumLostPackets - kNackThreshold;
|
||||
std::vector<uint16_t> nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_EQ(kExpectedListSize, nack_list.size());
|
||||
|
||||
nack->Reset();
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(nack_list.empty());
|
||||
}
|
||||
|
||||
TEST(NackTrackerTest, ListSizeAppliedFromBeginning) {
|
||||
const size_t kNackListSize = 10;
|
||||
for (int m = 0; m < 2; ++m) {
|
||||
uint16_t seq_num_offset = (m == 0) ? 0 : 65525; // Wrap around if |m| is 1.
|
||||
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
|
||||
nack->UpdateSampleRate(kSampleRateHz);
|
||||
nack->SetMaxNackListSize(kNackListSize);
|
||||
|
||||
uint16_t seq_num = seq_num_offset;
|
||||
uint32_t timestamp = 0x12345678;
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
|
||||
// Packet lost more than NACK-list size limit.
|
||||
uint16_t num_lost_packets = kNackThreshold + kNackListSize + 5;
|
||||
|
||||
seq_num += num_lost_packets + 1;
|
||||
timestamp += (num_lost_packets + 1) * kTimestampIncrement;
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
|
||||
std::vector<uint16_t> nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_EQ(kNackListSize - kNackThreshold, nack_list.size());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(NackTrackerTest, ChangeOfListSizeAppliedAndOldElementsRemoved) {
|
||||
const size_t kNackListSize = 10;
|
||||
for (int m = 0; m < 2; ++m) {
|
||||
uint16_t seq_num_offset = (m == 0) ? 0 : 65525; // Wrap around if |m| is 1.
|
||||
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
|
||||
nack->UpdateSampleRate(kSampleRateHz);
|
||||
|
||||
uint16_t seq_num = seq_num_offset;
|
||||
uint32_t timestamp = 0x87654321;
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
|
||||
// Packet lost more than NACK-list size limit.
|
||||
uint16_t num_lost_packets = kNackThreshold + kNackListSize + 5;
|
||||
|
||||
std::unique_ptr<uint16_t[]> seq_num_lost(new uint16_t[num_lost_packets]);
|
||||
for (int n = 0; n < num_lost_packets; ++n) {
|
||||
seq_num_lost[n] = ++seq_num;
|
||||
}
|
||||
|
||||
++seq_num;
|
||||
timestamp += (num_lost_packets + 1) * kTimestampIncrement;
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
size_t expected_size = num_lost_packets - kNackThreshold;
|
||||
|
||||
std::vector<uint16_t> nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_EQ(expected_size, nack_list.size());
|
||||
|
||||
nack->SetMaxNackListSize(kNackListSize);
|
||||
expected_size = kNackListSize - kNackThreshold;
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(IsNackListCorrect(
|
||||
nack_list, &seq_num_lost[num_lost_packets - kNackListSize],
|
||||
expected_size));
|
||||
|
||||
// NACK list does not change size but the content is changing. The oldest
|
||||
// element is removed and one from late list is inserted.
|
||||
size_t n;
|
||||
for (n = 1; n <= static_cast<size_t>(kNackThreshold); ++n) {
|
||||
++seq_num;
|
||||
timestamp += kTimestampIncrement;
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(IsNackListCorrect(
|
||||
nack_list, &seq_num_lost[num_lost_packets - kNackListSize + n],
|
||||
expected_size));
|
||||
}
|
||||
|
||||
// NACK list should shrink.
|
||||
for (; n < kNackListSize; ++n) {
|
||||
++seq_num;
|
||||
timestamp += kTimestampIncrement;
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
--expected_size;
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(IsNackListCorrect(
|
||||
nack_list, &seq_num_lost[num_lost_packets - kNackListSize + n],
|
||||
expected_size));
|
||||
}
|
||||
|
||||
// After this packet, NACK list should be empty.
|
||||
++seq_num;
|
||||
timestamp += kTimestampIncrement;
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
nack_list = nack->GetNackList(kShortRoundTripTimeMs);
|
||||
EXPECT_TRUE(nack_list.empty());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(NackTrackerTest, RoudTripTimeIsApplied) {
|
||||
const int kNackListSize = 200;
|
||||
std::unique_ptr<NackTracker> nack(NackTracker::Create(kNackThreshold));
|
||||
nack->UpdateSampleRate(kSampleRateHz);
|
||||
nack->SetMaxNackListSize(kNackListSize);
|
||||
|
||||
uint16_t seq_num = 0;
|
||||
uint32_t timestamp = 0x87654321;
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
|
||||
// Packet lost more than NACK-list size limit.
|
||||
uint16_t kNumLostPackets = kNackThreshold + 5;
|
||||
|
||||
seq_num += (1 + kNumLostPackets);
|
||||
timestamp += (1 + kNumLostPackets) * kTimestampIncrement;
|
||||
nack->UpdateLastReceivedPacket(seq_num, timestamp);
|
||||
|
||||
// Expected time-to-play are:
|
||||
// kPacketSizeMs - 10, 2*kPacketSizeMs - 10, 3*kPacketSizeMs - 10, ...
|
||||
//
|
||||
// sequence number: 1, 2, 3, 4, 5
|
||||
// time-to-play: 20, 50, 80, 110, 140
|
||||
//
|
||||
std::vector<uint16_t> nack_list = nack->GetNackList(100);
|
||||
ASSERT_EQ(2u, nack_list.size());
|
||||
EXPECT_EQ(4, nack_list[0]);
|
||||
EXPECT_EQ(5, nack_list[1]);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
43
modules/audio_coding/neteq/neteq.cc
Normal file
43
modules/audio_coding/neteq/neteq.cc
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/include/neteq.h"
|
||||
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/neteq_impl.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
std::string NetEq::Config::ToString() const {
|
||||
std::stringstream ss;
|
||||
ss << "sample_rate_hz=" << sample_rate_hz
|
||||
<< ", enable_post_decode_vad="
|
||||
<< (enable_post_decode_vad ? "true" : "false")
|
||||
<< ", max_packets_in_buffer=" << max_packets_in_buffer
|
||||
<< ", background_noise_mode=" << background_noise_mode
|
||||
<< ", playout_mode=" << playout_mode
|
||||
<< ", enable_fast_accelerate="
|
||||
<< (enable_fast_accelerate ? " true": "false")
|
||||
<< ", enable_muted_state=" << (enable_muted_state ? " true": "false");
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
// Creates all classes needed and inject them into a new NetEqImpl object.
|
||||
// Return the new object.
|
||||
NetEq* NetEq::Create(
|
||||
const NetEq::Config& config,
|
||||
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory) {
|
||||
return new NetEqImpl(config,
|
||||
NetEqImpl::Dependencies(config, decoder_factory));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
89
modules/audio_coding/neteq/neteq_decoder_enum.cc
Normal file
89
modules/audio_coding/neteq/neteq_decoder_enum.cc
Normal file
@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/neteq_decoder_enum.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
rtc::Optional<SdpAudioFormat> NetEqDecoderToSdpAudioFormat(NetEqDecoder nd) {
|
||||
switch (nd) {
|
||||
case NetEqDecoder::kDecoderPCMu:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("pcmu", 8000, 1));
|
||||
case NetEqDecoder::kDecoderPCMa:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("pcma", 8000, 1));
|
||||
case NetEqDecoder::kDecoderPCMu_2ch:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("pcmu", 8000, 2));
|
||||
case NetEqDecoder::kDecoderPCMa_2ch:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("pcma", 8000, 2));
|
||||
case NetEqDecoder::kDecoderILBC:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("ilbc", 8000, 1));
|
||||
case NetEqDecoder::kDecoderISAC:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("isac", 16000, 1));
|
||||
case NetEqDecoder::kDecoderISACswb:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("isac", 32000, 1));
|
||||
case NetEqDecoder::kDecoderPCM16B:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 8000, 1));
|
||||
case NetEqDecoder::kDecoderPCM16Bwb:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 16000, 1));
|
||||
case NetEqDecoder::kDecoderPCM16Bswb32kHz:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 32000, 1));
|
||||
case NetEqDecoder::kDecoderPCM16Bswb48kHz:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 48000, 1));
|
||||
case NetEqDecoder::kDecoderPCM16B_2ch:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 8000, 2));
|
||||
case NetEqDecoder::kDecoderPCM16Bwb_2ch:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 16000, 2));
|
||||
case NetEqDecoder::kDecoderPCM16Bswb32kHz_2ch:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 32000, 2));
|
||||
case NetEqDecoder::kDecoderPCM16Bswb48kHz_2ch:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 48000, 2));
|
||||
case NetEqDecoder::kDecoderPCM16B_5ch:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("l16", 8000, 5));
|
||||
case NetEqDecoder::kDecoderG722:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("g722", 8000, 1));
|
||||
case NetEqDecoder::kDecoderG722_2ch:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("g722", 8000, 2));
|
||||
case NetEqDecoder::kDecoderOpus:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("opus", 48000, 2));
|
||||
case NetEqDecoder::kDecoderOpus_2ch:
|
||||
return rtc::Optional<SdpAudioFormat>(
|
||||
SdpAudioFormat("opus", 48000, 2,
|
||||
std::map<std::string, std::string>{{"stereo", "1"}}));
|
||||
case NetEqDecoder::kDecoderRED:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("red", 8000, 1));
|
||||
case NetEqDecoder::kDecoderAVT:
|
||||
return rtc::Optional<SdpAudioFormat>(
|
||||
SdpAudioFormat("telephone-event", 8000, 1));
|
||||
case NetEqDecoder::kDecoderAVT16kHz:
|
||||
return rtc::Optional<SdpAudioFormat>(
|
||||
SdpAudioFormat("telephone-event", 16000, 1));
|
||||
case NetEqDecoder::kDecoderAVT32kHz:
|
||||
return rtc::Optional<SdpAudioFormat>(
|
||||
SdpAudioFormat("telephone-event", 32000, 1));
|
||||
case NetEqDecoder::kDecoderAVT48kHz:
|
||||
return rtc::Optional<SdpAudioFormat>(
|
||||
SdpAudioFormat("telephone-event", 48000, 1));
|
||||
case NetEqDecoder::kDecoderCNGnb:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("cn", 8000, 1));
|
||||
case NetEqDecoder::kDecoderCNGwb:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("cn", 16000, 1));
|
||||
case NetEqDecoder::kDecoderCNGswb32kHz:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("cn", 32000, 1));
|
||||
case NetEqDecoder::kDecoderCNGswb48kHz:
|
||||
return rtc::Optional<SdpAudioFormat>(SdpAudioFormat("cn", 48000, 1));
|
||||
default:
|
||||
return rtc::Optional<SdpAudioFormat>();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
56
modules/audio_coding/neteq/neteq_decoder_enum.h
Normal file
56
modules/audio_coding/neteq/neteq_decoder_enum.h
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_DECODER_ENUM_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_DECODER_ENUM_H_
|
||||
|
||||
#include "webrtc/api/audio_codecs/audio_format.h"
|
||||
#include "webrtc/api/optional.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
enum class NetEqDecoder {
|
||||
kDecoderPCMu,
|
||||
kDecoderPCMa,
|
||||
kDecoderPCMu_2ch,
|
||||
kDecoderPCMa_2ch,
|
||||
kDecoderILBC,
|
||||
kDecoderISAC,
|
||||
kDecoderISACswb,
|
||||
kDecoderPCM16B,
|
||||
kDecoderPCM16Bwb,
|
||||
kDecoderPCM16Bswb32kHz,
|
||||
kDecoderPCM16Bswb48kHz,
|
||||
kDecoderPCM16B_2ch,
|
||||
kDecoderPCM16Bwb_2ch,
|
||||
kDecoderPCM16Bswb32kHz_2ch,
|
||||
kDecoderPCM16Bswb48kHz_2ch,
|
||||
kDecoderPCM16B_5ch,
|
||||
kDecoderG722,
|
||||
kDecoderG722_2ch,
|
||||
kDecoderRED,
|
||||
kDecoderAVT,
|
||||
kDecoderAVT16kHz,
|
||||
kDecoderAVT32kHz,
|
||||
kDecoderAVT48kHz,
|
||||
kDecoderCNGnb,
|
||||
kDecoderCNGwb,
|
||||
kDecoderCNGswb32kHz,
|
||||
kDecoderCNGswb48kHz,
|
||||
kDecoderArbitrary,
|
||||
kDecoderOpus,
|
||||
kDecoderOpus_2ch,
|
||||
};
|
||||
|
||||
rtc::Optional<SdpAudioFormat> NetEqDecoderToSdpAudioFormat(NetEqDecoder nd);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_DECODER_ENUM_H_
|
||||
457
modules/audio_coding/neteq/neteq_external_decoder_unittest.cc
Normal file
457
modules/audio_coding/neteq/neteq_external_decoder_unittest.cc
Normal file
@ -0,0 +1,457 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Test to verify correct operation for externally created decoders.
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "webrtc/api/audio_codecs/builtin_audio_decoder_factory.h"
|
||||
#include "webrtc/common_types.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/mock/mock_external_decoder_pcm16b.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tools/input_audio_file.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tools/neteq_external_decoder_test.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tools/rtp_generator.h"
|
||||
#include "webrtc/modules/include/module_common_types.h"
|
||||
#include "webrtc/test/gmock.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
using ::testing::_;
|
||||
using ::testing::Return;
|
||||
|
||||
class NetEqExternalDecoderUnitTest : public test::NetEqExternalDecoderTest {
|
||||
protected:
|
||||
static const int kFrameSizeMs = 10; // Frame size of Pcm16B.
|
||||
|
||||
NetEqExternalDecoderUnitTest(NetEqDecoder codec,
|
||||
int sample_rate_hz,
|
||||
MockExternalPcm16B* decoder)
|
||||
: NetEqExternalDecoderTest(codec, sample_rate_hz, decoder),
|
||||
external_decoder_(decoder),
|
||||
samples_per_ms_(sample_rate_hz / 1000),
|
||||
frame_size_samples_(kFrameSizeMs * samples_per_ms_),
|
||||
rtp_generator_(new test::RtpGenerator(samples_per_ms_)),
|
||||
input_(new int16_t[frame_size_samples_]),
|
||||
// Payload should be no larger than input.
|
||||
encoded_(new uint8_t[2 * frame_size_samples_]),
|
||||
payload_size_bytes_(0),
|
||||
last_send_time_(0),
|
||||
last_arrival_time_(0) {
|
||||
// NetEq is not allowed to delete the external decoder (hence Times(0)).
|
||||
EXPECT_CALL(*external_decoder_, Die()).Times(0);
|
||||
Init();
|
||||
|
||||
const std::string file_name =
|
||||
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
|
||||
input_file_.reset(new test::InputAudioFile(file_name));
|
||||
}
|
||||
|
||||
virtual ~NetEqExternalDecoderUnitTest() {
|
||||
delete [] input_;
|
||||
delete [] encoded_;
|
||||
// ~NetEqExternalDecoderTest() will delete |external_decoder_|, so expecting
|
||||
// Die() to be called.
|
||||
EXPECT_CALL(*external_decoder_, Die()).Times(1);
|
||||
}
|
||||
|
||||
// Method to draw kFrameSizeMs audio and verify the output.
|
||||
// Use gTest methods. e.g. ASSERT_EQ() inside to trigger errors.
|
||||
virtual void GetAndVerifyOutput() = 0;
|
||||
|
||||
// Method to get the number of calls to the Decode() method of the external
|
||||
// decoder.
|
||||
virtual int NumExpectedDecodeCalls(int num_loops) = 0;
|
||||
|
||||
// Method to generate packets and return the send time of the packet.
|
||||
int GetNewPacket() {
|
||||
if (!input_file_->Read(frame_size_samples_, input_)) {
|
||||
return -1;
|
||||
}
|
||||
payload_size_bytes_ = WebRtcPcm16b_Encode(input_, frame_size_samples_,
|
||||
encoded_);
|
||||
|
||||
int next_send_time = rtp_generator_->GetRtpHeader(
|
||||
kPayloadType, frame_size_samples_, &rtp_header_);
|
||||
return next_send_time;
|
||||
}
|
||||
|
||||
// Method to decide packet losses.
|
||||
virtual bool Lost() { return false; }
|
||||
|
||||
// Method to calculate packet arrival time.
|
||||
int GetArrivalTime(int send_time) {
|
||||
int arrival_time = last_arrival_time_ + (send_time - last_send_time_);
|
||||
last_send_time_ = send_time;
|
||||
last_arrival_time_ = arrival_time;
|
||||
return arrival_time;
|
||||
}
|
||||
|
||||
void RunTest(int num_loops) {
|
||||
// Get next input packets (mono and multi-channel).
|
||||
uint32_t next_send_time;
|
||||
uint32_t next_arrival_time;
|
||||
do {
|
||||
next_send_time = GetNewPacket();
|
||||
next_arrival_time = GetArrivalTime(next_send_time);
|
||||
} while (Lost()); // If lost, immediately read the next packet.
|
||||
|
||||
EXPECT_CALL(
|
||||
*external_decoder_,
|
||||
DecodeInternal(_, payload_size_bytes_, 1000 * samples_per_ms_, _, _))
|
||||
.Times(NumExpectedDecodeCalls(num_loops));
|
||||
|
||||
uint32_t time_now = 0;
|
||||
for (int k = 0; k < num_loops; ++k) {
|
||||
while (time_now >= next_arrival_time) {
|
||||
InsertPacket(rtp_header_, rtc::ArrayView<const uint8_t>(
|
||||
encoded_, payload_size_bytes_),
|
||||
next_arrival_time);
|
||||
// Get next input packet.
|
||||
do {
|
||||
next_send_time = GetNewPacket();
|
||||
next_arrival_time = GetArrivalTime(next_send_time);
|
||||
} while (Lost()); // If lost, immediately read the next packet.
|
||||
}
|
||||
|
||||
std::ostringstream ss;
|
||||
ss << "Lap number " << k << ".";
|
||||
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
|
||||
// Compare mono and multi-channel.
|
||||
ASSERT_NO_FATAL_FAILURE(GetAndVerifyOutput());
|
||||
|
||||
time_now += kOutputLengthMs;
|
||||
}
|
||||
}
|
||||
|
||||
void InsertPacket(RTPHeader rtp_header,
|
||||
rtc::ArrayView<const uint8_t> payload,
|
||||
uint32_t receive_timestamp) override {
|
||||
EXPECT_CALL(*external_decoder_,
|
||||
IncomingPacket(_, payload.size(), rtp_header.sequenceNumber,
|
||||
rtp_header.timestamp, receive_timestamp));
|
||||
NetEqExternalDecoderTest::InsertPacket(rtp_header, payload,
|
||||
receive_timestamp);
|
||||
}
|
||||
|
||||
MockExternalPcm16B* external_decoder() { return external_decoder_.get(); }
|
||||
|
||||
void ResetRtpGenerator(test::RtpGenerator* rtp_generator) {
|
||||
rtp_generator_.reset(rtp_generator);
|
||||
}
|
||||
|
||||
int samples_per_ms() const { return samples_per_ms_; }
|
||||
private:
|
||||
std::unique_ptr<MockExternalPcm16B> external_decoder_;
|
||||
int samples_per_ms_;
|
||||
size_t frame_size_samples_;
|
||||
std::unique_ptr<test::RtpGenerator> rtp_generator_;
|
||||
int16_t* input_;
|
||||
uint8_t* encoded_;
|
||||
size_t payload_size_bytes_;
|
||||
uint32_t last_send_time_;
|
||||
uint32_t last_arrival_time_;
|
||||
std::unique_ptr<test::InputAudioFile> input_file_;
|
||||
RTPHeader rtp_header_;
|
||||
};
|
||||
|
||||
// This test encodes a few packets of PCM16b 32 kHz data and inserts it into two
|
||||
// different NetEq instances. The first instance uses the internal version of
|
||||
// the decoder object, while the second one uses an externally created decoder
|
||||
// object (ExternalPcm16B wrapped in MockExternalPcm16B, both defined above).
|
||||
// The test verifies that the output from both instances match.
|
||||
class NetEqExternalVsInternalDecoderTest : public NetEqExternalDecoderUnitTest,
|
||||
public ::testing::Test {
|
||||
protected:
|
||||
static const size_t kMaxBlockSize = 480; // 10 ms @ 48 kHz.
|
||||
|
||||
NetEqExternalVsInternalDecoderTest()
|
||||
: NetEqExternalDecoderUnitTest(NetEqDecoder::kDecoderPCM16Bswb32kHz,
|
||||
32000,
|
||||
new MockExternalPcm16B(32000)),
|
||||
sample_rate_hz_(32000) {
|
||||
NetEq::Config config;
|
||||
config.sample_rate_hz = sample_rate_hz_;
|
||||
neteq_internal_.reset(
|
||||
NetEq::Create(config, CreateBuiltinAudioDecoderFactory()));
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
ASSERT_EQ(true, neteq_internal_->RegisterPayloadType(
|
||||
kPayloadType, SdpAudioFormat("L16", 32000, 1)));
|
||||
}
|
||||
|
||||
void GetAndVerifyOutput() override {
|
||||
// Get audio from internal decoder instance.
|
||||
bool muted;
|
||||
EXPECT_EQ(NetEq::kOK, neteq_internal_->GetAudio(&output_internal_, &muted));
|
||||
ASSERT_FALSE(muted);
|
||||
EXPECT_EQ(1u, output_internal_.num_channels_);
|
||||
EXPECT_EQ(static_cast<size_t>(kOutputLengthMs * sample_rate_hz_ / 1000),
|
||||
output_internal_.samples_per_channel_);
|
||||
|
||||
// Get audio from external decoder instance.
|
||||
GetOutputAudio(&output_);
|
||||
|
||||
const int16_t* output_data = output_.data();
|
||||
const int16_t* output_internal_data = output_internal_.data();
|
||||
for (size_t i = 0; i < output_.samples_per_channel_; ++i) {
|
||||
ASSERT_EQ(output_data[i], output_internal_data[i])
|
||||
<< "Diff in sample " << i << ".";
|
||||
}
|
||||
}
|
||||
|
||||
void InsertPacket(RTPHeader rtp_header,
|
||||
rtc::ArrayView<const uint8_t> payload,
|
||||
uint32_t receive_timestamp) override {
|
||||
// Insert packet in internal decoder.
|
||||
ASSERT_EQ(NetEq::kOK, neteq_internal_->InsertPacket(rtp_header, payload,
|
||||
receive_timestamp));
|
||||
|
||||
// Insert packet in external decoder instance.
|
||||
NetEqExternalDecoderUnitTest::InsertPacket(rtp_header, payload,
|
||||
receive_timestamp);
|
||||
}
|
||||
|
||||
int NumExpectedDecodeCalls(int num_loops) override { return num_loops; }
|
||||
|
||||
private:
|
||||
int sample_rate_hz_;
|
||||
std::unique_ptr<NetEq> neteq_internal_;
|
||||
AudioFrame output_internal_;
|
||||
AudioFrame output_;
|
||||
};
|
||||
|
||||
TEST_F(NetEqExternalVsInternalDecoderTest, RunTest) {
|
||||
RunTest(100); // Run 100 laps @ 10 ms each in the test loop.
|
||||
}
|
||||
|
||||
class LargeTimestampJumpTest : public NetEqExternalDecoderUnitTest,
|
||||
public ::testing::Test {
|
||||
protected:
|
||||
static const size_t kMaxBlockSize = 480; // 10 ms @ 48 kHz.
|
||||
|
||||
enum TestStates {
|
||||
kInitialPhase,
|
||||
kNormalPhase,
|
||||
kExpandPhase,
|
||||
kFadedExpandPhase,
|
||||
kRecovered
|
||||
};
|
||||
|
||||
LargeTimestampJumpTest()
|
||||
: NetEqExternalDecoderUnitTest(NetEqDecoder::kDecoderPCM16B,
|
||||
8000,
|
||||
new MockExternalPcm16B(8000)),
|
||||
test_state_(kInitialPhase) {
|
||||
EXPECT_CALL(*external_decoder(), HasDecodePlc())
|
||||
.WillRepeatedly(Return(false));
|
||||
}
|
||||
|
||||
virtual void UpdateState(AudioFrame::SpeechType output_type) {
|
||||
switch (test_state_) {
|
||||
case kInitialPhase: {
|
||||
if (output_type == AudioFrame::kNormalSpeech) {
|
||||
test_state_ = kNormalPhase;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kNormalPhase: {
|
||||
if (output_type == AudioFrame::kPLC) {
|
||||
test_state_ = kExpandPhase;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kExpandPhase: {
|
||||
if (output_type == AudioFrame::kPLCCNG) {
|
||||
test_state_ = kFadedExpandPhase;
|
||||
} else if (output_type == AudioFrame::kNormalSpeech) {
|
||||
test_state_ = kRecovered;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kFadedExpandPhase: {
|
||||
if (output_type == AudioFrame::kNormalSpeech) {
|
||||
test_state_ = kRecovered;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kRecovered: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GetAndVerifyOutput() override {
|
||||
AudioFrame output;
|
||||
GetOutputAudio(&output);
|
||||
UpdateState(output.speech_type_);
|
||||
|
||||
if (test_state_ == kExpandPhase || test_state_ == kFadedExpandPhase) {
|
||||
// Don't verify the output in this phase of the test.
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT_EQ(1u, output.num_channels_);
|
||||
const int16_t* output_data = output.data();
|
||||
for (size_t i = 0; i < output.samples_per_channel_; ++i) {
|
||||
if (output_data[i] != 0)
|
||||
return;
|
||||
}
|
||||
EXPECT_TRUE(false)
|
||||
<< "Expected at least one non-zero sample in each output block.";
|
||||
}
|
||||
|
||||
int NumExpectedDecodeCalls(int num_loops) override {
|
||||
// Some packets at the end of the stream won't be decoded. When the jump in
|
||||
// timestamp happens, NetEq will do Expand during one GetAudio call. In the
|
||||
// next call it will decode the packet after the jump, but the net result is
|
||||
// that the delay increased by 1 packet. In another call, a Pre-emptive
|
||||
// Expand operation is performed, leading to delay increase by 1 packet. In
|
||||
// total, the test will end with a 2-packet delay, which results in the 2
|
||||
// last packets not being decoded.
|
||||
return num_loops - 2;
|
||||
}
|
||||
|
||||
TestStates test_state_;
|
||||
};
|
||||
|
||||
TEST_F(LargeTimestampJumpTest, JumpLongerThanHalfRange) {
|
||||
// Set the timestamp series to start at 2880, increase to 7200, then jump to
|
||||
// 2869342376. The sequence numbers start at 42076 and increase by 1 for each
|
||||
// packet, also when the timestamp jumps.
|
||||
static const uint16_t kStartSeqeunceNumber = 42076;
|
||||
static const uint32_t kStartTimestamp = 2880;
|
||||
static const uint32_t kJumpFromTimestamp = 7200;
|
||||
static const uint32_t kJumpToTimestamp = 2869342376;
|
||||
static_assert(kJumpFromTimestamp < kJumpToTimestamp,
|
||||
"timestamp jump should not result in wrap");
|
||||
static_assert(
|
||||
static_cast<uint32_t>(kJumpToTimestamp - kJumpFromTimestamp) > 0x7FFFFFFF,
|
||||
"jump should be larger than half range");
|
||||
// Replace the default RTP generator with one that jumps in timestamp.
|
||||
ResetRtpGenerator(new test::TimestampJumpRtpGenerator(samples_per_ms(),
|
||||
kStartSeqeunceNumber,
|
||||
kStartTimestamp,
|
||||
kJumpFromTimestamp,
|
||||
kJumpToTimestamp));
|
||||
|
||||
RunTest(130); // Run 130 laps @ 10 ms each in the test loop.
|
||||
EXPECT_EQ(kRecovered, test_state_);
|
||||
}
|
||||
|
||||
TEST_F(LargeTimestampJumpTest, JumpLongerThanHalfRangeAndWrap) {
|
||||
// Make a jump larger than half the 32-bit timestamp range. Set the start
|
||||
// timestamp such that the jump will result in a wrap around.
|
||||
static const uint16_t kStartSeqeunceNumber = 42076;
|
||||
// Set the jump length slightly larger than 2^31.
|
||||
static const uint32_t kStartTimestamp = 3221223116;
|
||||
static const uint32_t kJumpFromTimestamp = 3221223216;
|
||||
static const uint32_t kJumpToTimestamp = 1073744278;
|
||||
static_assert(kJumpToTimestamp < kJumpFromTimestamp,
|
||||
"timestamp jump should result in wrap");
|
||||
static_assert(
|
||||
static_cast<uint32_t>(kJumpToTimestamp - kJumpFromTimestamp) > 0x7FFFFFFF,
|
||||
"jump should be larger than half range");
|
||||
// Replace the default RTP generator with one that jumps in timestamp.
|
||||
ResetRtpGenerator(new test::TimestampJumpRtpGenerator(samples_per_ms(),
|
||||
kStartSeqeunceNumber,
|
||||
kStartTimestamp,
|
||||
kJumpFromTimestamp,
|
||||
kJumpToTimestamp));
|
||||
|
||||
RunTest(130); // Run 130 laps @ 10 ms each in the test loop.
|
||||
EXPECT_EQ(kRecovered, test_state_);
|
||||
}
|
||||
|
||||
class ShortTimestampJumpTest : public LargeTimestampJumpTest {
|
||||
protected:
|
||||
void UpdateState(AudioFrame::SpeechType output_type) override {
|
||||
switch (test_state_) {
|
||||
case kInitialPhase: {
|
||||
if (output_type == AudioFrame::kNormalSpeech) {
|
||||
test_state_ = kNormalPhase;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kNormalPhase: {
|
||||
if (output_type == AudioFrame::kPLC) {
|
||||
test_state_ = kExpandPhase;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kExpandPhase: {
|
||||
if (output_type == AudioFrame::kNormalSpeech) {
|
||||
test_state_ = kRecovered;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case kRecovered: {
|
||||
break;
|
||||
}
|
||||
default: { FAIL(); }
|
||||
}
|
||||
}
|
||||
|
||||
int NumExpectedDecodeCalls(int num_loops) override {
|
||||
// Some packets won't be decoded because of the timestamp jump.
|
||||
return num_loops - 2;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(ShortTimestampJumpTest, JumpShorterThanHalfRange) {
|
||||
// Make a jump shorter than half the 32-bit timestamp range. Set the start
|
||||
// timestamp such that the jump will not result in a wrap around.
|
||||
static const uint16_t kStartSeqeunceNumber = 42076;
|
||||
// Set the jump length slightly smaller than 2^31.
|
||||
static const uint32_t kStartTimestamp = 4711;
|
||||
static const uint32_t kJumpFromTimestamp = 4811;
|
||||
static const uint32_t kJumpToTimestamp = 2147483747;
|
||||
static_assert(kJumpFromTimestamp < kJumpToTimestamp,
|
||||
"timestamp jump should not result in wrap");
|
||||
static_assert(
|
||||
static_cast<uint32_t>(kJumpToTimestamp - kJumpFromTimestamp) < 0x7FFFFFFF,
|
||||
"jump should be smaller than half range");
|
||||
// Replace the default RTP generator with one that jumps in timestamp.
|
||||
ResetRtpGenerator(new test::TimestampJumpRtpGenerator(samples_per_ms(),
|
||||
kStartSeqeunceNumber,
|
||||
kStartTimestamp,
|
||||
kJumpFromTimestamp,
|
||||
kJumpToTimestamp));
|
||||
|
||||
RunTest(130); // Run 130 laps @ 10 ms each in the test loop.
|
||||
EXPECT_EQ(kRecovered, test_state_);
|
||||
}
|
||||
|
||||
TEST_F(ShortTimestampJumpTest, JumpShorterThanHalfRangeAndWrap) {
|
||||
// Make a jump shorter than half the 32-bit timestamp range. Set the start
|
||||
// timestamp such that the jump will result in a wrap around.
|
||||
static const uint16_t kStartSeqeunceNumber = 42076;
|
||||
// Set the jump length slightly smaller than 2^31.
|
||||
static const uint32_t kStartTimestamp = 3221227827;
|
||||
static const uint32_t kJumpFromTimestamp = 3221227927;
|
||||
static const uint32_t kJumpToTimestamp = 1073739567;
|
||||
static_assert(kJumpToTimestamp < kJumpFromTimestamp,
|
||||
"timestamp jump should result in wrap");
|
||||
static_assert(
|
||||
static_cast<uint32_t>(kJumpToTimestamp - kJumpFromTimestamp) < 0x7FFFFFFF,
|
||||
"jump should be smaller than half range");
|
||||
// Replace the default RTP generator with one that jumps in timestamp.
|
||||
ResetRtpGenerator(new test::TimestampJumpRtpGenerator(samples_per_ms(),
|
||||
kStartSeqeunceNumber,
|
||||
kStartTimestamp,
|
||||
kJumpFromTimestamp,
|
||||
kJumpToTimestamp));
|
||||
|
||||
RunTest(130); // Run 130 laps @ 10 ms each in the test loop.
|
||||
EXPECT_EQ(kRecovered, test_state_);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
2124
modules/audio_coding/neteq/neteq_impl.cc
Normal file
2124
modules/audio_coding/neteq/neteq_impl.cc
Normal file
File diff suppressed because it is too large
Load Diff
449
modules/audio_coding/neteq/neteq_impl.h
Normal file
449
modules/audio_coding/neteq/neteq_impl.h
Normal file
@ -0,0 +1,449 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "webrtc/api/optional.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/defines.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/include/neteq.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/packet.h" // Declare PacketList.
|
||||
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/rtcp.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
|
||||
#include "webrtc/modules/include/module_common_types.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/rtc_base/criticalsection.h"
|
||||
#include "webrtc/rtc_base/thread_annotations.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Forward declarations.
|
||||
class Accelerate;
|
||||
class BackgroundNoise;
|
||||
class BufferLevelFilter;
|
||||
class ComfortNoise;
|
||||
class DecisionLogic;
|
||||
class DecoderDatabase;
|
||||
class DelayManager;
|
||||
class DelayPeakDetector;
|
||||
class DtmfBuffer;
|
||||
class DtmfToneGenerator;
|
||||
class Expand;
|
||||
class Merge;
|
||||
class NackTracker;
|
||||
class Normal;
|
||||
class PacketBuffer;
|
||||
class RedPayloadSplitter;
|
||||
class PostDecodeVad;
|
||||
class PreemptiveExpand;
|
||||
class RandomVector;
|
||||
class SyncBuffer;
|
||||
class TimestampScaler;
|
||||
struct AccelerateFactory;
|
||||
struct DtmfEvent;
|
||||
struct ExpandFactory;
|
||||
struct PreemptiveExpandFactory;
|
||||
|
||||
class NetEqImpl : public webrtc::NetEq {
|
||||
public:
|
||||
enum class OutputType {
|
||||
kNormalSpeech,
|
||||
kPLC,
|
||||
kCNG,
|
||||
kPLCCNG,
|
||||
kVadPassive
|
||||
};
|
||||
|
||||
enum ErrorCodes {
|
||||
kNoError = 0,
|
||||
kOtherError,
|
||||
kUnknownRtpPayloadType,
|
||||
kDecoderNotFound,
|
||||
kInvalidPointer,
|
||||
kAccelerateError,
|
||||
kPreemptiveExpandError,
|
||||
kComfortNoiseErrorCode,
|
||||
kDecoderErrorCode,
|
||||
kOtherDecoderError,
|
||||
kInvalidOperation,
|
||||
kDtmfParsingError,
|
||||
kDtmfInsertError,
|
||||
kSampleUnderrun,
|
||||
kDecodedTooMuch,
|
||||
kRedundancySplitError,
|
||||
kPacketBufferCorruption
|
||||
};
|
||||
|
||||
struct Dependencies {
|
||||
// The constructor populates the Dependencies struct with the default
|
||||
// implementations of the objects. They can all be replaced by the user
|
||||
// before sending the struct to the NetEqImpl constructor. However, there
|
||||
// are dependencies between some of the classes inside the struct, so
|
||||
// swapping out one may make it necessary to re-create another one.
|
||||
explicit Dependencies(
|
||||
const NetEq::Config& config,
|
||||
const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);
|
||||
~Dependencies();
|
||||
|
||||
std::unique_ptr<TickTimer> tick_timer;
|
||||
std::unique_ptr<BufferLevelFilter> buffer_level_filter;
|
||||
std::unique_ptr<DecoderDatabase> decoder_database;
|
||||
std::unique_ptr<DelayPeakDetector> delay_peak_detector;
|
||||
std::unique_ptr<DelayManager> delay_manager;
|
||||
std::unique_ptr<DtmfBuffer> dtmf_buffer;
|
||||
std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator;
|
||||
std::unique_ptr<PacketBuffer> packet_buffer;
|
||||
std::unique_ptr<RedPayloadSplitter> red_payload_splitter;
|
||||
std::unique_ptr<TimestampScaler> timestamp_scaler;
|
||||
std::unique_ptr<AccelerateFactory> accelerate_factory;
|
||||
std::unique_ptr<ExpandFactory> expand_factory;
|
||||
std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory;
|
||||
};
|
||||
|
||||
// Creates a new NetEqImpl object.
|
||||
NetEqImpl(const NetEq::Config& config,
|
||||
Dependencies&& deps,
|
||||
bool create_components = true);
|
||||
|
||||
~NetEqImpl() override;
|
||||
|
||||
// Inserts a new packet into NetEq. The |receive_timestamp| is an indication
|
||||
// of the time when the packet was received, and should be measured with
|
||||
// the same tick rate as the RTP timestamp of the current payload.
|
||||
// Returns 0 on success, -1 on failure.
|
||||
int InsertPacket(const RTPHeader& rtp_header,
|
||||
rtc::ArrayView<const uint8_t> payload,
|
||||
uint32_t receive_timestamp) override;
|
||||
|
||||
void InsertEmptyPacket(const RTPHeader& rtp_header) override;
|
||||
|
||||
int GetAudio(AudioFrame* audio_frame, bool* muted) override;
|
||||
|
||||
void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) override;
|
||||
|
||||
int RegisterPayloadType(NetEqDecoder codec,
|
||||
const std::string& codec_name,
|
||||
uint8_t rtp_payload_type) override;
|
||||
|
||||
int RegisterExternalDecoder(AudioDecoder* decoder,
|
||||
NetEqDecoder codec,
|
||||
const std::string& codec_name,
|
||||
uint8_t rtp_payload_type) override;
|
||||
|
||||
bool RegisterPayloadType(int rtp_payload_type,
|
||||
const SdpAudioFormat& audio_format) override;
|
||||
|
||||
// Removes |rtp_payload_type| from the codec database. Returns 0 on success,
|
||||
// -1 on failure.
|
||||
int RemovePayloadType(uint8_t rtp_payload_type) override;
|
||||
|
||||
void RemoveAllPayloadTypes() override;
|
||||
|
||||
bool SetMinimumDelay(int delay_ms) override;
|
||||
|
||||
bool SetMaximumDelay(int delay_ms) override;
|
||||
|
||||
int LeastRequiredDelayMs() const override;
|
||||
|
||||
int SetTargetDelay() override;
|
||||
|
||||
int TargetDelayMs() override;
|
||||
|
||||
int CurrentDelayMs() const override;
|
||||
|
||||
int FilteredCurrentDelayMs() const override;
|
||||
|
||||
// Sets the playout mode to |mode|.
|
||||
// Deprecated.
|
||||
// TODO(henrik.lundin) Delete.
|
||||
void SetPlayoutMode(NetEqPlayoutMode mode) override;
|
||||
|
||||
// Returns the current playout mode.
|
||||
// Deprecated.
|
||||
// TODO(henrik.lundin) Delete.
|
||||
NetEqPlayoutMode PlayoutMode() const override;
|
||||
|
||||
// Writes the current network statistics to |stats|. The statistics are reset
|
||||
// after the call.
|
||||
int NetworkStatistics(NetEqNetworkStatistics* stats) override;
|
||||
|
||||
// Writes the current RTCP statistics to |stats|. The statistics are reset
|
||||
// and a new report period is started with the call.
|
||||
void GetRtcpStatistics(RtcpStatistics* stats) override;
|
||||
|
||||
NetEqLifetimeStatistics GetLifetimeStatistics() const override;
|
||||
|
||||
// Same as RtcpStatistics(), but does not reset anything.
|
||||
void GetRtcpStatisticsNoReset(RtcpStatistics* stats) override;
|
||||
|
||||
// Enables post-decode VAD. When enabled, GetAudio() will return
|
||||
// kOutputVADPassive when the signal contains no speech.
|
||||
void EnableVad() override;
|
||||
|
||||
// Disables post-decode VAD.
|
||||
void DisableVad() override;
|
||||
|
||||
rtc::Optional<uint32_t> GetPlayoutTimestamp() const override;
|
||||
|
||||
int last_output_sample_rate_hz() const override;
|
||||
|
||||
rtc::Optional<CodecInst> GetDecoder(int payload_type) const override;
|
||||
|
||||
rtc::Optional<SdpAudioFormat> GetDecoderFormat(
|
||||
int payload_type) const override;
|
||||
|
||||
int SetTargetNumberOfChannels() override;
|
||||
|
||||
int SetTargetSampleRate() override;
|
||||
|
||||
// Flushes both the packet buffer and the sync buffer.
|
||||
void FlushBuffers() override;
|
||||
|
||||
void PacketBufferStatistics(int* current_num_packets,
|
||||
int* max_num_packets) const override;
|
||||
|
||||
void EnableNack(size_t max_nack_list_size) override;
|
||||
|
||||
void DisableNack() override;
|
||||
|
||||
std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const override;
|
||||
|
||||
std::vector<uint32_t> LastDecodedTimestamps() const override;
|
||||
|
||||
int SyncBufferSizeMs() const override;
|
||||
|
||||
// This accessor method is only intended for testing purposes.
|
||||
const SyncBuffer* sync_buffer_for_test() const;
|
||||
Operations last_operation_for_test() const;
|
||||
|
||||
protected:
|
||||
static const int kOutputSizeMs = 10;
|
||||
static const size_t kMaxFrameSize = 5760; // 120 ms @ 48 kHz.
|
||||
// TODO(hlundin): Provide a better value for kSyncBufferSize.
|
||||
// Current value is kMaxFrameSize + 60 ms * 48 kHz, which is enough for
|
||||
// calculating correlations of current frame against history.
|
||||
static const size_t kSyncBufferSize = kMaxFrameSize + 60 * 48;
|
||||
|
||||
// Inserts a new packet into NetEq. This is used by the InsertPacket method
|
||||
// above. Returns 0 on success, otherwise an error code.
|
||||
// TODO(hlundin): Merge this with InsertPacket above?
|
||||
int InsertPacketInternal(const RTPHeader& rtp_header,
|
||||
rtc::ArrayView<const uint8_t> payload,
|
||||
uint32_t receive_timestamp)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Delivers 10 ms of audio data. The data is written to |audio_frame|.
|
||||
// Returns 0 on success, otherwise an error code.
|
||||
int GetAudioInternal(AudioFrame* audio_frame, bool* muted)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Provides a decision to the GetAudioInternal method. The decision what to
|
||||
// do is written to |operation|. Packets to decode are written to
|
||||
// |packet_list|, and a DTMF event to play is written to |dtmf_event|. When
|
||||
// DTMF should be played, |play_dtmf| is set to true by the method.
|
||||
// Returns 0 on success, otherwise an error code.
|
||||
int GetDecision(Operations* operation,
|
||||
PacketList* packet_list,
|
||||
DtmfEvent* dtmf_event,
|
||||
bool* play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Decodes the speech packets in |packet_list|, and writes the results to
|
||||
// |decoded_buffer|, which is allocated to hold |decoded_buffer_length|
|
||||
// elements. The length of the decoded data is written to |decoded_length|.
|
||||
// The speech type -- speech or (codec-internal) comfort noise -- is written
|
||||
// to |speech_type|. If |packet_list| contains any SID frames for RFC 3389
|
||||
// comfort noise, those are not decoded.
|
||||
int Decode(PacketList* packet_list,
|
||||
Operations* operation,
|
||||
int* decoded_length,
|
||||
AudioDecoder::SpeechType* speech_type)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Sub-method to Decode(). Performs codec internal CNG.
|
||||
int DecodeCng(AudioDecoder* decoder,
|
||||
int* decoded_length,
|
||||
AudioDecoder::SpeechType* speech_type)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Sub-method to Decode(). Performs the actual decoding.
|
||||
int DecodeLoop(PacketList* packet_list,
|
||||
const Operations& operation,
|
||||
AudioDecoder* decoder,
|
||||
int* decoded_length,
|
||||
AudioDecoder::SpeechType* speech_type)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Sub-method which calls the Normal class to perform the normal operation.
|
||||
void DoNormal(const int16_t* decoded_buffer,
|
||||
size_t decoded_length,
|
||||
AudioDecoder::SpeechType speech_type,
|
||||
bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Sub-method which calls the Merge class to perform the merge operation.
|
||||
void DoMerge(int16_t* decoded_buffer,
|
||||
size_t decoded_length,
|
||||
AudioDecoder::SpeechType speech_type,
|
||||
bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Sub-method which calls the Expand class to perform the expand operation.
|
||||
int DoExpand(bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Sub-method which calls the Accelerate class to perform the accelerate
|
||||
// operation.
|
||||
int DoAccelerate(int16_t* decoded_buffer,
|
||||
size_t decoded_length,
|
||||
AudioDecoder::SpeechType speech_type,
|
||||
bool play_dtmf,
|
||||
bool fast_accelerate)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Sub-method which calls the PreemptiveExpand class to perform the
|
||||
// preemtive expand operation.
|
||||
int DoPreemptiveExpand(int16_t* decoded_buffer,
|
||||
size_t decoded_length,
|
||||
AudioDecoder::SpeechType speech_type,
|
||||
bool play_dtmf)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort
|
||||
// noise. |packet_list| can either contain one SID frame to update the
|
||||
// noise parameters, or no payload at all, in which case the previously
|
||||
// received parameters are used.
|
||||
int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Calls the audio decoder to generate codec-internal comfort noise when
|
||||
// no packet was received.
|
||||
void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Calls the DtmfToneGenerator class to generate DTMF tones.
|
||||
int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Produces packet-loss concealment using alternative methods. If the codec
|
||||
// has an internal PLC, it is called to generate samples. Otherwise, the
|
||||
// method performs zero-stuffing.
|
||||
void DoAlternativePlc(bool increase_timestamp)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Overdub DTMF on top of |output|.
|
||||
int DtmfOverdub(const DtmfEvent& dtmf_event,
|
||||
size_t num_channels,
|
||||
int16_t* output) const
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Extracts packets from |packet_buffer_| to produce at least
|
||||
// |required_samples| samples. The packets are inserted into |packet_list|.
|
||||
// Returns the number of samples that the packets in the list will produce, or
|
||||
// -1 in case of an error.
|
||||
int ExtractPackets(size_t required_samples, PacketList* packet_list)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Resets various variables and objects to new values based on the sample rate
|
||||
// |fs_hz| and |channels| number audio channels.
|
||||
void SetSampleRateAndChannels(int fs_hz, size_t channels)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Returns the output type for the audio produced by the latest call to
|
||||
// GetAudio().
|
||||
OutputType LastOutputType() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Updates Expand and Merge.
|
||||
virtual void UpdatePlcComponents(int fs_hz, size_t channels)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
// Creates DecisionLogic object with the mode given by |playout_mode_|.
|
||||
virtual void CreateDecisionLogic() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
|
||||
|
||||
rtc::CriticalSection crit_sect_;
|
||||
const std::unique_ptr<TickTimer> tick_timer_ RTC_GUARDED_BY(crit_sect_);
|
||||
const std::unique_ptr<BufferLevelFilter> buffer_level_filter_
|
||||
RTC_GUARDED_BY(crit_sect_);
|
||||
const std::unique_ptr<DecoderDatabase> decoder_database_
|
||||
RTC_GUARDED_BY(crit_sect_);
|
||||
const std::unique_ptr<DelayManager> delay_manager_ RTC_GUARDED_BY(crit_sect_);
|
||||
const std::unique_ptr<DelayPeakDetector> delay_peak_detector_
|
||||
RTC_GUARDED_BY(crit_sect_);
|
||||
const std::unique_ptr<DtmfBuffer> dtmf_buffer_ RTC_GUARDED_BY(crit_sect_);
|
||||
const std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator_
|
||||
RTC_GUARDED_BY(crit_sect_);
|
||||
const std::unique_ptr<PacketBuffer> packet_buffer_ RTC_GUARDED_BY(crit_sect_);
|
||||
const std::unique_ptr<RedPayloadSplitter> red_payload_splitter_
|
||||
RTC_GUARDED_BY(crit_sect_);
|
||||
const std::unique_ptr<TimestampScaler> timestamp_scaler_
|
||||
RTC_GUARDED_BY(crit_sect_);
|
||||
const std::unique_ptr<PostDecodeVad> vad_ RTC_GUARDED_BY(crit_sect_);
|
||||
const std::unique_ptr<ExpandFactory> expand_factory_
|
||||
RTC_GUARDED_BY(crit_sect_);
|
||||
const std::unique_ptr<AccelerateFactory> accelerate_factory_
|
||||
RTC_GUARDED_BY(crit_sect_);
|
||||
const std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory_
|
||||
RTC_GUARDED_BY(crit_sect_);
|
||||
|
||||
std::unique_ptr<BackgroundNoise> background_noise_ RTC_GUARDED_BY(crit_sect_);
|
||||
std::unique_ptr<DecisionLogic> decision_logic_ RTC_GUARDED_BY(crit_sect_);
|
||||
std::unique_ptr<AudioMultiVector> algorithm_buffer_
|
||||
RTC_GUARDED_BY(crit_sect_);
|
||||
std::unique_ptr<SyncBuffer> sync_buffer_ RTC_GUARDED_BY(crit_sect_);
|
||||
std::unique_ptr<Expand> expand_ RTC_GUARDED_BY(crit_sect_);
|
||||
std::unique_ptr<Normal> normal_ RTC_GUARDED_BY(crit_sect_);
|
||||
std::unique_ptr<Merge> merge_ RTC_GUARDED_BY(crit_sect_);
|
||||
std::unique_ptr<Accelerate> accelerate_ RTC_GUARDED_BY(crit_sect_);
|
||||
std::unique_ptr<PreemptiveExpand> preemptive_expand_
|
||||
RTC_GUARDED_BY(crit_sect_);
|
||||
RandomVector random_vector_ RTC_GUARDED_BY(crit_sect_);
|
||||
std::unique_ptr<ComfortNoise> comfort_noise_ RTC_GUARDED_BY(crit_sect_);
|
||||
Rtcp rtcp_ RTC_GUARDED_BY(crit_sect_);
|
||||
StatisticsCalculator stats_ RTC_GUARDED_BY(crit_sect_);
|
||||
int fs_hz_ RTC_GUARDED_BY(crit_sect_);
|
||||
int fs_mult_ RTC_GUARDED_BY(crit_sect_);
|
||||
int last_output_sample_rate_hz_ RTC_GUARDED_BY(crit_sect_);
|
||||
size_t output_size_samples_ RTC_GUARDED_BY(crit_sect_);
|
||||
size_t decoder_frame_length_ RTC_GUARDED_BY(crit_sect_);
|
||||
Modes last_mode_ RTC_GUARDED_BY(crit_sect_);
|
||||
Operations last_operation_ RTC_GUARDED_BY(crit_sect_);
|
||||
std::unique_ptr<int16_t[]> mute_factor_array_ RTC_GUARDED_BY(crit_sect_);
|
||||
size_t decoded_buffer_length_ RTC_GUARDED_BY(crit_sect_);
|
||||
std::unique_ptr<int16_t[]> decoded_buffer_ RTC_GUARDED_BY(crit_sect_);
|
||||
uint32_t playout_timestamp_ RTC_GUARDED_BY(crit_sect_);
|
||||
bool new_codec_ RTC_GUARDED_BY(crit_sect_);
|
||||
uint32_t timestamp_ RTC_GUARDED_BY(crit_sect_);
|
||||
bool reset_decoder_ RTC_GUARDED_BY(crit_sect_);
|
||||
rtc::Optional<uint8_t> current_rtp_payload_type_ RTC_GUARDED_BY(crit_sect_);
|
||||
rtc::Optional<uint8_t> current_cng_rtp_payload_type_
|
||||
RTC_GUARDED_BY(crit_sect_);
|
||||
uint32_t ssrc_ RTC_GUARDED_BY(crit_sect_);
|
||||
bool first_packet_ RTC_GUARDED_BY(crit_sect_);
|
||||
const BackgroundNoiseMode background_noise_mode_ RTC_GUARDED_BY(crit_sect_);
|
||||
NetEqPlayoutMode playout_mode_ RTC_GUARDED_BY(crit_sect_);
|
||||
bool enable_fast_accelerate_ RTC_GUARDED_BY(crit_sect_);
|
||||
std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(crit_sect_);
|
||||
bool nack_enabled_ RTC_GUARDED_BY(crit_sect_);
|
||||
const bool enable_muted_state_ RTC_GUARDED_BY(crit_sect_);
|
||||
AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(crit_sect_) =
|
||||
AudioFrame::kVadPassive;
|
||||
std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
|
||||
RTC_GUARDED_BY(crit_sect_);
|
||||
std::vector<uint32_t> last_decoded_timestamps_ RTC_GUARDED_BY(crit_sect_);
|
||||
|
||||
private:
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
|
||||
1520
modules/audio_coding/neteq/neteq_impl_unittest.cc
Normal file
1520
modules/audio_coding/neteq/neteq_impl_unittest.cc
Normal file
File diff suppressed because it is too large
Load Diff
337
modules/audio_coding/neteq/neteq_network_stats_unittest.cc
Normal file
337
modules/audio_coding/neteq/neteq_network_stats_unittest.cc
Normal file
@ -0,0 +1,337 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "webrtc/common_types.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tools/neteq_external_decoder_test.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tools/rtp_generator.h"
|
||||
#include "webrtc/modules/include/module_common_types.h"
|
||||
#include "webrtc/test/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace test {
|
||||
|
||||
using ::testing::_;
|
||||
using ::testing::SetArgPointee;
|
||||
using ::testing::Return;
|
||||
|
||||
class MockAudioDecoder final : public AudioDecoder {
|
||||
public:
|
||||
// TODO(nisse): Valid overrides commented out, because the gmock
|
||||
// methods don't use any override declarations, and we want to avoid
|
||||
// warnings from -Winconsistent-missing-override. See
|
||||
// http://crbug.com/428099.
|
||||
static const int kPacketDuration = 960; // 48 kHz * 20 ms
|
||||
|
||||
MockAudioDecoder(int sample_rate_hz, size_t num_channels)
|
||||
: sample_rate_hz_(sample_rate_hz),
|
||||
num_channels_(num_channels),
|
||||
fec_enabled_(false) {}
|
||||
~MockAudioDecoder() /* override */ { Die(); }
|
||||
MOCK_METHOD0(Die, void());
|
||||
|
||||
MOCK_METHOD0(Reset, void());
|
||||
|
||||
class MockFrame : public AudioDecoder::EncodedAudioFrame {
|
||||
public:
|
||||
MockFrame(size_t num_channels) : num_channels_(num_channels) {}
|
||||
|
||||
size_t Duration() const override { return kPacketDuration; }
|
||||
|
||||
rtc::Optional<DecodeResult> Decode(
|
||||
rtc::ArrayView<int16_t> decoded) const override {
|
||||
const size_t output_size =
|
||||
sizeof(int16_t) * kPacketDuration * num_channels_;
|
||||
if (decoded.size() >= output_size) {
|
||||
memset(decoded.data(), 0,
|
||||
sizeof(int16_t) * kPacketDuration * num_channels_);
|
||||
return rtc::Optional<DecodeResult>(
|
||||
{kPacketDuration * num_channels_, kSpeech});
|
||||
} else {
|
||||
ADD_FAILURE() << "Expected decoded.size() to be >= output_size ("
|
||||
<< decoded.size() << " vs. " << output_size << ")";
|
||||
return rtc::Optional<DecodeResult>();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const size_t num_channels_;
|
||||
};
|
||||
|
||||
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
|
||||
uint32_t timestamp) /* override */ {
|
||||
std::vector<ParseResult> results;
|
||||
if (fec_enabled_) {
|
||||
std::unique_ptr<MockFrame> fec_frame(new MockFrame(num_channels_));
|
||||
results.emplace_back(timestamp - kPacketDuration, 1,
|
||||
std::move(fec_frame));
|
||||
}
|
||||
|
||||
std::unique_ptr<MockFrame> frame(new MockFrame(num_channels_));
|
||||
results.emplace_back(timestamp, 0, std::move(frame));
|
||||
return results;
|
||||
}
|
||||
|
||||
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const
|
||||
/* override */ {
|
||||
ADD_FAILURE() << "Since going through ParsePayload, PacketDuration should "
|
||||
"never get called.";
|
||||
return kPacketDuration;
|
||||
}
|
||||
|
||||
bool PacketHasFec(
|
||||
const uint8_t* encoded, size_t encoded_len) const /* override */ {
|
||||
ADD_FAILURE() << "Since going through ParsePayload, PacketHasFec should "
|
||||
"never get called.";
|
||||
return fec_enabled_;
|
||||
}
|
||||
|
||||
int SampleRateHz() const /* override */ { return sample_rate_hz_; }
|
||||
|
||||
size_t Channels() const /* override */ { return num_channels_; }
|
||||
|
||||
void set_fec_enabled(bool enable_fec) { fec_enabled_ = enable_fec; }
|
||||
|
||||
bool fec_enabled() const { return fec_enabled_; }
|
||||
|
||||
protected:
|
||||
int DecodeInternal(const uint8_t* encoded,
|
||||
size_t encoded_len,
|
||||
int sample_rate_hz,
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type) /* override */ {
|
||||
ADD_FAILURE() << "Since going through ParsePayload, DecodeInternal should "
|
||||
"never get called.";
|
||||
return -1;
|
||||
}
|
||||
|
||||
private:
|
||||
const int sample_rate_hz_;
|
||||
const size_t num_channels_;
|
||||
bool fec_enabled_;
|
||||
};
|
||||
|
||||
class NetEqNetworkStatsTest : public NetEqExternalDecoderTest {
|
||||
public:
|
||||
static const int kPayloadSizeByte = 30;
|
||||
static const int kFrameSizeMs = 20;
|
||||
|
||||
enum logic {
|
||||
kIgnore,
|
||||
kEqual,
|
||||
kSmallerThan,
|
||||
kLargerThan,
|
||||
};
|
||||
|
||||
struct NetEqNetworkStatsCheck {
|
||||
logic current_buffer_size_ms;
|
||||
logic preferred_buffer_size_ms;
|
||||
logic jitter_peaks_found;
|
||||
logic packet_loss_rate;
|
||||
logic expand_rate;
|
||||
logic speech_expand_rate;
|
||||
logic preemptive_rate;
|
||||
logic accelerate_rate;
|
||||
logic secondary_decoded_rate;
|
||||
logic secondary_discarded_rate;
|
||||
logic clockdrift_ppm;
|
||||
logic added_zero_samples;
|
||||
NetEqNetworkStatistics stats_ref;
|
||||
};
|
||||
|
||||
NetEqNetworkStatsTest(NetEqDecoder codec,
|
||||
int sample_rate_hz,
|
||||
MockAudioDecoder* decoder)
|
||||
: NetEqExternalDecoderTest(codec, sample_rate_hz, decoder),
|
||||
external_decoder_(decoder),
|
||||
samples_per_ms_(sample_rate_hz / 1000),
|
||||
frame_size_samples_(kFrameSizeMs * samples_per_ms_),
|
||||
rtp_generator_(new test::RtpGenerator(samples_per_ms_)),
|
||||
last_lost_time_(0),
|
||||
packet_loss_interval_(0xffffffff) {
|
||||
Init();
|
||||
}
|
||||
|
||||
bool Lost(uint32_t send_time) {
|
||||
if (send_time - last_lost_time_ >= packet_loss_interval_) {
|
||||
last_lost_time_ = send_time;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void SetPacketLossRate(double loss_rate) {
|
||||
packet_loss_interval_ = (loss_rate >= 1e-3 ?
|
||||
static_cast<double>(kFrameSizeMs) / loss_rate : 0xffffffff);
|
||||
}
|
||||
|
||||
// |stats_ref|
|
||||
// expects.x = -1, do not care
|
||||
// expects.x = 0, 'x' in current stats should equal 'x' in |stats_ref|
|
||||
// expects.x = 1, 'x' in current stats should < 'x' in |stats_ref|
|
||||
// expects.x = 2, 'x' in current stats should > 'x' in |stats_ref|
|
||||
void CheckNetworkStatistics(NetEqNetworkStatsCheck expects) {
|
||||
NetEqNetworkStatistics stats;
|
||||
neteq()->NetworkStatistics(&stats);
|
||||
|
||||
#define CHECK_NETEQ_NETWORK_STATS(x)\
|
||||
switch (expects.x) {\
|
||||
case kEqual:\
|
||||
EXPECT_EQ(stats.x, expects.stats_ref.x);\
|
||||
break;\
|
||||
case kSmallerThan:\
|
||||
EXPECT_LT(stats.x, expects.stats_ref.x);\
|
||||
break;\
|
||||
case kLargerThan:\
|
||||
EXPECT_GT(stats.x, expects.stats_ref.x);\
|
||||
break;\
|
||||
default:\
|
||||
break;\
|
||||
}
|
||||
|
||||
CHECK_NETEQ_NETWORK_STATS(current_buffer_size_ms);
|
||||
CHECK_NETEQ_NETWORK_STATS(preferred_buffer_size_ms);
|
||||
CHECK_NETEQ_NETWORK_STATS(jitter_peaks_found);
|
||||
CHECK_NETEQ_NETWORK_STATS(packet_loss_rate);
|
||||
CHECK_NETEQ_NETWORK_STATS(expand_rate);
|
||||
CHECK_NETEQ_NETWORK_STATS(speech_expand_rate);
|
||||
CHECK_NETEQ_NETWORK_STATS(preemptive_rate);
|
||||
CHECK_NETEQ_NETWORK_STATS(accelerate_rate);
|
||||
CHECK_NETEQ_NETWORK_STATS(secondary_decoded_rate);
|
||||
CHECK_NETEQ_NETWORK_STATS(secondary_discarded_rate);
|
||||
CHECK_NETEQ_NETWORK_STATS(clockdrift_ppm);
|
||||
CHECK_NETEQ_NETWORK_STATS(added_zero_samples);
|
||||
|
||||
#undef CHECK_NETEQ_NETWORK_STATS
|
||||
|
||||
// Compare with CurrentDelay, which should be identical.
|
||||
EXPECT_EQ(stats.current_buffer_size_ms, neteq()->CurrentDelayMs());
|
||||
}
|
||||
|
||||
void RunTest(int num_loops, NetEqNetworkStatsCheck expects) {
|
||||
uint32_t time_now;
|
||||
uint32_t next_send_time;
|
||||
|
||||
// Initiate |last_lost_time_|.
|
||||
time_now = next_send_time = last_lost_time_ =
|
||||
rtp_generator_->GetRtpHeader(kPayloadType, frame_size_samples_,
|
||||
&rtp_header_);
|
||||
for (int k = 0; k < num_loops; ++k) {
|
||||
// Delay by one frame such that the FEC can come in.
|
||||
while (time_now + kFrameSizeMs >= next_send_time) {
|
||||
next_send_time = rtp_generator_->GetRtpHeader(kPayloadType,
|
||||
frame_size_samples_,
|
||||
&rtp_header_);
|
||||
if (!Lost(next_send_time)) {
|
||||
static const uint8_t payload[kPayloadSizeByte] = {0};
|
||||
InsertPacket(rtp_header_, payload, next_send_time);
|
||||
}
|
||||
}
|
||||
GetOutputAudio(&output_frame_);
|
||||
time_now += kOutputLengthMs;
|
||||
}
|
||||
CheckNetworkStatistics(expects);
|
||||
neteq()->FlushBuffers();
|
||||
}
|
||||
|
||||
void DecodeFecTest() {
|
||||
external_decoder_->set_fec_enabled(false);
|
||||
NetEqNetworkStatsCheck expects = {
|
||||
kIgnore, // current_buffer_size_ms
|
||||
kIgnore, // preferred_buffer_size_ms
|
||||
kIgnore, // jitter_peaks_found
|
||||
kEqual, // packet_loss_rate
|
||||
kEqual, // expand_rate
|
||||
kEqual, // voice_expand_rate
|
||||
kIgnore, // preemptive_rate
|
||||
kEqual, // accelerate_rate
|
||||
kEqual, // decoded_fec_rate
|
||||
kEqual, // discarded_fec_rate
|
||||
kIgnore, // clockdrift_ppm
|
||||
kEqual, // added_zero_samples
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
};
|
||||
RunTest(50, expects);
|
||||
|
||||
// Next we introduce packet losses.
|
||||
SetPacketLossRate(0.1);
|
||||
expects.stats_ref.packet_loss_rate = 1337;
|
||||
expects.stats_ref.expand_rate = expects.stats_ref.speech_expand_rate = 1065;
|
||||
RunTest(50, expects);
|
||||
|
||||
// Next we enable FEC.
|
||||
external_decoder_->set_fec_enabled(true);
|
||||
// If FEC fills in the lost packets, no packet loss will be counted.
|
||||
expects.stats_ref.packet_loss_rate = 0;
|
||||
expects.stats_ref.expand_rate = expects.stats_ref.speech_expand_rate = 0;
|
||||
expects.stats_ref.secondary_decoded_rate = 2006;
|
||||
expects.stats_ref.secondary_discarded_rate = 14336;
|
||||
RunTest(50, expects);
|
||||
}
|
||||
|
||||
void NoiseExpansionTest() {
|
||||
NetEqNetworkStatsCheck expects = {
|
||||
kIgnore, // current_buffer_size_ms
|
||||
kIgnore, // preferred_buffer_size_ms
|
||||
kIgnore, // jitter_peaks_found
|
||||
kEqual, // packet_loss_rate
|
||||
kEqual, // expand_rate
|
||||
kEqual, // speech_expand_rate
|
||||
kIgnore, // preemptive_rate
|
||||
kEqual, // accelerate_rate
|
||||
kEqual, // decoded_fec_rate
|
||||
kEqual, // discard_fec_rate
|
||||
kIgnore, // clockdrift_ppm
|
||||
kEqual, // added_zero_samples
|
||||
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
|
||||
};
|
||||
RunTest(50, expects);
|
||||
|
||||
SetPacketLossRate(1);
|
||||
expects.stats_ref.expand_rate = 16384;
|
||||
expects.stats_ref.speech_expand_rate = 5324;
|
||||
RunTest(10, expects); // Lost 10 * 20ms in a row.
|
||||
}
|
||||
|
||||
private:
|
||||
MockAudioDecoder* external_decoder_;
|
||||
const int samples_per_ms_;
|
||||
const size_t frame_size_samples_;
|
||||
std::unique_ptr<test::RtpGenerator> rtp_generator_;
|
||||
RTPHeader rtp_header_;
|
||||
uint32_t last_lost_time_;
|
||||
uint32_t packet_loss_interval_;
|
||||
AudioFrame output_frame_;
|
||||
};
|
||||
|
||||
TEST(NetEqNetworkStatsTest, DecodeFec) {
|
||||
MockAudioDecoder decoder(48000, 1);
|
||||
NetEqNetworkStatsTest test(NetEqDecoder::kDecoderOpus, 48000, &decoder);
|
||||
test.DecodeFecTest();
|
||||
EXPECT_CALL(decoder, Die()).Times(1);
|
||||
}
|
||||
|
||||
TEST(NetEqNetworkStatsTest, StereoDecodeFec) {
|
||||
MockAudioDecoder decoder(48000, 2);
|
||||
NetEqNetworkStatsTest test(NetEqDecoder::kDecoderOpus, 48000, &decoder);
|
||||
test.DecodeFecTest();
|
||||
EXPECT_CALL(decoder, Die()).Times(1);
|
||||
}
|
||||
|
||||
TEST(NetEqNetworkStatsTest, NoiseExpansionTest) {
|
||||
MockAudioDecoder decoder(48000, 1);
|
||||
NetEqNetworkStatsTest test(NetEqDecoder::kDecoderOpus, 48000, &decoder);
|
||||
test.NoiseExpansionTest();
|
||||
EXPECT_CALL(decoder, Die()).Times(1);
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace webrtc
|
||||
442
modules/audio_coding/neteq/neteq_stereo_unittest.cc
Normal file
442
modules/audio_coding/neteq/neteq_stereo_unittest.cc
Normal file
@ -0,0 +1,442 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Test to verify correct stereo and multi-channel operation.
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <list>
|
||||
|
||||
#include "webrtc/api/audio_codecs/builtin_audio_decoder_factory.h"
|
||||
#include "webrtc/common_types.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/pcm16b/pcm16b.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/include/neteq.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tools/input_audio_file.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tools/rtp_generator.h"
|
||||
#include "webrtc/modules/include/module_common_types.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
struct TestParameters {
|
||||
int frame_size;
|
||||
int sample_rate;
|
||||
size_t num_channels;
|
||||
};
|
||||
|
||||
// This is a parameterized test. The test parameters are supplied through a
|
||||
// TestParameters struct, which is obtained through the GetParam() method.
|
||||
//
|
||||
// The objective of the test is to create a mono input signal and a
|
||||
// multi-channel input signal, where each channel is identical to the mono
|
||||
// input channel. The two input signals are processed through their respective
|
||||
// NetEq instances. After that, the output signals are compared. The expected
|
||||
// result is that each channel in the multi-channel output is identical to the
|
||||
// mono output.
|
||||
class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
|
||||
protected:
|
||||
static const int kTimeStepMs = 10;
|
||||
static const size_t kMaxBlockSize = 480; // 10 ms @ 48 kHz.
|
||||
static const uint8_t kPayloadTypeMono = 95;
|
||||
static const uint8_t kPayloadTypeMulti = 96;
|
||||
|
||||
NetEqStereoTest()
|
||||
: num_channels_(GetParam().num_channels),
|
||||
sample_rate_hz_(GetParam().sample_rate),
|
||||
samples_per_ms_(sample_rate_hz_ / 1000),
|
||||
frame_size_ms_(GetParam().frame_size),
|
||||
frame_size_samples_(
|
||||
static_cast<size_t>(frame_size_ms_ * samples_per_ms_)),
|
||||
output_size_samples_(10 * samples_per_ms_),
|
||||
rtp_generator_mono_(samples_per_ms_),
|
||||
rtp_generator_(samples_per_ms_),
|
||||
payload_size_bytes_(0),
|
||||
multi_payload_size_bytes_(0),
|
||||
last_send_time_(0),
|
||||
last_arrival_time_(0) {
|
||||
NetEq::Config config;
|
||||
config.sample_rate_hz = sample_rate_hz_;
|
||||
rtc::scoped_refptr<AudioDecoderFactory> factory =
|
||||
CreateBuiltinAudioDecoderFactory();
|
||||
neteq_mono_ = NetEq::Create(config, factory);
|
||||
neteq_ = NetEq::Create(config, factory);
|
||||
input_ = new int16_t[frame_size_samples_];
|
||||
encoded_ = new uint8_t[2 * frame_size_samples_];
|
||||
input_multi_channel_ = new int16_t[frame_size_samples_ * num_channels_];
|
||||
encoded_multi_channel_ = new uint8_t[frame_size_samples_ * 2 *
|
||||
num_channels_];
|
||||
}
|
||||
|
||||
~NetEqStereoTest() {
|
||||
delete neteq_mono_;
|
||||
delete neteq_;
|
||||
delete [] input_;
|
||||
delete [] encoded_;
|
||||
delete [] input_multi_channel_;
|
||||
delete [] encoded_multi_channel_;
|
||||
}
|
||||
|
||||
virtual void SetUp() {
|
||||
const std::string file_name =
|
||||
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
|
||||
input_file_.reset(new test::InputAudioFile(file_name));
|
||||
NetEqDecoder mono_decoder;
|
||||
NetEqDecoder multi_decoder;
|
||||
switch (sample_rate_hz_) {
|
||||
case 8000:
|
||||
mono_decoder = NetEqDecoder::kDecoderPCM16B;
|
||||
if (num_channels_ == 2) {
|
||||
multi_decoder = NetEqDecoder::kDecoderPCM16B_2ch;
|
||||
} else if (num_channels_ == 5) {
|
||||
multi_decoder = NetEqDecoder::kDecoderPCM16B_5ch;
|
||||
} else {
|
||||
FAIL() << "Only 2 and 5 channels supported for 8000 Hz.";
|
||||
}
|
||||
break;
|
||||
case 16000:
|
||||
mono_decoder = NetEqDecoder::kDecoderPCM16Bwb;
|
||||
if (num_channels_ == 2) {
|
||||
multi_decoder = NetEqDecoder::kDecoderPCM16Bwb_2ch;
|
||||
} else {
|
||||
FAIL() << "More than 2 channels is not supported for 16000 Hz.";
|
||||
}
|
||||
break;
|
||||
case 32000:
|
||||
mono_decoder = NetEqDecoder::kDecoderPCM16Bswb32kHz;
|
||||
if (num_channels_ == 2) {
|
||||
multi_decoder = NetEqDecoder::kDecoderPCM16Bswb32kHz_2ch;
|
||||
} else {
|
||||
FAIL() << "More than 2 channels is not supported for 32000 Hz.";
|
||||
}
|
||||
break;
|
||||
case 48000:
|
||||
mono_decoder = NetEqDecoder::kDecoderPCM16Bswb48kHz;
|
||||
if (num_channels_ == 2) {
|
||||
multi_decoder = NetEqDecoder::kDecoderPCM16Bswb48kHz_2ch;
|
||||
} else {
|
||||
FAIL() << "More than 2 channels is not supported for 48000 Hz.";
|
||||
}
|
||||
break;
|
||||
default:
|
||||
FAIL() << "We shouldn't get here.";
|
||||
}
|
||||
ASSERT_EQ(NetEq::kOK, neteq_mono_->RegisterPayloadType(mono_decoder, "mono",
|
||||
kPayloadTypeMono));
|
||||
ASSERT_EQ(NetEq::kOK,
|
||||
neteq_->RegisterPayloadType(multi_decoder, "multi-channel",
|
||||
kPayloadTypeMulti));
|
||||
}
|
||||
|
||||
virtual void TearDown() {}
|
||||
|
||||
int GetNewPackets() {
|
||||
if (!input_file_->Read(frame_size_samples_, input_)) {
|
||||
return -1;
|
||||
}
|
||||
payload_size_bytes_ = WebRtcPcm16b_Encode(input_, frame_size_samples_,
|
||||
encoded_);
|
||||
if (frame_size_samples_ * 2 != payload_size_bytes_) {
|
||||
return -1;
|
||||
}
|
||||
int next_send_time = rtp_generator_mono_.GetRtpHeader(kPayloadTypeMono,
|
||||
frame_size_samples_,
|
||||
&rtp_header_mono_);
|
||||
test::InputAudioFile::DuplicateInterleaved(input_, frame_size_samples_,
|
||||
num_channels_,
|
||||
input_multi_channel_);
|
||||
multi_payload_size_bytes_ = WebRtcPcm16b_Encode(
|
||||
input_multi_channel_, frame_size_samples_ * num_channels_,
|
||||
encoded_multi_channel_);
|
||||
if (frame_size_samples_ * 2 * num_channels_ != multi_payload_size_bytes_) {
|
||||
return -1;
|
||||
}
|
||||
rtp_generator_.GetRtpHeader(kPayloadTypeMulti, frame_size_samples_,
|
||||
&rtp_header_);
|
||||
return next_send_time;
|
||||
}
|
||||
|
||||
virtual void VerifyOutput(size_t num_samples) {
|
||||
const int16_t* output_data = output_.data();
|
||||
const int16_t* output_multi_channel_data = output_multi_channel_.data();
|
||||
for (size_t i = 0; i < num_samples; ++i) {
|
||||
for (size_t j = 0; j < num_channels_; ++j) {
|
||||
ASSERT_EQ(output_data[i],
|
||||
output_multi_channel_data[i * num_channels_ + j])
|
||||
<< "Diff in sample " << i << ", channel " << j << ".";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
virtual int GetArrivalTime(int send_time) {
|
||||
int arrival_time = last_arrival_time_ + (send_time - last_send_time_);
|
||||
last_send_time_ = send_time;
|
||||
last_arrival_time_ = arrival_time;
|
||||
return arrival_time;
|
||||
}
|
||||
|
||||
virtual bool Lost() { return false; }
|
||||
|
||||
void RunTest(int num_loops) {
|
||||
// Get next input packets (mono and multi-channel).
|
||||
int next_send_time;
|
||||
int next_arrival_time;
|
||||
do {
|
||||
next_send_time = GetNewPackets();
|
||||
ASSERT_NE(-1, next_send_time);
|
||||
next_arrival_time = GetArrivalTime(next_send_time);
|
||||
} while (Lost()); // If lost, immediately read the next packet.
|
||||
|
||||
int time_now = 0;
|
||||
for (int k = 0; k < num_loops; ++k) {
|
||||
while (time_now >= next_arrival_time) {
|
||||
// Insert packet in mono instance.
|
||||
ASSERT_EQ(NetEq::kOK,
|
||||
neteq_mono_->InsertPacket(rtp_header_mono_,
|
||||
rtc::ArrayView<const uint8_t>(
|
||||
encoded_, payload_size_bytes_),
|
||||
next_arrival_time));
|
||||
// Insert packet in multi-channel instance.
|
||||
ASSERT_EQ(NetEq::kOK,
|
||||
neteq_->InsertPacket(
|
||||
rtp_header_,
|
||||
rtc::ArrayView<const uint8_t>(encoded_multi_channel_,
|
||||
multi_payload_size_bytes_),
|
||||
next_arrival_time));
|
||||
// Get next input packets (mono and multi-channel).
|
||||
do {
|
||||
next_send_time = GetNewPackets();
|
||||
ASSERT_NE(-1, next_send_time);
|
||||
next_arrival_time = GetArrivalTime(next_send_time);
|
||||
} while (Lost()); // If lost, immediately read the next packet.
|
||||
}
|
||||
// Get audio from mono instance.
|
||||
bool muted;
|
||||
EXPECT_EQ(NetEq::kOK, neteq_mono_->GetAudio(&output_, &muted));
|
||||
ASSERT_FALSE(muted);
|
||||
EXPECT_EQ(1u, output_.num_channels_);
|
||||
EXPECT_EQ(output_size_samples_, output_.samples_per_channel_);
|
||||
// Get audio from multi-channel instance.
|
||||
ASSERT_EQ(NetEq::kOK, neteq_->GetAudio(&output_multi_channel_, &muted));
|
||||
ASSERT_FALSE(muted);
|
||||
EXPECT_EQ(num_channels_, output_multi_channel_.num_channels_);
|
||||
EXPECT_EQ(output_size_samples_,
|
||||
output_multi_channel_.samples_per_channel_);
|
||||
std::ostringstream ss;
|
||||
ss << "Lap number " << k << ".";
|
||||
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
|
||||
// Compare mono and multi-channel.
|
||||
ASSERT_NO_FATAL_FAILURE(VerifyOutput(output_size_samples_));
|
||||
|
||||
time_now += kTimeStepMs;
|
||||
}
|
||||
}
|
||||
|
||||
const size_t num_channels_;
|
||||
const int sample_rate_hz_;
|
||||
const int samples_per_ms_;
|
||||
const int frame_size_ms_;
|
||||
const size_t frame_size_samples_;
|
||||
const size_t output_size_samples_;
|
||||
NetEq* neteq_mono_;
|
||||
NetEq* neteq_;
|
||||
test::RtpGenerator rtp_generator_mono_;
|
||||
test::RtpGenerator rtp_generator_;
|
||||
int16_t* input_;
|
||||
int16_t* input_multi_channel_;
|
||||
uint8_t* encoded_;
|
||||
uint8_t* encoded_multi_channel_;
|
||||
AudioFrame output_;
|
||||
AudioFrame output_multi_channel_;
|
||||
RTPHeader rtp_header_mono_;
|
||||
RTPHeader rtp_header_;
|
||||
size_t payload_size_bytes_;
|
||||
size_t multi_payload_size_bytes_;
|
||||
int last_send_time_;
|
||||
int last_arrival_time_;
|
||||
std::unique_ptr<test::InputAudioFile> input_file_;
|
||||
};
|
||||
|
||||
class NetEqStereoTestNoJitter : public NetEqStereoTest {
|
||||
protected:
|
||||
NetEqStereoTestNoJitter()
|
||||
: NetEqStereoTest() {
|
||||
// Start the sender 100 ms before the receiver to pre-fill the buffer.
|
||||
// This is to avoid doing preemptive expand early in the test.
|
||||
// TODO(hlundin): Mock the decision making instead to control the modes.
|
||||
last_arrival_time_ = -100;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(NetEqStereoTestNoJitter, RunTest) {
|
||||
RunTest(8);
|
||||
}
|
||||
|
||||
class NetEqStereoTestPositiveDrift : public NetEqStereoTest {
|
||||
protected:
|
||||
NetEqStereoTestPositiveDrift()
|
||||
: NetEqStereoTest(),
|
||||
drift_factor(0.9) {
|
||||
// Start the sender 100 ms before the receiver to pre-fill the buffer.
|
||||
// This is to avoid doing preemptive expand early in the test.
|
||||
// TODO(hlundin): Mock the decision making instead to control the modes.
|
||||
last_arrival_time_ = -100;
|
||||
}
|
||||
virtual int GetArrivalTime(int send_time) {
|
||||
int arrival_time = last_arrival_time_ +
|
||||
drift_factor * (send_time - last_send_time_);
|
||||
last_send_time_ = send_time;
|
||||
last_arrival_time_ = arrival_time;
|
||||
return arrival_time;
|
||||
}
|
||||
|
||||
double drift_factor;
|
||||
};
|
||||
|
||||
TEST_P(NetEqStereoTestPositiveDrift, RunTest) {
|
||||
RunTest(100);
|
||||
}
|
||||
|
||||
class NetEqStereoTestNegativeDrift : public NetEqStereoTestPositiveDrift {
|
||||
protected:
|
||||
NetEqStereoTestNegativeDrift()
|
||||
: NetEqStereoTestPositiveDrift() {
|
||||
drift_factor = 1.1;
|
||||
last_arrival_time_ = 0;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(NetEqStereoTestNegativeDrift, RunTest) {
|
||||
RunTest(100);
|
||||
}
|
||||
|
||||
class NetEqStereoTestDelays : public NetEqStereoTest {
|
||||
protected:
|
||||
static const int kDelayInterval = 10;
|
||||
static const int kDelay = 1000;
|
||||
NetEqStereoTestDelays()
|
||||
: NetEqStereoTest(),
|
||||
frame_index_(0) {
|
||||
}
|
||||
|
||||
virtual int GetArrivalTime(int send_time) {
|
||||
// Deliver immediately, unless we have a back-log.
|
||||
int arrival_time = std::min(last_arrival_time_, send_time);
|
||||
if (++frame_index_ % kDelayInterval == 0) {
|
||||
// Delay this packet.
|
||||
arrival_time += kDelay;
|
||||
}
|
||||
last_send_time_ = send_time;
|
||||
last_arrival_time_ = arrival_time;
|
||||
return arrival_time;
|
||||
}
|
||||
|
||||
int frame_index_;
|
||||
};
|
||||
|
||||
TEST_P(NetEqStereoTestDelays, RunTest) {
|
||||
RunTest(1000);
|
||||
}
|
||||
|
||||
class NetEqStereoTestLosses : public NetEqStereoTest {
|
||||
protected:
|
||||
static const int kLossInterval = 10;
|
||||
NetEqStereoTestLosses()
|
||||
: NetEqStereoTest(),
|
||||
frame_index_(0) {
|
||||
}
|
||||
|
||||
virtual bool Lost() {
|
||||
return (++frame_index_) % kLossInterval == 0;
|
||||
}
|
||||
|
||||
// TODO(hlundin): NetEq is not giving bitexact results for these cases.
|
||||
virtual void VerifyOutput(size_t num_samples) {
|
||||
for (size_t i = 0; i < num_samples; ++i) {
|
||||
const int16_t* output_data = output_.data();
|
||||
const int16_t* output_multi_channel_data = output_multi_channel_.data();
|
||||
auto first_channel_sample =
|
||||
output_multi_channel_data[i * num_channels_];
|
||||
for (size_t j = 0; j < num_channels_; ++j) {
|
||||
const int kErrorMargin = 200;
|
||||
EXPECT_NEAR(output_data[i],
|
||||
output_multi_channel_data[i * num_channels_ + j],
|
||||
kErrorMargin)
|
||||
<< "Diff in sample " << i << ", channel " << j << ".";
|
||||
EXPECT_EQ(first_channel_sample,
|
||||
output_multi_channel_data[i * num_channels_ + j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int frame_index_;
|
||||
};
|
||||
|
||||
TEST_P(NetEqStereoTestLosses, RunTest) {
|
||||
RunTest(100);
|
||||
}
|
||||
|
||||
|
||||
// Creates a list of parameter sets.
|
||||
std::list<TestParameters> GetTestParameters() {
|
||||
std::list<TestParameters> l;
|
||||
const int sample_rates[] = {8000, 16000, 32000};
|
||||
const int num_rates = sizeof(sample_rates) / sizeof(sample_rates[0]);
|
||||
// Loop through sample rates.
|
||||
for (int rate_index = 0; rate_index < num_rates; ++rate_index) {
|
||||
int sample_rate = sample_rates[rate_index];
|
||||
// Loop through all frame sizes between 10 and 60 ms.
|
||||
for (int frame_size = 10; frame_size <= 60; frame_size += 10) {
|
||||
TestParameters p;
|
||||
p.frame_size = frame_size;
|
||||
p.sample_rate = sample_rate;
|
||||
p.num_channels = 2;
|
||||
l.push_back(p);
|
||||
if (sample_rate == 8000) {
|
||||
// Add a five-channel test for 8000 Hz.
|
||||
p.num_channels = 5;
|
||||
l.push_back(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
// Pretty-printing the test parameters in case of an error.
|
||||
void PrintTo(const TestParameters& p, ::std::ostream* os) {
|
||||
*os << "{frame_size = " << p.frame_size <<
|
||||
", num_channels = " << p.num_channels <<
|
||||
", sample_rate = " << p.sample_rate << "}";
|
||||
}
|
||||
|
||||
// Instantiate the tests. Each test is instantiated using the function above,
|
||||
// so that all different parameter combinations are tested.
|
||||
INSTANTIATE_TEST_CASE_P(MultiChannel,
|
||||
NetEqStereoTestNoJitter,
|
||||
::testing::ValuesIn(GetTestParameters()));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MultiChannel,
|
||||
NetEqStereoTestPositiveDrift,
|
||||
::testing::ValuesIn(GetTestParameters()));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MultiChannel,
|
||||
NetEqStereoTestNegativeDrift,
|
||||
::testing::ValuesIn(GetTestParameters()));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MultiChannel,
|
||||
NetEqStereoTestDelays,
|
||||
::testing::ValuesIn(GetTestParameters()));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MultiChannel,
|
||||
NetEqStereoTestLosses,
|
||||
::testing::ValuesIn(GetTestParameters()));
|
||||
|
||||
} // namespace webrtc
|
||||
1637
modules/audio_coding/neteq/neteq_unittest.cc
Normal file
1637
modules/audio_coding/neteq/neteq_unittest.cc
Normal file
File diff suppressed because it is too large
Load Diff
32
modules/audio_coding/neteq/neteq_unittest.proto
Normal file
32
modules/audio_coding/neteq/neteq_unittest.proto
Normal file
@ -0,0 +1,32 @@
|
||||
syntax = "proto2";
|
||||
option optimize_for = LITE_RUNTIME;
|
||||
package webrtc.neteq_unittest;
|
||||
|
||||
message NetEqNetworkStatistics {
|
||||
// Next field number 18.
|
||||
optional uint32 current_buffer_size_ms = 1;
|
||||
optional uint32 preferred_buffer_size_ms = 2;
|
||||
optional uint32 jitter_peaks_found = 3;
|
||||
optional uint32 packet_loss_rate = 4;
|
||||
optional uint32 packet_discard_rate = 5 [deprecated = true];
|
||||
optional uint32 expand_rate = 6;
|
||||
optional uint32 speech_expand_rate = 7;
|
||||
optional uint32 preemptive_rate = 8;
|
||||
optional uint32 accelerate_rate = 9;
|
||||
optional uint32 secondary_decoded_rate = 10;
|
||||
optional uint32 secondary_discarded_rate = 17;
|
||||
optional int32 clockdrift_ppm = 11;
|
||||
optional uint64 added_zero_samples = 12;
|
||||
optional int32 mean_waiting_time_ms = 13;
|
||||
optional int32 median_waiting_time_ms = 14;
|
||||
optional int32 min_waiting_time_ms = 15;
|
||||
optional int32 max_waiting_time_ms = 16;
|
||||
}
|
||||
|
||||
message RtcpStatistics {
|
||||
optional uint32 fraction_lost = 1;
|
||||
optional uint32 cumulative_lost = 2;
|
||||
optional uint32 extended_max_sequence_number = 3;
|
||||
optional uint32 jitter = 4;
|
||||
}
|
||||
|
||||
216
modules/audio_coding/neteq/normal.cc
Normal file
216
modules/audio_coding/neteq/normal.cc
Normal file
@ -0,0 +1,216 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/normal.h"
|
||||
|
||||
#include <string.h> // memset, memcpy
|
||||
|
||||
#include <algorithm> // min
|
||||
|
||||
#include "webrtc/api/audio_codecs/audio_decoder.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/expand.h"
|
||||
#include "webrtc/rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
int Normal::Process(const int16_t* input,
|
||||
size_t length,
|
||||
Modes last_mode,
|
||||
int16_t* external_mute_factor_array,
|
||||
AudioMultiVector* output) {
|
||||
if (length == 0) {
|
||||
// Nothing to process.
|
||||
output->Clear();
|
||||
return static_cast<int>(length);
|
||||
}
|
||||
|
||||
RTC_DCHECK(output->Empty());
|
||||
// Output should be empty at this point.
|
||||
if (length % output->Channels() != 0) {
|
||||
// The length does not match the number of channels.
|
||||
output->Clear();
|
||||
return 0;
|
||||
}
|
||||
output->PushBackInterleaved(input, length);
|
||||
|
||||
const int fs_mult = fs_hz_ / 8000;
|
||||
RTC_DCHECK_GT(fs_mult, 0);
|
||||
// fs_shift = log2(fs_mult), rounded down.
|
||||
// Note that |fs_shift| is not "exact" for 48 kHz.
|
||||
// TODO(hlundin): Investigate this further.
|
||||
const int fs_shift = 30 - WebRtcSpl_NormW32(fs_mult);
|
||||
|
||||
// Check if last RecOut call resulted in an Expand. If so, we have to take
|
||||
// care of some cross-fading and unmuting.
|
||||
if (last_mode == kModeExpand) {
|
||||
// Generate interpolation data using Expand.
|
||||
// First, set Expand parameters to appropriate values.
|
||||
expand_->SetParametersForNormalAfterExpand();
|
||||
|
||||
// Call Expand.
|
||||
AudioMultiVector expanded(output->Channels());
|
||||
expand_->Process(&expanded);
|
||||
expand_->Reset();
|
||||
|
||||
size_t length_per_channel = length / output->Channels();
|
||||
std::unique_ptr<int16_t[]> signal(new int16_t[length_per_channel]);
|
||||
for (size_t channel_ix = 0; channel_ix < output->Channels(); ++channel_ix) {
|
||||
// Adjust muting factor (main muting factor times expand muting factor).
|
||||
external_mute_factor_array[channel_ix] = static_cast<int16_t>(
|
||||
(external_mute_factor_array[channel_ix] *
|
||||
expand_->MuteFactor(channel_ix)) >> 14);
|
||||
|
||||
(*output)[channel_ix].CopyTo(length_per_channel, 0, signal.get());
|
||||
|
||||
// Find largest absolute value in new data.
|
||||
int16_t decoded_max =
|
||||
WebRtcSpl_MaxAbsValueW16(signal.get(), length_per_channel);
|
||||
// Adjust muting factor if needed (to BGN level).
|
||||
size_t energy_length =
|
||||
std::min(static_cast<size_t>(fs_mult * 64), length_per_channel);
|
||||
int scaling = 6 + fs_shift
|
||||
- WebRtcSpl_NormW32(decoded_max * decoded_max);
|
||||
scaling = std::max(scaling, 0); // |scaling| should always be >= 0.
|
||||
int32_t energy = WebRtcSpl_DotProductWithScale(signal.get(), signal.get(),
|
||||
energy_length, scaling);
|
||||
int32_t scaled_energy_length =
|
||||
static_cast<int32_t>(energy_length >> scaling);
|
||||
if (scaled_energy_length > 0) {
|
||||
energy = energy / scaled_energy_length;
|
||||
} else {
|
||||
energy = 0;
|
||||
}
|
||||
|
||||
int mute_factor;
|
||||
if ((energy != 0) &&
|
||||
(energy > background_noise_.Energy(channel_ix))) {
|
||||
// Normalize new frame energy to 15 bits.
|
||||
scaling = WebRtcSpl_NormW32(energy) - 16;
|
||||
// We want background_noise_.energy() / energy in Q14.
|
||||
int32_t bgn_energy = WEBRTC_SPL_SHIFT_W32(
|
||||
background_noise_.Energy(channel_ix), scaling + 14);
|
||||
int16_t energy_scaled =
|
||||
static_cast<int16_t>(WEBRTC_SPL_SHIFT_W32(energy, scaling));
|
||||
int32_t ratio = WebRtcSpl_DivW32W16(bgn_energy, energy_scaled);
|
||||
mute_factor = WebRtcSpl_SqrtFloor(ratio << 14);
|
||||
} else {
|
||||
mute_factor = 16384; // 1.0 in Q14.
|
||||
}
|
||||
if (mute_factor > external_mute_factor_array[channel_ix]) {
|
||||
external_mute_factor_array[channel_ix] =
|
||||
static_cast<int16_t>(std::min(mute_factor, 16384));
|
||||
}
|
||||
|
||||
// If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14).
|
||||
int increment = 64 / fs_mult;
|
||||
for (size_t i = 0; i < length_per_channel; i++) {
|
||||
// Scale with mute factor.
|
||||
RTC_DCHECK_LT(channel_ix, output->Channels());
|
||||
RTC_DCHECK_LT(i, output->Size());
|
||||
int32_t scaled_signal = (*output)[channel_ix][i] *
|
||||
external_mute_factor_array[channel_ix];
|
||||
// Shift 14 with proper rounding.
|
||||
(*output)[channel_ix][i] =
|
||||
static_cast<int16_t>((scaled_signal + 8192) >> 14);
|
||||
// Increase mute_factor towards 16384.
|
||||
external_mute_factor_array[channel_ix] = static_cast<int16_t>(std::min(
|
||||
external_mute_factor_array[channel_ix] + increment, 16384));
|
||||
}
|
||||
|
||||
// Interpolate the expanded data into the new vector.
|
||||
// (NB/WB/SWB32/SWB48 8/16/32/48 samples.)
|
||||
size_t win_length = samples_per_ms_;
|
||||
int16_t win_slope_Q14 = default_win_slope_Q14_;
|
||||
RTC_DCHECK_LT(channel_ix, output->Channels());
|
||||
if (win_length > output->Size()) {
|
||||
win_length = output->Size();
|
||||
win_slope_Q14 = (1 << 14) / static_cast<int16_t>(win_length);
|
||||
}
|
||||
int16_t win_up_Q14 = 0;
|
||||
for (size_t i = 0; i < win_length; i++) {
|
||||
win_up_Q14 += win_slope_Q14;
|
||||
(*output)[channel_ix][i] =
|
||||
(win_up_Q14 * (*output)[channel_ix][i] +
|
||||
((1 << 14) - win_up_Q14) * expanded[channel_ix][i] + (1 << 13)) >>
|
||||
14;
|
||||
}
|
||||
RTC_DCHECK_GT(win_up_Q14,
|
||||
(1 << 14) - 32); // Worst case rouding is a length of 34
|
||||
}
|
||||
} else if (last_mode == kModeRfc3389Cng) {
|
||||
RTC_DCHECK_EQ(output->Channels(), 1); // Not adapted for multi-channel yet.
|
||||
static const size_t kCngLength = 48;
|
||||
RTC_DCHECK_LE(8 * fs_mult, kCngLength);
|
||||
int16_t cng_output[kCngLength];
|
||||
// Reset mute factor and start up fresh.
|
||||
external_mute_factor_array[0] = 16384;
|
||||
ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
|
||||
|
||||
if (cng_decoder) {
|
||||
// Generate long enough for 48kHz.
|
||||
if (!cng_decoder->Generate(cng_output, 0)) {
|
||||
// Error returned; set return vector to all zeros.
|
||||
memset(cng_output, 0, sizeof(cng_output));
|
||||
}
|
||||
} else {
|
||||
// If no CNG instance is defined, just copy from the decoded data.
|
||||
// (This will result in interpolating the decoded with itself.)
|
||||
(*output)[0].CopyTo(fs_mult * 8, 0, cng_output);
|
||||
}
|
||||
// Interpolate the CNG into the new vector.
|
||||
// (NB/WB/SWB32/SWB48 8/16/32/48 samples.)
|
||||
size_t win_length = samples_per_ms_;
|
||||
int16_t win_slope_Q14 = default_win_slope_Q14_;
|
||||
if (win_length > kCngLength) {
|
||||
win_length = kCngLength;
|
||||
win_slope_Q14 = (1 << 14) / static_cast<int16_t>(win_length);
|
||||
}
|
||||
int16_t win_up_Q14 = 0;
|
||||
for (size_t i = 0; i < win_length; i++) {
|
||||
win_up_Q14 += win_slope_Q14;
|
||||
(*output)[0][i] =
|
||||
(win_up_Q14 * (*output)[0][i] +
|
||||
((1 << 14) - win_up_Q14) * cng_output[i] + (1 << 13)) >>
|
||||
14;
|
||||
}
|
||||
RTC_DCHECK_GT(win_up_Q14,
|
||||
(1 << 14) - 32); // Worst case rouding is a length of 34
|
||||
} else if (external_mute_factor_array[0] < 16384) {
|
||||
// Previous was neither of Expand, FadeToBGN or RFC3389_CNG, but we are
|
||||
// still ramping up from previous muting.
|
||||
// If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14).
|
||||
int increment = 64 / fs_mult;
|
||||
size_t length_per_channel = length / output->Channels();
|
||||
for (size_t i = 0; i < length_per_channel; i++) {
|
||||
for (size_t channel_ix = 0; channel_ix < output->Channels();
|
||||
++channel_ix) {
|
||||
// Scale with mute factor.
|
||||
RTC_DCHECK_LT(channel_ix, output->Channels());
|
||||
RTC_DCHECK_LT(i, output->Size());
|
||||
int32_t scaled_signal = (*output)[channel_ix][i] *
|
||||
external_mute_factor_array[channel_ix];
|
||||
// Shift 14 with proper rounding.
|
||||
(*output)[channel_ix][i] =
|
||||
static_cast<int16_t>((scaled_signal + 8192) >> 14);
|
||||
// Increase mute_factor towards 16384.
|
||||
external_mute_factor_array[channel_ix] = static_cast<int16_t>(std::min(
|
||||
16384, external_mute_factor_array[channel_ix] + increment));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return static_cast<int>(length);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
75
modules/audio_coding/neteq/normal.h
Normal file
75
modules/audio_coding/neteq/normal.h
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_NORMAL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_NORMAL_H_
|
||||
|
||||
#include <string.h> // Access to size_t.
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/defines.h"
|
||||
#include "webrtc/rtc_base/checks.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/rtc_base/safe_conversions.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Forward declarations.
|
||||
class BackgroundNoise;
|
||||
class DecoderDatabase;
|
||||
class Expand;
|
||||
|
||||
// This class provides the "Normal" DSP operation, that is performed when
|
||||
// there is no data loss, no need to stretch the timing of the signal, and
|
||||
// no other "special circumstances" are at hand.
|
||||
class Normal {
|
||||
public:
|
||||
Normal(int fs_hz,
|
||||
DecoderDatabase* decoder_database,
|
||||
const BackgroundNoise& background_noise,
|
||||
Expand* expand)
|
||||
: fs_hz_(fs_hz),
|
||||
decoder_database_(decoder_database),
|
||||
background_noise_(background_noise),
|
||||
expand_(expand),
|
||||
samples_per_ms_(rtc::CheckedDivExact(fs_hz_, 1000)),
|
||||
default_win_slope_Q14_(
|
||||
rtc::dchecked_cast<uint16_t>((1 << 14) / samples_per_ms_)) {}
|
||||
|
||||
virtual ~Normal() {}
|
||||
|
||||
// Performs the "Normal" operation. The decoder data is supplied in |input|,
|
||||
// having |length| samples in total for all channels (interleaved). The
|
||||
// result is written to |output|. The number of channels allocated in
|
||||
// |output| defines the number of channels that will be used when
|
||||
// de-interleaving |input|. |last_mode| contains the mode used in the previous
|
||||
// GetAudio call (i.e., not the current one), and |external_mute_factor| is
|
||||
// a pointer to the mute factor in the NetEqImpl class.
|
||||
int Process(const int16_t* input, size_t length,
|
||||
Modes last_mode,
|
||||
int16_t* external_mute_factor_array,
|
||||
AudioMultiVector* output);
|
||||
|
||||
private:
|
||||
int fs_hz_;
|
||||
DecoderDatabase* decoder_database_;
|
||||
const BackgroundNoise& background_noise_;
|
||||
Expand* expand_;
|
||||
const size_t samples_per_ms_;
|
||||
const int16_t default_win_slope_Q14_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(Normal);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_NORMAL_H_
|
||||
176
modules/audio_coding/neteq/normal_unittest.cc
Normal file
176
modules/audio_coding/neteq/normal_unittest.cc
Normal file
@ -0,0 +1,176 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for Normal class.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/normal.h"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/expand.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/mock/mock_expand.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
|
||||
using ::testing::_;
|
||||
using ::testing::Invoke;
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
int ExpandProcess120ms(AudioMultiVector* output) {
|
||||
AudioMultiVector dummy_audio(1, 11520u);
|
||||
dummy_audio.CopyTo(output);
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(Normal, CreateAndDestroy) {
|
||||
MockDecoderDatabase db;
|
||||
int fs = 8000;
|
||||
size_t channels = 1;
|
||||
BackgroundNoise bgn(channels);
|
||||
SyncBuffer sync_buffer(1, 1000);
|
||||
RandomVector random_vector;
|
||||
StatisticsCalculator statistics;
|
||||
Expand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs, channels);
|
||||
Normal normal(fs, &db, bgn, &expand);
|
||||
EXPECT_CALL(db, Die()); // Called when |db| goes out of scope.
|
||||
}
|
||||
|
||||
TEST(Normal, AvoidDivideByZero) {
|
||||
WebRtcSpl_Init();
|
||||
MockDecoderDatabase db;
|
||||
int fs = 8000;
|
||||
size_t channels = 1;
|
||||
BackgroundNoise bgn(channels);
|
||||
SyncBuffer sync_buffer(1, 1000);
|
||||
RandomVector random_vector;
|
||||
StatisticsCalculator statistics;
|
||||
MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs,
|
||||
channels);
|
||||
Normal normal(fs, &db, bgn, &expand);
|
||||
|
||||
int16_t input[1000] = {0};
|
||||
std::unique_ptr<int16_t[]> mute_factor_array(new int16_t[channels]);
|
||||
for (size_t i = 0; i < channels; ++i) {
|
||||
mute_factor_array[i] = 16384;
|
||||
}
|
||||
AudioMultiVector output(channels);
|
||||
|
||||
// Zero input length.
|
||||
EXPECT_EQ(
|
||||
0,
|
||||
normal.Process(input, 0, kModeExpand, mute_factor_array.get(), &output));
|
||||
EXPECT_EQ(0u, output.Size());
|
||||
|
||||
// Try to make energy_length >> scaling = 0;
|
||||
EXPECT_CALL(expand, SetParametersForNormalAfterExpand());
|
||||
EXPECT_CALL(expand, Process(_));
|
||||
EXPECT_CALL(expand, Reset());
|
||||
// If input_size_samples < 64, then energy_length in Normal::Process() will
|
||||
// be equal to input_size_samples. Since the input is all zeros, decoded_max
|
||||
// will be zero, and scaling will be >= 6. Thus, energy_length >> scaling = 0,
|
||||
// and using this as a denominator would lead to problems.
|
||||
int input_size_samples = 63;
|
||||
EXPECT_EQ(input_size_samples,
|
||||
normal.Process(input,
|
||||
input_size_samples,
|
||||
kModeExpand,
|
||||
mute_factor_array.get(),
|
||||
&output));
|
||||
|
||||
EXPECT_CALL(db, Die()); // Called when |db| goes out of scope.
|
||||
EXPECT_CALL(expand, Die()); // Called when |expand| goes out of scope.
|
||||
}
|
||||
|
||||
TEST(Normal, InputLengthAndChannelsDoNotMatch) {
|
||||
WebRtcSpl_Init();
|
||||
MockDecoderDatabase db;
|
||||
int fs = 8000;
|
||||
size_t channels = 2;
|
||||
BackgroundNoise bgn(channels);
|
||||
SyncBuffer sync_buffer(channels, 1000);
|
||||
RandomVector random_vector;
|
||||
StatisticsCalculator statistics;
|
||||
MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, fs,
|
||||
channels);
|
||||
Normal normal(fs, &db, bgn, &expand);
|
||||
|
||||
int16_t input[1000] = {0};
|
||||
std::unique_ptr<int16_t[]> mute_factor_array(new int16_t[channels]);
|
||||
for (size_t i = 0; i < channels; ++i) {
|
||||
mute_factor_array[i] = 16384;
|
||||
}
|
||||
AudioMultiVector output(channels);
|
||||
|
||||
// Let the number of samples be one sample less than 80 samples per channel.
|
||||
size_t input_len = 80 * channels - 1;
|
||||
EXPECT_EQ(
|
||||
0,
|
||||
normal.Process(
|
||||
input, input_len, kModeExpand, mute_factor_array.get(), &output));
|
||||
EXPECT_EQ(0u, output.Size());
|
||||
|
||||
EXPECT_CALL(db, Die()); // Called when |db| goes out of scope.
|
||||
EXPECT_CALL(expand, Die()); // Called when |expand| goes out of scope.
|
||||
}
|
||||
|
||||
TEST(Normal, LastModeExpand120msPacket) {
|
||||
WebRtcSpl_Init();
|
||||
MockDecoderDatabase db;
|
||||
const int kFs = 48000;
|
||||
const size_t kPacketsizeBytes = 11520u;
|
||||
const size_t kChannels = 1;
|
||||
BackgroundNoise bgn(kChannels);
|
||||
SyncBuffer sync_buffer(kChannels, 1000);
|
||||
RandomVector random_vector;
|
||||
StatisticsCalculator statistics;
|
||||
MockExpand expand(&bgn, &sync_buffer, &random_vector, &statistics, kFs,
|
||||
kChannels);
|
||||
Normal normal(kFs, &db, bgn, &expand);
|
||||
|
||||
int16_t input[kPacketsizeBytes] = {0};
|
||||
|
||||
std::unique_ptr<int16_t[]> mute_factor_array(new int16_t[kChannels]);
|
||||
for (size_t i = 0; i < kChannels; ++i) {
|
||||
mute_factor_array[i] = 16384;
|
||||
}
|
||||
|
||||
AudioMultiVector output(kChannels);
|
||||
|
||||
EXPECT_CALL(expand, SetParametersForNormalAfterExpand());
|
||||
EXPECT_CALL(expand, Process(_)).WillOnce(Invoke(ExpandProcess120ms));
|
||||
EXPECT_CALL(expand, Reset());
|
||||
EXPECT_EQ(static_cast<int>(kPacketsizeBytes),
|
||||
normal.Process(input,
|
||||
kPacketsizeBytes,
|
||||
kModeExpand,
|
||||
mute_factor_array.get(),
|
||||
&output));
|
||||
|
||||
EXPECT_EQ(kPacketsizeBytes, output.Size());
|
||||
|
||||
EXPECT_CALL(db, Die()); // Called when |db| goes out of scope.
|
||||
EXPECT_CALL(expand, Die()); // Called when |expand| goes out of scope.
|
||||
}
|
||||
|
||||
// TODO(hlundin): Write more tests.
|
||||
|
||||
} // namespace webrtc
|
||||
35
modules/audio_coding/neteq/packet.cc
Normal file
35
modules/audio_coding/neteq/packet.cc
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/packet.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
Packet::Packet() = default;
|
||||
Packet::Packet(Packet&& b) = default;
|
||||
|
||||
Packet::~Packet() = default;
|
||||
|
||||
Packet& Packet::operator=(Packet&& b) = default;
|
||||
|
||||
Packet Packet::Clone() const {
|
||||
RTC_CHECK(!frame);
|
||||
|
||||
Packet clone;
|
||||
clone.timestamp = timestamp;
|
||||
clone.sequence_number = sequence_number;
|
||||
clone.payload_type = payload_type;
|
||||
clone.payload.SetData(payload.data(), payload.size());
|
||||
clone.priority = priority;
|
||||
|
||||
return clone;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
124
modules/audio_coding/neteq/packet.h
Normal file
124
modules/audio_coding/neteq/packet.h
Normal file
@ -0,0 +1,124 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_PACKET_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_PACKET_H_
|
||||
|
||||
#include <list>
|
||||
#include <memory>
|
||||
|
||||
#include "webrtc/api/audio_codecs/audio_decoder.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
|
||||
#include "webrtc/rtc_base/buffer.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Struct for holding RTP packets.
|
||||
struct Packet {
|
||||
struct Priority {
|
||||
Priority() : codec_level(0), red_level(0) {}
|
||||
Priority(int codec_level, int red_level)
|
||||
: codec_level(codec_level), red_level(red_level) {
|
||||
CheckInvariant();
|
||||
}
|
||||
|
||||
int codec_level;
|
||||
int red_level;
|
||||
|
||||
// Priorities are sorted low-to-high, first on the level the codec
|
||||
// prioritizes it, then on the level of RED packet it is; i.e. if it is a
|
||||
// primary or secondary payload of a RED packet. For example: with Opus, an
|
||||
// Fec packet (which the decoder prioritizes lower than a regular packet)
|
||||
// will not be used if there is _any_ RED payload for the same
|
||||
// timeframe. The highest priority packet will have levels {0, 0}. Negative
|
||||
// priorities are not allowed.
|
||||
bool operator<(const Priority& b) const {
|
||||
CheckInvariant();
|
||||
b.CheckInvariant();
|
||||
if (codec_level == b.codec_level)
|
||||
return red_level < b.red_level;
|
||||
|
||||
return codec_level < b.codec_level;
|
||||
}
|
||||
bool operator==(const Priority& b) const {
|
||||
CheckInvariant();
|
||||
b.CheckInvariant();
|
||||
return codec_level == b.codec_level && red_level == b.red_level;
|
||||
}
|
||||
bool operator!=(const Priority& b) const { return !(*this == b); }
|
||||
bool operator>(const Priority& b) const { return b < *this; }
|
||||
bool operator<=(const Priority& b) const { return !(b > *this); }
|
||||
bool operator>=(const Priority& b) const { return !(b < *this); }
|
||||
|
||||
private:
|
||||
void CheckInvariant() const {
|
||||
RTC_DCHECK_GE(codec_level, 0);
|
||||
RTC_DCHECK_GE(red_level, 0);
|
||||
}
|
||||
};
|
||||
|
||||
uint32_t timestamp;
|
||||
uint16_t sequence_number;
|
||||
uint8_t payload_type;
|
||||
// Datagram excluding RTP header and header extension.
|
||||
rtc::Buffer payload;
|
||||
Priority priority;
|
||||
std::unique_ptr<TickTimer::Stopwatch> waiting_time;
|
||||
std::unique_ptr<AudioDecoder::EncodedAudioFrame> frame;
|
||||
|
||||
Packet();
|
||||
Packet(Packet&& b);
|
||||
~Packet();
|
||||
|
||||
// Packets should generally be moved around but sometimes it's useful to make
|
||||
// a copy, for example for testing purposes. NOTE: Will only work for
|
||||
// un-parsed packets, i.e. |frame| must be unset. The payload will, however,
|
||||
// be copied. |waiting_time| will also not be copied.
|
||||
Packet Clone() const;
|
||||
|
||||
Packet& operator=(Packet&& b);
|
||||
|
||||
// Comparison operators. Establish a packet ordering based on (1) timestamp,
|
||||
// (2) sequence number and (3) redundancy.
|
||||
// Timestamp and sequence numbers are compared taking wrap-around into
|
||||
// account. For two packets with the same sequence number and timestamp a
|
||||
// primary payload is considered "smaller" than a secondary.
|
||||
bool operator==(const Packet& rhs) const {
|
||||
return (this->timestamp == rhs.timestamp &&
|
||||
this->sequence_number == rhs.sequence_number &&
|
||||
this->priority == rhs.priority);
|
||||
}
|
||||
bool operator!=(const Packet& rhs) const { return !operator==(rhs); }
|
||||
bool operator<(const Packet& rhs) const {
|
||||
if (this->timestamp == rhs.timestamp) {
|
||||
if (this->sequence_number == rhs.sequence_number) {
|
||||
// Timestamp and sequence numbers are identical - deem the left hand
|
||||
// side to be "smaller" (i.e., "earlier") if it has higher priority.
|
||||
return this->priority < rhs.priority;
|
||||
}
|
||||
return (static_cast<uint16_t>(rhs.sequence_number -
|
||||
this->sequence_number) < 0xFFFF / 2);
|
||||
}
|
||||
return (static_cast<uint32_t>(rhs.timestamp - this->timestamp) <
|
||||
0xFFFFFFFF / 2);
|
||||
}
|
||||
bool operator>(const Packet& rhs) const { return rhs.operator<(*this); }
|
||||
bool operator<=(const Packet& rhs) const { return !operator>(rhs); }
|
||||
bool operator>=(const Packet& rhs) const { return !operator<(rhs); }
|
||||
|
||||
bool empty() const { return !frame && payload.empty(); }
|
||||
};
|
||||
|
||||
// A list of packets.
|
||||
typedef std::list<Packet> PacketList;
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_PACKET_H_
|
||||
294
modules/audio_coding/neteq/packet_buffer.cc
Normal file
294
modules/audio_coding/neteq/packet_buffer.cc
Normal file
@ -0,0 +1,294 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// This is the implementation of the PacketBuffer class. It is mostly based on
|
||||
// an STL list. The list is kept sorted at all times so that the next packet to
|
||||
// decode is at the beginning of the list.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
|
||||
|
||||
#include <algorithm> // find_if()
|
||||
|
||||
#include "webrtc/api/audio_codecs/audio_decoder.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
|
||||
#include "webrtc/rtc_base/logging.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
// Predicate used when inserting packets in the buffer list.
|
||||
// Operator() returns true when |packet| goes before |new_packet|.
|
||||
class NewTimestampIsLarger {
|
||||
public:
|
||||
explicit NewTimestampIsLarger(const Packet& new_packet)
|
||||
: new_packet_(new_packet) {
|
||||
}
|
||||
bool operator()(const Packet& packet) {
|
||||
return (new_packet_ >= packet);
|
||||
}
|
||||
|
||||
private:
|
||||
const Packet& new_packet_;
|
||||
};
|
||||
|
||||
// Returns true if both payload types are known to the decoder database, and
|
||||
// have the same sample rate.
|
||||
bool EqualSampleRates(uint8_t pt1,
|
||||
uint8_t pt2,
|
||||
const DecoderDatabase& decoder_database) {
|
||||
auto* di1 = decoder_database.GetDecoderInfo(pt1);
|
||||
auto* di2 = decoder_database.GetDecoderInfo(pt2);
|
||||
return di1 && di2 && di1->SampleRateHz() == di2->SampleRateHz();
|
||||
}
|
||||
|
||||
void LogPacketDiscarded(int codec_level, StatisticsCalculator* stats) {
|
||||
RTC_CHECK(stats);
|
||||
if (codec_level > 0) {
|
||||
stats->SecondaryPacketsDiscarded(1);
|
||||
} else {
|
||||
stats->PacketsDiscarded(1);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
PacketBuffer::PacketBuffer(size_t max_number_of_packets,
|
||||
const TickTimer* tick_timer)
|
||||
: max_number_of_packets_(max_number_of_packets), tick_timer_(tick_timer) {}
|
||||
|
||||
// Destructor. All packets in the buffer will be destroyed.
|
||||
PacketBuffer::~PacketBuffer() {
|
||||
Flush();
|
||||
}
|
||||
|
||||
// Flush the buffer. All packets in the buffer will be destroyed.
|
||||
void PacketBuffer::Flush() {
|
||||
buffer_.clear();
|
||||
}
|
||||
|
||||
bool PacketBuffer::Empty() const {
|
||||
return buffer_.empty();
|
||||
}
|
||||
|
||||
int PacketBuffer::InsertPacket(Packet&& packet, StatisticsCalculator* stats) {
|
||||
if (packet.empty()) {
|
||||
LOG(LS_WARNING) << "InsertPacket invalid packet";
|
||||
return kInvalidPacket;
|
||||
}
|
||||
|
||||
RTC_DCHECK_GE(packet.priority.codec_level, 0);
|
||||
RTC_DCHECK_GE(packet.priority.red_level, 0);
|
||||
|
||||
int return_val = kOK;
|
||||
|
||||
packet.waiting_time = tick_timer_->GetNewStopwatch();
|
||||
|
||||
if (buffer_.size() >= max_number_of_packets_) {
|
||||
// Buffer is full. Flush it.
|
||||
Flush();
|
||||
LOG(LS_WARNING) << "Packet buffer flushed";
|
||||
return_val = kFlushed;
|
||||
}
|
||||
|
||||
// Get an iterator pointing to the place in the buffer where the new packet
|
||||
// should be inserted. The list is searched from the back, since the most
|
||||
// likely case is that the new packet should be near the end of the list.
|
||||
PacketList::reverse_iterator rit = std::find_if(
|
||||
buffer_.rbegin(), buffer_.rend(),
|
||||
NewTimestampIsLarger(packet));
|
||||
|
||||
// The new packet is to be inserted to the right of |rit|. If it has the same
|
||||
// timestamp as |rit|, which has a higher priority, do not insert the new
|
||||
// packet to list.
|
||||
if (rit != buffer_.rend() && packet.timestamp == rit->timestamp) {
|
||||
LogPacketDiscarded(packet.priority.codec_level, stats);
|
||||
return return_val;
|
||||
}
|
||||
|
||||
// The new packet is to be inserted to the left of |it|. If it has the same
|
||||
// timestamp as |it|, which has a lower priority, replace |it| with the new
|
||||
// packet.
|
||||
PacketList::iterator it = rit.base();
|
||||
if (it != buffer_.end() && packet.timestamp == it->timestamp) {
|
||||
LogPacketDiscarded(packet.priority.codec_level, stats);
|
||||
it = buffer_.erase(it);
|
||||
}
|
||||
buffer_.insert(it, std::move(packet)); // Insert the packet at that position.
|
||||
|
||||
return return_val;
|
||||
}
|
||||
|
||||
int PacketBuffer::InsertPacketList(
|
||||
PacketList* packet_list,
|
||||
const DecoderDatabase& decoder_database,
|
||||
rtc::Optional<uint8_t>* current_rtp_payload_type,
|
||||
rtc::Optional<uint8_t>* current_cng_rtp_payload_type,
|
||||
StatisticsCalculator* stats) {
|
||||
RTC_DCHECK(stats);
|
||||
bool flushed = false;
|
||||
for (auto& packet : *packet_list) {
|
||||
if (decoder_database.IsComfortNoise(packet.payload_type)) {
|
||||
if (*current_cng_rtp_payload_type &&
|
||||
**current_cng_rtp_payload_type != packet.payload_type) {
|
||||
// New CNG payload type implies new codec type.
|
||||
*current_rtp_payload_type = rtc::Optional<uint8_t>();
|
||||
Flush();
|
||||
flushed = true;
|
||||
}
|
||||
*current_cng_rtp_payload_type =
|
||||
rtc::Optional<uint8_t>(packet.payload_type);
|
||||
} else if (!decoder_database.IsDtmf(packet.payload_type)) {
|
||||
// This must be speech.
|
||||
if ((*current_rtp_payload_type &&
|
||||
**current_rtp_payload_type != packet.payload_type) ||
|
||||
(*current_cng_rtp_payload_type &&
|
||||
!EqualSampleRates(packet.payload_type,
|
||||
**current_cng_rtp_payload_type,
|
||||
decoder_database))) {
|
||||
*current_cng_rtp_payload_type = rtc::Optional<uint8_t>();
|
||||
Flush();
|
||||
flushed = true;
|
||||
}
|
||||
*current_rtp_payload_type = rtc::Optional<uint8_t>(packet.payload_type);
|
||||
}
|
||||
int return_val = InsertPacket(std::move(packet), stats);
|
||||
if (return_val == kFlushed) {
|
||||
// The buffer flushed, but this is not an error. We can still continue.
|
||||
flushed = true;
|
||||
} else if (return_val != kOK) {
|
||||
// An error occurred. Delete remaining packets in list and return.
|
||||
packet_list->clear();
|
||||
return return_val;
|
||||
}
|
||||
}
|
||||
packet_list->clear();
|
||||
return flushed ? kFlushed : kOK;
|
||||
}
|
||||
|
||||
int PacketBuffer::NextTimestamp(uint32_t* next_timestamp) const {
|
||||
if (Empty()) {
|
||||
return kBufferEmpty;
|
||||
}
|
||||
if (!next_timestamp) {
|
||||
return kInvalidPointer;
|
||||
}
|
||||
*next_timestamp = buffer_.front().timestamp;
|
||||
return kOK;
|
||||
}
|
||||
|
||||
int PacketBuffer::NextHigherTimestamp(uint32_t timestamp,
|
||||
uint32_t* next_timestamp) const {
|
||||
if (Empty()) {
|
||||
return kBufferEmpty;
|
||||
}
|
||||
if (!next_timestamp) {
|
||||
return kInvalidPointer;
|
||||
}
|
||||
PacketList::const_iterator it;
|
||||
for (it = buffer_.begin(); it != buffer_.end(); ++it) {
|
||||
if (it->timestamp >= timestamp) {
|
||||
// Found a packet matching the search.
|
||||
*next_timestamp = it->timestamp;
|
||||
return kOK;
|
||||
}
|
||||
}
|
||||
return kNotFound;
|
||||
}
|
||||
|
||||
const Packet* PacketBuffer::PeekNextPacket() const {
|
||||
return buffer_.empty() ? nullptr : &buffer_.front();
|
||||
}
|
||||
|
||||
rtc::Optional<Packet> PacketBuffer::GetNextPacket() {
|
||||
if (Empty()) {
|
||||
// Buffer is empty.
|
||||
return rtc::Optional<Packet>();
|
||||
}
|
||||
|
||||
rtc::Optional<Packet> packet(std::move(buffer_.front()));
|
||||
// Assert that the packet sanity checks in InsertPacket method works.
|
||||
RTC_DCHECK(!packet->empty());
|
||||
buffer_.pop_front();
|
||||
|
||||
return packet;
|
||||
}
|
||||
|
||||
int PacketBuffer::DiscardNextPacket(StatisticsCalculator* stats) {
|
||||
if (Empty()) {
|
||||
return kBufferEmpty;
|
||||
}
|
||||
// Assert that the packet sanity checks in InsertPacket method works.
|
||||
const Packet& packet = buffer_.front();
|
||||
RTC_DCHECK(!packet.empty());
|
||||
LogPacketDiscarded(packet.priority.codec_level, stats);
|
||||
buffer_.pop_front();
|
||||
return kOK;
|
||||
}
|
||||
|
||||
void PacketBuffer::DiscardOldPackets(uint32_t timestamp_limit,
|
||||
uint32_t horizon_samples,
|
||||
StatisticsCalculator* stats) {
|
||||
buffer_.remove_if([timestamp_limit, horizon_samples, stats](const Packet& p) {
|
||||
if (timestamp_limit == p.timestamp ||
|
||||
!IsObsoleteTimestamp(p.timestamp, timestamp_limit, horizon_samples)) {
|
||||
return false;
|
||||
}
|
||||
LogPacketDiscarded(p.priority.codec_level, stats);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
void PacketBuffer::DiscardAllOldPackets(uint32_t timestamp_limit,
|
||||
StatisticsCalculator* stats) {
|
||||
DiscardOldPackets(timestamp_limit, 0, stats);
|
||||
}
|
||||
|
||||
void PacketBuffer::DiscardPacketsWithPayloadType(uint8_t payload_type,
|
||||
StatisticsCalculator* stats) {
|
||||
buffer_.remove_if([payload_type, stats](const Packet& p) {
|
||||
if (p.payload_type != payload_type) {
|
||||
return false;
|
||||
}
|
||||
LogPacketDiscarded(p.priority.codec_level, stats);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
size_t PacketBuffer::NumPacketsInBuffer() const {
|
||||
return buffer_.size();
|
||||
}
|
||||
|
||||
size_t PacketBuffer::NumSamplesInBuffer(size_t last_decoded_length) const {
|
||||
size_t num_samples = 0;
|
||||
size_t last_duration = last_decoded_length;
|
||||
for (const Packet& packet : buffer_) {
|
||||
if (packet.frame) {
|
||||
// TODO(hlundin): Verify that it's fine to count all packets and remove
|
||||
// this check.
|
||||
if (packet.priority != Packet::Priority(0, 0)) {
|
||||
continue;
|
||||
}
|
||||
size_t duration = packet.frame->Duration();
|
||||
if (duration > 0) {
|
||||
last_duration = duration; // Save the most up-to-date (valid) duration.
|
||||
}
|
||||
}
|
||||
num_samples += last_duration;
|
||||
}
|
||||
return num_samples;
|
||||
}
|
||||
|
||||
void PacketBuffer::BufferStat(int* num_packets, int* max_num_packets) const {
|
||||
*num_packets = static_cast<int>(buffer_.size());
|
||||
*max_num_packets = static_cast<int>(max_number_of_packets_);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
148
modules/audio_coding/neteq/packet_buffer.h
Normal file
148
modules/audio_coding/neteq/packet_buffer.h
Normal file
@ -0,0 +1,148 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_
|
||||
|
||||
#include "webrtc/api/optional.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/packet.h"
|
||||
#include "webrtc/modules/include/module_common_types.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class DecoderDatabase;
|
||||
class StatisticsCalculator;
|
||||
class TickTimer;
|
||||
|
||||
// This is the actual buffer holding the packets before decoding.
|
||||
class PacketBuffer {
|
||||
public:
|
||||
enum BufferReturnCodes {
|
||||
kOK = 0,
|
||||
kFlushed,
|
||||
kNotFound,
|
||||
kBufferEmpty,
|
||||
kInvalidPacket,
|
||||
kInvalidPointer
|
||||
};
|
||||
|
||||
// Constructor creates a buffer which can hold a maximum of
|
||||
// |max_number_of_packets| packets.
|
||||
PacketBuffer(size_t max_number_of_packets, const TickTimer* tick_timer);
|
||||
|
||||
// Deletes all packets in the buffer before destroying the buffer.
|
||||
virtual ~PacketBuffer();
|
||||
|
||||
// Flushes the buffer and deletes all packets in it.
|
||||
virtual void Flush();
|
||||
|
||||
// Returns true for an empty buffer.
|
||||
virtual bool Empty() const;
|
||||
|
||||
// Inserts |packet| into the buffer. The buffer will take over ownership of
|
||||
// the packet object.
|
||||
// Returns PacketBuffer::kOK on success, PacketBuffer::kFlushed if the buffer
|
||||
// was flushed due to overfilling.
|
||||
virtual int InsertPacket(Packet&& packet, StatisticsCalculator* stats);
|
||||
|
||||
// Inserts a list of packets into the buffer. The buffer will take over
|
||||
// ownership of the packet objects.
|
||||
// Returns PacketBuffer::kOK if all packets were inserted successfully.
|
||||
// If the buffer was flushed due to overfilling, only a subset of the list is
|
||||
// inserted, and PacketBuffer::kFlushed is returned.
|
||||
// The last three parameters are included for legacy compatibility.
|
||||
// TODO(hlundin): Redesign to not use current_*_payload_type and
|
||||
// decoder_database.
|
||||
virtual int InsertPacketList(
|
||||
PacketList* packet_list,
|
||||
const DecoderDatabase& decoder_database,
|
||||
rtc::Optional<uint8_t>* current_rtp_payload_type,
|
||||
rtc::Optional<uint8_t>* current_cng_rtp_payload_type,
|
||||
StatisticsCalculator* stats);
|
||||
|
||||
// Gets the timestamp for the first packet in the buffer and writes it to the
|
||||
// output variable |next_timestamp|.
|
||||
// Returns PacketBuffer::kBufferEmpty if the buffer is empty,
|
||||
// PacketBuffer::kOK otherwise.
|
||||
virtual int NextTimestamp(uint32_t* next_timestamp) const;
|
||||
|
||||
// Gets the timestamp for the first packet in the buffer with a timestamp no
|
||||
// lower than the input limit |timestamp|. The result is written to the output
|
||||
// variable |next_timestamp|.
|
||||
// Returns PacketBuffer::kBufferEmpty if the buffer is empty,
|
||||
// PacketBuffer::kOK otherwise.
|
||||
virtual int NextHigherTimestamp(uint32_t timestamp,
|
||||
uint32_t* next_timestamp) const;
|
||||
|
||||
// Returns a (constant) pointer to the first packet in the buffer. Returns
|
||||
// NULL if the buffer is empty.
|
||||
virtual const Packet* PeekNextPacket() const;
|
||||
|
||||
// Extracts the first packet in the buffer and returns it.
|
||||
// Returns an empty optional if the buffer is empty.
|
||||
virtual rtc::Optional<Packet> GetNextPacket();
|
||||
|
||||
// Discards the first packet in the buffer. The packet is deleted.
|
||||
// Returns PacketBuffer::kBufferEmpty if the buffer is empty,
|
||||
// PacketBuffer::kOK otherwise.
|
||||
virtual int DiscardNextPacket(StatisticsCalculator* stats);
|
||||
|
||||
// Discards all packets that are (strictly) older than timestamp_limit,
|
||||
// but newer than timestamp_limit - horizon_samples. Setting horizon_samples
|
||||
// to zero implies that the horizon is set to half the timestamp range. That
|
||||
// is, if a packet is more than 2^31 timestamps into the future compared with
|
||||
// timestamp_limit (including wrap-around), it is considered old.
|
||||
virtual void DiscardOldPackets(uint32_t timestamp_limit,
|
||||
uint32_t horizon_samples,
|
||||
StatisticsCalculator* stats);
|
||||
|
||||
// Discards all packets that are (strictly) older than timestamp_limit.
|
||||
virtual void DiscardAllOldPackets(uint32_t timestamp_limit,
|
||||
StatisticsCalculator* stats);
|
||||
|
||||
// Removes all packets with a specific payload type from the buffer.
|
||||
virtual void DiscardPacketsWithPayloadType(uint8_t payload_type,
|
||||
StatisticsCalculator* stats);
|
||||
|
||||
// Returns the number of packets in the buffer, including duplicates and
|
||||
// redundant packets.
|
||||
virtual size_t NumPacketsInBuffer() const;
|
||||
|
||||
// Returns the number of samples in the buffer, including samples carried in
|
||||
// duplicate and redundant packets.
|
||||
virtual size_t NumSamplesInBuffer(size_t last_decoded_length) const;
|
||||
|
||||
virtual void BufferStat(int* num_packets, int* max_num_packets) const;
|
||||
|
||||
// Static method returning true if |timestamp| is older than |timestamp_limit|
|
||||
// but less than |horizon_samples| behind |timestamp_limit|. For instance,
|
||||
// with timestamp_limit = 100 and horizon_samples = 10, a timestamp in the
|
||||
// range (90, 100) is considered obsolete, and will yield true.
|
||||
// Setting |horizon_samples| to 0 is the same as setting it to 2^31, i.e.,
|
||||
// half the 32-bit timestamp range.
|
||||
static bool IsObsoleteTimestamp(uint32_t timestamp,
|
||||
uint32_t timestamp_limit,
|
||||
uint32_t horizon_samples) {
|
||||
return IsNewerTimestamp(timestamp_limit, timestamp) &&
|
||||
(horizon_samples == 0 ||
|
||||
IsNewerTimestamp(timestamp, timestamp_limit - horizon_samples));
|
||||
}
|
||||
|
||||
private:
|
||||
size_t max_number_of_packets_;
|
||||
PacketList buffer_;
|
||||
const TickTimer* tick_timer_;
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(PacketBuffer);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_PACKET_BUFFER_H_
|
||||
737
modules/audio_coding/neteq/packet_buffer_unittest.cc
Normal file
737
modules/audio_coding/neteq/packet_buffer_unittest.cc
Normal file
@ -0,0 +1,737 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for PacketBuffer class.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/packet_buffer.h"
|
||||
#include "webrtc/api/audio_codecs/builtin_audio_decoder_factory.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/mock/mock_statistics_calculator.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/packet.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/tick_timer.h"
|
||||
#include "webrtc/test/gmock.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
|
||||
using ::testing::Return;
|
||||
using ::testing::StrictMock;
|
||||
using ::testing::_;
|
||||
using ::testing::InSequence;
|
||||
using ::testing::MockFunction;
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Helper class to generate packets. Packets must be deleted by the user.
|
||||
class PacketGenerator {
|
||||
public:
|
||||
PacketGenerator(uint16_t seq_no, uint32_t ts, uint8_t pt, int frame_size);
|
||||
virtual ~PacketGenerator() {}
|
||||
void Reset(uint16_t seq_no, uint32_t ts, uint8_t pt, int frame_size);
|
||||
Packet NextPacket(int payload_size_bytes);
|
||||
|
||||
uint16_t seq_no_;
|
||||
uint32_t ts_;
|
||||
uint8_t pt_;
|
||||
int frame_size_;
|
||||
};
|
||||
|
||||
PacketGenerator::PacketGenerator(uint16_t seq_no, uint32_t ts, uint8_t pt,
|
||||
int frame_size) {
|
||||
Reset(seq_no, ts, pt, frame_size);
|
||||
}
|
||||
|
||||
void PacketGenerator::Reset(uint16_t seq_no, uint32_t ts, uint8_t pt,
|
||||
int frame_size) {
|
||||
seq_no_ = seq_no;
|
||||
ts_ = ts;
|
||||
pt_ = pt;
|
||||
frame_size_ = frame_size;
|
||||
}
|
||||
|
||||
Packet PacketGenerator::NextPacket(int payload_size_bytes) {
|
||||
Packet packet;
|
||||
packet.sequence_number = seq_no_;
|
||||
packet.timestamp = ts_;
|
||||
packet.payload_type = pt_;
|
||||
packet.payload.SetSize(payload_size_bytes);
|
||||
++seq_no_;
|
||||
ts_ += frame_size_;
|
||||
return packet;
|
||||
}
|
||||
|
||||
struct PacketsToInsert {
|
||||
uint16_t sequence_number;
|
||||
uint32_t timestamp;
|
||||
uint8_t payload_type;
|
||||
bool primary;
|
||||
// Order of this packet to appear upon extraction, after inserting a series
|
||||
// of packets. A negative number means that it should have been discarded
|
||||
// before extraction.
|
||||
int extract_order;
|
||||
};
|
||||
|
||||
// Start of test definitions.
|
||||
|
||||
TEST(PacketBuffer, CreateAndDestroy) {
|
||||
TickTimer tick_timer;
|
||||
PacketBuffer* buffer = new PacketBuffer(10, &tick_timer); // 10 packets.
|
||||
EXPECT_TRUE(buffer->Empty());
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
TEST(PacketBuffer, InsertPacket) {
|
||||
TickTimer tick_timer;
|
||||
PacketBuffer buffer(10, &tick_timer); // 10 packets.
|
||||
PacketGenerator gen(17u, 4711u, 0, 10);
|
||||
StrictMock<MockStatisticsCalculator> mock_stats;
|
||||
|
||||
const int payload_len = 100;
|
||||
const Packet packet = gen.NextPacket(payload_len);
|
||||
EXPECT_EQ(0, buffer.InsertPacket(packet.Clone(), &mock_stats));
|
||||
uint32_t next_ts;
|
||||
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
|
||||
EXPECT_EQ(4711u, next_ts);
|
||||
EXPECT_FALSE(buffer.Empty());
|
||||
EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
|
||||
const Packet* next_packet = buffer.PeekNextPacket();
|
||||
EXPECT_EQ(packet, *next_packet); // Compare contents.
|
||||
|
||||
// Do not explicitly flush buffer or delete packet to test that it is deleted
|
||||
// with the buffer. (Tested with Valgrind or similar tool.)
|
||||
}
|
||||
|
||||
// Test to flush buffer.
|
||||
TEST(PacketBuffer, FlushBuffer) {
|
||||
TickTimer tick_timer;
|
||||
PacketBuffer buffer(10, &tick_timer); // 10 packets.
|
||||
PacketGenerator gen(0, 0, 0, 10);
|
||||
const int payload_len = 10;
|
||||
StrictMock<MockStatisticsCalculator> mock_stats;
|
||||
|
||||
// Insert 10 small packets; should be ok.
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
EXPECT_EQ(PacketBuffer::kOK,
|
||||
buffer.InsertPacket(gen.NextPacket(payload_len), &mock_stats));
|
||||
}
|
||||
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
|
||||
EXPECT_FALSE(buffer.Empty());
|
||||
|
||||
buffer.Flush();
|
||||
// Buffer should delete the payloads itself.
|
||||
EXPECT_EQ(0u, buffer.NumPacketsInBuffer());
|
||||
EXPECT_TRUE(buffer.Empty());
|
||||
}
|
||||
|
||||
// Test to fill the buffer over the limits, and verify that it flushes.
|
||||
TEST(PacketBuffer, OverfillBuffer) {
|
||||
TickTimer tick_timer;
|
||||
PacketBuffer buffer(10, &tick_timer); // 10 packets.
|
||||
PacketGenerator gen(0, 0, 0, 10);
|
||||
StrictMock<MockStatisticsCalculator> mock_stats;
|
||||
|
||||
// Insert 10 small packets; should be ok.
|
||||
const int payload_len = 10;
|
||||
int i;
|
||||
for (i = 0; i < 10; ++i) {
|
||||
EXPECT_EQ(PacketBuffer::kOK,
|
||||
buffer.InsertPacket(gen.NextPacket(payload_len), &mock_stats));
|
||||
}
|
||||
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
|
||||
uint32_t next_ts;
|
||||
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
|
||||
EXPECT_EQ(0u, next_ts); // Expect first inserted packet to be first in line.
|
||||
|
||||
const Packet packet = gen.NextPacket(payload_len);
|
||||
// Insert 11th packet; should flush the buffer and insert it after flushing.
|
||||
EXPECT_EQ(PacketBuffer::kFlushed,
|
||||
buffer.InsertPacket(packet.Clone(), &mock_stats));
|
||||
EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
|
||||
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
|
||||
// Expect last inserted packet to be first in line.
|
||||
EXPECT_EQ(packet.timestamp, next_ts);
|
||||
|
||||
// Flush buffer to delete all packets.
|
||||
buffer.Flush();
|
||||
}
|
||||
|
||||
// Test inserting a list of packets.
|
||||
TEST(PacketBuffer, InsertPacketList) {
|
||||
TickTimer tick_timer;
|
||||
PacketBuffer buffer(10, &tick_timer); // 10 packets.
|
||||
PacketGenerator gen(0, 0, 0, 10);
|
||||
PacketList list;
|
||||
const int payload_len = 10;
|
||||
|
||||
// Insert 10 small packets.
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
list.push_back(gen.NextPacket(payload_len));
|
||||
}
|
||||
|
||||
MockDecoderDatabase decoder_database;
|
||||
auto factory = CreateBuiltinAudioDecoderFactory();
|
||||
const DecoderDatabase::DecoderInfo info(NetEqDecoder::kDecoderPCMu, factory);
|
||||
EXPECT_CALL(decoder_database, GetDecoderInfo(0))
|
||||
.WillRepeatedly(Return(&info));
|
||||
|
||||
StrictMock<MockStatisticsCalculator> mock_stats;
|
||||
|
||||
rtc::Optional<uint8_t> current_pt;
|
||||
rtc::Optional<uint8_t> current_cng_pt;
|
||||
EXPECT_EQ(PacketBuffer::kOK,
|
||||
buffer.InsertPacketList(&list, decoder_database, ¤t_pt,
|
||||
¤t_cng_pt, &mock_stats));
|
||||
EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list.
|
||||
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
|
||||
EXPECT_EQ(rtc::Optional<uint8_t>(0),
|
||||
current_pt); // Current payload type changed to 0.
|
||||
EXPECT_FALSE(current_cng_pt); // CNG payload type not changed.
|
||||
|
||||
buffer.Flush(); // Clean up.
|
||||
|
||||
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
|
||||
}
|
||||
|
||||
// Test inserting a list of packets. Last packet is of a different payload type.
|
||||
// Expecting the buffer to flush.
|
||||
// TODO(hlundin): Remove this test when legacy operation is no longer needed.
|
||||
TEST(PacketBuffer, InsertPacketListChangePayloadType) {
|
||||
TickTimer tick_timer;
|
||||
PacketBuffer buffer(10, &tick_timer); // 10 packets.
|
||||
PacketGenerator gen(0, 0, 0, 10);
|
||||
PacketList list;
|
||||
const int payload_len = 10;
|
||||
|
||||
// Insert 10 small packets.
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
list.push_back(gen.NextPacket(payload_len));
|
||||
}
|
||||
// Insert 11th packet of another payload type (not CNG).
|
||||
{
|
||||
Packet packet = gen.NextPacket(payload_len);
|
||||
packet.payload_type = 1;
|
||||
list.push_back(std::move(packet));
|
||||
}
|
||||
|
||||
MockDecoderDatabase decoder_database;
|
||||
auto factory = CreateBuiltinAudioDecoderFactory();
|
||||
const DecoderDatabase::DecoderInfo info0(NetEqDecoder::kDecoderPCMu, factory);
|
||||
EXPECT_CALL(decoder_database, GetDecoderInfo(0))
|
||||
.WillRepeatedly(Return(&info0));
|
||||
const DecoderDatabase::DecoderInfo info1(NetEqDecoder::kDecoderPCMa, factory);
|
||||
EXPECT_CALL(decoder_database, GetDecoderInfo(1))
|
||||
.WillRepeatedly(Return(&info1));
|
||||
|
||||
StrictMock<MockStatisticsCalculator> mock_stats;
|
||||
|
||||
rtc::Optional<uint8_t> current_pt;
|
||||
rtc::Optional<uint8_t> current_cng_pt;
|
||||
EXPECT_EQ(PacketBuffer::kFlushed,
|
||||
buffer.InsertPacketList(&list, decoder_database, ¤t_pt,
|
||||
¤t_cng_pt, &mock_stats));
|
||||
EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list.
|
||||
EXPECT_EQ(1u, buffer.NumPacketsInBuffer()); // Only the last packet.
|
||||
EXPECT_EQ(rtc::Optional<uint8_t>(1),
|
||||
current_pt); // Current payload type changed to 1.
|
||||
EXPECT_FALSE(current_cng_pt); // CNG payload type not changed.
|
||||
|
||||
buffer.Flush(); // Clean up.
|
||||
|
||||
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
|
||||
}
|
||||
|
||||
TEST(PacketBuffer, ExtractOrderRedundancy) {
|
||||
TickTimer tick_timer;
|
||||
PacketBuffer buffer(100, &tick_timer); // 100 packets.
|
||||
const int kPackets = 18;
|
||||
const int kFrameSize = 10;
|
||||
const int kPayloadLength = 10;
|
||||
|
||||
PacketsToInsert packet_facts[kPackets] = {
|
||||
{0xFFFD, 0xFFFFFFD7, 0, true, 0},
|
||||
{0xFFFE, 0xFFFFFFE1, 0, true, 1},
|
||||
{0xFFFE, 0xFFFFFFD7, 1, false, -1},
|
||||
{0xFFFF, 0xFFFFFFEB, 0, true, 2},
|
||||
{0xFFFF, 0xFFFFFFE1, 1, false, -1},
|
||||
{0x0000, 0xFFFFFFF5, 0, true, 3},
|
||||
{0x0000, 0xFFFFFFEB, 1, false, -1},
|
||||
{0x0001, 0xFFFFFFFF, 0, true, 4},
|
||||
{0x0001, 0xFFFFFFF5, 1, false, -1},
|
||||
{0x0002, 0x0000000A, 0, true, 5},
|
||||
{0x0002, 0xFFFFFFFF, 1, false, -1},
|
||||
{0x0003, 0x0000000A, 1, false, -1},
|
||||
{0x0004, 0x0000001E, 0, true, 7},
|
||||
{0x0004, 0x00000014, 1, false, 6},
|
||||
{0x0005, 0x0000001E, 0, true, -1},
|
||||
{0x0005, 0x00000014, 1, false, -1},
|
||||
{0x0006, 0x00000028, 0, true, 8},
|
||||
{0x0006, 0x0000001E, 1, false, -1},
|
||||
};
|
||||
|
||||
const size_t kExpectPacketsInBuffer = 9;
|
||||
|
||||
std::vector<Packet> expect_order(kExpectPacketsInBuffer);
|
||||
|
||||
PacketGenerator gen(0, 0, 0, kFrameSize);
|
||||
|
||||
StrictMock<MockStatisticsCalculator> mock_stats;
|
||||
|
||||
// Interleaving the EXPECT_CALL sequence with expectations on the MockFunction
|
||||
// check ensures that exactly one call to PacketsDiscarded happens in each
|
||||
// DiscardNextPacket call.
|
||||
InSequence s;
|
||||
MockFunction<void(int check_point_id)> check;
|
||||
for (int i = 0; i < kPackets; ++i) {
|
||||
gen.Reset(packet_facts[i].sequence_number,
|
||||
packet_facts[i].timestamp,
|
||||
packet_facts[i].payload_type,
|
||||
kFrameSize);
|
||||
Packet packet = gen.NextPacket(kPayloadLength);
|
||||
packet.priority.codec_level = packet_facts[i].primary ? 0 : 1;
|
||||
if (packet_facts[i].extract_order < 0) {
|
||||
if (packet.priority.codec_level > 0) {
|
||||
EXPECT_CALL(mock_stats, SecondaryPacketsDiscarded(1));
|
||||
} else {
|
||||
EXPECT_CALL(mock_stats, PacketsDiscarded(1));
|
||||
}
|
||||
}
|
||||
EXPECT_CALL(check, Call(i));
|
||||
EXPECT_EQ(PacketBuffer::kOK,
|
||||
buffer.InsertPacket(packet.Clone(), &mock_stats));
|
||||
if (packet_facts[i].extract_order >= 0) {
|
||||
expect_order[packet_facts[i].extract_order] = std::move(packet);
|
||||
}
|
||||
check.Call(i);
|
||||
}
|
||||
|
||||
EXPECT_EQ(kExpectPacketsInBuffer, buffer.NumPacketsInBuffer());
|
||||
|
||||
for (size_t i = 0; i < kExpectPacketsInBuffer; ++i) {
|
||||
const rtc::Optional<Packet> packet = buffer.GetNextPacket();
|
||||
EXPECT_EQ(packet, expect_order[i]); // Compare contents.
|
||||
}
|
||||
EXPECT_TRUE(buffer.Empty());
|
||||
}
|
||||
|
||||
TEST(PacketBuffer, DiscardPackets) {
|
||||
TickTimer tick_timer;
|
||||
PacketBuffer buffer(100, &tick_timer); // 100 packets.
|
||||
const uint16_t start_seq_no = 17;
|
||||
const uint32_t start_ts = 4711;
|
||||
const uint32_t ts_increment = 10;
|
||||
PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment);
|
||||
PacketList list;
|
||||
const int payload_len = 10;
|
||||
StrictMock<MockStatisticsCalculator> mock_stats;
|
||||
|
||||
constexpr int kTotalPackets = 10;
|
||||
// Insert 10 small packets.
|
||||
for (int i = 0; i < kTotalPackets; ++i) {
|
||||
buffer.InsertPacket(gen.NextPacket(payload_len), &mock_stats);
|
||||
}
|
||||
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
|
||||
|
||||
uint32_t current_ts = start_ts;
|
||||
|
||||
// Discard them one by one and make sure that the right packets are at the
|
||||
// front of the buffer.
|
||||
constexpr int kDiscardPackets = 5;
|
||||
|
||||
// Interleaving the EXPECT_CALL sequence with expectations on the MockFunction
|
||||
// check ensures that exactly one call to PacketsDiscarded happens in each
|
||||
// DiscardNextPacket call.
|
||||
InSequence s;
|
||||
MockFunction<void(int check_point_id)> check;
|
||||
for (int i = 0; i < kDiscardPackets; ++i) {
|
||||
uint32_t ts;
|
||||
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&ts));
|
||||
EXPECT_EQ(current_ts, ts);
|
||||
EXPECT_CALL(mock_stats, PacketsDiscarded(1));
|
||||
EXPECT_CALL(check, Call(i));
|
||||
EXPECT_EQ(PacketBuffer::kOK, buffer.DiscardNextPacket(&mock_stats));
|
||||
current_ts += ts_increment;
|
||||
check.Call(i);
|
||||
}
|
||||
|
||||
constexpr int kRemainingPackets = kTotalPackets - kDiscardPackets;
|
||||
// This will discard all remaining packets but one. The oldest packet is older
|
||||
// than the indicated horizon_samples, and will thus be left in the buffer.
|
||||
constexpr size_t kSkipPackets = 1;
|
||||
EXPECT_CALL(mock_stats, PacketsDiscarded(1))
|
||||
.Times(kRemainingPackets - kSkipPackets);
|
||||
EXPECT_CALL(check, Call(17)); // Arbitrary id number.
|
||||
buffer.DiscardOldPackets(start_ts + kTotalPackets * ts_increment,
|
||||
kRemainingPackets * ts_increment, &mock_stats);
|
||||
check.Call(17); // Same arbitrary id number.
|
||||
|
||||
EXPECT_EQ(kSkipPackets, buffer.NumPacketsInBuffer());
|
||||
uint32_t ts;
|
||||
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&ts));
|
||||
EXPECT_EQ(current_ts, ts);
|
||||
|
||||
// Discard all remaining packets.
|
||||
EXPECT_CALL(mock_stats, PacketsDiscarded(kSkipPackets));
|
||||
buffer.DiscardAllOldPackets(start_ts + kTotalPackets * ts_increment,
|
||||
&mock_stats);
|
||||
|
||||
EXPECT_TRUE(buffer.Empty());
|
||||
}
|
||||
|
||||
TEST(PacketBuffer, Reordering) {
|
||||
TickTimer tick_timer;
|
||||
PacketBuffer buffer(100, &tick_timer); // 100 packets.
|
||||
const uint16_t start_seq_no = 17;
|
||||
const uint32_t start_ts = 4711;
|
||||
const uint32_t ts_increment = 10;
|
||||
PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment);
|
||||
const int payload_len = 10;
|
||||
|
||||
// Generate 10 small packets and insert them into a PacketList. Insert every
|
||||
// odd packet to the front, and every even packet to the back, thus creating
|
||||
// a (rather strange) reordering.
|
||||
PacketList list;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
Packet packet = gen.NextPacket(payload_len);
|
||||
if (i % 2) {
|
||||
list.push_front(std::move(packet));
|
||||
} else {
|
||||
list.push_back(std::move(packet));
|
||||
}
|
||||
}
|
||||
|
||||
MockDecoderDatabase decoder_database;
|
||||
auto factory = CreateBuiltinAudioDecoderFactory();
|
||||
const DecoderDatabase::DecoderInfo info(NetEqDecoder::kDecoderPCMu, factory);
|
||||
EXPECT_CALL(decoder_database, GetDecoderInfo(0))
|
||||
.WillRepeatedly(Return(&info));
|
||||
rtc::Optional<uint8_t> current_pt;
|
||||
rtc::Optional<uint8_t> current_cng_pt;
|
||||
|
||||
StrictMock<MockStatisticsCalculator> mock_stats;
|
||||
|
||||
EXPECT_EQ(PacketBuffer::kOK,
|
||||
buffer.InsertPacketList(&list, decoder_database, ¤t_pt,
|
||||
¤t_cng_pt, &mock_stats));
|
||||
EXPECT_EQ(10u, buffer.NumPacketsInBuffer());
|
||||
|
||||
// Extract them and make sure that come out in the right order.
|
||||
uint32_t current_ts = start_ts;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
const rtc::Optional<Packet> packet = buffer.GetNextPacket();
|
||||
ASSERT_TRUE(packet);
|
||||
EXPECT_EQ(current_ts, packet->timestamp);
|
||||
current_ts += ts_increment;
|
||||
}
|
||||
EXPECT_TRUE(buffer.Empty());
|
||||
|
||||
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
|
||||
}
|
||||
|
||||
// The test first inserts a packet with narrow-band CNG, then a packet with
|
||||
// wide-band speech. The expected behavior of the packet buffer is to detect a
|
||||
// change in sample rate, even though no speech packet has been inserted before,
|
||||
// and flush out the CNG packet.
|
||||
TEST(PacketBuffer, CngFirstThenSpeechWithNewSampleRate) {
|
||||
TickTimer tick_timer;
|
||||
PacketBuffer buffer(10, &tick_timer); // 10 packets.
|
||||
const uint8_t kCngPt = 13;
|
||||
const int kPayloadLen = 10;
|
||||
const uint8_t kSpeechPt = 100;
|
||||
|
||||
MockDecoderDatabase decoder_database;
|
||||
auto factory = CreateBuiltinAudioDecoderFactory();
|
||||
const DecoderDatabase::DecoderInfo info_cng(NetEqDecoder::kDecoderCNGnb,
|
||||
factory);
|
||||
EXPECT_CALL(decoder_database, GetDecoderInfo(kCngPt))
|
||||
.WillRepeatedly(Return(&info_cng));
|
||||
const DecoderDatabase::DecoderInfo info_speech(NetEqDecoder::kDecoderPCM16Bwb,
|
||||
factory);
|
||||
EXPECT_CALL(decoder_database, GetDecoderInfo(kSpeechPt))
|
||||
.WillRepeatedly(Return(&info_speech));
|
||||
|
||||
// Insert first packet, which is narrow-band CNG.
|
||||
PacketGenerator gen(0, 0, kCngPt, 10);
|
||||
PacketList list;
|
||||
list.push_back(gen.NextPacket(kPayloadLen));
|
||||
rtc::Optional<uint8_t> current_pt;
|
||||
rtc::Optional<uint8_t> current_cng_pt;
|
||||
|
||||
StrictMock<MockStatisticsCalculator> mock_stats;
|
||||
|
||||
EXPECT_EQ(PacketBuffer::kOK,
|
||||
buffer.InsertPacketList(&list, decoder_database, ¤t_pt,
|
||||
¤t_cng_pt, &mock_stats));
|
||||
EXPECT_TRUE(list.empty());
|
||||
EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
|
||||
ASSERT_TRUE(buffer.PeekNextPacket());
|
||||
EXPECT_EQ(kCngPt, buffer.PeekNextPacket()->payload_type);
|
||||
EXPECT_FALSE(current_pt); // Current payload type not set.
|
||||
EXPECT_EQ(rtc::Optional<uint8_t>(kCngPt),
|
||||
current_cng_pt); // CNG payload type set.
|
||||
|
||||
// Insert second packet, which is wide-band speech.
|
||||
{
|
||||
Packet packet = gen.NextPacket(kPayloadLen);
|
||||
packet.payload_type = kSpeechPt;
|
||||
list.push_back(std::move(packet));
|
||||
}
|
||||
// Expect the buffer to flush out the CNG packet, since it does not match the
|
||||
// new speech sample rate.
|
||||
EXPECT_EQ(PacketBuffer::kFlushed,
|
||||
buffer.InsertPacketList(&list, decoder_database, ¤t_pt,
|
||||
¤t_cng_pt, &mock_stats));
|
||||
EXPECT_TRUE(list.empty());
|
||||
EXPECT_EQ(1u, buffer.NumPacketsInBuffer());
|
||||
ASSERT_TRUE(buffer.PeekNextPacket());
|
||||
EXPECT_EQ(kSpeechPt, buffer.PeekNextPacket()->payload_type);
|
||||
|
||||
EXPECT_EQ(rtc::Optional<uint8_t>(kSpeechPt),
|
||||
current_pt); // Current payload type set.
|
||||
EXPECT_FALSE(current_cng_pt); // CNG payload type reset.
|
||||
|
||||
buffer.Flush(); // Clean up.
|
||||
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
|
||||
}
|
||||
|
||||
TEST(PacketBuffer, Failures) {
|
||||
const uint16_t start_seq_no = 17;
|
||||
const uint32_t start_ts = 4711;
|
||||
const uint32_t ts_increment = 10;
|
||||
int payload_len = 100;
|
||||
PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment);
|
||||
TickTimer tick_timer;
|
||||
StrictMock<MockStatisticsCalculator> mock_stats;
|
||||
|
||||
PacketBuffer* buffer = new PacketBuffer(100, &tick_timer); // 100 packets.
|
||||
{
|
||||
Packet packet = gen.NextPacket(payload_len);
|
||||
packet.payload.Clear();
|
||||
EXPECT_EQ(PacketBuffer::kInvalidPacket,
|
||||
buffer->InsertPacket(std::move(packet), &mock_stats));
|
||||
}
|
||||
// Buffer should still be empty. Test all empty-checks.
|
||||
uint32_t temp_ts;
|
||||
EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer->NextTimestamp(&temp_ts));
|
||||
EXPECT_EQ(PacketBuffer::kBufferEmpty,
|
||||
buffer->NextHigherTimestamp(0, &temp_ts));
|
||||
EXPECT_EQ(NULL, buffer->PeekNextPacket());
|
||||
EXPECT_FALSE(buffer->GetNextPacket());
|
||||
|
||||
// Discarding packets will not invoke mock_stats.PacketDiscarded() because the
|
||||
// packet buffer is empty.
|
||||
EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer->DiscardNextPacket(&mock_stats));
|
||||
buffer->DiscardAllOldPackets(0, &mock_stats);
|
||||
|
||||
// Insert one packet to make the buffer non-empty.
|
||||
EXPECT_EQ(PacketBuffer::kOK,
|
||||
buffer->InsertPacket(gen.NextPacket(payload_len), &mock_stats));
|
||||
EXPECT_EQ(PacketBuffer::kInvalidPointer, buffer->NextTimestamp(NULL));
|
||||
EXPECT_EQ(PacketBuffer::kInvalidPointer,
|
||||
buffer->NextHigherTimestamp(0, NULL));
|
||||
delete buffer;
|
||||
|
||||
// Insert packet list of three packets, where the second packet has an invalid
|
||||
// payload. Expect first packet to be inserted, and the remaining two to be
|
||||
// discarded.
|
||||
buffer = new PacketBuffer(100, &tick_timer); // 100 packets.
|
||||
PacketList list;
|
||||
list.push_back(gen.NextPacket(payload_len)); // Valid packet.
|
||||
{
|
||||
Packet packet = gen.NextPacket(payload_len);
|
||||
packet.payload.Clear(); // Invalid.
|
||||
list.push_back(std::move(packet));
|
||||
}
|
||||
list.push_back(gen.NextPacket(payload_len)); // Valid packet.
|
||||
MockDecoderDatabase decoder_database;
|
||||
auto factory = CreateBuiltinAudioDecoderFactory();
|
||||
const DecoderDatabase::DecoderInfo info(NetEqDecoder::kDecoderPCMu, factory);
|
||||
EXPECT_CALL(decoder_database, GetDecoderInfo(0))
|
||||
.WillRepeatedly(Return(&info));
|
||||
rtc::Optional<uint8_t> current_pt;
|
||||
rtc::Optional<uint8_t> current_cng_pt;
|
||||
EXPECT_EQ(PacketBuffer::kInvalidPacket,
|
||||
buffer->InsertPacketList(&list, decoder_database, ¤t_pt,
|
||||
¤t_cng_pt, &mock_stats));
|
||||
EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list.
|
||||
EXPECT_EQ(1u, buffer->NumPacketsInBuffer());
|
||||
delete buffer;
|
||||
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
|
||||
}
|
||||
|
||||
// Test packet comparison function.
|
||||
// The function should return true if the first packet "goes before" the second.
|
||||
TEST(PacketBuffer, ComparePackets) {
|
||||
PacketGenerator gen(0, 0, 0, 10);
|
||||
Packet a(gen.NextPacket(10)); // SN = 0, TS = 0.
|
||||
Packet b(gen.NextPacket(10)); // SN = 1, TS = 10.
|
||||
EXPECT_FALSE(a == b);
|
||||
EXPECT_TRUE(a != b);
|
||||
EXPECT_TRUE(a < b);
|
||||
EXPECT_FALSE(a > b);
|
||||
EXPECT_TRUE(a <= b);
|
||||
EXPECT_FALSE(a >= b);
|
||||
|
||||
// Testing wrap-around case; 'a' is earlier but has a larger timestamp value.
|
||||
a.timestamp = 0xFFFFFFFF - 10;
|
||||
EXPECT_FALSE(a == b);
|
||||
EXPECT_TRUE(a != b);
|
||||
EXPECT_TRUE(a < b);
|
||||
EXPECT_FALSE(a > b);
|
||||
EXPECT_TRUE(a <= b);
|
||||
EXPECT_FALSE(a >= b);
|
||||
|
||||
// Test equal packets.
|
||||
EXPECT_TRUE(a == a);
|
||||
EXPECT_FALSE(a != a);
|
||||
EXPECT_FALSE(a < a);
|
||||
EXPECT_FALSE(a > a);
|
||||
EXPECT_TRUE(a <= a);
|
||||
EXPECT_TRUE(a >= a);
|
||||
|
||||
// Test equal timestamps but different sequence numbers (0 and 1).
|
||||
a.timestamp = b.timestamp;
|
||||
EXPECT_FALSE(a == b);
|
||||
EXPECT_TRUE(a != b);
|
||||
EXPECT_TRUE(a < b);
|
||||
EXPECT_FALSE(a > b);
|
||||
EXPECT_TRUE(a <= b);
|
||||
EXPECT_FALSE(a >= b);
|
||||
|
||||
// Test equal timestamps but different sequence numbers (32767 and 1).
|
||||
a.sequence_number = 0xFFFF;
|
||||
EXPECT_FALSE(a == b);
|
||||
EXPECT_TRUE(a != b);
|
||||
EXPECT_TRUE(a < b);
|
||||
EXPECT_FALSE(a > b);
|
||||
EXPECT_TRUE(a <= b);
|
||||
EXPECT_FALSE(a >= b);
|
||||
|
||||
// Test equal timestamps and sequence numbers, but differing priorities.
|
||||
a.sequence_number = b.sequence_number;
|
||||
a.priority = {1, 0};
|
||||
b.priority = {0, 0};
|
||||
// a after b
|
||||
EXPECT_FALSE(a == b);
|
||||
EXPECT_TRUE(a != b);
|
||||
EXPECT_FALSE(a < b);
|
||||
EXPECT_TRUE(a > b);
|
||||
EXPECT_FALSE(a <= b);
|
||||
EXPECT_TRUE(a >= b);
|
||||
|
||||
Packet c(gen.NextPacket(0)); // SN = 2, TS = 20.
|
||||
Packet d(gen.NextPacket(0)); // SN = 3, TS = 20.
|
||||
c.timestamp = b.timestamp;
|
||||
d.timestamp = b.timestamp;
|
||||
c.sequence_number = b.sequence_number;
|
||||
d.sequence_number = b.sequence_number;
|
||||
c.priority = {1, 1};
|
||||
d.priority = {0, 1};
|
||||
// c after d
|
||||
EXPECT_FALSE(c == d);
|
||||
EXPECT_TRUE(c != d);
|
||||
EXPECT_FALSE(c < d);
|
||||
EXPECT_TRUE(c > d);
|
||||
EXPECT_FALSE(c <= d);
|
||||
EXPECT_TRUE(c >= d);
|
||||
|
||||
// c after a
|
||||
EXPECT_FALSE(c == a);
|
||||
EXPECT_TRUE(c != a);
|
||||
EXPECT_FALSE(c < a);
|
||||
EXPECT_TRUE(c > a);
|
||||
EXPECT_FALSE(c <= a);
|
||||
EXPECT_TRUE(c >= a);
|
||||
|
||||
// c after b
|
||||
EXPECT_FALSE(c == b);
|
||||
EXPECT_TRUE(c != b);
|
||||
EXPECT_FALSE(c < b);
|
||||
EXPECT_TRUE(c > b);
|
||||
EXPECT_FALSE(c <= b);
|
||||
EXPECT_TRUE(c >= b);
|
||||
|
||||
// a after d
|
||||
EXPECT_FALSE(a == d);
|
||||
EXPECT_TRUE(a != d);
|
||||
EXPECT_FALSE(a < d);
|
||||
EXPECT_TRUE(a > d);
|
||||
EXPECT_FALSE(a <= d);
|
||||
EXPECT_TRUE(a >= d);
|
||||
|
||||
// d after b
|
||||
EXPECT_FALSE(d == b);
|
||||
EXPECT_TRUE(d != b);
|
||||
EXPECT_FALSE(d < b);
|
||||
EXPECT_TRUE(d > b);
|
||||
EXPECT_FALSE(d <= b);
|
||||
EXPECT_TRUE(d >= b);
|
||||
}
|
||||
|
||||
namespace {
|
||||
void TestIsObsoleteTimestamp(uint32_t limit_timestamp) {
|
||||
// Check with zero horizon, which implies that the horizon is at 2^31, i.e.,
|
||||
// half the timestamp range.
|
||||
static const uint32_t kZeroHorizon = 0;
|
||||
static const uint32_t k2Pow31Minus1 = 0x7FFFFFFF;
|
||||
// Timestamp on the limit is not old.
|
||||
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
|
||||
limit_timestamp, limit_timestamp, kZeroHorizon));
|
||||
// 1 sample behind is old.
|
||||
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(
|
||||
limit_timestamp - 1, limit_timestamp, kZeroHorizon));
|
||||
// 2^31 - 1 samples behind is old.
|
||||
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(
|
||||
limit_timestamp - k2Pow31Minus1, limit_timestamp, kZeroHorizon));
|
||||
// 1 sample ahead is not old.
|
||||
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
|
||||
limit_timestamp + 1, limit_timestamp, kZeroHorizon));
|
||||
// If |t1-t2|=2^31 and t1>t2, t2 is older than t1 but not the opposite.
|
||||
uint32_t other_timestamp = limit_timestamp + (1 << 31);
|
||||
uint32_t lowest_timestamp = std::min(limit_timestamp, other_timestamp);
|
||||
uint32_t highest_timestamp = std::max(limit_timestamp, other_timestamp);
|
||||
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(
|
||||
lowest_timestamp, highest_timestamp, kZeroHorizon));
|
||||
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
|
||||
highest_timestamp, lowest_timestamp, kZeroHorizon));
|
||||
|
||||
// Fixed horizon at 10 samples.
|
||||
static const uint32_t kHorizon = 10;
|
||||
// Timestamp on the limit is not old.
|
||||
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
|
||||
limit_timestamp, limit_timestamp, kHorizon));
|
||||
// 1 sample behind is old.
|
||||
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(
|
||||
limit_timestamp - 1, limit_timestamp, kHorizon));
|
||||
// 9 samples behind is old.
|
||||
EXPECT_TRUE(PacketBuffer::IsObsoleteTimestamp(
|
||||
limit_timestamp - 9, limit_timestamp, kHorizon));
|
||||
// 10 samples behind is not old.
|
||||
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
|
||||
limit_timestamp - 10, limit_timestamp, kHorizon));
|
||||
// 2^31 - 1 samples behind is not old.
|
||||
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
|
||||
limit_timestamp - k2Pow31Minus1, limit_timestamp, kHorizon));
|
||||
// 1 sample ahead is not old.
|
||||
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
|
||||
limit_timestamp + 1, limit_timestamp, kHorizon));
|
||||
// 2^31 samples ahead is not old.
|
||||
EXPECT_FALSE(PacketBuffer::IsObsoleteTimestamp(
|
||||
limit_timestamp + (1 << 31), limit_timestamp, kHorizon));
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Test the IsObsoleteTimestamp method with different limit timestamps.
|
||||
TEST(PacketBuffer, IsObsoleteTimestamp) {
|
||||
TestIsObsoleteTimestamp(0);
|
||||
TestIsObsoleteTimestamp(1);
|
||||
TestIsObsoleteTimestamp(0xFFFFFFFF); // -1 in uint32_t.
|
||||
TestIsObsoleteTimestamp(0x80000000); // 2^31.
|
||||
TestIsObsoleteTimestamp(0x80000001); // 2^31 + 1.
|
||||
TestIsObsoleteTimestamp(0x7FFFFFFF); // 2^31 - 1.
|
||||
}
|
||||
} // namespace webrtc
|
||||
89
modules/audio_coding/neteq/post_decode_vad.cc
Normal file
89
modules/audio_coding/neteq/post_decode_vad.cc
Normal file
@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/post_decode_vad.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
PostDecodeVad::~PostDecodeVad() {
|
||||
if (vad_instance_)
|
||||
WebRtcVad_Free(vad_instance_);
|
||||
}
|
||||
|
||||
void PostDecodeVad::Enable() {
|
||||
if (!vad_instance_) {
|
||||
// Create the instance.
|
||||
vad_instance_ = WebRtcVad_Create();
|
||||
if (vad_instance_ == nullptr) {
|
||||
// Failed to create instance.
|
||||
Disable();
|
||||
return;
|
||||
}
|
||||
}
|
||||
Init();
|
||||
enabled_ = true;
|
||||
}
|
||||
|
||||
void PostDecodeVad::Disable() {
|
||||
enabled_ = false;
|
||||
running_ = false;
|
||||
}
|
||||
|
||||
void PostDecodeVad::Init() {
|
||||
running_ = false;
|
||||
if (vad_instance_) {
|
||||
WebRtcVad_Init(vad_instance_);
|
||||
WebRtcVad_set_mode(vad_instance_, kVadMode);
|
||||
running_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void PostDecodeVad::Update(int16_t* signal, size_t length,
|
||||
AudioDecoder::SpeechType speech_type,
|
||||
bool sid_frame,
|
||||
int fs_hz) {
|
||||
if (!vad_instance_ || !enabled_) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (speech_type == AudioDecoder::kComfortNoise || sid_frame ||
|
||||
fs_hz > 16000) {
|
||||
// TODO(hlundin): Remove restriction on fs_hz.
|
||||
running_ = false;
|
||||
active_speech_ = true;
|
||||
sid_interval_counter_ = 0;
|
||||
} else if (!running_) {
|
||||
++sid_interval_counter_;
|
||||
}
|
||||
|
||||
if (sid_interval_counter_ >= kVadAutoEnable) {
|
||||
Init();
|
||||
}
|
||||
|
||||
if (length > 0 && running_) {
|
||||
size_t vad_sample_index = 0;
|
||||
active_speech_ = false;
|
||||
// Loop through frame sizes 30, 20, and 10 ms.
|
||||
for (int vad_frame_size_ms = 30; vad_frame_size_ms >= 10;
|
||||
vad_frame_size_ms -= 10) {
|
||||
size_t vad_frame_size_samples =
|
||||
static_cast<size_t>(vad_frame_size_ms * fs_hz / 1000);
|
||||
while (length - vad_sample_index >= vad_frame_size_samples) {
|
||||
int vad_return = WebRtcVad_Process(
|
||||
vad_instance_, fs_hz, &signal[vad_sample_index],
|
||||
vad_frame_size_samples);
|
||||
active_speech_ |= (vad_return == 1);
|
||||
vad_sample_index += vad_frame_size_samples;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
72
modules/audio_coding/neteq/post_decode_vad.h
Normal file
72
modules/audio_coding/neteq/post_decode_vad.h
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_
|
||||
|
||||
#include <string> // size_t
|
||||
|
||||
#include "webrtc/api/audio_codecs/audio_decoder.h"
|
||||
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
|
||||
#include "webrtc/common_types.h" // NULL
|
||||
#include "webrtc/modules/audio_coding/neteq/defines.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/packet.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class PostDecodeVad {
|
||||
public:
|
||||
PostDecodeVad()
|
||||
: enabled_(false),
|
||||
running_(false),
|
||||
active_speech_(true),
|
||||
sid_interval_counter_(0),
|
||||
vad_instance_(NULL) {
|
||||
}
|
||||
|
||||
virtual ~PostDecodeVad();
|
||||
|
||||
// Enables post-decode VAD.
|
||||
void Enable();
|
||||
|
||||
// Disables post-decode VAD.
|
||||
void Disable();
|
||||
|
||||
// Initializes post-decode VAD.
|
||||
void Init();
|
||||
|
||||
// Updates post-decode VAD with the audio data in |signal| having |length|
|
||||
// samples. The data is of type |speech_type|, at the sample rate |fs_hz|.
|
||||
void Update(int16_t* signal, size_t length,
|
||||
AudioDecoder::SpeechType speech_type, bool sid_frame, int fs_hz);
|
||||
|
||||
// Accessors.
|
||||
bool enabled() const { return enabled_; }
|
||||
bool running() const { return running_; }
|
||||
bool active_speech() const { return active_speech_; }
|
||||
|
||||
private:
|
||||
static const int kVadMode = 0; // Sets aggressiveness to "Normal".
|
||||
// Number of Update() calls without CNG/SID before re-enabling VAD.
|
||||
static const int kVadAutoEnable = 3000;
|
||||
|
||||
bool enabled_;
|
||||
bool running_;
|
||||
bool active_speech_;
|
||||
int sid_interval_counter_;
|
||||
::VadInst* vad_instance_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(PostDecodeVad);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_POST_DECODE_VAD_H_
|
||||
25
modules/audio_coding/neteq/post_decode_vad_unittest.cc
Normal file
25
modules/audio_coding/neteq/post_decode_vad_unittest.cc
Normal file
@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for PostDecodeVad class.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/post_decode_vad.h"
|
||||
|
||||
#include "webrtc/test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(PostDecodeVad, CreateAndDestroy) {
|
||||
PostDecodeVad vad;
|
||||
}
|
||||
|
||||
// TODO(hlundin): Write more tests.
|
||||
|
||||
} // namespace webrtc
|
||||
114
modules/audio_coding/neteq/preemptive_expand.cc
Normal file
114
modules/audio_coding/neteq/preemptive_expand.cc
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/preemptive_expand.h"
|
||||
|
||||
#include <algorithm> // min, max
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
PreemptiveExpand::ReturnCodes PreemptiveExpand::Process(
|
||||
const int16_t* input,
|
||||
size_t input_length,
|
||||
size_t old_data_length,
|
||||
AudioMultiVector* output,
|
||||
size_t* length_change_samples) {
|
||||
old_data_length_per_channel_ = old_data_length;
|
||||
// Input length must be (almost) 30 ms.
|
||||
// Also, the new part must be at least |overlap_samples_| elements.
|
||||
static const size_t k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate.
|
||||
if (num_channels_ == 0 ||
|
||||
input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_ ||
|
||||
old_data_length >= input_length / num_channels_ - overlap_samples_) {
|
||||
// Length of input data too short to do preemptive expand. Simply move all
|
||||
// data from input to output.
|
||||
output->PushBackInterleaved(input, input_length);
|
||||
return kError;
|
||||
}
|
||||
const bool kFastMode = false; // Fast mode is not available for PE Expand.
|
||||
return TimeStretch::Process(input, input_length, kFastMode, output,
|
||||
length_change_samples);
|
||||
}
|
||||
|
||||
void PreemptiveExpand::SetParametersForPassiveSpeech(size_t len,
|
||||
int16_t* best_correlation,
|
||||
size_t* peak_index) const {
|
||||
// When the signal does not contain any active speech, the correlation does
|
||||
// not matter. Simply set it to zero.
|
||||
*best_correlation = 0;
|
||||
|
||||
// For low energy expansion, the new data can be less than 15 ms,
|
||||
// but we must ensure that best_correlation is not larger than the length of
|
||||
// the new data.
|
||||
// but we must ensure that best_correlation is not larger than the new data.
|
||||
*peak_index = std::min(*peak_index,
|
||||
len - old_data_length_per_channel_);
|
||||
}
|
||||
|
||||
PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch(
|
||||
const int16_t* input,
|
||||
size_t input_length,
|
||||
size_t peak_index,
|
||||
int16_t best_correlation,
|
||||
bool active_speech,
|
||||
bool /*fast_mode*/,
|
||||
AudioMultiVector* output) const {
|
||||
// Pre-calculate common multiplication with |fs_mult_|.
|
||||
// 120 corresponds to 15 ms.
|
||||
size_t fs_mult_120 = static_cast<size_t>(fs_mult_ * 120);
|
||||
// Check for strong correlation (>0.9 in Q14) and at least 15 ms new data,
|
||||
// or passive speech.
|
||||
if (((best_correlation > kCorrelationThreshold) &&
|
||||
(old_data_length_per_channel_ <= fs_mult_120)) ||
|
||||
!active_speech) {
|
||||
// Do accelerate operation by overlap add.
|
||||
|
||||
// Set length of the first part, not to be modified.
|
||||
size_t unmodified_length = std::max(old_data_length_per_channel_,
|
||||
fs_mult_120);
|
||||
// Copy first part, including cross-fade region.
|
||||
output->PushBackInterleaved(
|
||||
input, (unmodified_length + peak_index) * num_channels_);
|
||||
// Copy the last |peak_index| samples up to 15 ms to |temp_vector|.
|
||||
AudioMultiVector temp_vector(num_channels_);
|
||||
temp_vector.PushBackInterleaved(
|
||||
&input[(unmodified_length - peak_index) * num_channels_],
|
||||
peak_index * num_channels_);
|
||||
// Cross-fade |temp_vector| onto the end of |output|.
|
||||
output->CrossFade(temp_vector, peak_index);
|
||||
// Copy the last unmodified part, 15 ms + pitch period until the end.
|
||||
output->PushBackInterleaved(
|
||||
&input[unmodified_length * num_channels_],
|
||||
input_length - unmodified_length * num_channels_);
|
||||
|
||||
if (active_speech) {
|
||||
return kSuccess;
|
||||
} else {
|
||||
return kSuccessLowEnergy;
|
||||
}
|
||||
} else {
|
||||
// Accelerate not allowed. Simply move all data from decoded to outData.
|
||||
output->PushBackInterleaved(input, input_length);
|
||||
return kNoStretch;
|
||||
}
|
||||
}
|
||||
|
||||
PreemptiveExpand* PreemptiveExpandFactory::Create(
|
||||
int sample_rate_hz,
|
||||
size_t num_channels,
|
||||
const BackgroundNoise& background_noise,
|
||||
size_t overlap_samples) const {
|
||||
return new PreemptiveExpand(
|
||||
sample_rate_hz, num_channels, background_noise, overlap_samples);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
88
modules/audio_coding/neteq/preemptive_expand.h
Normal file
88
modules/audio_coding/neteq/preemptive_expand.h
Normal file
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/time_stretch.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Forward declarations.
|
||||
class BackgroundNoise;
|
||||
|
||||
// This class implements the PreemptiveExpand operation. Most of the work is
|
||||
// done in the base class TimeStretch, which is shared with the Accelerate
|
||||
// operation. In the PreemptiveExpand class, the operations that are specific to
|
||||
// PreemptiveExpand are implemented.
|
||||
class PreemptiveExpand : public TimeStretch {
|
||||
public:
|
||||
PreemptiveExpand(int sample_rate_hz,
|
||||
size_t num_channels,
|
||||
const BackgroundNoise& background_noise,
|
||||
size_t overlap_samples)
|
||||
: TimeStretch(sample_rate_hz, num_channels, background_noise),
|
||||
old_data_length_per_channel_(0),
|
||||
overlap_samples_(overlap_samples) {
|
||||
}
|
||||
|
||||
// This method performs the actual PreemptiveExpand operation. The samples are
|
||||
// read from |input|, of length |input_length| elements, and are written to
|
||||
// |output|. The number of samples added through time-stretching is
|
||||
// is provided in the output |length_change_samples|. The method returns
|
||||
// the outcome of the operation as an enumerator value.
|
||||
ReturnCodes Process(const int16_t *pw16_decoded,
|
||||
size_t len,
|
||||
size_t old_data_len,
|
||||
AudioMultiVector* output,
|
||||
size_t* length_change_samples);
|
||||
|
||||
protected:
|
||||
// Sets the parameters |best_correlation| and |peak_index| to suitable
|
||||
// values when the signal contains no active speech.
|
||||
void SetParametersForPassiveSpeech(size_t input_length,
|
||||
int16_t* best_correlation,
|
||||
size_t* peak_index) const override;
|
||||
|
||||
// Checks the criteria for performing the time-stretching operation and,
|
||||
// if possible, performs the time-stretching.
|
||||
ReturnCodes CheckCriteriaAndStretch(const int16_t* input,
|
||||
size_t input_length,
|
||||
size_t peak_index,
|
||||
int16_t best_correlation,
|
||||
bool active_speech,
|
||||
bool /*fast_mode*/,
|
||||
AudioMultiVector* output) const override;
|
||||
|
||||
private:
|
||||
size_t old_data_length_per_channel_;
|
||||
size_t overlap_samples_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(PreemptiveExpand);
|
||||
};
|
||||
|
||||
struct PreemptiveExpandFactory {
|
||||
PreemptiveExpandFactory() {}
|
||||
virtual ~PreemptiveExpandFactory() {}
|
||||
|
||||
virtual PreemptiveExpand* Create(
|
||||
int sample_rate_hz,
|
||||
size_t num_channels,
|
||||
const BackgroundNoise& background_noise,
|
||||
size_t overlap_samples) const;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_PREEMPTIVE_EXPAND_H_
|
||||
57
modules/audio_coding/neteq/random_vector.cc
Normal file
57
modules/audio_coding/neteq/random_vector.cc
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
const int16_t RandomVector::kRandomTable[RandomVector::kRandomTableSize] = {
|
||||
2680, 5532, 441, 5520, 16170, -5146, -1024, -8733, 3115, 9598, -10380,
|
||||
-4959, -1280, -21716, 7133, -1522, 13458, -3902, 2789, -675, 3441, 5016,
|
||||
-13599, -4003, -2739, 3922, -7209, 13352, -11617, -7241, 12905, -2314, 5426,
|
||||
10121, -9702, 11207, -13542, 1373, 816, -5934, -12504, 4798, 1811, 4112,
|
||||
-613, 201, -10367, -2960, -2419, 3442, 4299, -6116, -6092, 1552, -1650,
|
||||
-480, -1237, 18720, -11858, -8303, -8212, 865, -2890, -16968, 12052, -5845,
|
||||
-5912, 9777, -5665, -6294, 5426, -4737, -6335, 1652, 761, 3832, 641, -8552,
|
||||
-9084, -5753, 8146, 12156, -4915, 15086, -1231, -1869, 11749, -9319, -6403,
|
||||
11407, 6232, -1683, 24340, -11166, 4017, -10448, 3153, -2936, 6212, 2891,
|
||||
-866, -404, -4807, -2324, -1917, -2388, -6470, -3895, -10300, 5323, -5403,
|
||||
2205, 4640, 7022, -21186, -6244, -882, -10031, -3395, -12885, 7155, -5339,
|
||||
5079, -2645, -9515, 6622, 14651, 15852, 359, 122, 8246, -3502, -6696, -3679,
|
||||
-13535, -1409, -704, -7403, -4007, 1798, 279, -420, -12796, -14219, 1141,
|
||||
3359, 11434, 7049, -6684, -7473, 14283, -4115, -9123, -8969, 4152, 4117,
|
||||
13792, 5742, 16168, 8661, -1609, -6095, 1881, 14380, -5588, 6758, -6425,
|
||||
-22969, -7269, 7031, 1119, -1611, -5850, -11281, 3559, -8952, -10146, -4667,
|
||||
-16251, -1538, 2062, -1012, -13073, 227, -3142, -5265, 20, 5770, -7559,
|
||||
4740, -4819, 992, -8208, -7130, -4652, 6725, 7369, -1036, 13144, -1588,
|
||||
-5304, -2344, -449, -5705, -8894, 5205, -17904, -11188, -1022, 4852, 10101,
|
||||
-5255, -4200, -752, 7941, -1543, 5959, 14719, 13346, 17045, -15605, -1678,
|
||||
-1600, -9230, 68, 23348, 1172, 7750, 11212, -18227, 9956, 4161, 883, 3947,
|
||||
4341, 1014, -4889, -2603, 1246, -5630, -3596, -870, -1298, 2784, -3317,
|
||||
-6612, -20541, 4166, 4181, -8625, 3562, 12890, 4761, 3205, -12259, -8579 };
|
||||
|
||||
void RandomVector::Reset() {
|
||||
seed_ = 777;
|
||||
seed_increment_ = 1;
|
||||
}
|
||||
|
||||
void RandomVector::Generate(size_t length, int16_t* output) {
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
seed_ += seed_increment_;
|
||||
size_t position = seed_ & (kRandomTableSize - 1);
|
||||
output[i] = kRandomTable[position];
|
||||
}
|
||||
}
|
||||
|
||||
void RandomVector::IncreaseSeedIncrement(int16_t increase_by) {
|
||||
seed_increment_+= increase_by;
|
||||
seed_increment_ &= kRandomTableSize - 1;
|
||||
}
|
||||
} // namespace webrtc
|
||||
50
modules/audio_coding/neteq/random_vector.h
Normal file
50
modules/audio_coding/neteq/random_vector.h
Normal file
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_
|
||||
|
||||
#include <string.h> // size_t
|
||||
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This class generates pseudo-random samples.
|
||||
class RandomVector {
|
||||
public:
|
||||
static const size_t kRandomTableSize = 256;
|
||||
static const int16_t kRandomTable[kRandomTableSize];
|
||||
|
||||
RandomVector()
|
||||
: seed_(777),
|
||||
seed_increment_(1) {
|
||||
}
|
||||
|
||||
void Reset();
|
||||
|
||||
void Generate(size_t length, int16_t* output);
|
||||
|
||||
void IncreaseSeedIncrement(int16_t increase_by);
|
||||
|
||||
// Accessors and mutators.
|
||||
int16_t seed_increment() { return seed_increment_; }
|
||||
void set_seed_increment(int16_t value) { seed_increment_ = value; }
|
||||
|
||||
private:
|
||||
uint32_t seed_;
|
||||
int16_t seed_increment_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(RandomVector);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_RANDOM_VECTOR_H_
|
||||
25
modules/audio_coding/neteq/random_vector_unittest.cc
Normal file
25
modules/audio_coding/neteq/random_vector_unittest.cc
Normal file
@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for RandomVector class.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
|
||||
|
||||
#include "webrtc/test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(RandomVector, CreateAndDestroy) {
|
||||
RandomVector random_vector;
|
||||
}
|
||||
|
||||
// TODO(hlundin): Write more tests.
|
||||
|
||||
} // namespace webrtc
|
||||
162
modules/audio_coding/neteq/red_payload_splitter.cc
Normal file
162
modules/audio_coding/neteq/red_payload_splitter.cc
Normal file
@ -0,0 +1,162 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/red_payload_splitter.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/decoder_database.h"
|
||||
#include "webrtc/rtc_base/checks.h"
|
||||
#include "webrtc/rtc_base/logging.h"
|
||||
#include "webrtc/rtc_base/safe_conversions.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// The method loops through a list of packets {A, B, C, ...}. Each packet is
|
||||
// split into its corresponding RED payloads, {A1, A2, ...}, which is
|
||||
// temporarily held in the list |new_packets|.
|
||||
// When the first packet in |packet_list| has been processed, the orignal packet
|
||||
// is replaced by the new ones in |new_packets|, so that |packet_list| becomes:
|
||||
// {A1, A2, ..., B, C, ...}. The method then continues with B, and C, until all
|
||||
// the original packets have been replaced by their split payloads.
|
||||
bool RedPayloadSplitter::SplitRed(PacketList* packet_list) {
|
||||
// Too many RED blocks indicates that something is wrong. Clamp it at some
|
||||
// reasonable value.
|
||||
const size_t kMaxRedBlocks = 32;
|
||||
bool ret = true;
|
||||
PacketList::iterator it = packet_list->begin();
|
||||
while (it != packet_list->end()) {
|
||||
const Packet& red_packet = *it;
|
||||
assert(!red_packet.payload.empty());
|
||||
const uint8_t* payload_ptr = red_packet.payload.data();
|
||||
|
||||
// Read RED headers (according to RFC 2198):
|
||||
//
|
||||
// 0 1 2 3
|
||||
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
// |F| block PT | timestamp offset | block length |
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
// Last RED header:
|
||||
// 0 1 2 3 4 5 6 7
|
||||
// +-+-+-+-+-+-+-+-+
|
||||
// |0| Block PT |
|
||||
// +-+-+-+-+-+-+-+-+
|
||||
|
||||
struct RedHeader {
|
||||
uint8_t payload_type;
|
||||
uint32_t timestamp;
|
||||
size_t payload_length;
|
||||
};
|
||||
|
||||
std::vector<RedHeader> new_headers;
|
||||
bool last_block = false;
|
||||
size_t sum_length = 0;
|
||||
while (!last_block) {
|
||||
RedHeader new_header;
|
||||
// Check the F bit. If F == 0, this was the last block.
|
||||
last_block = ((*payload_ptr & 0x80) == 0);
|
||||
// Bits 1 through 7 are payload type.
|
||||
new_header.payload_type = payload_ptr[0] & 0x7F;
|
||||
if (last_block) {
|
||||
// No more header data to read.
|
||||
++sum_length; // Account for RED header size of 1 byte.
|
||||
new_header.timestamp = red_packet.timestamp;
|
||||
new_header.payload_length = red_packet.payload.size() - sum_length;
|
||||
payload_ptr += 1; // Advance to first payload byte.
|
||||
} else {
|
||||
// Bits 8 through 21 are timestamp offset.
|
||||
int timestamp_offset =
|
||||
(payload_ptr[1] << 6) + ((payload_ptr[2] & 0xFC) >> 2);
|
||||
new_header.timestamp = red_packet.timestamp - timestamp_offset;
|
||||
// Bits 22 through 31 are payload length.
|
||||
new_header.payload_length =
|
||||
((payload_ptr[2] & 0x03) << 8) + payload_ptr[3];
|
||||
payload_ptr += 4; // Advance to next RED header.
|
||||
}
|
||||
sum_length += new_header.payload_length;
|
||||
sum_length += 4; // Account for RED header size of 4 bytes.
|
||||
// Store in new list of packets.
|
||||
new_headers.push_back(new_header);
|
||||
}
|
||||
|
||||
if (new_headers.size() <= kMaxRedBlocks) {
|
||||
// Populate the new packets with payload data.
|
||||
// |payload_ptr| now points at the first payload byte.
|
||||
PacketList new_packets; // An empty list to store the split packets in.
|
||||
for (size_t i = 0; i != new_headers.size(); ++i) {
|
||||
const auto& new_header = new_headers[i];
|
||||
size_t payload_length = new_header.payload_length;
|
||||
if (payload_ptr + payload_length >
|
||||
red_packet.payload.data() + red_packet.payload.size()) {
|
||||
// The block lengths in the RED headers do not match the overall
|
||||
// packet length. Something is corrupt. Discard this and the remaining
|
||||
// payloads from this packet.
|
||||
LOG(LS_WARNING) << "SplitRed length mismatch";
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
|
||||
Packet new_packet;
|
||||
new_packet.timestamp = new_header.timestamp;
|
||||
new_packet.payload_type = new_header.payload_type;
|
||||
new_packet.sequence_number = red_packet.sequence_number;
|
||||
new_packet.priority.red_level =
|
||||
rtc::dchecked_cast<int>((new_headers.size() - 1) - i);
|
||||
new_packet.payload.SetData(payload_ptr, payload_length);
|
||||
new_packets.push_front(std::move(new_packet));
|
||||
payload_ptr += payload_length;
|
||||
}
|
||||
// Insert new packets into original list, before the element pointed to by
|
||||
// iterator |it|.
|
||||
packet_list->splice(it, std::move(new_packets));
|
||||
} else {
|
||||
LOG(LS_WARNING) << "SplitRed too many blocks: " << new_headers.size();
|
||||
ret = false;
|
||||
}
|
||||
// Remove |it| from the packet list. This operation effectively moves the
|
||||
// iterator |it| to the next packet in the list. Thus, we do not have to
|
||||
// increment it manually.
|
||||
it = packet_list->erase(it);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int RedPayloadSplitter::CheckRedPayloads(
|
||||
PacketList* packet_list,
|
||||
const DecoderDatabase& decoder_database) {
|
||||
int main_payload_type = -1;
|
||||
int num_deleted_packets = 0;
|
||||
for (auto it = packet_list->begin(); it != packet_list->end(); /* */) {
|
||||
uint8_t this_payload_type = it->payload_type;
|
||||
if (!decoder_database.IsDtmf(this_payload_type) &&
|
||||
!decoder_database.IsComfortNoise(this_payload_type)) {
|
||||
if (main_payload_type == -1) {
|
||||
// This is the first packet in the list which is non-DTMF non-CNG.
|
||||
main_payload_type = this_payload_type;
|
||||
} else {
|
||||
if (this_payload_type != main_payload_type) {
|
||||
// We do not allow redundant payloads of a different type.
|
||||
// Remove |it| from the packet list. This operation effectively
|
||||
// moves the iterator |it| to the next packet in the list. Thus, we
|
||||
// do not have to increment it manually.
|
||||
it = packet_list->erase(it);
|
||||
++num_deleted_packets;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
++it;
|
||||
}
|
||||
return num_deleted_packets;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
51
modules/audio_coding/neteq/red_payload_splitter.h
Normal file
51
modules/audio_coding/neteq/red_payload_splitter.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/packet.h"
|
||||
#include "webrtc/rtc_base/constructormagic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Forward declarations.
|
||||
class DecoderDatabase;
|
||||
|
||||
// This class handles splitting of RED payloads into smaller parts.
|
||||
// Codec-specific packet splitting can be performed by
|
||||
// AudioDecoder::ParsePayload.
|
||||
class RedPayloadSplitter {
|
||||
public:
|
||||
RedPayloadSplitter() {}
|
||||
|
||||
virtual ~RedPayloadSplitter() {}
|
||||
|
||||
// Splits each packet in |packet_list| into its separate RED payloads. Each
|
||||
// RED payload is packetized into a Packet. The original elements in
|
||||
// |packet_list| are properly deleted, and replaced by the new packets.
|
||||
// Note that all packets in |packet_list| must be RED payloads, i.e., have
|
||||
// RED headers according to RFC 2198 at the very beginning of the payload.
|
||||
// Returns kOK or an error.
|
||||
virtual bool SplitRed(PacketList* packet_list);
|
||||
|
||||
// Checks all packets in |packet_list|. Packets that are DTMF events or
|
||||
// comfort noise payloads are kept. Except that, only one single payload type
|
||||
// is accepted. Any packet with another payload type is discarded. Returns
|
||||
// the number of discarded packets.
|
||||
virtual int CheckRedPayloads(PacketList* packet_list,
|
||||
const DecoderDatabase& decoder_database);
|
||||
|
||||
private:
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(RedPayloadSplitter);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_RED_PAYLOAD_SPLITTER_H_
|
||||
344
modules/audio_coding/neteq/red_payload_splitter_unittest.cc
Normal file
344
modules/audio_coding/neteq/red_payload_splitter_unittest.cc
Normal file
@ -0,0 +1,344 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Unit tests for RedPayloadSplitter class.
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/red_payload_splitter.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <memory>
|
||||
#include <utility> // pair
|
||||
|
||||
#include "webrtc/api/audio_codecs/builtin_audio_decoder_factory.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/mock/mock_decoder_database.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/packet.h"
|
||||
#include "webrtc/test/gtest.h"
|
||||
#include "webrtc/test/mock_audio_decoder_factory.h"
|
||||
|
||||
using ::testing::Return;
|
||||
using ::testing::ReturnNull;
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kRedPayloadType = 100;
|
||||
static const size_t kPayloadLength = 10;
|
||||
static const size_t kRedHeaderLength = 4; // 4 bytes RED header.
|
||||
static const uint16_t kSequenceNumber = 0;
|
||||
static const uint32_t kBaseTimestamp = 0x12345678;
|
||||
|
||||
// A possible Opus packet that contains FEC is the following.
|
||||
// The frame is 20 ms in duration.
|
||||
//
|
||||
// 0 1 2 3
|
||||
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
// |0|0|0|0|1|0|0|0|x|1|x|x|x|x|x|x|x| |
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
|
||||
// | Compressed frame 1 (N-2 bytes)... :
|
||||
// : |
|
||||
// | |
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
void CreateOpusFecPayload(uint8_t* payload,
|
||||
size_t payload_length,
|
||||
uint8_t payload_value) {
|
||||
if (payload_length < 2) {
|
||||
return;
|
||||
}
|
||||
payload[0] = 0x08;
|
||||
payload[1] = 0x40;
|
||||
memset(&payload[2], payload_value, payload_length - 2);
|
||||
}
|
||||
|
||||
// RED headers (according to RFC 2198):
|
||||
//
|
||||
// 0 1 2 3
|
||||
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
// |F| block PT | timestamp offset | block length |
|
||||
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
//
|
||||
// Last RED header:
|
||||
// 0 1 2 3 4 5 6 7
|
||||
// +-+-+-+-+-+-+-+-+
|
||||
// |0| Block PT |
|
||||
// +-+-+-+-+-+-+-+-+
|
||||
|
||||
// Creates a RED packet, with |num_payloads| payloads, with payload types given
|
||||
// by the values in array |payload_types| (which must be of length
|
||||
// |num_payloads|). Each redundant payload is |timestamp_offset| samples
|
||||
// "behind" the the previous payload.
|
||||
Packet CreateRedPayload(size_t num_payloads,
|
||||
uint8_t* payload_types,
|
||||
int timestamp_offset,
|
||||
bool embed_opus_fec = false) {
|
||||
Packet packet;
|
||||
packet.payload_type = kRedPayloadType;
|
||||
packet.timestamp = kBaseTimestamp;
|
||||
packet.sequence_number = kSequenceNumber;
|
||||
packet.payload.SetSize((kPayloadLength + 1) +
|
||||
(num_payloads - 1) *
|
||||
(kPayloadLength + kRedHeaderLength));
|
||||
uint8_t* payload_ptr = packet.payload.data();
|
||||
for (size_t i = 0; i < num_payloads; ++i) {
|
||||
// Write the RED headers.
|
||||
if (i == num_payloads - 1) {
|
||||
// Special case for last payload.
|
||||
*payload_ptr = payload_types[i] & 0x7F; // F = 0;
|
||||
++payload_ptr;
|
||||
break;
|
||||
}
|
||||
*payload_ptr = payload_types[i] & 0x7F;
|
||||
// Not the last block; set F = 1.
|
||||
*payload_ptr |= 0x80;
|
||||
++payload_ptr;
|
||||
int this_offset = (num_payloads - i - 1) * timestamp_offset;
|
||||
*payload_ptr = this_offset >> 6;
|
||||
++payload_ptr;
|
||||
assert(kPayloadLength <= 1023); // Max length described by 10 bits.
|
||||
*payload_ptr = ((this_offset & 0x3F) << 2) | (kPayloadLength >> 8);
|
||||
++payload_ptr;
|
||||
*payload_ptr = kPayloadLength & 0xFF;
|
||||
++payload_ptr;
|
||||
}
|
||||
for (size_t i = 0; i < num_payloads; ++i) {
|
||||
// Write |i| to all bytes in each payload.
|
||||
if (embed_opus_fec) {
|
||||
CreateOpusFecPayload(payload_ptr, kPayloadLength,
|
||||
static_cast<uint8_t>(i));
|
||||
} else {
|
||||
memset(payload_ptr, static_cast<int>(i), kPayloadLength);
|
||||
}
|
||||
payload_ptr += kPayloadLength;
|
||||
}
|
||||
return packet;
|
||||
}
|
||||
|
||||
// Create a packet with all payload bytes set to |payload_value|.
|
||||
Packet CreatePacket(uint8_t payload_type,
|
||||
size_t payload_length,
|
||||
uint8_t payload_value,
|
||||
bool opus_fec = false) {
|
||||
Packet packet;
|
||||
packet.payload_type = payload_type;
|
||||
packet.timestamp = kBaseTimestamp;
|
||||
packet.sequence_number = kSequenceNumber;
|
||||
packet.payload.SetSize(payload_length);
|
||||
if (opus_fec) {
|
||||
CreateOpusFecPayload(packet.payload.data(), packet.payload.size(),
|
||||
payload_value);
|
||||
} else {
|
||||
memset(packet.payload.data(), payload_value, packet.payload.size());
|
||||
}
|
||||
return packet;
|
||||
}
|
||||
|
||||
// Checks that |packet| has the attributes given in the remaining parameters.
|
||||
void VerifyPacket(const Packet& packet,
|
||||
size_t payload_length,
|
||||
uint8_t payload_type,
|
||||
uint16_t sequence_number,
|
||||
uint32_t timestamp,
|
||||
uint8_t payload_value,
|
||||
Packet::Priority priority) {
|
||||
EXPECT_EQ(payload_length, packet.payload.size());
|
||||
EXPECT_EQ(payload_type, packet.payload_type);
|
||||
EXPECT_EQ(sequence_number, packet.sequence_number);
|
||||
EXPECT_EQ(timestamp, packet.timestamp);
|
||||
EXPECT_EQ(priority, packet.priority);
|
||||
ASSERT_FALSE(packet.payload.empty());
|
||||
for (size_t i = 0; i < packet.payload.size(); ++i) {
|
||||
ASSERT_EQ(payload_value, packet.payload.data()[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void VerifyPacket(const Packet& packet,
|
||||
size_t payload_length,
|
||||
uint8_t payload_type,
|
||||
uint16_t sequence_number,
|
||||
uint32_t timestamp,
|
||||
uint8_t payload_value,
|
||||
bool primary) {
|
||||
return VerifyPacket(packet, payload_length, payload_type, sequence_number,
|
||||
timestamp, payload_value,
|
||||
Packet::Priority{0, primary ? 0 : 1});
|
||||
}
|
||||
|
||||
// Start of test definitions.
|
||||
|
||||
TEST(RedPayloadSplitter, CreateAndDestroy) {
|
||||
RedPayloadSplitter* splitter = new RedPayloadSplitter;
|
||||
delete splitter;
|
||||
}
|
||||
|
||||
// Packet A is split into A1 and A2.
|
||||
TEST(RedPayloadSplitter, OnePacketTwoPayloads) {
|
||||
uint8_t payload_types[] = {0, 0};
|
||||
const int kTimestampOffset = 160;
|
||||
PacketList packet_list;
|
||||
packet_list.push_back(CreateRedPayload(2, payload_types, kTimestampOffset));
|
||||
RedPayloadSplitter splitter;
|
||||
EXPECT_TRUE(splitter.SplitRed(&packet_list));
|
||||
ASSERT_EQ(2u, packet_list.size());
|
||||
// Check first packet. The first in list should always be the primary payload.
|
||||
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1],
|
||||
kSequenceNumber, kBaseTimestamp, 1, true);
|
||||
packet_list.pop_front();
|
||||
// Check second packet.
|
||||
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
|
||||
kSequenceNumber, kBaseTimestamp - kTimestampOffset, 0, false);
|
||||
}
|
||||
|
||||
// Packets A and B are not split at all. Only the RED header in each packet is
|
||||
// removed.
|
||||
TEST(RedPayloadSplitter, TwoPacketsOnePayload) {
|
||||
uint8_t payload_types[] = {0};
|
||||
const int kTimestampOffset = 160;
|
||||
// Create first packet, with a single RED payload.
|
||||
PacketList packet_list;
|
||||
packet_list.push_back(CreateRedPayload(1, payload_types, kTimestampOffset));
|
||||
// Create second packet, with a single RED payload.
|
||||
{
|
||||
Packet packet = CreateRedPayload(1, payload_types, kTimestampOffset);
|
||||
// Manually change timestamp and sequence number of second packet.
|
||||
packet.timestamp += kTimestampOffset;
|
||||
packet.sequence_number++;
|
||||
packet_list.push_back(std::move(packet));
|
||||
}
|
||||
RedPayloadSplitter splitter;
|
||||
EXPECT_TRUE(splitter.SplitRed(&packet_list));
|
||||
ASSERT_EQ(2u, packet_list.size());
|
||||
// Check first packet.
|
||||
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
|
||||
kSequenceNumber, kBaseTimestamp, 0, true);
|
||||
packet_list.pop_front();
|
||||
// Check second packet.
|
||||
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
|
||||
kSequenceNumber + 1, kBaseTimestamp + kTimestampOffset, 0, true);
|
||||
}
|
||||
|
||||
// Packets A and B are split into packets A1, A2, A3, B1, B2, B3, with
|
||||
// attributes as follows:
|
||||
//
|
||||
// A1* A2 A3 B1* B2 B3
|
||||
// Payload type 0 1 2 0 1 2
|
||||
// Timestamp b b-o b-2o b+o b b-o
|
||||
// Sequence number 0 0 0 1 1 1
|
||||
//
|
||||
// b = kBaseTimestamp, o = kTimestampOffset, * = primary.
|
||||
TEST(RedPayloadSplitter, TwoPacketsThreePayloads) {
|
||||
uint8_t payload_types[] = {2, 1, 0}; // Primary is the last one.
|
||||
const int kTimestampOffset = 160;
|
||||
// Create first packet, with 3 RED payloads.
|
||||
PacketList packet_list;
|
||||
packet_list.push_back(CreateRedPayload(3, payload_types, kTimestampOffset));
|
||||
// Create first packet, with 3 RED payloads.
|
||||
{
|
||||
Packet packet = CreateRedPayload(3, payload_types, kTimestampOffset);
|
||||
// Manually change timestamp and sequence number of second packet.
|
||||
packet.timestamp += kTimestampOffset;
|
||||
packet.sequence_number++;
|
||||
packet_list.push_back(std::move(packet));
|
||||
}
|
||||
RedPayloadSplitter splitter;
|
||||
EXPECT_TRUE(splitter.SplitRed(&packet_list));
|
||||
ASSERT_EQ(6u, packet_list.size());
|
||||
// Check first packet, A1.
|
||||
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[2],
|
||||
kSequenceNumber, kBaseTimestamp, 2, {0, 0});
|
||||
packet_list.pop_front();
|
||||
// Check second packet, A2.
|
||||
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1],
|
||||
kSequenceNumber, kBaseTimestamp - kTimestampOffset, 1, {0, 1});
|
||||
packet_list.pop_front();
|
||||
// Check third packet, A3.
|
||||
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
|
||||
kSequenceNumber, kBaseTimestamp - 2 * kTimestampOffset, 0,
|
||||
{0, 2});
|
||||
packet_list.pop_front();
|
||||
// Check fourth packet, B1.
|
||||
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[2],
|
||||
kSequenceNumber + 1, kBaseTimestamp + kTimestampOffset, 2,
|
||||
{0, 0});
|
||||
packet_list.pop_front();
|
||||
// Check fifth packet, B2.
|
||||
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[1],
|
||||
kSequenceNumber + 1, kBaseTimestamp, 1, {0, 1});
|
||||
packet_list.pop_front();
|
||||
// Check sixth packet, B3.
|
||||
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
|
||||
kSequenceNumber + 1, kBaseTimestamp - kTimestampOffset, 0,
|
||||
{0, 2});
|
||||
}
|
||||
|
||||
// Creates a list with 4 packets with these payload types:
|
||||
// 0 = CNGnb
|
||||
// 1 = PCMu
|
||||
// 2 = DTMF (AVT)
|
||||
// 3 = iLBC
|
||||
// We expect the method CheckRedPayloads to discard the iLBC packet, since it
|
||||
// is a non-CNG, non-DTMF payload of another type than the first speech payload
|
||||
// found in the list (which is PCMu).
|
||||
TEST(RedPayloadSplitter, CheckRedPayloads) {
|
||||
PacketList packet_list;
|
||||
for (uint8_t i = 0; i <= 3; ++i) {
|
||||
// Create packet with payload type |i|, payload length 10 bytes, all 0.
|
||||
packet_list.push_back(CreatePacket(i, 10, 0));
|
||||
}
|
||||
|
||||
// Use a real DecoderDatabase object here instead of a mock, since it is
|
||||
// easier to just register the payload types and let the actual implementation
|
||||
// do its job.
|
||||
DecoderDatabase decoder_database(
|
||||
new rtc::RefCountedObject<MockAudioDecoderFactory>);
|
||||
decoder_database.RegisterPayload(0, NetEqDecoder::kDecoderCNGnb, "cng-nb");
|
||||
decoder_database.RegisterPayload(1, NetEqDecoder::kDecoderPCMu, "pcmu");
|
||||
decoder_database.RegisterPayload(2, NetEqDecoder::kDecoderAVT, "avt");
|
||||
decoder_database.RegisterPayload(3, NetEqDecoder::kDecoderILBC, "ilbc");
|
||||
|
||||
RedPayloadSplitter splitter;
|
||||
splitter.CheckRedPayloads(&packet_list, decoder_database);
|
||||
|
||||
ASSERT_EQ(3u, packet_list.size()); // Should have dropped the last packet.
|
||||
// Verify packets. The loop verifies that payload types 0, 1, and 2 are in the
|
||||
// list.
|
||||
for (int i = 0; i <= 2; ++i) {
|
||||
VerifyPacket(packet_list.front(), 10, i, kSequenceNumber, kBaseTimestamp, 0,
|
||||
true);
|
||||
packet_list.pop_front();
|
||||
}
|
||||
EXPECT_TRUE(packet_list.empty());
|
||||
}
|
||||
|
||||
// Packet A is split into A1, A2 and A3. But the length parameter is off, so
|
||||
// the last payloads should be discarded.
|
||||
TEST(RedPayloadSplitter, WrongPayloadLength) {
|
||||
uint8_t payload_types[] = {0, 0, 0};
|
||||
const int kTimestampOffset = 160;
|
||||
PacketList packet_list;
|
||||
{
|
||||
Packet packet = CreateRedPayload(3, payload_types, kTimestampOffset);
|
||||
// Manually tamper with the payload length of the packet.
|
||||
// This is one byte too short for the second payload (out of three).
|
||||
// We expect only the first payload to be returned.
|
||||
packet.payload.SetSize(packet.payload.size() - (kPayloadLength + 1));
|
||||
packet_list.push_back(std::move(packet));
|
||||
}
|
||||
RedPayloadSplitter splitter;
|
||||
EXPECT_FALSE(splitter.SplitRed(&packet_list));
|
||||
ASSERT_EQ(1u, packet_list.size());
|
||||
// Check first packet.
|
||||
VerifyPacket(packet_list.front(), kPayloadLength, payload_types[0],
|
||||
kSequenceNumber, kBaseTimestamp - 2 * kTimestampOffset, 0,
|
||||
{0, 2});
|
||||
packet_list.pop_front();
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
96
modules/audio_coding/neteq/rtcp.cc
Normal file
96
modules/audio_coding/neteq/rtcp.cc
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/rtcp.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/modules/include/module_common_types.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
void Rtcp::Init(uint16_t start_sequence_number) {
|
||||
cycles_ = 0;
|
||||
max_seq_no_ = start_sequence_number;
|
||||
base_seq_no_ = start_sequence_number;
|
||||
received_packets_ = 0;
|
||||
received_packets_prior_ = 0;
|
||||
expected_prior_ = 0;
|
||||
jitter_ = 0;
|
||||
transit_ = 0;
|
||||
}
|
||||
|
||||
void Rtcp::Update(const RTPHeader& rtp_header, uint32_t receive_timestamp) {
|
||||
// Update number of received packets, and largest packet number received.
|
||||
received_packets_++;
|
||||
int16_t sn_diff = rtp_header.sequenceNumber - max_seq_no_;
|
||||
if (sn_diff >= 0) {
|
||||
if (rtp_header.sequenceNumber < max_seq_no_) {
|
||||
// Wrap-around detected.
|
||||
cycles_++;
|
||||
}
|
||||
max_seq_no_ = rtp_header.sequenceNumber;
|
||||
}
|
||||
|
||||
// Calculate jitter according to RFC 3550, and update previous timestamps.
|
||||
// Note that the value in |jitter_| is in Q4.
|
||||
if (received_packets_ > 1) {
|
||||
int32_t ts_diff = receive_timestamp - (rtp_header.timestamp - transit_);
|
||||
int64_t jitter_diff = (std::abs(int64_t{ts_diff}) << 4) - jitter_;
|
||||
// Calculate 15 * jitter_ / 16 + jitter_diff / 16 (with proper rounding).
|
||||
jitter_ = jitter_ + ((jitter_diff + 8) >> 4);
|
||||
RTC_DCHECK_GE(jitter_, 0);
|
||||
}
|
||||
transit_ = rtp_header.timestamp - receive_timestamp;
|
||||
}
|
||||
|
||||
void Rtcp::GetStatistics(bool no_reset, RtcpStatistics* stats) {
|
||||
// Extended highest sequence number received.
|
||||
stats->extended_highest_sequence_number =
|
||||
(static_cast<int>(cycles_) << 16) + max_seq_no_;
|
||||
|
||||
// Calculate expected number of packets and compare it with the number of
|
||||
// packets that were actually received. The cumulative number of lost packets
|
||||
// can be extracted.
|
||||
uint32_t expected_packets =
|
||||
stats->extended_highest_sequence_number - base_seq_no_ + 1;
|
||||
if (received_packets_ == 0) {
|
||||
// No packets received, assume none lost.
|
||||
stats->packets_lost = 0;
|
||||
} else if (expected_packets > received_packets_) {
|
||||
stats->packets_lost = expected_packets - received_packets_;
|
||||
if (stats->packets_lost > 0xFFFFFF) {
|
||||
stats->packets_lost = 0xFFFFFF;
|
||||
}
|
||||
} else {
|
||||
stats->packets_lost = 0;
|
||||
}
|
||||
|
||||
// Fraction lost since last report.
|
||||
uint32_t expected_since_last = expected_packets - expected_prior_;
|
||||
uint32_t received_since_last = received_packets_ - received_packets_prior_;
|
||||
if (!no_reset) {
|
||||
expected_prior_ = expected_packets;
|
||||
received_packets_prior_ = received_packets_;
|
||||
}
|
||||
int32_t lost = expected_since_last - received_since_last;
|
||||
if (expected_since_last == 0 || lost <= 0 || received_packets_ == 0) {
|
||||
stats->fraction_lost = 0;
|
||||
} else {
|
||||
stats->fraction_lost = std::min(0xFFU, (lost << 8) / expected_since_last);
|
||||
}
|
||||
|
||||
stats->jitter = jitter_ >> 4; // Scaling from Q4.
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user