Initial upload of NetEq4

This is the first public upload of the new NetEq, version 4.

It has been through extensive internal review during the course of
the project.

TEST=trybots

Review URL: https://webrtc-codereview.appspot.com/1073005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@3425 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
henrik.lundin@webrtc.org
2013-01-29 12:09:21 +00:00
parent 63e0964039
commit d94659dc27
129 changed files with 24101 additions and 103 deletions

View File

@ -138,109 +138,7 @@
'test/NetEqRTPplay.cc',
],
},
{
'target_name': 'RTPencode',
'type': 'executable',
'dependencies': [
'NetEqTestTools',# Test helpers
'G711',
'G722',
'PCM16B',
'iLBC',
'iSAC',
'CNG',
'<(webrtc_root)/common_audio/common_audio.gyp:vad',
],
'defines': [
# TODO: Make codec selection conditional on definitions in target NetEq
'CODEC_ILBC',
'CODEC_PCM16B',
'CODEC_G711',
'CODEC_G722',
'CODEC_ISAC',
'CODEC_PCM16B_WB',
'CODEC_ISAC_SWB',
'CODEC_ISAC_FB',
'CODEC_PCM16B_32KHZ',
'CODEC_CNGCODEC8',
'CODEC_CNGCODEC16',
'CODEC_CNGCODEC32',
'CODEC_ATEVENT_DECODE',
'CODEC_RED',
],
'include_dirs': [
'interface',
'test',
],
'sources': [
'test/RTPencode.cc',
],
},
{
'target_name': 'RTPjitter',
'type': 'executable',
'dependencies': [
'<(DEPTH)/testing/gtest.gyp:gtest',
],
'sources': [
'test/RTPjitter.cc',
],
},
{
'target_name': 'RTPanalyze',
'type': 'executable',
'dependencies': [
'NetEqTestTools',
'<(DEPTH)/testing/gtest.gyp:gtest',
],
'sources': [
'test/RTPanalyze.cc',
],
},
{
'target_name': 'RTPchange',
'type': 'executable',
'dependencies': [
'NetEqTestTools',
'<(DEPTH)/testing/gtest.gyp:gtest',
],
'sources': [
'test/RTPchange.cc',
],
},
{
'target_name': 'RTPtimeshift',
'type': 'executable',
'dependencies': [
'NetEqTestTools',
'<(DEPTH)/testing/gtest.gyp:gtest',
],
'sources': [
'test/RTPtimeshift.cc',
],
},
{
'target_name': 'RTPcat',
'type': 'executable',
'dependencies': [
'NetEqTestTools',
'<(DEPTH)/testing/gtest.gyp:gtest',
],
'sources': [
'test/RTPcat.cc',
],
},
{
'target_name': 'rtp_to_text',
'type': 'executable',
'dependencies': [
'NetEqTestTools',
'<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
],
'sources': [
'test/rtp_to_text.cc',
],
},
{
'target_name': 'NetEqTestTools',
# Collection of useful functions used in other tests

View File

@ -0,0 +1,4 @@
henrik.lundin@webrtc.org
tina.legrand@webrtc.org
turajs@webrtc.org
minyue@webrtc.org

View File

@ -0,0 +1,81 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/accelerate.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
namespace webrtc {
Accelerate::ReturnCodes Accelerate::Process(
const int16_t* input,
int input_length,
AudioMultiVector<int16_t>* output,
int16_t* length_change_samples) {
// Input length must be (almost) 30 ms.
static const int k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate.
if (num_channels_ == 0 ||
input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_) {
// Length of input data too short to do accelerate. Simply move all data
// from input to output.
output->PushBackInterleaved(input, input_length);
return kError;
}
return TimeStretch::Process(input, input_length, output,
length_change_samples);
}
void Accelerate::SetParametersForPassiveSpeech(int /*len*/,
int16_t* best_correlation,
int* /*peak_index*/) const {
// When the signal does not contain any active speech, the correlation does
// not matter. Simply set it to zero.
*best_correlation = 0;
}
Accelerate::ReturnCodes Accelerate::CheckCriteriaAndStretch(
const int16_t* input, int input_length, size_t peak_index,
int16_t best_correlation, bool active_speech,
AudioMultiVector<int16_t>* output) const {
// Check for strong correlation or passive speech.
if ((best_correlation > kCorrelationThreshold) || !active_speech) {
// Do accelerate operation by overlap add.
// Pre-calculate common multiplication with |fs_mult_|.
// 120 corresponds to 15 ms.
size_t fs_mult_120 = fs_mult_ * 120;
assert(fs_mult_120 >= peak_index); // Should be handled in Process().
// Copy first part; 0 to 15 ms.
output->PushBackInterleaved(input, fs_mult_120 * num_channels_);
// Copy the |peak_index| starting at 15 ms to |temp_vector|.
AudioMultiVector<int16_t> temp_vector(num_channels_);
temp_vector.PushBackInterleaved(&input[fs_mult_120 * num_channels_],
peak_index * num_channels_);
// Cross-fade |temp_vector| onto the end of |output|.
output->CrossFade(temp_vector, peak_index);
// Copy the last unmodified part, 15 ms + pitch period until the end.
output->PushBackInterleaved(
&input[(fs_mult_120 + peak_index) * num_channels_],
input_length - (fs_mult_120 + peak_index) * num_channels_);
if (active_speech) {
return kSuccess;
} else {
return kSuccessLowEnergy;
}
} else {
// Accelerate not allowed. Simply move all data from decoded to outData.
output->PushBackInterleaved(input, input_length);
return kNoStretch;
}
}
} // namespace webrtc

View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_ACCELERATE_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_ACCELERATE_H_
#include <assert.h>
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq4/time_stretch.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class BackgroundNoise;
// This class implements the Accelerate operation. Most of the work is done
// in the base class TimeStretch, which is shared with the PreemptiveExpand
// operation. In the Accelerate class, the operations that are specific to
// Accelerate are implemented.
class Accelerate : public TimeStretch {
public:
Accelerate(int sample_rate_hz, size_t num_channels,
const BackgroundNoise& background_noise)
: TimeStretch(sample_rate_hz, num_channels, background_noise) {
}
virtual ~Accelerate() {}
// This method performs the actual Accelerate operation. The samples are
// read from |input|, of length |input_length| elements, and are written to
// |output|. The number of samples removed through time-stretching is
// is provided in the output |length_change_samples|. The method returns
// the outcome of the operation as an enumerator value.
ReturnCodes Process(const int16_t* input,
int input_length,
AudioMultiVector<int16_t>* output,
int16_t* length_change_samples);
protected:
// Sets the parameters |best_correlation| and |peak_index| to suitable
// values when the signal contains no active speech.
virtual void SetParametersForPassiveSpeech(int len,
int16_t* best_correlation,
int* peak_index) const;
// Checks the criteria for performing the time-stretching operation and,
// if possible, performs the time-stretching.
virtual ReturnCodes CheckCriteriaAndStretch(
const int16_t* input, int input_length, size_t peak_index,
int16_t best_correlation, bool active_speech,
AudioMultiVector<int16_t>* output) const;
private:
DISALLOW_COPY_AND_ASSIGN(Accelerate);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_ACCELERATE_H_

View File

@ -0,0 +1,206 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h"
#include <assert.h>
#include "webrtc/modules/audio_coding/neteq4/audio_decoder_impl.h"
namespace webrtc {
bool AudioDecoder::CodecSupported(NetEqDecoder codec_type) {
switch (codec_type) {
case kDecoderPCMu:
case kDecoderPCMa:
case kDecoderPCMu_2ch:
case kDecoderPCMa_2ch:
#ifdef WEBRTC_CODEC_ILBC
case kDecoderILBC:
#endif
#if defined(WEBRTC_CODEC_ISACFX) || defined(WEBRTC_CODEC_ISAC)
case kDecoderISAC:
#endif
#ifdef WEBRTC_CODEC_ISAC
case kDecoderISACswb:
#endif
#ifdef WEBRTC_CODEC_PCM16
case kDecoderPCM16B:
case kDecoderPCM16Bwb:
case kDecoderPCM16Bswb32kHz:
case kDecoderPCM16Bswb48kHz:
case kDecoderPCM16B_2ch:
case kDecoderPCM16Bwb_2ch:
case kDecoderPCM16Bswb32kHz_2ch:
case kDecoderPCM16Bswb48kHz_2ch:
case kDecoderPCM16B_5ch:
#endif
#ifdef WEBRTC_CODEC_G722
case kDecoderG722:
#endif
#ifdef WEBRTC_CODEC_OPUS
case kDecoderOpus:
case kDecoderOpus_2ch:
#endif
case kDecoderRED:
case kDecoderAVT:
case kDecoderCNGnb:
case kDecoderCNGwb:
case kDecoderCNGswb32kHz:
case kDecoderCNGswb48kHz:
case kDecoderArbitrary: {
return true;
}
default: {
return false;
}
}
}
int AudioDecoder::CodecSampleRateHz(NetEqDecoder codec_type) {
switch (codec_type) {
case kDecoderPCMu:
case kDecoderPCMa:
case kDecoderPCMu_2ch:
case kDecoderPCMa_2ch:
#ifdef WEBRTC_CODEC_ILBC
case kDecoderILBC:
#endif
#ifdef WEBRTC_CODEC_PCM16
case kDecoderPCM16B:
case kDecoderPCM16B_2ch:
case kDecoderPCM16B_5ch:
#endif
case kDecoderCNGnb: {
return 8000;
}
#if defined(WEBRTC_CODEC_ISACFX) || defined(WEBRTC_CODEC_ISAC)
case kDecoderISAC:
#endif
#ifdef WEBRTC_CODEC_PCM16
case kDecoderPCM16Bwb:
case kDecoderPCM16Bwb_2ch:
#endif
#ifdef WEBRTC_CODEC_G722
case kDecoderG722:
#endif
case kDecoderCNGwb: {
return 16000;
}
#ifdef WEBRTC_CODEC_ISAC
case kDecoderISACswb:
#endif
#ifdef WEBRTC_CODEC_PCM16
case kDecoderPCM16Bswb32kHz:
case kDecoderPCM16Bswb32kHz_2ch:
#endif
case kDecoderCNGswb32kHz: {
return 32000;
}
#ifdef WEBRTC_CODEC_PCM16
case kDecoderPCM16Bswb48kHz:
case kDecoderPCM16Bswb48kHz_2ch: {
return 48000;
}
#endif
#ifdef WEBRTC_CODEC_OPUS
case kDecoderOpus:
case kDecoderOpus_2ch: {
return 32000;
}
#endif
case kDecoderCNGswb48kHz: {
// TODO(tlegrand): Remove limitation once ACM has full 48 kHz support.
return 32000;
}
default: {
return -1; // Undefined sample rate.
}
}
}
AudioDecoder* AudioDecoder::CreateAudioDecoder(NetEqDecoder codec_type) {
if (!CodecSupported(codec_type)) {
return NULL;
}
switch (codec_type) {
case kDecoderPCMu:
return new AudioDecoderPcmU;
case kDecoderPCMa:
return new AudioDecoderPcmA;
case kDecoderPCMu_2ch:
return new AudioDecoderPcmUMultiCh(2);
case kDecoderPCMa_2ch:
return new AudioDecoderPcmAMultiCh(2);
#ifdef WEBRTC_CODEC_ILBC
case kDecoderILBC:
return new AudioDecoderIlbc;
#endif
#if defined(WEBRTC_CODEC_ISACFX)
case kDecoderISAC:
return new AudioDecoderIsacFix;
#elif defined(WEBRTC_CODEC_ISAC)
case kDecoderISAC:
return new AudioDecoderIsac;
#endif
#ifdef WEBRTC_CODEC_ISAC
case kDecoderISACswb:
return new AudioDecoderIsacSwb;
#endif
#ifdef WEBRTC_CODEC_PCM16
case kDecoderPCM16B:
case kDecoderPCM16Bwb:
case kDecoderPCM16Bswb32kHz:
case kDecoderPCM16Bswb48kHz:
return new AudioDecoderPcm16B(codec_type);
case kDecoderPCM16B_2ch:
case kDecoderPCM16Bwb_2ch:
case kDecoderPCM16Bswb32kHz_2ch:
case kDecoderPCM16Bswb48kHz_2ch:
case kDecoderPCM16B_5ch:
return new AudioDecoderPcm16BMultiCh(codec_type);
#endif
#ifdef WEBRTC_CODEC_G722
case kDecoderG722:
return new AudioDecoderG722;
#endif
#ifdef WEBRTC_CODEC_OPUS
case kDecoderOpus:
case kDecoderOpus_2ch:
return new AudioDecoderOpus(codec_type);
#endif
case kDecoderCNGnb:
case kDecoderCNGwb:
case kDecoderCNGswb32kHz:
case kDecoderCNGswb48kHz:
return new AudioDecoderCng(codec_type);
case kDecoderRED:
case kDecoderAVT:
case kDecoderArbitrary:
default: {
return NULL;
}
}
}
AudioDecoder::SpeechType AudioDecoder::ConvertSpeechType(int16_t type) {
switch (type) {
case 0: // TODO(hlundin): Both iSAC and Opus return 0 for speech.
case 1:
return kSpeech;
case 2:
return kComfortNoise;
default:
assert(false);
return kSpeech;
}
}
} // namespace webrtc

View File

@ -0,0 +1,347 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/audio_decoder_impl.h"
#include <assert.h>
#include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h"
#include "webrtc/modules/audio_coding/codecs/g711/include/g711_interface.h"
#ifdef WEBRTC_CODEC_G722
#include "webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h"
#endif
#ifdef WEBRTC_CODEC_ILBC
#include "webrtc/modules/audio_coding/codecs/ilbc/interface/ilbc.h"
#endif
#ifdef WEBRTC_CODEC_ISACFX
#include "webrtc/modules/audio_coding/codecs/isac/fix/interface/isacfix.h"
#endif
#ifdef WEBRTC_CODEC_ISAC
#include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h"
#endif
#ifdef WEBRTC_CODEC_OPUS
#include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h"
#endif
#ifdef WEBRTC_CODEC_PCM16
#include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h"
#endif
namespace webrtc {
// PCMu
int AudioDecoderPcmU::Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type) {
int16_t temp_type;
int16_t ret = WebRtcG711_DecodeU(
state_, reinterpret_cast<int16_t*>(const_cast<uint8_t*>(encoded)),
static_cast<int16_t>(encoded_len), decoded, &temp_type);
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
int AudioDecoderPcmU::PacketDuration(const uint8_t* encoded,
size_t encoded_len) {
return encoded_len / channels_; // One encoded byte per sample per channel.
}
// PCMa
int AudioDecoderPcmA::Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type) {
int16_t temp_type;
int16_t ret = WebRtcG711_DecodeA(
state_, reinterpret_cast<int16_t*>(const_cast<uint8_t*>(encoded)),
static_cast<int16_t>(encoded_len), decoded, &temp_type);
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
int AudioDecoderPcmA::PacketDuration(const uint8_t* encoded,
size_t encoded_len) {
return encoded_len / channels_; // One encoded byte per sample per channel.
}
// PCM16B
#ifdef WEBRTC_CODEC_PCM16
AudioDecoderPcm16B::AudioDecoderPcm16B(enum NetEqDecoder type)
: AudioDecoder(type) {
assert(type == kDecoderPCM16B ||
type == kDecoderPCM16Bwb ||
type == kDecoderPCM16Bswb32kHz ||
type == kDecoderPCM16Bswb48kHz);
}
int AudioDecoderPcm16B::Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type) {
int16_t temp_type;
int16_t ret = WebRtcPcm16b_DecodeW16(
state_, reinterpret_cast<int16_t*>(const_cast<uint8_t*>(encoded)),
static_cast<int16_t>(encoded_len), decoded, &temp_type);
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
int AudioDecoderPcm16B::PacketDuration(const uint8_t* encoded,
size_t encoded_len) {
// Two encoded byte per sample per channel.
return encoded_len / (2 * channels_);
}
AudioDecoderPcm16BMultiCh::AudioDecoderPcm16BMultiCh(
enum NetEqDecoder type)
: AudioDecoderPcm16B(kDecoderPCM16B) { // This will be changed below.
codec_type_ = type; // Changing to actual type here.
switch (codec_type_) {
case kDecoderPCM16B_2ch:
case kDecoderPCM16Bwb_2ch:
case kDecoderPCM16Bswb32kHz_2ch:
case kDecoderPCM16Bswb48kHz_2ch:
channels_ = 2;
break;
case kDecoderPCM16B_5ch:
channels_ = 5;
break;
default:
assert(false);
}
}
#endif
// iLBC
#ifdef WEBRTC_CODEC_ILBC
AudioDecoderIlbc::AudioDecoderIlbc() : AudioDecoder(kDecoderILBC) {
WebRtcIlbcfix_DecoderCreate(reinterpret_cast<iLBC_decinst_t**>(&state_));
}
AudioDecoderIlbc::~AudioDecoderIlbc() {
WebRtcIlbcfix_DecoderFree(static_cast<iLBC_decinst_t*>(state_));
}
int AudioDecoderIlbc::Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type) {
int16_t temp_type;
int16_t ret = WebRtcIlbcfix_Decode(static_cast<iLBC_decinst_t*>(state_),
reinterpret_cast<const int16_t*>(encoded),
static_cast<int16_t>(encoded_len), decoded,
&temp_type);
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
int AudioDecoderIlbc::DecodePlc(int num_frames, int16_t* decoded) {
return WebRtcIlbcfix_NetEqPlc(static_cast<iLBC_decinst_t*>(state_),
decoded, num_frames);
}
int AudioDecoderIlbc::Init() {
return WebRtcIlbcfix_Decoderinit30Ms(static_cast<iLBC_decinst_t*>(state_));
}
#endif
// iSAC float
#ifdef WEBRTC_CODEC_ISAC
AudioDecoderIsac::AudioDecoderIsac() : AudioDecoder(kDecoderISAC) {
WebRtcIsac_Create(reinterpret_cast<ISACStruct**>(&state_));
WebRtcIsac_SetDecSampRate(static_cast<ISACStruct*>(state_), 16000);
}
AudioDecoderIsac::~AudioDecoderIsac() {
WebRtcIsac_Free(static_cast<ISACStruct*>(state_));
}
int AudioDecoderIsac::Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type) {
int16_t temp_type;
int16_t ret = WebRtcIsac_Decode(static_cast<ISACStruct*>(state_),
reinterpret_cast<const uint16_t*>(encoded),
static_cast<int16_t>(encoded_len), decoded,
&temp_type);
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
int AudioDecoderIsac::DecodeRedundant(const uint8_t* encoded,
size_t encoded_len, int16_t* decoded,
SpeechType* speech_type) {
int16_t temp_type;
int16_t ret = WebRtcIsac_DecodeRcu(static_cast<ISACStruct*>(state_),
reinterpret_cast<const uint16_t*>(encoded),
static_cast<int16_t>(encoded_len), decoded,
&temp_type);
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
int AudioDecoderIsac::DecodePlc(int num_frames, int16_t* decoded) {
return WebRtcIsac_DecodePlc(static_cast<ISACStruct*>(state_),
decoded, num_frames);
}
int AudioDecoderIsac::Init() {
return WebRtcIsac_DecoderInit(static_cast<ISACStruct*>(state_));
}
int AudioDecoderIsac::IncomingPacket(const uint8_t* payload,
size_t payload_len,
uint16_t rtp_sequence_number,
uint32_t rtp_timestamp,
uint32_t arrival_timestamp) {
return WebRtcIsac_UpdateBwEstimate(static_cast<ISACStruct*>(state_),
reinterpret_cast<const uint16_t*>(payload),
payload_len,
rtp_sequence_number,
rtp_timestamp,
arrival_timestamp);
}
int AudioDecoderIsac::ErrorCode() {
return WebRtcIsac_GetErrorCode(static_cast<ISACStruct*>(state_));
}
// iSAC SWB
AudioDecoderIsacSwb::AudioDecoderIsacSwb() : AudioDecoderIsac() {
codec_type_ = kDecoderISACswb;
WebRtcIsac_SetDecSampRate(static_cast<ISACStruct*>(state_), 32000);
}
#endif
// iSAC fix
#ifdef WEBRTC_CODEC_ISACFX
AudioDecoderIsacFix::AudioDecoderIsacFix() : AudioDecoder(kDecoderISAC) {
WebRtcIsacfix_Create(reinterpret_cast<ISACFIX_MainStruct**>(&state_));
}
AudioDecoderIsacFix::~AudioDecoderIsacFix() {
WebRtcIsacfix_Free(static_cast<ISACFIX_MainStruct*>(state_));
}
int AudioDecoderIsacFix::Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type) {
int16_t temp_type;
int16_t ret = WebRtcIsacfix_Decode(static_cast<ISACFIX_MainStruct*>(state_),
reinterpret_cast<const uint16_t*>(encoded),
static_cast<int16_t>(encoded_len), decoded,
&temp_type);
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
int AudioDecoderIsacFix::Init() {
return WebRtcIsacfix_DecoderInit(static_cast<ISACFIX_MainStruct*>(state_));
}
int AudioDecoderIsacFix::IncomingPacket(const uint8_t* payload,
size_t payload_len,
uint16_t rtp_sequence_number,
uint32_t rtp_timestamp,
uint32_t arrival_timestamp) {
return WebRtcIsacfix_UpdateBwEstimate(
static_cast<ISACFIX_MainStruct*>(state_),
reinterpret_cast<const uint16_t*>(payload), payload_len,
rtp_sequence_number, rtp_timestamp, arrival_timestamp);
}
int AudioDecoderIsacFix::ErrorCode() {
return WebRtcIsacfix_GetErrorCode(static_cast<ISACFIX_MainStruct*>(state_));
}
#endif
// G.722
#ifdef WEBRTC_CODEC_G722
AudioDecoderG722::AudioDecoderG722() : AudioDecoder(kDecoderG722) {
WebRtcG722_CreateDecoder(reinterpret_cast<G722DecInst**>(&state_));
}
AudioDecoderG722::~AudioDecoderG722() {
WebRtcG722_FreeDecoder(static_cast<G722DecInst*>(state_));
}
int AudioDecoderG722::Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type) {
int16_t temp_type;
int16_t ret = WebRtcG722_Decode(
static_cast<G722DecInst*>(state_),
const_cast<int16_t*>(reinterpret_cast<const int16_t*>(encoded)),
static_cast<int16_t>(encoded_len), decoded, &temp_type);
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
int AudioDecoderG722::Init() {
return WebRtcG722_DecoderInit(static_cast<G722DecInst*>(state_));
}
int AudioDecoderG722::PacketDuration(const uint8_t* encoded,
size_t encoded_len) {
// 1/2 encoded byte per sample per channel.
return 2 * encoded_len / channels_;
}
#endif
// Opus
#ifdef WEBRTC_CODEC_OPUS
AudioDecoderOpus::AudioDecoderOpus(enum NetEqDecoder type)
: AudioDecoder(type) {
if (type == kDecoderOpus_2ch) {
channels_ = 2;
} else {
channels_ = 1;
}
WebRtcOpus_DecoderCreate(reinterpret_cast<OpusDecInst**>(&state_), channels_);
}
AudioDecoderOpus::~AudioDecoderOpus() {
WebRtcOpus_DecoderFree(static_cast<OpusDecInst*>(state_));
}
int AudioDecoderOpus::Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type) {
int16_t temp_type;
assert(channels_ == 1);
// TODO(hlundin): Allow 2 channels when WebRtcOpus_Decode provides both
// channels interleaved.
int16_t ret = WebRtcOpus_Decode(
static_cast<OpusDecInst*>(state_),
const_cast<int16_t*>(reinterpret_cast<const int16_t*>(encoded)),
static_cast<int16_t>(encoded_len), decoded, &temp_type);
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
int AudioDecoderOpus::Init() {
return WebRtcOpus_DecoderInit(static_cast<OpusDecInst*>(state_));
}
int AudioDecoderOpus::PacketDuration(const uint8_t* encoded,
size_t encoded_len) {
return WebRtcOpus_DurationEst(static_cast<OpusDecInst*>(state_),
encoded, encoded_len);
}
#endif
AudioDecoderCng::AudioDecoderCng(enum NetEqDecoder type)
: AudioDecoder(type) {
assert(type == kDecoderCNGnb || type == kDecoderCNGwb ||
kDecoderCNGswb32kHz || type == kDecoderCNGswb48kHz);
WebRtcCng_CreateDec(reinterpret_cast<CNG_dec_inst**>(&state_));
assert(state_);
}
AudioDecoderCng::~AudioDecoderCng() {
if (state_) {
WebRtcCng_FreeDec(static_cast<CNG_dec_inst*>(state_));
}
}
int AudioDecoderCng::Init() {
assert(state_);
return WebRtcCng_InitDec(static_cast<CNG_dec_inst*>(state_));
}
} // namespace webrtc

View File

@ -0,0 +1,223 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_DECODER_IMPL_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_DECODER_IMPL_H_
#include <assert.h>
#ifndef AUDIO_DECODER_UNITTEST
// If this is compiled as a part of the audio_deoder_unittest, the codec
// selection is made in the gypi file instead of in engine_configurations.h.
#include "webrtc/engine_configurations.h"
#endif
#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class AudioDecoderPcmU : public AudioDecoder {
public:
AudioDecoderPcmU() : AudioDecoder(kDecoderPCMu) {}
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type);
virtual int Init() { return 0; }
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len);
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcmU);
};
class AudioDecoderPcmA : public AudioDecoder {
public:
AudioDecoderPcmA() : AudioDecoder(kDecoderPCMa) {}
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type);
virtual int Init() { return 0; }
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len);
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcmA);
};
class AudioDecoderPcmUMultiCh : public AudioDecoderPcmU {
public:
explicit AudioDecoderPcmUMultiCh(size_t channels) : AudioDecoderPcmU() {
assert(channels > 0);
channels_ = channels;
}
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcmUMultiCh);
};
class AudioDecoderPcmAMultiCh : public AudioDecoderPcmA {
public:
explicit AudioDecoderPcmAMultiCh(size_t channels) : AudioDecoderPcmA() {
assert(channels > 0);
channels_ = channels;
}
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcmAMultiCh);
};
#ifdef WEBRTC_CODEC_PCM16
// This class handles all four types (i.e., sample rates) of PCM16B codecs.
// The type is specified in the constructor parameter |type|.
class AudioDecoderPcm16B : public AudioDecoder {
public:
explicit AudioDecoderPcm16B(enum NetEqDecoder type);
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type);
virtual int Init() { return 0; }
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len);
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcm16B);
};
// This class handles all four types (i.e., sample rates) of PCM16B codecs.
// The type is specified in the constructor parameter |type|, and the number
// of channels is derived from the type.
class AudioDecoderPcm16BMultiCh : public AudioDecoderPcm16B {
public:
explicit AudioDecoderPcm16BMultiCh(enum NetEqDecoder type);
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcm16BMultiCh);
};
#endif
#ifdef WEBRTC_CODEC_ILBC
class AudioDecoderIlbc : public AudioDecoder {
public:
AudioDecoderIlbc();
virtual ~AudioDecoderIlbc();
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type);
virtual bool HasDecodePlc() const { return true; }
virtual int DecodePlc(int num_frames, int16_t* decoded);
virtual int Init();
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderIlbc);
};
#endif
#ifdef WEBRTC_CODEC_ISAC
class AudioDecoderIsac : public AudioDecoder {
public:
AudioDecoderIsac();
virtual ~AudioDecoderIsac();
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type);
virtual int DecodeRedundant(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type);
virtual bool HasDecodePlc() const { return true; }
virtual int DecodePlc(int num_frames, int16_t* decoded);
virtual int Init();
virtual int IncomingPacket(const uint8_t* payload,
size_t payload_len,
uint16_t rtp_sequence_number,
uint32_t rtp_timestamp,
uint32_t arrival_timestamp);
virtual int ErrorCode();
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderIsac);
};
class AudioDecoderIsacSwb : public AudioDecoderIsac {
public:
AudioDecoderIsacSwb();
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderIsacSwb);
};
#endif
#ifdef WEBRTC_CODEC_ISACFX
class AudioDecoderIsacFix : public AudioDecoder {
public:
AudioDecoderIsacFix();
virtual ~AudioDecoderIsacFix();
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type);
virtual int Init();
virtual int IncomingPacket(const uint8_t* payload,
size_t payload_len,
uint16_t rtp_sequence_number,
uint32_t rtp_timestamp,
uint32_t arrival_timestamp);
virtual int ErrorCode();
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderIsacFix);
};
#endif
#ifdef WEBRTC_CODEC_G722
class AudioDecoderG722 : public AudioDecoder {
public:
AudioDecoderG722();
virtual ~AudioDecoderG722();
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type);
virtual bool HasDecodePlc() const { return false; }
virtual int Init();
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len);
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderG722);
};
#endif
#ifdef WEBRTC_CODEC_OPUS
class AudioDecoderOpus : public AudioDecoder {
public:
explicit AudioDecoderOpus(enum NetEqDecoder type);
virtual ~AudioDecoderOpus();
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type);
virtual int Init();
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len);
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderOpus);
};
#endif
// AudioDecoderCng is a special type of AudioDecoder. It inherits from
// AudioDecoder just to fit in the DecoderDatabase. None of the class methods
// should be used, except constructor, destructor, and accessors.
// TODO(hlundin): Consider the possibility to create a super-class to
// AudioDecoder that is stored in DecoderDatabase. Then AudioDecoder and a
// specific CngDecoder class could both inherit from that class.
class AudioDecoderCng : public AudioDecoder {
public:
explicit AudioDecoderCng(enum NetEqDecoder type);
virtual ~AudioDecoderCng();
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type) { return -1; }
virtual int Init();
virtual int IncomingPacket(const uint8_t* payload,
size_t payload_len,
uint16_t rtp_sequence_number,
uint32_t rtp_timestamp,
uint32_t arrival_timestamp) { return -1; }
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoderCng);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_DECODER_IMPL_H_

View File

@ -0,0 +1,647 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/audio_decoder_impl.h"
#include <assert.h>
#include <stdlib.h>
#include <string>
#include "gtest/gtest.h"
#include "webrtc/common_audio/resampler/include/resampler.h"
#include "webrtc/modules/audio_coding/codecs/g711/include/g711_interface.h"
#include "webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h"
#include "webrtc/modules/audio_coding/codecs/ilbc/interface/ilbc.h"
#include "webrtc/modules/audio_coding/codecs/isac/fix/interface/isacfix.h"
#include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h"
#include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h"
#include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h"
#include "webrtc/system_wrappers/interface/data_log.h"
#include "webrtc/test/testsupport/fileutils.h"
namespace webrtc {
class AudioDecoderTest : public ::testing::Test {
protected:
AudioDecoderTest()
: input_fp_(NULL),
input_(NULL),
encoded_(NULL),
decoded_(NULL),
frame_size_(0),
data_length_(0),
encoded_bytes_(0),
decoder_(NULL) {
input_file_ = webrtc::test::ProjectRootPath() +
"resources/audio_coding/testfile32kHz.pcm";
}
virtual ~AudioDecoderTest() {}
virtual void SetUp() {
// Create arrays.
ASSERT_GT(data_length_, 0u) << "The test must set data_length_ > 0";
input_ = new int16_t[data_length_];
encoded_ = new uint8_t[data_length_ * 2];
decoded_ = new int16_t[data_length_];
// Open input file.
input_fp_ = fopen(input_file_.c_str(), "rb");
ASSERT_TRUE(input_fp_ != NULL) << "Failed to open file " << input_file_;
// Read data to |input_|.
ASSERT_EQ(data_length_,
fread(input_, sizeof(int16_t), data_length_, input_fp_)) <<
"Could not read enough data from file";
// Logging to view input and output in Matlab.
// Use 'gyp -Denable_data_logging=1' to enable logging.
DataLog::CreateLog();
DataLog::AddTable("CodecTest");
DataLog::AddColumn("CodecTest", "input", 1);
DataLog::AddColumn("CodecTest", "output", 1);
}
virtual void TearDown() {
delete decoder_;
decoder_ = NULL;
// Close input file.
fclose(input_fp_);
// Delete arrays.
delete [] input_;
input_ = NULL;
delete [] encoded_;
encoded_ = NULL;
delete [] decoded_;
decoded_ = NULL;
// Close log.
DataLog::ReturnLog();
}
virtual void InitEncoder() { }
// This method must be implemented for all tests derived from this class.
virtual int EncodeFrame(const int16_t* input, size_t input_len,
uint8_t* output) = 0;
// Encodes and decodes audio. The absolute difference between the input and
// output is compared vs |tolerance|, and the mean-squared error is compared
// with |mse|. The encoded stream should contain |expected_bytes|.
void EncodeDecodeTest(size_t expected_bytes, int tolerance, double mse,
int delay = 0) {
ASSERT_GE(tolerance, 0) << "Test must define a tolerance >= 0";
size_t processed_samples = 0u;
encoded_bytes_ = 0u;
InitEncoder();
EXPECT_EQ(0, decoder_->Init());
while (processed_samples + frame_size_ <= data_length_) {
size_t enc_len = EncodeFrame(&input_[processed_samples], frame_size_,
&encoded_[encoded_bytes_]);
AudioDecoder::SpeechType speech_type;
size_t dec_len = decoder_->Decode(&encoded_[encoded_bytes_], enc_len,
&decoded_[processed_samples],
&speech_type);
EXPECT_EQ(frame_size_, dec_len);
encoded_bytes_ += enc_len;
processed_samples += frame_size_;
}
EXPECT_EQ(expected_bytes, encoded_bytes_);
CompareInputOutput(processed_samples, tolerance, delay);
EXPECT_LE(MseInputOutput(processed_samples, delay), mse);
}
// The absolute difference between the input and output is compared vs
// |tolerance|. The parameter |delay| is used to correct for codec delays.
void CompareInputOutput(size_t num_samples, int tolerance, int delay) const {
assert(num_samples <= data_length_);
for (unsigned int n = 0; n < num_samples - delay; ++n) {
ASSERT_NEAR(input_[n], decoded_[n + delay], tolerance) <<
"Exit test on first diff; n = " << n;
DataLog::InsertCell("CodecTest", "input", input_[n]);
DataLog::InsertCell("CodecTest", "output", decoded_[n]);
DataLog::NextRow("CodecTest");
}
}
// Calculates mean-squared error between input and output. The parameter
// |delay| is used to correct for codec delays.
double MseInputOutput(size_t num_samples, int delay) const {
assert(num_samples <= data_length_);
if (num_samples == 0) return 0.0;
double squared_sum = 0.0;
for (unsigned int n = 0; n < num_samples - delay; ++n) {
squared_sum += (input_[n] - decoded_[n + delay]) *
(input_[n] - decoded_[n + delay]);
}
return squared_sum / (num_samples - delay);
}
// Encodes a payload and decodes it twice with decoder re-init before each
// decode. Verifies that the decoded result is the same.
void ReInitTest() {
uint8_t* encoded = encoded_;
uint8_t* encoded_copy = encoded_ + 2 * frame_size_;
int16_t* output1 = decoded_;
int16_t* output2 = decoded_ + frame_size_;
InitEncoder();
size_t enc_len = EncodeFrame(input_, frame_size_, encoded);
// Copy payload since iSAC fix destroys it during decode.
// Issue: http://code.google.com/p/webrtc/issues/detail?id=845.
// TODO(hlundin): Remove if the iSAC bug gets fixed.
memcpy(encoded_copy, encoded, enc_len);
AudioDecoder::SpeechType speech_type1, speech_type2;
EXPECT_EQ(0, decoder_->Init());
size_t dec_len = decoder_->Decode(encoded, enc_len, output1, &speech_type1);
EXPECT_EQ(frame_size_, dec_len);
// Re-init decoder and decode again.
EXPECT_EQ(0, decoder_->Init());
dec_len = decoder_->Decode(encoded_copy, enc_len, output2, &speech_type2);
EXPECT_EQ(frame_size_, dec_len);
for (unsigned int n = 0; n < frame_size_; ++n) {
ASSERT_EQ(output1[n], output2[n]) << "Exit test on first diff; n = " << n;
}
EXPECT_EQ(speech_type1, speech_type2);
}
// Call DecodePlc and verify that the correct number of samples is produced.
void DecodePlcTest() {
InitEncoder();
size_t enc_len = EncodeFrame(input_, frame_size_, encoded_);
AudioDecoder::SpeechType speech_type;
EXPECT_EQ(0, decoder_->Init());
size_t dec_len =
decoder_->Decode(encoded_, enc_len, decoded_, &speech_type);
EXPECT_EQ(frame_size_, dec_len);
// Call DecodePlc and verify that we get one frame of data.
// (Overwrite the output from the above Decode call, but that does not
// matter.)
dec_len = decoder_->DecodePlc(1, decoded_);
EXPECT_EQ(frame_size_, dec_len);
}
std::string input_file_;
FILE* input_fp_;
int16_t* input_;
uint8_t* encoded_;
int16_t* decoded_;
size_t frame_size_;
size_t data_length_;
size_t encoded_bytes_;
AudioDecoder* decoder_;
};
class AudioDecoderPcmUTest : public AudioDecoderTest {
protected:
AudioDecoderPcmUTest() : AudioDecoderTest() {
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderPcmU;
assert(decoder_);
}
virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
uint8_t* output) {
int enc_len_bytes =
WebRtcG711_EncodeU(NULL, const_cast<int16_t*>(input), input_len_samples,
reinterpret_cast<int16_t*>(output));
EXPECT_EQ(input_len_samples, static_cast<size_t>(enc_len_bytes));
return enc_len_bytes;
}
};
class AudioDecoderPcmATest : public AudioDecoderTest {
protected:
AudioDecoderPcmATest() : AudioDecoderTest() {
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderPcmA;
assert(decoder_);
}
virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
uint8_t* output) {
int enc_len_bytes =
WebRtcG711_EncodeA(NULL, const_cast<int16_t*>(input), input_len_samples,
reinterpret_cast<int16_t*>(output));
EXPECT_EQ(input_len_samples, static_cast<size_t>(enc_len_bytes));
return enc_len_bytes;
}
};
class AudioDecoderPcm16BTest : public AudioDecoderTest {
protected:
AudioDecoderPcm16BTest() : AudioDecoderTest() {
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderPcm16B(kDecoderPCM16B);
assert(decoder_);
}
virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
uint8_t* output) {
int enc_len_bytes = WebRtcPcm16b_EncodeW16(
const_cast<int16_t*>(input), input_len_samples,
reinterpret_cast<int16_t*>(output));
EXPECT_EQ(2 * input_len_samples, static_cast<size_t>(enc_len_bytes));
return enc_len_bytes;
}
};
class AudioDecoderIlbcTest : public AudioDecoderTest {
protected:
AudioDecoderIlbcTest() : AudioDecoderTest() {
frame_size_ = 240;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderIlbc;
assert(decoder_);
assert(WebRtcIlbcfix_EncoderCreate(&encoder_) == 0);
}
~AudioDecoderIlbcTest() {
WebRtcIlbcfix_EncoderFree(encoder_);
}
virtual void InitEncoder() {
ASSERT_EQ(0, WebRtcIlbcfix_EncoderInit(encoder_, 30)); // 30 ms.
}
virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
uint8_t* output) {
int enc_len_bytes =
WebRtcIlbcfix_Encode(encoder_, input, input_len_samples,
reinterpret_cast<int16_t*>(output));
EXPECT_EQ(50, enc_len_bytes);
return enc_len_bytes;
}
// Overload the default test since iLBC's function WebRtcIlbcfix_NetEqPlc does
// not return any data. It simply resets a few states and returns 0.
void DecodePlcTest() {
InitEncoder();
size_t enc_len = EncodeFrame(input_, frame_size_, encoded_);
AudioDecoder::SpeechType speech_type;
EXPECT_EQ(0, decoder_->Init());
size_t dec_len =
decoder_->Decode(encoded_, enc_len, decoded_, &speech_type);
EXPECT_EQ(frame_size_, dec_len);
// Simply call DecodePlc and verify that we get 0 as return value.
EXPECT_EQ(0, decoder_->DecodePlc(1, decoded_));
}
iLBC_encinst_t* encoder_;
};
class AudioDecoderIsacFloatTest : public AudioDecoderTest {
protected:
AudioDecoderIsacFloatTest() : AudioDecoderTest() {
input_size_ = 160;
frame_size_ = 480;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderIsac;
assert(decoder_);
assert(WebRtcIsac_Create(&encoder_) == 0);
assert(WebRtcIsac_SetEncSampRate(encoder_, 16000) == 0);
}
~AudioDecoderIsacFloatTest() {
WebRtcIsac_Free(encoder_);
}
virtual void InitEncoder() {
ASSERT_EQ(0, WebRtcIsac_EncoderInit(encoder_, 1)); // Fixed mode.
ASSERT_EQ(0, WebRtcIsac_Control(encoder_, 32000, 30)); // 32 kbps, 30 ms.
}
virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
uint8_t* output) {
// Insert 3 * 10 ms. Expect non-zero output on third call.
EXPECT_EQ(0, WebRtcIsac_Encode(encoder_, input,
reinterpret_cast<int16_t*>(output)));
input += input_size_;
EXPECT_EQ(0, WebRtcIsac_Encode(encoder_, input,
reinterpret_cast<int16_t*>(output)));
input += input_size_;
int enc_len_bytes =
WebRtcIsac_Encode(encoder_, input, reinterpret_cast<int16_t*>(output));
EXPECT_GT(enc_len_bytes, 0);
return enc_len_bytes;
}
ISACStruct* encoder_;
int input_size_;
};
class AudioDecoderIsacSwbTest : public AudioDecoderTest {
protected:
AudioDecoderIsacSwbTest() : AudioDecoderTest() {
input_size_ = 320;
frame_size_ = 960;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderIsacSwb;
assert(decoder_);
assert(WebRtcIsac_Create(&encoder_) == 0);
assert(WebRtcIsac_SetEncSampRate(encoder_, 32000) == 0);
}
~AudioDecoderIsacSwbTest() {
WebRtcIsac_Free(encoder_);
}
virtual void InitEncoder() {
ASSERT_EQ(0, WebRtcIsac_EncoderInit(encoder_, 1)); // Fixed mode.
ASSERT_EQ(0, WebRtcIsac_Control(encoder_, 32000, 30)); // 32 kbps, 30 ms.
}
virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
uint8_t* output) {
// Insert 3 * 10 ms. Expect non-zero output on third call.
EXPECT_EQ(0, WebRtcIsac_Encode(encoder_, input,
reinterpret_cast<int16_t*>(output)));
input += input_size_;
EXPECT_EQ(0, WebRtcIsac_Encode(encoder_, input,
reinterpret_cast<int16_t*>(output)));
input += input_size_;
int enc_len_bytes =
WebRtcIsac_Encode(encoder_, input, reinterpret_cast<int16_t*>(output));
EXPECT_GT(enc_len_bytes, 0);
return enc_len_bytes;
}
ISACStruct* encoder_;
int input_size_;
};
class AudioDecoderIsacFixTest : public AudioDecoderTest {
protected:
AudioDecoderIsacFixTest() : AudioDecoderTest() {
input_size_ = 160;
frame_size_ = 480;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderIsacFix;
assert(decoder_);
assert(WebRtcIsacfix_Create(&encoder_) == 0);
}
~AudioDecoderIsacFixTest() {
WebRtcIsacfix_Free(encoder_);
}
virtual void InitEncoder() {
ASSERT_EQ(0, WebRtcIsacfix_EncoderInit(encoder_, 1)); // Fixed mode.
ASSERT_EQ(0,
WebRtcIsacfix_Control(encoder_, 32000, 30)); // 32 kbps, 30 ms.
}
virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
uint8_t* output) {
// Insert 3 * 10 ms. Expect non-zero output on third call.
EXPECT_EQ(0, WebRtcIsacfix_Encode(encoder_, input,
reinterpret_cast<int16_t*>(output)));
input += input_size_;
EXPECT_EQ(0, WebRtcIsacfix_Encode(encoder_, input,
reinterpret_cast<int16_t*>(output)));
input += input_size_;
int enc_len_bytes = WebRtcIsacfix_Encode(
encoder_, input, reinterpret_cast<int16_t*>(output));
EXPECT_GT(enc_len_bytes, 0);
return enc_len_bytes;
}
ISACFIX_MainStruct* encoder_;
int input_size_;
};
class AudioDecoderG722Test : public AudioDecoderTest {
protected:
AudioDecoderG722Test() : AudioDecoderTest() {
frame_size_ = 160;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderG722;
assert(decoder_);
assert(WebRtcG722_CreateEncoder(&encoder_) == 0);
}
~AudioDecoderG722Test() {
WebRtcG722_FreeEncoder(encoder_);
}
virtual void InitEncoder() {
ASSERT_EQ(0, WebRtcG722_EncoderInit(encoder_));
}
virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
uint8_t* output) {
int enc_len_bytes =
WebRtcG722_Encode(encoder_, const_cast<int16_t*>(input),
input_len_samples,
reinterpret_cast<int16_t*>(output));
EXPECT_EQ(80, enc_len_bytes);
return enc_len_bytes;
}
G722EncInst* encoder_;
};
class AudioDecoderOpusTest : public AudioDecoderTest {
protected:
AudioDecoderOpusTest() : AudioDecoderTest() {
frame_size_ = 320;
data_length_ = 10 * frame_size_;
decoder_ = new AudioDecoderOpus(kDecoderOpus);
assert(decoder_);
assert(WebRtcOpus_EncoderCreate(&encoder_, 1) == 0);
}
~AudioDecoderOpusTest() {
WebRtcOpus_EncoderFree(encoder_);
}
virtual void InitEncoder() {}
virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
uint8_t* output) {
// Upsample from 32 to 48 kHz.
Resampler rs;
rs.Reset(32000, 48000, kResamplerSynchronous);
const int max_resamp_len_samples = input_len_samples * 3 / 2;
int16_t* resamp_input = new int16_t[max_resamp_len_samples];
int resamp_len_samples;
EXPECT_EQ(0, rs.Push(input, input_len_samples, resamp_input,
max_resamp_len_samples, resamp_len_samples));
EXPECT_EQ(max_resamp_len_samples, resamp_len_samples);
int enc_len_bytes =
WebRtcOpus_Encode(encoder_, resamp_input,
resamp_len_samples, data_length_, output);
EXPECT_GT(enc_len_bytes, 0);
delete [] resamp_input;
return enc_len_bytes;
}
OpusEncInst* encoder_;
};
TEST_F(AudioDecoderPcmUTest, EncodeDecode) {
int tolerance = 251;
double mse = 1734.0;
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCMu));
EncodeDecodeTest(data_length_, tolerance, mse);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderPcmATest, EncodeDecode) {
int tolerance = 308;
double mse = 1931.0;
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCMa));
EncodeDecodeTest(data_length_, tolerance, mse);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderPcm16BTest, EncodeDecode) {
int tolerance = 0;
double mse = 0.0;
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16B));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bwb));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb32kHz));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb48kHz));
EncodeDecodeTest(2 * data_length_, tolerance, mse);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderIlbcTest, EncodeDecode) {
int tolerance = 6808;
double mse = 2.13e6;
int delay = 80; // Delay from input to output.
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderILBC));
EncodeDecodeTest(500, tolerance, mse, delay);
ReInitTest();
EXPECT_TRUE(decoder_->HasDecodePlc());
DecodePlcTest();
}
TEST_F(AudioDecoderIsacFloatTest, EncodeDecode) {
int tolerance = 3399;
double mse = 434951.0;
int delay = 48; // Delay from input to output.
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderISAC));
EncodeDecodeTest(883, tolerance, mse, delay);
ReInitTest();
EXPECT_TRUE(decoder_->HasDecodePlc());
DecodePlcTest();
}
TEST_F(AudioDecoderIsacSwbTest, EncodeDecode) {
int tolerance = 19757;
double mse = 8.18e6;
int delay = 160; // Delay from input to output.
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderISACswb));
EncodeDecodeTest(853, tolerance, mse, delay);
ReInitTest();
EXPECT_TRUE(decoder_->HasDecodePlc());
DecodePlcTest();
}
TEST_F(AudioDecoderIsacFixTest, EncodeDecode) {
int tolerance = 11034;
double mse = 3.46e6;
int delay = 54; // Delay from input to output.
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderISAC));
EncodeDecodeTest(735, tolerance, mse, delay);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderG722Test, EncodeDecode) {
int tolerance = 6176;
double mse = 238630.0;
int delay = 22; // Delay from input to output.
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722));
EncodeDecodeTest(data_length_ / 2, tolerance, mse, delay);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST_F(AudioDecoderOpusTest, EncodeDecode) {
int tolerance = 6176;
double mse = 238630.0;
int delay = 22; // Delay from input to output.
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderOpus));
EncodeDecodeTest(731, tolerance, mse, delay);
ReInitTest();
EXPECT_FALSE(decoder_->HasDecodePlc());
}
TEST(AudioDecoder, CodecSampleRateHz) {
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMu));
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMa));
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMu_2ch));
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMa_2ch));
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderILBC));
EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderISAC));
EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderISACswb));
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16B));
EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bwb));
EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bswb32kHz));
EXPECT_EQ(48000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bswb48kHz));
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16B_2ch));
EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bwb_2ch));
EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bswb32kHz_2ch));
EXPECT_EQ(48000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bswb48kHz_2ch));
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16B_5ch));
EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderG722));
EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderG722_2ch));
EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderRED));
EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderAVT));
EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderCNGnb));
EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderCNGwb));
EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderCNGswb32kHz));
// TODO(tlegrand): Change 32000 to 48000 below once ACM has 48 kHz support.
EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderCNGswb48kHz));
EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderArbitrary));
EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderOpus));
EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderOpus_2ch));
EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderCELT_32));
EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderCELT_32_2ch));
}
TEST(AudioDecoder, CodecSupported) {
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCMu));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCMa));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCMu_2ch));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCMa_2ch));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderILBC));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderISAC));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderISACswb));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16B));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bwb));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb32kHz));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb48kHz));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16B_2ch));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bwb_2ch));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb32kHz_2ch));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb48kHz_2ch));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16B_5ch));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722));
EXPECT_FALSE(AudioDecoder::CodecSupported(kDecoderG722_2ch));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderRED));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderAVT));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderCNGnb));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderCNGwb));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderCNGswb32kHz));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderCNGswb48kHz));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderArbitrary));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderOpus));
EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderOpus_2ch));
EXPECT_FALSE(AudioDecoder::CodecSupported(kDecoderCELT_32));
EXPECT_FALSE(AudioDecoder::CodecSupported(kDecoderCELT_32_2ch));
}
} // namespace webrtc

View File

@ -0,0 +1,228 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include <assert.h>
#include <algorithm>
#include "webrtc/typedefs.h"
namespace webrtc {
template<typename T>
AudioMultiVector<T>::AudioMultiVector(size_t N) {
assert(N > 0);
if (N < 1) N = 1;
for (size_t n = 0; n < N; ++n) {
channels_.push_back(new AudioVector<T>);
}
}
template<typename T>
AudioMultiVector<T>::AudioMultiVector(size_t N, size_t initial_size) {
assert(N > 0);
if (N < 1) N = 1;
for (size_t n = 0; n < N; ++n) {
channels_.push_back(new AudioVector<T>(initial_size));
}
}
template<typename T>
AudioMultiVector<T>::~AudioMultiVector() {
typename std::vector<AudioVector<T>*>::iterator it = channels_.begin();
while (it != channels_.end()) {
delete (*it);
++it;
}
}
template<typename T>
void AudioMultiVector<T>::Clear() {
for (size_t i = 0; i < Channels(); ++i) {
channels_[i]->Clear();
}
}
template<typename T>
void AudioMultiVector<T>::Zeros(size_t length) {
for (size_t i = 0; i < Channels(); ++i) {
channels_[i]->Clear();
channels_[i]->Extend(length);
}
}
template<typename T>
void AudioMultiVector<T>::CopyFrom(AudioMultiVector<T>* copy_to) const {
if (copy_to) {
for (size_t i = 0; i < Channels(); ++i) {
channels_[i]->CopyFrom(&(*copy_to)[i]);
}
}
}
template<typename T>
void AudioMultiVector<T>::PushBackInterleaved(const T* append_this,
size_t length) {
assert(length % Channels() == 0);
size_t length_per_channel = length / Channels();
T* temp_array = new T[length_per_channel]; // Intermediate storage.
for (size_t channel = 0; channel < Channels(); ++channel) {
// Copy elements to |temp_array|.
// Set |source_ptr| to first element of this channel.
const T* source_ptr = &append_this[channel];
for (size_t i = 0; i < length_per_channel; ++i) {
temp_array[i] = *source_ptr;
source_ptr += Channels(); // Jump to next element of this channel.
}
channels_[channel]->PushBack(temp_array, length_per_channel);
}
delete [] temp_array;
}
template<typename T>
void AudioMultiVector<T>::PushBack(const AudioMultiVector<T>& append_this) {
assert(Channels() == append_this.Channels());
if (Channels() == append_this.Channels()) {
for (size_t i = 0; i < Channels(); ++i) {
channels_[i]->PushBack(append_this[i]);
}
}
}
template<typename T>
void AudioMultiVector<T>::PushBackFromIndex(
const AudioMultiVector<T>& append_this,
size_t index) {
assert(index < append_this.Size());
index = std::min(index, append_this.Size() - 1);
size_t length = append_this.Size() - index;
assert(Channels() == append_this.Channels());
if (Channels() == append_this.Channels()) {
for (size_t i = 0; i < Channels(); ++i) {
channels_[i]->PushBack(&append_this[i][index], length);
}
}
}
template<typename T>
void AudioMultiVector<T>::PopFront(size_t length) {
for (size_t i = 0; i < Channels(); ++i) {
channels_[i]->PopFront(length);
}
}
template<typename T>
void AudioMultiVector<T>::PopBack(size_t length) {
for (size_t i = 0; i < Channels(); ++i) {
channels_[i]->PopBack(length);
}
}
template<typename T>
size_t AudioMultiVector<T>::ReadInterleaved(size_t length,
T* destination) const {
return ReadInterleavedFromIndex(0, length, destination);
}
template<typename T>
size_t AudioMultiVector<T>::ReadInterleavedFromIndex(size_t start_index,
size_t length,
T* destination) const {
if (!destination) {
return 0;
}
size_t index = 0; // Number of elements written to |destination| so far.
assert(start_index <= Size());
start_index = std::min(start_index, Size());
if (length + start_index > Size()) {
length = Size() - start_index;
}
for (size_t i = 0; i < length; ++i) {
for (size_t channel = 0; channel < Channels(); ++channel) {
destination[index] = (*this)[channel][i + start_index];
++index;
}
}
return index;
}
template<typename T>
size_t AudioMultiVector<T>::ReadInterleavedFromEnd(size_t length,
T* destination) const {
length = std::min(length, Size()); // Cannot read more than Size() elements.
return ReadInterleavedFromIndex(Size() - length, length, destination);
}
template<typename T>
void AudioMultiVector<T>::OverwriteAt(const AudioMultiVector<T>& insert_this,
size_t length,
size_t position) {
assert(Channels() == insert_this.Channels());
// Cap |length| at the length of |insert_this|.
assert(length <= insert_this.Size());
length = std::min(length, insert_this.Size());
if (Channels() == insert_this.Channels()) {
for (size_t i = 0; i < Channels(); ++i) {
channels_[i]->OverwriteAt(&insert_this[i][0], length, position);
}
}
}
template<typename T>
void AudioMultiVector<T>::CrossFade(const AudioMultiVector<T>& append_this,
size_t fade_length) {
assert(Channels() == append_this.Channels());
if (Channels() == append_this.Channels()) {
for (size_t i = 0; i < Channels(); ++i) {
channels_[i]->CrossFade(append_this[i], fade_length);
}
}
}
template<typename T>
size_t AudioMultiVector<T>::Size() const {
assert(channels_[0]);
return channels_[0]->Size();
}
template<typename T>
void AudioMultiVector<T>::AssertSize(size_t required_size) {
if (Size() < required_size) {
size_t extend_length = required_size - Size();
for (size_t channel = 0; channel < Channels(); ++channel) {
channels_[channel]->Extend(extend_length);
}
}
}
template<typename T>
bool AudioMultiVector<T>::Empty() const {
assert(channels_[0]);
return channels_[0]->Empty();
}
template<typename T>
const AudioVector<T>& AudioMultiVector<T>::operator[](size_t index) const {
return *(channels_[index]);
}
template<typename T>
AudioVector<T>& AudioMultiVector<T>::operator[](size_t index) {
return *(channels_[index]);
}
// Instantiate the template for a few types.
template class AudioMultiVector<int16_t>;
template class AudioMultiVector<int32_t>;
template class AudioMultiVector<double>;
} // namespace webrtc

View File

@ -0,0 +1,132 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_MULTI_VECTOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_MULTI_VECTOR_H_
#include <cstring> // Access to size_t.
#include <vector>
#include "webrtc/modules/audio_coding/neteq4/audio_vector.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
namespace webrtc {
template <typename T>
class AudioMultiVector {
public:
// Creates an empty AudioMultiVector with |N| audio channels. |N| must be
// larger than 0.
explicit AudioMultiVector(size_t N);
// Creates an AudioMultiVector with |N| audio channels, each channel having
// an initial size. |N| must be larger than 0.
AudioMultiVector(size_t N, size_t initial_size);
virtual ~AudioMultiVector();
// Deletes all values and make the vector empty.
virtual void Clear();
// Clears the vector and inserts |length| zeros into each channel.
virtual void Zeros(size_t length);
// Copies all values from this vector to |copy_to|. Any contents in |copy_to|
// are deleted. After the operation is done, |copy_to| will be an exact
// replica of this object. The source and the destination must have the same
// number of channels.
virtual void CopyFrom(AudioMultiVector<T>* copy_to) const;
// Appends the contents of array |append_this| to the end of this
// object. The array is assumed to be channel-interleaved. |length| must be
// an even multiple of this object's number of channels.
// The length of this object is increased with the |length| divided by the
// number of channels.
virtual void PushBackInterleaved(const T* append_this, size_t length);
// Appends the contents of AudioMultiVector |append_this| to this object. The
// length of this object is increased with the length of |append_this|.
virtual void PushBack(const AudioMultiVector<T>& append_this);
// Appends the contents of AudioMultiVector |append_this| to this object,
// taken from |index| up until the end of |append_this|. The length of this
// object is increased.
virtual void PushBackFromIndex(const AudioMultiVector<T>& append_this,
size_t index);
// Removes |length| elements from the beginning of this object, from each
// channel.
virtual void PopFront(size_t length);
// Removes |length| elements from the end of this object, from each
// channel.
virtual void PopBack(size_t length);
// Reads |length| samples from each channel and writes them interleaved to
// |destination|. The total number of elements written to |destination| is
// returned, i.e., |length| * number of channels. If the AudioMultiVector
// contains less than |length| samples per channel, this is reflected in the
// return value.
virtual size_t ReadInterleaved(size_t length, T* destination) const;
// Like ReadInterleaved() above, but reads from |start_index| instead of from
// the beginning.
virtual size_t ReadInterleavedFromIndex(size_t start_index,
size_t length,
T* destination) const;
// Like ReadInterleaved() above, but reads from the end instead of from
// the beginning.
virtual size_t ReadInterleavedFromEnd(size_t length,
T* destination) const;
// Overwrites each channel in this AudioMultiVector with values taken from
// |insert_this|. The values are taken from the beginning of |insert_this| and
// are inserted starting at |position|. |length| values are written into each
// channel. If |length| and |position| are selected such that the new data
// extends beyond the end of the current AudioVector, the vector is extended
// to accommodate the new data. |length| is limited to the length of
// |insert_this|.
virtual void OverwriteAt(const AudioMultiVector<T>& insert_this,
size_t length,
size_t position);
// Appends |append_this| to the end of the current vector. Lets the two
// vectors overlap by |fade_length| samples (per channel), and cross-fade
// linearly in this region.
virtual void CrossFade(const AudioMultiVector<T>& append_this,
size_t fade_length);
// Returns the number of channels.
virtual size_t Channels() const { return channels_.size(); }
// Returns the number of elements per channel in this AudioMultiVector.
virtual size_t Size() const;
// Verify that each channel can hold at least |required_size| elements. If
// not, extend accordingly.
virtual void AssertSize(size_t required_size);
virtual bool Empty() const;
// Accesses and modifies a channel (i.e., an AudioVector object) of this
// AudioMultiVector.
const AudioVector<T>& operator[](size_t index) const;
AudioVector<T>& operator[](size_t index);
protected:
std::vector<AudioVector<T>*> channels_;
private:
DISALLOW_COPY_AND_ASSIGN(AudioMultiVector);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_MULTI_VECTOR_H_

View File

@ -0,0 +1,304 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include <assert.h>
#include <stdlib.h>
#include <string>
#include "gtest/gtest.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This is a value-parameterized test. The test cases are instantiated with
// different values for the test parameter, which is used to determine the
// number of channels in the AudioMultiBuffer. Note that it is not possible
// to combine typed testing with value-parameterized testing, and since the
// tests for AudioVector already covers a number of different type parameters,
// this test focuses on testing different number of channels, and keeping the
// value type constant.
class AudioMultiVectorTest : public ::testing::TestWithParam<size_t> {
protected:
typedef int16_t T; // Use this value type for all tests.
AudioMultiVectorTest()
: num_channels_(GetParam()), // Get the test parameter.
interleaved_length_(num_channels_ * kLength) {
array_interleaved_ = new T[num_channels_ * kLength];
}
~AudioMultiVectorTest() {
delete [] array_interleaved_;
}
virtual void SetUp() {
// Populate test arrays.
for (size_t i = 0; i < kLength; ++i) {
array_[i] = static_cast<T>(i);
}
T* ptr = array_interleaved_;
// Write 100, 101, 102, ... for first channel.
// Write 200, 201, 202, ... for second channel.
// And so on.
for (size_t i = 0; i < kLength; ++i) {
for (size_t j = 1; j <= num_channels_; ++j) {
*ptr = j * 100 + i;
++ptr;
}
}
}
enum {
kLength = 10
};
const size_t num_channels_;
size_t interleaved_length_;
T array_[kLength];
T* array_interleaved_;
};
// Create and destroy AudioMultiVector objects, both empty and with a predefined
// length.
TEST_P(AudioMultiVectorTest, CreateAndDestroy) {
AudioMultiVector<T> vec1(num_channels_);
EXPECT_TRUE(vec1.Empty());
EXPECT_EQ(num_channels_, vec1.Channels());
EXPECT_EQ(0u, vec1.Size());
size_t initial_size = 17;
AudioMultiVector<T> vec2(num_channels_, initial_size);
EXPECT_FALSE(vec2.Empty());
EXPECT_EQ(num_channels_, vec2.Channels());
EXPECT_EQ(initial_size, vec2.Size());
}
// Test the subscript operator [] for getting and setting.
TEST_P(AudioMultiVectorTest, SubscriptOperator) {
AudioMultiVector<T> vec(num_channels_, kLength);
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < kLength; ++i) {
vec[channel][i] = static_cast<T>(i);
// Make sure to use the const version.
const AudioVector<T>& audio_vec = vec[channel];
EXPECT_EQ(static_cast<T>(i), audio_vec[i]);
}
}
}
// Test the PushBackInterleaved method and the CopyFrom method. The Clear
// method is also invoked.
TEST_P(AudioMultiVectorTest, PushBackInterleavedAndCopy) {
AudioMultiVector<T> vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
AudioMultiVector<T> vec_copy(num_channels_);
vec.CopyFrom(&vec_copy); // Copy from |vec| to |vec_copy|.
ASSERT_EQ(num_channels_, vec.Channels());
ASSERT_EQ(kLength, vec.Size());
ASSERT_EQ(num_channels_, vec_copy.Channels());
ASSERT_EQ(kLength, vec_copy.Size());
for (size_t channel = 0; channel < vec.Channels(); ++channel) {
for (size_t i = 0; i < kLength; ++i) {
EXPECT_EQ(static_cast<T>((channel + 1) * 100 + i), vec[channel][i]);
EXPECT_EQ(vec[channel][i], vec_copy[channel][i]);
}
}
// Clear |vec| and verify that it is empty.
vec.Clear();
EXPECT_TRUE(vec.Empty());
// Now copy the empty vector and verify that the copy becomes empty too.
vec.CopyFrom(&vec_copy);
EXPECT_TRUE(vec_copy.Empty());
}
// Try to copy to a NULL pointer. Nothing should happen.
TEST_P(AudioMultiVectorTest, CopyToNull) {
AudioMultiVector<T> vec(num_channels_);
AudioMultiVector<T>* vec_copy = NULL;
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.CopyFrom(vec_copy);
}
// Test the PushBack method with another AudioMultiVector as input argument.
TEST_P(AudioMultiVectorTest, PushBackVector) {
AudioMultiVector<T> vec1(num_channels_, kLength);
AudioMultiVector<T> vec2(num_channels_, kLength);
// Set the first vector to [0, 1, ..., kLength - 1] + 100 * channel_number.
// Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1] +
// 100 * channel_number.
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < kLength; ++i) {
vec1[channel][i] = static_cast<T>(i + 100 * channel);
vec2[channel][i] = static_cast<T>(i + 100 * channel + kLength);
}
}
// Append vec2 to the back of vec1.
vec1.PushBack(vec2);
ASSERT_EQ(2u * kLength, vec1.Size());
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < 2 * kLength; ++i) {
EXPECT_EQ(static_cast<T>(i + 100 * channel), vec1[channel][i]);
}
}
}
// Test the PushBackFromIndex method.
TEST_P(AudioMultiVectorTest, PushBackFromIndex) {
AudioMultiVector<T> vec1(num_channels_);
vec1.PushBackInterleaved(array_interleaved_, interleaved_length_);
AudioMultiVector<T> vec2(num_channels_);
// Append vec1 to the back of vec2 (which is empty). Read vec1 from the second
// last element.
vec2.PushBackFromIndex(vec1, kLength - 2);
ASSERT_EQ(2u, vec2.Size());
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < 2; ++i) {
EXPECT_EQ(array_interleaved_[channel + num_channels_ * (kLength - 2 + i)],
vec2[channel][i]);
}
}
}
// Starts with pushing some values to the vector, then test the Zeros method.
TEST_P(AudioMultiVectorTest, Zeros) {
AudioMultiVector<T> vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.Zeros(2 * kLength);
ASSERT_EQ(num_channels_, vec.Channels());
ASSERT_EQ(2u * kLength, vec.Size());
for (size_t channel = 0; channel < num_channels_; ++channel) {
for (size_t i = 0; i < 2 * kLength; ++i) {
EXPECT_EQ(0, vec[channel][i]);
}
}
}
// Test the ReadInterleaved method
TEST_P(AudioMultiVectorTest, ReadInterleaved) {
AudioMultiVector<T> vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
T* output = new T[interleaved_length_];
// Read 5 samples.
size_t read_samples = 5;
EXPECT_EQ(num_channels_ * read_samples,
vec.ReadInterleaved(read_samples, output));
EXPECT_EQ(0, memcmp(array_interleaved_, output, read_samples * sizeof(T)));
// Read too many samples. Expect to get all samples from the vector.
EXPECT_EQ(interleaved_length_,
vec.ReadInterleaved(kLength + 1, output));
EXPECT_EQ(0, memcmp(array_interleaved_, output, read_samples * sizeof(T)));
delete [] output;
}
// Try to read to a NULL pointer. Expected to return 0.
TEST_P(AudioMultiVectorTest, ReadInterleavedToNull) {
AudioMultiVector<T> vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
T* output = NULL;
// Read 5 samples.
size_t read_samples = 5;
EXPECT_EQ(0u, vec.ReadInterleaved(read_samples, output));
}
// Test the PopFront method.
TEST_P(AudioMultiVectorTest, PopFront) {
AudioMultiVector<T> vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.PopFront(1); // Remove one element from each channel.
ASSERT_EQ(kLength - 1u, vec.Size());
// Let |ptr| point to the second element of the first channel in the
// interleaved array.
T* ptr = &array_interleaved_[num_channels_];
for (size_t i = 0; i < kLength - 1; ++i) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
EXPECT_EQ(*ptr, vec[channel][i]);
++ptr;
}
}
vec.PopFront(kLength); // Remove more elements than vector size.
EXPECT_EQ(0u, vec.Size());
}
// Test the PopBack method.
TEST_P(AudioMultiVectorTest, PopBack) {
AudioMultiVector<T> vec(num_channels_);
vec.PushBackInterleaved(array_interleaved_, interleaved_length_);
vec.PopBack(1); // Remove one element from each channel.
ASSERT_EQ(kLength - 1u, vec.Size());
// Let |ptr| point to the first element of the first channel in the
// interleaved array.
T* ptr = array_interleaved_;
for (size_t i = 0; i < kLength - 1; ++i) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
EXPECT_EQ(*ptr, vec[channel][i]);
++ptr;
}
}
vec.PopBack(kLength); // Remove more elements than vector size.
EXPECT_EQ(0u, vec.Size());
}
// Test the AssertSize method.
TEST_P(AudioMultiVectorTest, AssertSize) {
AudioMultiVector<T> vec(num_channels_, kLength);
EXPECT_EQ(kLength, vec.Size());
// Start with asserting with smaller sizes than already allocated.
vec.AssertSize(0);
vec.AssertSize(kLength - 1);
// Nothing should have changed.
EXPECT_EQ(kLength, vec.Size());
// Assert with one element longer than already allocated.
vec.AssertSize(kLength + 1);
// Expect vector to have grown.
EXPECT_EQ(kLength + 1u, vec.Size());
// Also check the individual AudioVectors.
for (size_t channel = 0; channel < vec.Channels(); ++channel) {
EXPECT_EQ(kLength + 1u, vec[channel].Size());
}
}
// Test the PushBack method with another AudioMultiVector as input argument.
TEST_P(AudioMultiVectorTest, OverwriteAt) {
AudioMultiVector<T> vec1(num_channels_);
vec1.PushBackInterleaved(array_interleaved_, interleaved_length_);
AudioMultiVector<T> vec2(num_channels_);
vec2.Zeros(3); // 3 zeros in each channel.
// Overwrite vec2 at position 5.
vec1.OverwriteAt(vec2, 3, 5);
// Verify result.
ASSERT_EQ(kLength, vec1.Size()); // Length remains the same.
T* ptr = array_interleaved_;
for (size_t i = 0; i < kLength - 1; ++i) {
for (size_t channel = 0; channel < num_channels_; ++channel) {
if (i >= 5 && i <= 7) {
// Elements 5, 6, 7 should have been replaced with zeros.
EXPECT_EQ(0, vec1[channel][i]);
} else {
EXPECT_EQ(*ptr, vec1[channel][i]);
}
++ptr;
}
}
}
INSTANTIATE_TEST_CASE_P(TestNumChannels,
AudioMultiVectorTest,
::testing::Values(static_cast<size_t>(1),
static_cast<size_t>(2),
static_cast<size_t>(5)));
} // namespace webrtc

View File

@ -0,0 +1,202 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/audio_vector.h"
#include <assert.h>
#include <algorithm>
#include "webrtc/typedefs.h"
namespace webrtc {
template<typename T>
void AudioVector<T>::Clear() {
vector_.clear();
}
template<typename T>
void AudioVector<T>::CopyFrom(AudioVector<T>* copy_to) const {
if (copy_to) {
copy_to->vector_.assign(vector_.begin(), vector_.end());
}
}
template<typename T>
void AudioVector<T>::PushFront(const AudioVector<T>& prepend_this) {
vector_.insert(vector_.begin(), prepend_this.vector_.begin(),
prepend_this.vector_.end());
}
template<typename T>
void AudioVector<T>::PushFront(const T* prepend_this, size_t length) {
// Same operation as InsertAt beginning.
InsertAt(prepend_this, length, 0);
}
template<typename T>
void AudioVector<T>::PushBack(const AudioVector<T>& append_this) {
vector_.reserve(vector_.size() + append_this.Size());
for (size_t i = 0; i < append_this.Size(); ++i) {
vector_.push_back(append_this[i]);
}
}
template<typename T>
void AudioVector<T>::PushBack(const T* append_this, size_t length) {
vector_.reserve(vector_.size() + length);
for (size_t i = 0; i < length; ++i) {
vector_.push_back(append_this[i]);
}
}
template<typename T>
void AudioVector<T>::PopFront(size_t length) {
if (length >= vector_.size()) {
// Remove all elements.
vector_.clear();
} else {
typename std::vector<T>::iterator end_range = vector_.begin();
end_range += length;
// Erase all elements in range vector_.begin() and |end_range| (not
// including |end_range|).
vector_.erase(vector_.begin(), end_range);
}
}
template<typename T>
void AudioVector<T>::PopBack(size_t length) {
// Make sure that new_size is never negative (which causes wrap-around).
size_t new_size = vector_.size() - std::min(length, vector_.size());
vector_.resize(new_size);
}
template<typename T>
void AudioVector<T>::Extend(size_t extra_length) {
vector_.insert(vector_.end(), extra_length, 0);
}
template<typename T>
void AudioVector<T>::InsertAt(const T* insert_this,
size_t length,
size_t position) {
typename std::vector<T>::iterator insert_position = vector_.begin();
// Cap the position at the current vector length, to be sure the iterator
// does not extend beyond the end of the vector.
position = std::min(vector_.size(), position);
insert_position += position;
// First, insert zeros at the position. This makes the vector longer (and
// invalidates the iterator |insert_position|.
vector_.insert(insert_position, length, 0);
// Write the new values into the vector.
for (size_t i = 0; i < length; ++i) {
vector_[position + i] = insert_this[i];
}
}
template<typename T>
void AudioVector<T>::InsertZerosAt(size_t length,
size_t position) {
typename std::vector<T>::iterator insert_position = vector_.begin();
// Cap the position at the current vector length, to be sure the iterator
// does not extend beyond the end of the vector.
position = std::min(vector_.size(), position);
insert_position += position;
// Insert zeros at the position. This makes the vector longer (and
// invalidates the iterator |insert_position|.
vector_.insert(insert_position, length, 0);
}
template<typename T>
void AudioVector<T>::OverwriteAt(const T* insert_this,
size_t length,
size_t position) {
// Cap the insert position at the current vector length.
position = std::min(vector_.size(), position);
// Extend the vector if needed. (It is valid to overwrite beyond the current
// end of the vector.)
if (position + length > vector_.size()) {
Extend(position + length - vector_.size());
}
for (size_t i = 0; i < length; ++i) {
vector_[position + i] = insert_this[i];
}
}
template<typename T>
void AudioVector<T>::CrossFade(const AudioVector<T>& append_this,
size_t fade_length) {
// Fade length cannot be longer than the current vector or |append_this|.
assert(fade_length <= Size());
assert(fade_length <= append_this.Size());
fade_length = std::min(fade_length, Size());
fade_length = std::min(fade_length, append_this.Size());
size_t position = Size() - fade_length;
// Cross fade the overlapping regions.
// |alpha| is the mixing factor in Q14.
// TODO(hlundin): Consider skipping +1 in the denominator to produce a
// smoother cross-fade, in particular at the end of the fade.
int alpha_step = 16384 / (fade_length + 1);
int alpha = 16384;
for (size_t i = 0; i < fade_length; ++i) {
alpha -= alpha_step;
vector_[position + i] = (alpha * vector_[position + i] +
(16384 - alpha) * append_this[i] + 8192) >> 14;
}
assert(alpha >= 0); // Verify that the slope was correct.
// Append what is left of |append_this|.
PushBack(&append_this[fade_length], append_this.Size() - fade_length);
}
// Template specialization for double. The only difference is in the calculation
// of the cross-faded value, where we divide by 16384 instead of shifting with
// 14 steps, and also not adding 8192 before scaling.
template<>
void AudioVector<double>::CrossFade(const AudioVector<double>& append_this,
size_t fade_length) {
// Fade length cannot be longer than the current vector or |append_this|.
assert(fade_length <= Size());
assert(fade_length <= append_this.Size());
fade_length = std::min(fade_length, Size());
fade_length = std::min(fade_length, append_this.Size());
size_t position = Size() - fade_length;
// Cross fade the overlapping regions.
// |alpha| is the mixing factor in Q14.
// TODO(hlundin): Consider skipping +1 in the denominator to produce a
// smoother cross-fade, in particular at the end of the fade.
int alpha_step = 16384 / (fade_length + 1);
int alpha = 16384;
for (size_t i = 0; i < fade_length; ++i) {
alpha -= alpha_step;
vector_[position + i] = (alpha * vector_[position + i] +
(16384 - alpha) * append_this[i]) / 16384;
}
assert(alpha >= 0); // Verify that the slope was correct.
// Append what is left of |append_this|.
PushBack(&append_this[fade_length], append_this.Size() - fade_length);
}
template<typename T>
const T& AudioVector<T>::operator[](size_t index) const {
return vector_[index];
}
template<typename T>
T& AudioVector<T>::operator[](size_t index) {
return vector_[index];
}
// Instantiate the template for a few types.
template class AudioVector<int16_t>;
template class AudioVector<int32_t>;
template class AudioVector<double>;
} // namespace webrtc

View File

@ -0,0 +1,105 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_VECTOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_VECTOR_H_
#include <cstring> // Access to size_t.
#include <vector>
#include "webrtc/system_wrappers/interface/constructor_magic.h"
namespace webrtc {
template <typename T>
class AudioVector {
public:
// Creates an empty AudioVector.
AudioVector() {}
// Creates an AudioVector with an initial size.
explicit AudioVector(size_t initial_size)
: vector_(initial_size, 0) {}
virtual ~AudioVector() {}
// Deletes all values and make the vector empty.
virtual void Clear();
// Copies all values from this vector to |copy_to|. Any contents in |copy_to|
// are deleted before the copy operation. After the operation is done,
// |copy_to| will be an exact replica of this object.
virtual void CopyFrom(AudioVector<T>* copy_to) const;
// Prepends the contents of AudioVector |prepend_this| to this object. The
// length of this object is increased with the length of |prepend_this|.
virtual void PushFront(const AudioVector<T>& prepend_this);
// Same as above, but with an array |prepend_this| with |length| elements as
// source.
virtual void PushFront(const T* prepend_this, size_t length);
// Same as PushFront but will append to the end of this object.
virtual void PushBack(const AudioVector<T>& append_this);
// Same as PushFront but will append to the end of this object.
virtual void PushBack(const T* append_this, size_t length);
// Removes |length| elements from the beginning of this object.
virtual void PopFront(size_t length);
// Removes |length| elements from the end of this object.
virtual void PopBack(size_t length);
// Extends this object with |extra_length| elements at the end. The new
// elements are initialized to zero.
virtual void Extend(size_t extra_length);
// Inserts |length| elements taken from the array |insert_this| and insert
// them at |position|. The length of the AudioVector is increased by |length|.
// |position| = 0 means that the new values are prepended to the vector.
// |position| = Size() means that the new values are appended to the vector.
virtual void InsertAt(const T* insert_this, size_t length, size_t position);
// Like InsertAt, but inserts |length| zero elements at |position|.
virtual void InsertZerosAt(size_t length, size_t position);
// Overwrites |length| elements of this AudioVector with values taken from the
// array |insert_this|, starting at |position|. The definition of |position|
// is the same as for InsertAt(). If |length| and |position| are selected
// such that the new data extends beyond the end of the current AudioVector,
// the vector is extended to accommodate the new data.
virtual void OverwriteAt(const T* insert_this,
size_t length,
size_t position);
// Appends |append_this| to the end of the current vector. Lets the two
// vectors overlap by |fade_length| samples, and cross-fade linearly in this
// region.
virtual void CrossFade(const AudioVector<T>& append_this, size_t fade_length);
// Returns the number of elements in this AudioVector.
virtual size_t Size() const { return vector_.size(); }
// Returns true if this AudioVector is empty.
virtual bool Empty() const { return vector_.empty(); }
// Accesses and modifies an element of AudioVector.
const T& operator[](size_t index) const;
T& operator[](size_t index);
private:
std::vector<T> vector_;
DISALLOW_COPY_AND_ASSIGN(AudioVector);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_VECTOR_H_

View File

@ -0,0 +1,408 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/audio_vector.h"
#include <assert.h>
#include <stdlib.h>
#include <string>
#include "gtest/gtest.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// The tests in this file are so called typed tests (see e.g.,
// http://code.google.com/p/googletest/wiki/AdvancedGuide#Typed_Tests).
// This means that the tests are written with the typename T as an unknown
// template type. The tests are then instantiated for a few types; int16_t,
// int32_t and double in this case. Each test is then run once for each of these
// types.
// A few special tricks are needed. For instance, the member variable |array_|
// in the test fixture must be accessed using this->array_ in the tests. Also,
// the enumerator value kLength must be accessed with TestFixture::kLength.
template<typename T>
class AudioVectorTest : public ::testing::Test {
protected:
virtual void SetUp() {
// Populate test array.
for (size_t i = 0; i < kLength; ++i) {
array_[i] = static_cast<T>(i);
}
}
enum {
kLength = 10
};
T array_[kLength];
};
// Instantiate typed tests with int16_t, int32_t, and double.
typedef ::testing::Types<int16_t, int32_t, double> MyTypes;
TYPED_TEST_CASE(AudioVectorTest, MyTypes);
// Create and destroy AudioVector objects, both empty and with a predefined
// length.
TYPED_TEST(AudioVectorTest, CreateAndDestroy) {
AudioVector<TypeParam> vec1;
EXPECT_TRUE(vec1.Empty());
EXPECT_EQ(0u, vec1.Size());
size_t initial_size = 17;
AudioVector<TypeParam> vec2(initial_size);
EXPECT_FALSE(vec2.Empty());
EXPECT_EQ(initial_size, vec2.Size());
}
// Test the subscript operator [] for getting and setting.
TYPED_TEST(AudioVectorTest, SubscriptOperator) {
AudioVector<TypeParam> vec(TestFixture::kLength);
for (size_t i = 0; i < TestFixture::kLength; ++i) {
vec[i] = static_cast<TypeParam>(i);
const TypeParam& value = vec[i]; // Make sure to use the const version.
EXPECT_EQ(static_cast<TypeParam>(i), value);
}
}
// Test the PushBack method and the CopyFrom method. The Clear method is also
// invoked.
TYPED_TEST(AudioVectorTest, PushBackAndCopy) {
AudioVector<TypeParam> vec;
AudioVector<TypeParam> vec_copy;
vec.PushBack(this->array_, TestFixture::kLength);
vec.CopyFrom(&vec_copy); // Copy from |vec| to |vec_copy|.
ASSERT_EQ(TestFixture::kLength, vec.Size());
ASSERT_EQ(TestFixture::kLength, vec_copy.Size());
for (size_t i = 0; i < TestFixture::kLength; ++i) {
EXPECT_EQ(this->array_[i], vec[i]);
EXPECT_EQ(this->array_[i], vec_copy[i]);
}
// Clear |vec| and verify that it is empty.
vec.Clear();
EXPECT_TRUE(vec.Empty());
// Now copy the empty vector and verify that the copy becomes empty too.
vec.CopyFrom(&vec_copy);
EXPECT_TRUE(vec_copy.Empty());
}
// Try to copy to a NULL pointer. Nothing should happen.
TYPED_TEST(AudioVectorTest, CopyToNull) {
AudioVector<TypeParam> vec;
AudioVector<TypeParam>* vec_copy = NULL;
vec.PushBack(this->array_, TestFixture::kLength);
vec.CopyFrom(vec_copy);
}
// Test the PushBack method with another AudioVector as input argument.
TYPED_TEST(AudioVectorTest, PushBackVector) {
static const size_t kLength = 10;
AudioVector<TypeParam> vec1(kLength);
AudioVector<TypeParam> vec2(kLength);
// Set the first vector to [0, 1, ..., kLength - 1].
// Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1].
for (size_t i = 0; i < kLength; ++i) {
vec1[i] = static_cast<TypeParam>(i);
vec2[i] = static_cast<TypeParam>(i + kLength);
}
// Append vec2 to the back of vec1.
vec1.PushBack(vec2);
ASSERT_EQ(2 * kLength, vec1.Size());
for (size_t i = 0; i < 2 * kLength; ++i) {
EXPECT_EQ(static_cast<TypeParam>(i), vec1[i]);
}
}
// Test the PushFront method.
TYPED_TEST(AudioVectorTest, PushFront) {
AudioVector<TypeParam> vec;
vec.PushFront(this->array_, TestFixture::kLength);
ASSERT_EQ(TestFixture::kLength, vec.Size());
for (size_t i = 0; i < TestFixture::kLength; ++i) {
EXPECT_EQ(this->array_[i], vec[i]);
}
}
// Test the PushFront method with another AudioVector as input argument.
TYPED_TEST(AudioVectorTest, PushFrontVector) {
static const size_t kLength = 10;
AudioVector<TypeParam> vec1(kLength);
AudioVector<TypeParam> vec2(kLength);
// Set the first vector to [0, 1, ..., kLength - 1].
// Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1].
for (size_t i = 0; i < kLength; ++i) {
vec1[i] = static_cast<TypeParam>(i);
vec2[i] = static_cast<TypeParam>(i + kLength);
}
// Prepend vec1 to the front of vec2.
vec2.PushFront(vec1);
ASSERT_EQ(2 * kLength, vec2.Size());
for (size_t i = 0; i < 2 * kLength; ++i) {
EXPECT_EQ(static_cast<TypeParam>(i), vec2[i]);
}
}
// Test the PopFront method.
TYPED_TEST(AudioVectorTest, PopFront) {
AudioVector<TypeParam> vec;
vec.PushBack(this->array_, TestFixture::kLength);
vec.PopFront(1); // Remove one element.
EXPECT_EQ(TestFixture::kLength - 1u, vec.Size());
for (size_t i = 0; i < TestFixture::kLength - 1; ++i) {
EXPECT_EQ(static_cast<TypeParam>(i + 1), vec[i]);
}
vec.PopFront(TestFixture::kLength); // Remove more elements than vector size.
EXPECT_EQ(0u, vec.Size());
}
// Test the PopBack method.
TYPED_TEST(AudioVectorTest, PopBack) {
AudioVector<TypeParam> vec;
vec.PushBack(this->array_, TestFixture::kLength);
vec.PopBack(1); // Remove one element.
EXPECT_EQ(TestFixture::kLength - 1u, vec.Size());
for (size_t i = 0; i < TestFixture::kLength - 1; ++i) {
EXPECT_EQ(static_cast<TypeParam>(i), vec[i]);
}
vec.PopBack(TestFixture::kLength); // Remove more elements than vector size.
EXPECT_EQ(0u, vec.Size());
}
// Test the Extend method.
TYPED_TEST(AudioVectorTest, Extend) {
AudioVector<TypeParam> vec;
vec.PushBack(this->array_, TestFixture::kLength);
vec.Extend(5); // Extend with 5 elements, which should all be zeros.
ASSERT_EQ(TestFixture::kLength + 5u, vec.Size());
// Verify that all are zero.
for (int i = TestFixture::kLength; i < TestFixture::kLength + 5; ++i) {
EXPECT_EQ(0, vec[i]);
}
}
// Test the InsertAt method with an insert position in the middle of the vector.
TYPED_TEST(AudioVectorTest, InsertAt) {
AudioVector<TypeParam> vec;
vec.PushBack(this->array_, TestFixture::kLength);
static const int kNewLength = 5;
TypeParam new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = 5;
vec.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {0, 1, ..., |insert_position| - 1, 100, 101, ..., 100 + kNewLength - 1,
// |insert_position|, |insert_position| + 1, ..., kLength - 1}.
int pos = 0;
for (int i = 0; i < insert_position; ++i) {
EXPECT_EQ(this->array_[i], vec[pos]);
++pos;
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
for (int i = insert_position; i < TestFixture::kLength; ++i) {
EXPECT_EQ(this->array_[i], vec[pos]);
++pos;
}
}
// Test the InsertZerosAt method with an insert position in the middle of the
// vector. Use the InsertAt method as reference.
TYPED_TEST(AudioVectorTest, InsertZerosAt) {
AudioVector<TypeParam> vec;
AudioVector<TypeParam> vec_ref;
vec.PushBack(this->array_, TestFixture::kLength);
vec_ref.PushBack(this->array_, TestFixture::kLength);
static const int kNewLength = 5;
int insert_position = 5;
vec.InsertZerosAt(kNewLength, insert_position);
TypeParam new_array[kNewLength] = {0}; // All zero elements.
vec_ref.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vectors are identical.
ASSERT_EQ(vec_ref.Size(), vec.Size());
for (size_t i = 0; i < vec.Size(); ++i) {
EXPECT_EQ(vec_ref[i], vec[i]);
}
}
// Test the InsertAt method with an insert position at the start of the vector.
TYPED_TEST(AudioVectorTest, InsertAtBeginning) {
AudioVector<TypeParam> vec;
vec.PushBack(this->array_, TestFixture::kLength);
static const int kNewLength = 5;
TypeParam new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = 0;
vec.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {100, 101, ..., 100 + kNewLength - 1,
// 0, 1, ..., kLength - 1}.
int pos = 0;
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
for (int i = insert_position; i < TestFixture::kLength; ++i) {
EXPECT_EQ(this->array_[i], vec[pos]);
++pos;
}
}
// Test the InsertAt method with an insert position at the end of the vector.
TYPED_TEST(AudioVectorTest, InsertAtEnd) {
AudioVector<TypeParam> vec;
vec.PushBack(this->array_, TestFixture::kLength);
static const int kNewLength = 5;
TypeParam new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = TestFixture::kLength;
vec.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }.
int pos = 0;
for (int i = 0; i < TestFixture::kLength; ++i) {
EXPECT_EQ(this->array_[i], vec[pos]);
++pos;
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
}
// Test the InsertAt method with an insert position beyond the end of the
// vector. Verify that a position beyond the end of the vector does not lead to
// an error. The expected outcome is the same as if the vector end was used as
// input position. That is, the input position should be capped at the maximum
// allowed value.
TYPED_TEST(AudioVectorTest, InsertBeyondEnd) {
AudioVector<TypeParam> vec;
vec.PushBack(this->array_, TestFixture::kLength);
static const int kNewLength = 5;
TypeParam new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = TestFixture::kLength + 10; // Too large.
vec.InsertAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }.
int pos = 0;
for (int i = 0; i < TestFixture::kLength; ++i) {
EXPECT_EQ(this->array_[i], vec[pos]);
++pos;
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
}
// Test the OverwriteAt method with a position such that all of the new values
// fit within the old vector.
TYPED_TEST(AudioVectorTest, OverwriteAt) {
AudioVector<TypeParam> vec;
vec.PushBack(this->array_, TestFixture::kLength);
static const int kNewLength = 5;
TypeParam new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = 2;
vec.OverwriteAt(new_array, kNewLength, insert_position);
// Verify that the vector looks as follows:
// {0, ..., |insert_position| - 1, 100, 101, ..., 100 + kNewLength - 1,
// |insert_position|, |insert_position| + 1, ..., kLength - 1}.
int pos = 0;
for (pos = 0; pos < insert_position; ++pos) {
EXPECT_EQ(this->array_[pos], vec[pos]);
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
for (; pos < TestFixture::kLength; ++pos) {
EXPECT_EQ(this->array_[pos], vec[pos]);
}
}
// Test the OverwriteAt method with a position such that some of the new values
// extend beyond the end of the current vector. This is valid, and the vector is
// expected to expand to accommodate the new values.
TYPED_TEST(AudioVectorTest, OverwriteBeyondEnd) {
AudioVector<TypeParam> vec;
vec.PushBack(this->array_, TestFixture::kLength);
static const int kNewLength = 5;
TypeParam new_array[kNewLength];
// Set array elements to {100, 101, 102, ... }.
for (int i = 0; i < kNewLength; ++i) {
new_array[i] = 100 + i;
}
int insert_position = TestFixture::kLength - 2;
vec.OverwriteAt(new_array, kNewLength, insert_position);
ASSERT_EQ(TestFixture::kLength - 2u + kNewLength, vec.Size());
// Verify that the vector looks as follows:
// {0, ..., |insert_position| - 1, 100, 101, ..., 100 + kNewLength - 1,
// |insert_position|, |insert_position| + 1, ..., kLength - 1}.
int pos = 0;
for (pos = 0; pos < insert_position; ++pos) {
EXPECT_EQ(this->array_[pos], vec[pos]);
}
for (int i = 0; i < kNewLength; ++i) {
EXPECT_EQ(new_array[i], vec[pos]);
++pos;
}
// Verify that we checked to the end of |vec|.
EXPECT_EQ(vec.Size(), static_cast<size_t>(pos));
}
TYPED_TEST(AudioVectorTest, CrossFade) {
static const size_t kLength = 100;
static const size_t kFadeLength = 10;
AudioVector<TypeParam> vec1(kLength);
AudioVector<TypeParam> vec2(kLength);
// Set all vector elements to 0 in |vec1| and 100 in |vec2|.
for (size_t i = 0; i < kLength; ++i) {
vec1[i] = 0;
vec2[i] = 100;
}
vec1.CrossFade(vec2, kFadeLength);
ASSERT_EQ(2 * kLength - kFadeLength, vec1.Size());
// First part untouched.
for (size_t i = 0; i < kLength - kFadeLength; ++i) {
EXPECT_EQ(0, vec1[i]);
}
// Check mixing zone.
for (size_t i = 0 ; i < kFadeLength; ++i) {
EXPECT_NEAR((i + 1) * 100 / (kFadeLength + 1),
vec1[kLength - kFadeLength + i], 1);
}
// Second part untouched.
for (size_t i = kLength; i < vec1.Size(); ++i) {
EXPECT_EQ(100, vec1[i]);
}
}
} // namespace webrtc

View File

@ -0,0 +1,251 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/background_noise.h"
#include <assert.h>
#include <algorithm> // min, max
#include <cstring> // memcpy
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq4/post_decode_vad.h"
namespace webrtc {
void BackgroundNoise::Reset() {
initialized_ = false;
for (size_t channel = 0; channel < num_channels_; ++channel) {
channel_parameters_[channel].Reset();
}
// Keep _bgnMode as it is.
}
void BackgroundNoise::Update(const AudioMultiVector<int16_t>& input,
const PostDecodeVad& vad) {
if (vad.running() && vad.active_speech()) {
// Do not update the background noise parameters if we know that the signal
// is active speech.
return;
}
int32_t auto_correlation[kMaxLpcOrder + 1];
int16_t fiter_output[kMaxLpcOrder + kResidualLength];
int16_t reflection_coefficients[kMaxLpcOrder];
int16_t lpc_coefficients[kMaxLpcOrder + 1];
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
ChannelParameters& parameters = channel_parameters_[channel_ix];
int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0};
int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder];
memcpy(temp_signal,
&input[channel_ix][input.Size() - kVecLen],
sizeof(int16_t) * kVecLen);
int32_t sample_energy = CalculateAutoCorrelation(temp_signal, kVecLen,
auto_correlation);
if ((!vad.running() &&
sample_energy < parameters.energy_update_threshold) ||
(vad.running() && !vad.active_speech())) {
// Generate LPC coefficients.
if (auto_correlation[0] > 0) {
// Regardless of whether the filter is actually updated or not,
// update energy threshold levels, since we have in fact observed
// a low energy signal.
if (sample_energy < parameters.energy_update_threshold) {
// Never go under 1.0 in average sample energy.
parameters.energy_update_threshold = std::max(sample_energy, 1);
parameters.low_energy_update_threshold = 0;
}
// Only update BGN if filter is stable, i.e., if return value from
// Levinson-Durbin function is 1.
if (WebRtcSpl_LevinsonDurbin(auto_correlation, lpc_coefficients,
reflection_coefficients,
kMaxLpcOrder) != 1) {
return;
}
} else {
// Center value in auto-correlation is not positive. Do not update.
return;
}
// Generate the CNG gain factor by looking at the energy of the residual.
WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength,
fiter_output, lpc_coefficients,
kMaxLpcOrder + 1, kResidualLength);
int32_t residual_energy = WebRtcSpl_DotProductWithScale(fiter_output,
fiter_output,
kResidualLength,
0);
// Check spectral flatness.
// Comparing the residual variance with the input signal variance tells
// if the spectrum is flat or not.
// If 20 * residual_energy >= sample_energy << 6, the spectrum is flat
// enough. Also ensure that the energy is non-zero.
if ((residual_energy * 20 >= (sample_energy << 6)) &&
(sample_energy > 0)) {
// Spectrum is flat enough; save filter parameters.
// |temp_signal| + |kVecLen| - |kMaxLpcOrder| points at the first of the
// |kMaxLpcOrder| samples in the residual signal, which will form the
// filter state for the next noise generation.
SaveParameters(channel_ix, lpc_coefficients,
temp_signal + kVecLen - kMaxLpcOrder, sample_energy,
residual_energy);
}
} else {
// Will only happen if post-decode VAD is disabled and |sample_energy| is
// not low enough. Increase the threshold for update so that it increases
// by a factor 4 in 4 seconds.
IncrementEnergyThreshold(channel_ix, sample_energy);
}
}
return;
}
int32_t BackgroundNoise::Energy(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].energy;
}
void BackgroundNoise::SetMuteFactor(size_t channel, int16_t value) {
assert(channel < num_channels_);
channel_parameters_[channel].mute_factor = value;
}
int16_t BackgroundNoise::MuteFactor(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].mute_factor;
}
const int16_t* BackgroundNoise::Filter(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].filter;
}
const int16_t* BackgroundNoise::FilterState(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].filter_state;
}
void BackgroundNoise::SetFilterState(size_t channel, const int16_t* input,
size_t length) {
assert(channel < num_channels_);
length = std::min(length, static_cast<size_t>(kMaxLpcOrder));
memcpy(channel_parameters_[channel].filter_state, input,
length * sizeof(int16_t));
}
int16_t BackgroundNoise::Scale(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].scale;
}
int16_t BackgroundNoise::ScaleShift(size_t channel) const {
assert(channel < num_channels_);
return channel_parameters_[channel].scale_shift;
}
int32_t BackgroundNoise::CalculateAutoCorrelation(
const int16_t* signal, size_t length, int32_t* auto_correlation) const {
int16_t signal_max = WebRtcSpl_MaxAbsValueW16(signal, length);
int correlation_scale = kLogVecLen -
WebRtcSpl_NormW32(signal_max * signal_max);
correlation_scale = std::max(0, correlation_scale);
static const int kCorrelationStep = -1;
WebRtcSpl_CrossCorrelation(auto_correlation, signal, signal,
length, kMaxLpcOrder + 1, correlation_scale,
kCorrelationStep);
// Number of shifts to normalize energy to energy/sample.
int energy_sample_shift = kLogVecLen - correlation_scale;
return auto_correlation[0] >> energy_sample_shift;
}
void BackgroundNoise::IncrementEnergyThreshold(size_t channel,
int32_t sample_energy) {
// TODO(hlundin): Simplify the below threshold update. What this code
// does is simply "threshold += (increment * threshold) >> 16", but due
// to the limited-width operations, it is not exactly the same. The
// difference should be inaudible, but bit-exactness would not be
// maintained.
assert(channel < num_channels_);
ChannelParameters& parameters = channel_parameters_[channel];
int32_t temp_energy =
WEBRTC_SPL_MUL_16_16_RSFT(kThresholdIncrement,
parameters.low_energy_update_threshold, 16);
temp_energy += kThresholdIncrement *
(parameters.energy_update_threshold & 0xFF);
temp_energy += (kThresholdIncrement *
((parameters.energy_update_threshold>>8) & 0xFF)) << 8;
parameters.low_energy_update_threshold += temp_energy;
parameters.energy_update_threshold += kThresholdIncrement *
(parameters.energy_update_threshold>>16);
parameters.energy_update_threshold +=
parameters.low_energy_update_threshold >> 16;
parameters.low_energy_update_threshold =
parameters.low_energy_update_threshold & 0x0FFFF;
// Update maximum energy.
// Decrease by a factor 1/1024 each time.
parameters.max_energy = parameters.max_energy -
(parameters.max_energy >> 10);
if (sample_energy > parameters.max_energy) {
parameters.max_energy = sample_energy;
}
// Set |energy_update_threshold| to no less than 60 dB lower than
// |max_energy_|. Adding 524288 assures proper rounding.
int32_t energy_update_threshold = (parameters.max_energy + 524288) >> 20;
if (energy_update_threshold > parameters.energy_update_threshold) {
parameters.energy_update_threshold = energy_update_threshold;
}
}
void BackgroundNoise::SaveParameters(size_t channel,
const int16_t* lpc_coefficients,
const int16_t* filter_state,
int32_t sample_energy,
int32_t residual_energy) {
assert(channel < num_channels_);
ChannelParameters& parameters = channel_parameters_[channel];
memcpy(parameters.filter, lpc_coefficients,
(kMaxLpcOrder+1) * sizeof(int16_t));
memcpy(parameters.filter_state, filter_state,
kMaxLpcOrder * sizeof(int16_t));
// Save energy level and update energy threshold levels.
// Never get under 1.0 in average sample energy.
parameters.energy = std::max(sample_energy, 1);
parameters.energy_update_threshold = parameters.energy;
parameters.low_energy_update_threshold = 0;
// Normalize residual_energy to 29 or 30 bits before sqrt.
int norm_shift = WebRtcSpl_NormW32(residual_energy) - 1;
if (norm_shift & 0x1) {
norm_shift -= 1; // Even number of shifts required.
}
assert(norm_shift >= 0); // Should always be positive.
residual_energy = residual_energy << norm_shift;
// Calculate scale and shift factor.
parameters.scale = WebRtcSpl_SqrtFloor(residual_energy);
// Add 13 to the |scale_shift_|, since the random numbers table is in
// Q13.
// TODO(hlundin): Move the "13" to where the |scale_shift_| is used?
parameters.scale_shift = 13 + ((kLogResidualLength + norm_shift) / 2);
initialized_ = true;
}
} // namespace webrtc

View File

@ -0,0 +1,145 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_BACKGROUND_NOISE_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_BACKGROUND_NOISE_H_
#include <cstring> // size_t
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class PostDecodeVad;
// This class handles estimation of background noise parameters.
class BackgroundNoise {
public:
enum BackgroundNoiseMode {
kBgnOn, // Default behavior with eternal noise.
kBgnFade, // Noise fades to zero after some time.
kBgnOff // Background noise is always zero.
};
// TODO(hlundin): For 48 kHz support, increase kMaxLpcOrder to 10.
// Will work anyway, but probably sound a little worse.
static const int kMaxLpcOrder = 8; // 32000 / 8000 + 4.
explicit BackgroundNoise(size_t num_channels)
: num_channels_(num_channels),
channel_parameters_(new ChannelParameters[num_channels_]),
mode_(kBgnOn) {
Reset();
}
virtual ~BackgroundNoise() {
}
void Reset();
// Updates the parameter estimates based on the signal currently in the
// |sync_buffer|, and on the latest decision in |vad| if it is running.
void Update(const AudioMultiVector<int16_t>& sync_buffer,
const PostDecodeVad& vad);
// Returns |energy_| for |channel|.
int32_t Energy(size_t channel) const;
// Sets the value of |mute_factor_| for |channel| to |value|.
void SetMuteFactor(size_t channel, int16_t value);
// Returns |mute_factor_| for |channel|.
int16_t MuteFactor(size_t channel) const;
// Returns a pointer to |filter_| for |channel|.
const int16_t* Filter(size_t channel) const;
// Returns a pointer to |filter_state_| for |channel|.
const int16_t* FilterState(size_t channel) const;
// Copies |length| elements from |input| to the filter state. Will not copy
// more than |kMaxLpcOrder| elements.
void SetFilterState(size_t channel, const int16_t* input, size_t length);
// Returns |scale_| for |channel|.
int16_t Scale(size_t channel) const;
// Returns |scale_shift_| for |channel|.
int16_t ScaleShift(size_t channel) const;
// Accessors.
bool initialized() const { return initialized_; }
BackgroundNoiseMode mode() const { return mode_; }
private:
static const int kThresholdIncrement = 229; // 0.0035 in Q16.
static const int kVecLen = 256;
static const int kLogVecLen = 8; // log2(kVecLen).
static const int kResidualLength = 64;
static const int kLogResidualLength = 6; // log2(kResidualLength)
struct ChannelParameters {
// Constructor.
ChannelParameters() {
Reset();
}
void Reset() {
energy = 2500;
max_energy = 0;
energy_update_threshold = 500000;
low_energy_update_threshold = 0;
memset(filter_state, 0, sizeof(filter_state));
memset(filter, 0, sizeof(filter));
filter[0] = 4096;
mute_factor = 0,
scale = 20000;
scale_shift = 24;
}
int32_t energy;
int32_t max_energy;
int32_t energy_update_threshold;
int32_t low_energy_update_threshold;
int16_t filter_state[kMaxLpcOrder];
int16_t filter[kMaxLpcOrder + 1];
int16_t mute_factor;
int16_t scale;
int16_t scale_shift;
};
int32_t CalculateAutoCorrelation(const int16_t* signal,
size_t length,
int32_t* auto_correlation) const;
// Increments the energy threshold by a factor 1 + |kThresholdIncrement|.
void IncrementEnergyThreshold(size_t channel, int32_t sample_energy);
// Updates the filter parameters.
void SaveParameters(size_t channel,
const int16_t* lpc_coefficients,
const int16_t* filter_state,
int32_t sample_energy,
int32_t residual_energy);
size_t num_channels_;
scoped_array<ChannelParameters> channel_parameters_;
bool initialized_;
BackgroundNoiseMode mode_;
DISALLOW_COPY_AND_ASSIGN(BackgroundNoise);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_BACKGROUND_NOISE_H_

View File

@ -0,0 +1,26 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for BackgroundNoise class.
#include "webrtc/modules/audio_coding/neteq4/background_noise.h"
#include "gtest/gtest.h"
namespace webrtc {
TEST(BackgroundNoise, CreateAndDestroy) {
size_t channels = 1;
BackgroundNoise bgn(channels);
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h"
#include <algorithm> // Provide access to std::max.
namespace webrtc {
BufferLevelFilter::BufferLevelFilter() {
Reset();
}
void BufferLevelFilter::Reset() {
filtered_current_level_ = 0;
level_factor_ = 253;
}
void BufferLevelFilter::Update(int buffer_size_packets,
int time_stretched_samples,
int packet_len_samples) {
// Filter:
// |filtered_current_level_| = |level_factor_| * |filtered_current_level_| +
// (1 - |level_factor_|) * |buffer_size_packets|
// |level_factor_| and |filtered_current_level_| are in Q8.
// |buffer_size_packets| is in Q0.
filtered_current_level_ = ((level_factor_ * filtered_current_level_) >> 8) +
((256 - level_factor_) * buffer_size_packets);
// Account for time-scale operations (accelerate and pre-emptive expand).
if (time_stretched_samples && packet_len_samples > 0) {
// Time-scaling has been performed since last filter update. Subtract the
// value of |time_stretched_samples| from |filtered_current_level_| after
// converting |time_stretched_samples| from samples to packets in Q8.
// Make sure that the filtered value remains non-negative.
filtered_current_level_ = std::max(0,
filtered_current_level_ -
(time_stretched_samples << 8) / packet_len_samples);
}
}
void BufferLevelFilter::SetTargetBufferLevel(int target_buffer_level) {
if (target_buffer_level <= 1) {
level_factor_ = 251;
} else if (target_buffer_level <= 3) {
level_factor_ = 252;
} else if (target_buffer_level <= 7) {
level_factor_ = 253;
} else {
level_factor_ = 254;
}
}
} // namespace webrtc

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_BUFFER_LEVEL_FILTER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_BUFFER_LEVEL_FILTER_H_
#include "webrtc/system_wrappers/interface/constructor_magic.h"
namespace webrtc {
class BufferLevelFilter {
public:
BufferLevelFilter();
virtual ~BufferLevelFilter() {}
virtual void Reset();
// Updates the filter. Current buffer size is |buffer_size_packets| (Q0).
// If |time_stretched_samples| is non-zero, the value is converted to the
// corresponding number of packets, and is subtracted from the filtered
// value (thus bypassing the filter operation). |packet_len_samples| is the
// number of audio samples carried in each incoming packet.
virtual void Update(int buffer_size_packets, int time_stretched_samples,
int packet_len_samples);
// Set the current target buffer level (obtained from
// DelayManager::base_target_level()). Used to select the appropriate
// filter coefficient.
virtual void SetTargetBufferLevel(int target_buffer_level);
virtual int filtered_current_level() const { return filtered_current_level_; }
private:
int level_factor_; // Filter factor for the buffer level filter in Q8.
int filtered_current_level_; // Filtered current buffer level in Q8.
DISALLOW_COPY_AND_ASSIGN(BufferLevelFilter);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_BUFFER_LEVEL_FILTER_H_

View File

@ -0,0 +1,162 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for BufferLevelFilter class.
#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h"
#include <cmath> // Access to pow function.
#include "gtest/gtest.h"
namespace webrtc {
TEST(BufferLevelFilter, CreateAndDestroy) {
BufferLevelFilter* filter = new BufferLevelFilter();
EXPECT_EQ(0, filter->filtered_current_level());
delete filter;
}
TEST(BufferLevelFilter, ConvergenceTest) {
BufferLevelFilter filter;
for (int times = 10; times <= 50; times += 10) {
for (int value = 100; value <= 200; value += 10) {
filter.Reset();
filter.SetTargetBufferLevel(1); // Makes filter coefficient 251/256.
std::ostringstream ss;
ss << "times = " << times << ", value = " << value;
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
for (int i = 0; i < times; ++i) {
filter.Update(value, 0 /* time_stretched_samples */,
160 /* packet_len_samples */);
}
// Expect the filtered value to be (theoretically)
// (1 - (251/256) ^ |times|) * |value|.
double expected_value_double =
(1 - pow(251.0 / 256.0, times)) * value;
int expected_value = static_cast<int>(expected_value_double);
// filtered_current_level() returns the value in Q8.
// The actual value may differ slightly from the expected value due to
// intermediate-stage rounding errors in the filter implementation.
// This is why we have to use EXPECT_NEAR with a tolerance of +/-1.
EXPECT_NEAR(expected_value, filter.filtered_current_level() >> 8, 1);
}
}
}
// Verify that target buffer level impacts on the filter convergence.
TEST(BufferLevelFilter, FilterFactor) {
BufferLevelFilter filter;
// Update 10 times with value 100.
const int kTimes = 10;
const int kValue = 100;
filter.SetTargetBufferLevel(3); // Makes filter coefficient 252/256.
for (int i = 0; i < kTimes; ++i) {
filter.Update(kValue, 0 /* time_stretched_samples */,
160 /* packet_len_samples */);
}
// Expect the filtered value to be
// (1 - (252/256) ^ |kTimes|) * |kValue|.
int expected_value = 14;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8);
filter.Reset();
filter.SetTargetBufferLevel(7); // Makes filter coefficient 253/256.
for (int i = 0; i < kTimes; ++i) {
filter.Update(kValue, 0 /* time_stretched_samples */,
160 /* packet_len_samples */);
}
// Expect the filtered value to be
// (1 - (253/256) ^ |kTimes|) * |kValue|.
expected_value = 11;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8);
filter.Reset();
filter.SetTargetBufferLevel(8); // Makes filter coefficient 254/256.
for (int i = 0; i < kTimes; ++i) {
filter.Update(kValue, 0 /* time_stretched_samples */,
160 /* packet_len_samples */);
}
// Expect the filtered value to be
// (1 - (254/256) ^ |kTimes|) * |kValue|.
expected_value = 7;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8);
}
TEST(BufferLevelFilter, TimeStretchedSamples) {
BufferLevelFilter filter;
filter.SetTargetBufferLevel(1); // Makes filter coefficient 251/256.
// Update 10 times with value 100.
const int kTimes = 10;
const int kValue = 100;
const int kPacketSizeSamples = 160;
const int kNumPacketsStretched = 2;
const int kTimeStretchedSamples = kNumPacketsStretched * kPacketSizeSamples;
for (int i = 0; i < kTimes; ++i) {
// Packet size set to 0. Do not expect the parameter
// |kTimeStretchedSamples| to have any effect.
filter.Update(kValue, kTimeStretchedSamples, 0 /* packet_len_samples */);
}
// Expect the filtered value to be
// (1 - (251/256) ^ |kTimes|) * |kValue|.
const int kExpectedValue = 17;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
// Update filter again, now with non-zero value for packet length.
// Set the current filtered value to be the input, in order to isolate the
// impact of |kTimeStretchedSamples|.
filter.Update(filter.filtered_current_level() >> 8, kTimeStretchedSamples,
kPacketSizeSamples);
EXPECT_EQ(kExpectedValue - kNumPacketsStretched,
filter.filtered_current_level() >> 8);
// Try negative value and verify that we come back to the previous result.
filter.Update(filter.filtered_current_level() >> 8, -kTimeStretchedSamples,
kPacketSizeSamples);
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
}
TEST(BufferLevelFilter, TimeStretchedSamplesNegativeUnevenFrames) {
BufferLevelFilter filter;
filter.SetTargetBufferLevel(1); // Makes filter coefficient 251/256.
// Update 10 times with value 100.
const int kTimes = 10;
const int kValue = 100;
const int kPacketSizeSamples = 160;
const int kTimeStretchedSamples = -3.1415 * kPacketSizeSamples;
for (int i = 0; i < kTimes; ++i) {
// Packet size set to 0. Do not expect the parameter
// |kTimeStretchedSamples| to have any effect.
filter.Update(kValue, kTimeStretchedSamples, 0 /* packet_len_samples */);
}
// Expect the filtered value to be
// (1 - (251/256) ^ |kTimes|) * |kValue|.
const int kExpectedValue = 17;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
// Update filter again, now with non-zero value for packet length.
// Set the current filtered value to be the input, in order to isolate the
// impact of |kTimeStretchedSamples|.
filter.Update(filter.filtered_current_level() >> 8, kTimeStretchedSamples,
kPacketSizeSamples);
EXPECT_EQ(21, filter.filtered_current_level() >> 8);
// Try negative value and verify that we come back to the previous result.
filter.Update(filter.filtered_current_level() >> 8, -kTimeStretchedSamples,
kPacketSizeSamples);
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
}
} // namespace webrtc

View File

@ -0,0 +1 @@
CODE_REVIEW_SERVER: https://chromereviews.googleplex.com

View File

@ -0,0 +1,134 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/comfort_noise.h"
#include <assert.h>
#include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h"
#include "webrtc/modules/audio_coding/neteq4/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq4/dsp_helper.h"
#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h"
#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h"
namespace webrtc {
void ComfortNoise::Reset() {
first_call_ = true;
internal_error_code_ = 0;
}
int ComfortNoise::UpdateParameters(Packet* packet) {
assert(packet); // Existence is verified by caller.
// Get comfort noise decoder.
AudioDecoder* cng_decoder = decoder_database_->GetDecoder(
packet->header.payloadType);
if (!cng_decoder) {
delete [] packet->payload;
delete packet;
return kUnknownPayloadType;
}
decoder_database_->SetActiveCngDecoder(packet->header.payloadType);
CNG_dec_inst* cng_inst = static_cast<CNG_dec_inst*>(cng_decoder->state());
int16_t ret = WebRtcCng_UpdateSid(cng_inst,
packet->payload,
packet->payload_length);
delete [] packet->payload;
delete packet;
if (ret < 0) {
internal_error_code_ = WebRtcCng_GetErrorCodeDec(cng_inst);
return kInternalError;
}
return kOK;
}
int ComfortNoise::Generate(size_t requested_length,
AudioMultiVector<int16_t>* output) {
// TODO(hlundin): Change to an enumerator and skip assert.
assert(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 ||
fs_hz_ == 48000);
assert(output->Channels() == 1); // Not adapted for multi-channel yet.
if (output->Channels() != 1) {
return kMultiChannelNotSupported;
}
int16_t number_of_samples = requested_length;
int16_t new_period = 0;
if (first_call_) {
// Generate noise and overlap slightly with old data.
number_of_samples = requested_length + overlap_length_;
new_period = 1;
}
output->AssertSize(number_of_samples);
// Get the decoder from the database.
AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
if (!cng_decoder) {
return kUnknownPayloadType;
}
CNG_dec_inst* cng_inst = static_cast<CNG_dec_inst*>(cng_decoder->state());
// The expression &(*output)[0][0] is a pointer to the first element in
// the first channel.
if (WebRtcCng_Generate(cng_inst, &(*output)[0][0], number_of_samples,
new_period) < 0) {
// Error returned.
output->Zeros(requested_length);
internal_error_code_ = WebRtcCng_GetErrorCodeDec(cng_inst);
return kInternalError;
}
if (first_call_) {
// Set tapering window parameters. Values are in Q15.
int16_t muting_window; // Mixing factor for overlap data.
int16_t muting_window_increment; // Mixing factor increment (negative).
int16_t unmuting_window; // Mixing factor for comfort noise.
int16_t unmuting_window_increment; // Mixing factor increment.
if (fs_hz_ == 8000) {
muting_window = DspHelper::kMuteFactorStart8kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement8kHz;
unmuting_window = DspHelper::kUnmuteFactorStart8kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz;
} else if (fs_hz_ == 16000) {
muting_window = DspHelper::kMuteFactorStart16kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement16kHz;
unmuting_window = DspHelper::kUnmuteFactorStart16kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz;
} else if (fs_hz_ == 32000) {
muting_window = DspHelper::kMuteFactorStart32kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement32kHz;
unmuting_window = DspHelper::kUnmuteFactorStart32kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz;
} else { // fs_hz_ == 48000
muting_window = DspHelper::kMuteFactorStart48kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement48kHz;
unmuting_window = DspHelper::kUnmuteFactorStart48kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz;
}
// Do overlap-add between new vector and overlap.
size_t start_ix = sync_buffer_->Size() - overlap_length_;
for (size_t i = 0; i < overlap_length_; i++) {
/* overlapVec[i] = WinMute * overlapVec[i] + WinUnMute * outData[i] */
// The expression (*output)[0][i] is the i-th element in the first
// channel.
(*sync_buffer_)[0][start_ix + i] =
(((*sync_buffer_)[0][start_ix + i] * muting_window) +
((*output)[0][i] * unmuting_window) + 16384) >> 15;
muting_window += muting_window_increment;
unmuting_window += unmuting_window_increment;
}
// Remove |overlap_length_| samples from the front of |output| since they
// were mixed into |sync_buffer_| above.
output->PopFront(overlap_length_);
}
first_call_ = false;
return kOK;
}
} // namespace webrtc

View File

@ -0,0 +1,73 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_COMFORT_NOISE_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_COMFORT_NOISE_H_
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class DecoderDatabase;
class SyncBuffer;
struct Packet;
// This class acts as an interface to the CNG generator.
class ComfortNoise {
public:
enum ReturnCodes {
kOK = 0,
kUnknownPayloadType,
kInternalError,
kMultiChannelNotSupported
};
ComfortNoise(int fs_hz, DecoderDatabase* decoder_database,
SyncBuffer* sync_buffer)
: fs_hz_(fs_hz),
first_call_(true),
overlap_length_(5 * fs_hz_ / 8000),
decoder_database_(decoder_database),
sync_buffer_(sync_buffer),
internal_error_code_(0) {
}
// Resets the state. Should be called before each new comfort noise period.
void Reset();
// Update the comfort noise generator with the parameters in |packet|.
// Will delete the packet.
int UpdateParameters(Packet* packet);
// Generates |requested_length| samples of comfort noise and writes to
// |output|. If this is the first in call after Reset (or first after creating
// the object), it will also mix in comfort noise at the end of the
// SyncBuffer object provided in the constructor.
int Generate(size_t requested_length, AudioMultiVector<int16_t>* output);
// Returns the last error code that was produced by the comfort noise
// decoder. Returns 0 if no error has been encountered since the last reset.
int internal_error_code() { return internal_error_code_; }
private:
int fs_hz_;
bool first_call_;
size_t overlap_length_;
DecoderDatabase* decoder_database_;
SyncBuffer* sync_buffer_;
int internal_error_code_;
DISALLOW_COPY_AND_ASSIGN(ComfortNoise);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_COMFORT_NOISE_H_

View File

@ -0,0 +1,31 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for ComfortNoise class.
#include "webrtc/modules/audio_coding/neteq4/comfort_noise.h"
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h"
#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h"
namespace webrtc {
TEST(ComfortNoise, CreateAndDestroy) {
int fs = 8000;
MockDecoderDatabase db;
SyncBuffer sync_buffer(1, 1000);
ComfortNoise cn(fs, &db, &sync_buffer);
EXPECT_CALL(db, Die()); // Called when |db| goes out of scope.
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,184 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/decision_logic.h"
#include <algorithm>
#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h"
#include "webrtc/modules/audio_coding/neteq4/decision_logic_fax.h"
#include "webrtc/modules/audio_coding/neteq4/decision_logic_normal.h"
#include "webrtc/modules/audio_coding/neteq4/delay_manager.h"
#include "webrtc/modules/audio_coding/neteq4/expand.h"
#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h"
#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h"
#include "webrtc/system_wrappers/interface/logging.h"
namespace webrtc {
DecisionLogic* DecisionLogic::Create(int fs_hz,
int output_size_samples,
NetEqPlayoutMode playout_mode,
DecoderDatabase* decoder_database,
const PacketBuffer& packet_buffer,
DelayManager* delay_manager,
BufferLevelFilter* buffer_level_filter) {
switch (playout_mode) {
case kPlayoutOn:
case kPlayoutStreaming:
return new DecisionLogicNormal(fs_hz,
output_size_samples,
playout_mode,
decoder_database,
packet_buffer,
delay_manager,
buffer_level_filter);
case kPlayoutFax:
case kPlayoutOff:
return new DecisionLogicFax(fs_hz,
output_size_samples,
playout_mode,
decoder_database,
packet_buffer,
delay_manager,
buffer_level_filter);
}
// This line cannot be reached, but must be here to avoid compiler errors.
assert(false);
return NULL;
}
DecisionLogic::DecisionLogic(int fs_hz,
int output_size_samples,
NetEqPlayoutMode playout_mode,
DecoderDatabase* decoder_database,
const PacketBuffer& packet_buffer,
DelayManager* delay_manager,
BufferLevelFilter* buffer_level_filter)
: decoder_database_(decoder_database),
packet_buffer_(packet_buffer),
delay_manager_(delay_manager),
buffer_level_filter_(buffer_level_filter),
cng_state_(kCngOff),
generated_noise_samples_(0),
packet_length_samples_(0),
sample_memory_(0),
prev_time_scale_(false),
timescale_hold_off_(kMinTimescaleInterval),
num_consecutive_expands_(0),
playout_mode_(playout_mode) {
delay_manager_->set_streaming_mode(playout_mode_ == kPlayoutStreaming);
SetSampleRate(fs_hz, output_size_samples);
}
void DecisionLogic::Reset() {
cng_state_ = kCngOff;
generated_noise_samples_ = 0;
packet_length_samples_ = 0;
sample_memory_ = 0;
prev_time_scale_ = false;
timescale_hold_off_ = 0;
num_consecutive_expands_ = 0;
}
void DecisionLogic::SoftReset() {
packet_length_samples_ = 0;
sample_memory_ = 0;
prev_time_scale_ = false;
timescale_hold_off_ = kMinTimescaleInterval;
}
void DecisionLogic::SetSampleRate(int fs_hz, int output_size_samples) {
// TODO(hlundin): Change to an enumerator and skip assert.
assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
fs_mult_ = fs_hz / 8000;
output_size_samples_ = output_size_samples;
}
Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
const Expand& expand,
int decoder_frame_length,
const RTPHeader* packet_header,
Modes prev_mode,
bool play_dtmf, bool* reset_decoder) {
if (prev_mode == kModeRfc3389Cng ||
prev_mode == kModeCodecInternalCng ||
prev_mode == kModeExpand) {
// If last mode was CNG (or Expand, since this could be covering up for
// a lost CNG packet), increase the |generated_noise_samples_| counter.
generated_noise_samples_ += output_size_samples_;
// Remember that CNG is on. This is needed if comfort noise is interrupted
// by DTMF.
if (prev_mode == kModeRfc3389Cng) {
cng_state_ = kCngRfc3389On;
} else if (prev_mode == kModeCodecInternalCng) {
cng_state_ = kCngInternalOn;
}
}
const int samples_left = sync_buffer.FutureLength() - expand.overlap_length();
const int cur_size_samples =
samples_left + packet_buffer_.NumSamplesInBuffer(decoder_database_,
decoder_frame_length);
LOG(LS_VERBOSE) << "Buffers: " << packet_buffer_.NumPacketsInBuffer() <<
" packets * " << decoder_frame_length << " samples/packet + " <<
samples_left << " samples in sync buffer = " << cur_size_samples;
prev_time_scale_ = prev_time_scale_ &&
(prev_mode == kModeAccelerateSuccess ||
prev_mode == kModeAccelerateLowEnergy ||
prev_mode == kModePreemptiveExpandSuccess ||
prev_mode == kModePreemptiveExpandLowEnergy);
FilterBufferLevel(cur_size_samples, prev_mode);
return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length,
packet_header, prev_mode, play_dtmf,
reset_decoder);
}
void DecisionLogic::ExpandDecision(bool is_expand_decision) {
if (is_expand_decision) {
num_consecutive_expands_++;
} else {
num_consecutive_expands_ = 0;
}
}
void DecisionLogic::FilterBufferLevel(int buffer_size_samples,
Modes prev_mode) {
const int elapsed_time_ms = output_size_samples_ / (8 * fs_mult_);
delay_manager_->UpdateCounters(elapsed_time_ms);
// Do not update buffer history if currently playing CNG since it will bias
// the filtered buffer level.
if ((prev_mode != kModeRfc3389Cng) && (prev_mode != kModeCodecInternalCng)) {
buffer_level_filter_->SetTargetBufferLevel(
delay_manager_->base_target_level());
int buffer_size_packets = 0;
if (packet_length_samples_ > 0) {
// Calculate size in packets.
buffer_size_packets = buffer_size_samples / packet_length_samples_;
}
int sample_memory_local = 0;
if (prev_time_scale_) {
sample_memory_local = sample_memory_;
timescale_hold_off_ = kMinTimescaleInterval;
}
buffer_level_filter_->Update(buffer_size_packets, sample_memory_local,
packet_length_samples_);
prev_time_scale_ = false;
}
timescale_hold_off_ = std::max(timescale_hold_off_ - 1, 0);
}
} // namespace webrtc

View File

@ -0,0 +1,168 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_H_
#include "webrtc/modules/audio_coding/neteq4/defines.h"
#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class BufferLevelFilter;
class DecoderDatabase;
class DelayManager;
class Expand;
class PacketBuffer;
class SyncBuffer;
struct RTPHeader;
// This is the base class for the decision tree implementations. Derived classes
// must implement the method GetDecisionSpecialized().
class DecisionLogic {
public:
// Static factory function which creates different types of objects depending
// on the |playout_mode|.
static DecisionLogic* Create(int fs_hz,
int output_size_samples,
NetEqPlayoutMode playout_mode,
DecoderDatabase* decoder_database,
const PacketBuffer& packet_buffer,
DelayManager* delay_manager,
BufferLevelFilter* buffer_level_filter);
// Constructor.
DecisionLogic(int fs_hz,
int output_size_samples,
NetEqPlayoutMode playout_mode,
DecoderDatabase* decoder_database,
const PacketBuffer& packet_buffer,
DelayManager* delay_manager,
BufferLevelFilter* buffer_level_filter);
// Destructor.
virtual ~DecisionLogic() {}
// Resets object to a clean state.
void Reset();
// Resets parts of the state. Typically done when switching codecs.
void SoftReset();
// Sets the sample rate and the output block size.
void SetSampleRate(int fs_hz, int output_size_samples);
// Returns the operation that should be done next. |sync_buffer| and |expand|
// are provided for reference. |decoder_frame_length| is the number of samples
// obtained from the last decoded frame. If there is a packet available, the
// packet header should be supplied in |packet_header|; otherwise it should
// be NULL. The mode resulting form the last call to NetEqImpl::GetAudio is
// supplied in |prev_mode|. If there is a DTMF event to play, |play_dtmf|
// should be set to true. The output variable |reset_decoder| will be set to
// true if a reset is required; otherwise it is left unchanged (i.e., it can
// remain true if it was true before the call).
// This method end with calling GetDecisionSpecialized to get the actual
// return value.
Operations GetDecision(const SyncBuffer& sync_buffer,
const Expand& expand,
int decoder_frame_length,
const RTPHeader* packet_header,
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder);
// These methods test the |cng_state_| for different conditions.
bool CngRfc3389On() const { return cng_state_ == kCngRfc3389On; }
bool CngOff() const { return cng_state_ == kCngOff; }
// Resets the |cng_state_| to kCngOff.
void SetCngOff() { cng_state_ = kCngOff; }
// Reports back to DecisionLogic whether the decision to do expand remains or
// not. Note that this is necessary, since an expand decision can be changed
// to kNormal in NetEqImpl::GetDecision if there is still enough data in the
// sync buffer.
void ExpandDecision(bool is_expand_decision);
// Adds |value| to |sample_memory_|.
void AddSampleMemory(int32_t value) {
sample_memory_ += value;
}
// Accessors and mutators.
void set_sample_memory(int32_t value) { sample_memory_ = value; }
int generated_noise_samples() const { return generated_noise_samples_; }
void set_generated_noise_samples(int value) {
generated_noise_samples_ = value;
}
int packet_length_samples() const { return packet_length_samples_; }
void set_packet_length_samples(int value) {
packet_length_samples_ = value;
}
void set_prev_time_scale(bool value) { prev_time_scale_ = value; }
NetEqPlayoutMode playout_mode() const { return playout_mode_; }
protected:
// The value 6 sets maximum time-stretch rate to about 100 ms/s.
static const int kMinTimescaleInterval = 6;
enum CngState {
kCngOff,
kCngRfc3389On,
kCngInternalOn
};
// Returns the operation that should be done next. |sync_buffer| and |expand|
// are provided for reference. |decoder_frame_length| is the number of samples
// obtained from the last decoded frame. If there is a packet available, the
// packet header should be supplied in |packet_header|; otherwise it should
// be NULL. The mode resulting form the last call to NetEqImpl::GetAudio is
// supplied in |prev_mode|. If there is a DTMF event to play, |play_dtmf|
// should be set to true. The output variable |reset_decoder| will be set to
// true if a reset is required; otherwise it is left unchanged (i.e., it can
// remain true if it was true before the call).
// Should be implemented by derived classes.
virtual Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer,
const Expand& expand,
int decoder_frame_length,
const RTPHeader* packet_header,
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder) = 0;
// Updates the |buffer_level_filter_| with the current buffer level
// |buffer_size_packets|.
void FilterBufferLevel(int buffer_size_packets, Modes prev_mode);
DecoderDatabase* decoder_database_;
const PacketBuffer& packet_buffer_;
DelayManager* delay_manager_;
BufferLevelFilter* buffer_level_filter_;
int fs_mult_;
int output_size_samples_;
CngState cng_state_; // Remember if comfort noise is interrupted by other
// event (e.g., DTMF).
int generated_noise_samples_;
int packet_length_samples_;
int sample_memory_;
bool prev_time_scale_;
int timescale_hold_off_;
int num_consecutive_expands_;
const NetEqPlayoutMode playout_mode_;
private:
DISALLOW_COPY_AND_ASSIGN(DecisionLogic);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_H_

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/decision_logic_fax.h"
#include <assert.h>
#include <algorithm>
#include "webrtc/modules/audio_coding/neteq4/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h"
namespace webrtc {
Operations DecisionLogicFax::GetDecisionSpecialized(
const SyncBuffer& sync_buffer,
const Expand& expand,
int decoder_frame_length,
const RTPHeader* packet_header,
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder) {
assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff);
uint32_t target_timestamp = sync_buffer.end_timestamp();
uint32_t available_timestamp = 0;
int is_cng_packet = 0;
if (packet_header) {
available_timestamp = packet_header->timestamp;
is_cng_packet =
decoder_database_->IsComfortNoise(packet_header->payloadType);
}
if (is_cng_packet) {
if (static_cast<int32_t>((generated_noise_samples_ + target_timestamp)
- available_timestamp) >= 0) {
// Time to play this packet now.
return kRfc3389Cng;
} else {
// Wait before playing this packet.
return kRfc3389CngNoPacket;
}
}
if (!packet_header) {
// No packet. If in CNG mode, play as usual. Otherwise, use other method to
// generate data.
if (cng_state_ == kCngRfc3389On) {
// Continue playing comfort noise.
return kRfc3389CngNoPacket;
} else if (cng_state_ == kCngInternalOn) {
// Continue playing codec-internal comfort noise.
return kCodecInternalCng;
} else {
// Nothing to play. Generate some data to play out.
switch (playout_mode_) {
case kPlayoutOff:
return kAlternativePlc;
case kPlayoutFax:
return kAudioRepetition;
default:
assert(false);
return kUndefined;
}
}
} else if (target_timestamp == available_timestamp) {
return kNormal;
} else {
if (static_cast<int32_t>((generated_noise_samples_ + target_timestamp)
- available_timestamp) >= 0) {
return kNormal;
} else {
// If currently playing comfort noise, continue with that. Do not
// increase the timestamp counter since generated_noise_samples_ will
// be increased.
if (cng_state_ == kCngRfc3389On) {
return kRfc3389CngNoPacket;
} else if (cng_state_ == kCngInternalOn) {
return kCodecInternalCng;
} else {
// Otherwise, do packet-loss concealment and increase the
// timestamp while waiting for the time to play this packet.
switch (playout_mode_) {
case kPlayoutOff:
return kAlternativePlcIncreaseTimestamp;
case kPlayoutFax:
return kAudioRepetitionIncreaseTimestamp;
default:
assert(0);
return kUndefined;
}
}
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_FAX_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_FAX_H_
#include "webrtc/modules/audio_coding/neteq4/decision_logic.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Implementation of the DecisionLogic class for playout modes kPlayoutFax and
// kPlayoutOff.
class DecisionLogicFax : public DecisionLogic {
public:
// Constructor.
DecisionLogicFax(int fs_hz,
int output_size_samples,
NetEqPlayoutMode playout_mode,
DecoderDatabase* decoder_database,
const PacketBuffer& packet_buffer,
DelayManager* delay_manager,
BufferLevelFilter* buffer_level_filter)
: DecisionLogic(fs_hz, output_size_samples, playout_mode,
decoder_database, packet_buffer, delay_manager,
buffer_level_filter) {
}
// Destructor.
virtual ~DecisionLogicFax() {}
protected:
// Returns the operation that should be done next. |sync_buffer| and |expand|
// are provided for reference. |decoder_frame_length| is the number of samples
// obtained from the last decoded frame. If there is a packet available, the
// packet header should be supplied in |packet_header|; otherwise it should
// be NULL. The mode resulting form the last call to NetEqImpl::GetAudio is
// supplied in |prev_mode|. If there is a DTMF event to play, |play_dtmf|
// should be set to true. The output variable |reset_decoder| will be set to
// true if a reset is required; otherwise it is left unchanged (i.e., it can
// remain true if it was true before the call).
virtual Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer,
const Expand& expand,
int decoder_frame_length,
const RTPHeader* packet_header,
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder);
private:
DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_FAX_H_

View File

@ -0,0 +1,236 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/decision_logic_normal.h"
#include <assert.h>
#include <algorithm>
#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h"
#include "webrtc/modules/audio_coding/neteq4/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq4/delay_manager.h"
#include "webrtc/modules/audio_coding/neteq4/expand.h"
#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h"
#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h"
#include "webrtc/modules/interface/module_common_types.h"
namespace webrtc {
Operations DecisionLogicNormal::GetDecisionSpecialized(
const SyncBuffer& sync_buffer,
const Expand& expand,
int decoder_frame_length,
const RTPHeader* packet_header,
Modes prev_mode,
bool play_dtmf,
bool* reset_decoder) {
assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming);
// Guard for errors, to avoid getting stuck in error mode.
if (prev_mode == kModeError) {
if (!packet_header) {
return kExpand;
} else {
return kUndefined; // Use kUndefined to flag for a reset.
}
}
uint32_t target_timestamp = sync_buffer.end_timestamp();
uint32_t available_timestamp = 0;
int is_cng_packet = 0;
if (packet_header) {
available_timestamp = packet_header->timestamp;
is_cng_packet =
decoder_database_->IsComfortNoise(packet_header->payloadType);
}
if (is_cng_packet) {
return CngOperation(prev_mode, target_timestamp, available_timestamp);
}
// Handle the case with no packet at all available (except maybe DTMF).
if (!packet_header) {
return NoPacket(play_dtmf);
}
// If the expand period was very long, reset NetEQ since it is likely that the
// sender was restarted.
if (num_consecutive_expands_ > kReinitAfterExpands) {
*reset_decoder = true;
return kNormal;
}
// Check if the required packet is available.
if (target_timestamp == available_timestamp) {
return ExpectedPacketAvailable(prev_mode, play_dtmf);
} else if (available_timestamp > target_timestamp) {
// TODO(hlundin): Consider wrap-around too?
return FuturePacketAvailable(sync_buffer, expand, decoder_frame_length,
prev_mode, target_timestamp,
available_timestamp, play_dtmf);
} else {
// This implies that available_timestamp < target_timestamp, which can
// happen when a new stream or codec is received. Signal for a reset.
return kUndefined;
}
}
Operations DecisionLogicNormal::CngOperation(Modes prev_mode,
uint32_t target_timestamp,
uint32_t available_timestamp) {
// Signed difference between target and available timestamp.
int32_t timestamp_diff = (generated_noise_samples_ + target_timestamp) -
available_timestamp;
int32_t optimal_level_samp =
(delay_manager_->TargetLevel() * packet_length_samples_) >> 8;
int32_t excess_waiting_time_samp = -timestamp_diff - optimal_level_samp;
if (excess_waiting_time_samp > optimal_level_samp / 2) {
// The waiting time for this packet will be longer than 1.5
// times the wanted buffer delay. Advance the clock to cut
// waiting time down to the optimal.
generated_noise_samples_ += excess_waiting_time_samp;
timestamp_diff += excess_waiting_time_samp;
}
if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) {
// Not time to play this packet yet. Wait another round before using this
// packet. Keep on playing CNG from previous CNG parameters.
return kRfc3389CngNoPacket;
} else {
// Otherwise, go for the CNG packet now.
return kRfc3389Cng;
}
}
Operations DecisionLogicNormal::NoPacket(bool play_dtmf) {
if (cng_state_ == kCngRfc3389On) {
// Keep on playing comfort noise.
return kRfc3389CngNoPacket;
} else if (cng_state_ == kCngInternalOn) {
// Keep on playing codec internal comfort noise.
return kCodecInternalCng;
} else if (play_dtmf) {
return kDtmf;
} else {
// Nothing to play, do expand.
return kExpand;
}
}
Operations DecisionLogicNormal::ExpectedPacketAvailable(Modes prev_mode,
bool play_dtmf) {
if (prev_mode != kModeExpand && !play_dtmf) {
// Check criterion for time-stretching.
int low_limit, high_limit;
delay_manager_->BufferLimits(&low_limit, &high_limit);
if ((buffer_level_filter_->filtered_current_level() >= high_limit &&
TimescaleAllowed()) ||
buffer_level_filter_->filtered_current_level() >= high_limit << 2) {
// Buffer level higher than limit and time-scaling allowed,
// or buffer level really high.
return kAccelerate;
} else if ((buffer_level_filter_->filtered_current_level() < low_limit)
&& TimescaleAllowed()) {
return kPreemptiveExpand;
}
}
return kNormal;
}
Operations DecisionLogicNormal::FuturePacketAvailable(
const SyncBuffer& sync_buffer,
const Expand& expand,
int decoder_frame_length,
Modes prev_mode,
uint32_t target_timestamp,
uint32_t available_timestamp,
bool play_dtmf) {
// Required packet is not available, but a future packet is.
// Check if we should continue with an ongoing expand because the new packet
// is too far into the future.
uint32_t timestamp_leap = available_timestamp - target_timestamp;
if ((prev_mode == kModeExpand) &&
!ReinitAfterExpands(timestamp_leap) &&
!MaxWaitForPacket() &&
PacketTooEarly(timestamp_leap) &&
UnderTargetLevel()) {
if (play_dtmf) {
// Still have DTMF to play, so do not do expand.
return kDtmf;
} else {
// Nothing to play.
return kExpand;
}
}
const int samples_left = sync_buffer.FutureLength() -
expand.overlap_length();
const int cur_size_samples = samples_left +
packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
// If previous was comfort noise, then no merge is needed.
if (prev_mode == kModeRfc3389Cng ||
prev_mode == kModeCodecInternalCng) {
// Keep the same delay as before the CNG (or maximum 70 ms in buffer as
// safety precaution), but make sure that the number of samples in buffer
// is no higher than 4 times the optimal level. (Note that TargetLevel()
// is in Q8.)
int32_t timestamp_diff = (generated_noise_samples_ + target_timestamp) -
available_timestamp;
if (timestamp_diff >= 0 ||
cur_size_samples >
4 * ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8)) {
// Time to play this new packet.
return kNormal;
} else {
// Too early to play this new packet; keep on playing comfort noise.
if (prev_mode == kModeRfc3389Cng) {
return kRfc3389CngNoPacket;
} else { // prevPlayMode == kModeCodecInternalCng.
return kCodecInternalCng;
}
}
}
// Do not merge unless we have done an expand before.
// (Convert kAllowMergeWithoutExpand from ms to samples by multiplying with
// fs_mult_ * 8 = fs / 1000.)
if (prev_mode == kModeExpand ||
(decoder_frame_length < output_size_samples_ &&
cur_size_samples > kAllowMergeWithoutExpandMs * fs_mult_ * 8)) {
return kMerge;
} else if (play_dtmf) {
// Play DTMF instead of expand.
return kDtmf;
} else {
return kExpand;
}
}
bool DecisionLogicNormal::UnderTargetLevel() const {
return buffer_level_filter_->filtered_current_level() <=
delay_manager_->TargetLevel();
}
bool DecisionLogicNormal::ReinitAfterExpands(uint32_t timestamp_leap) const {
return timestamp_leap >=
static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
}
bool DecisionLogicNormal::PacketTooEarly(uint32_t timestamp_leap) const {
return timestamp_leap >
static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
}
bool DecisionLogicNormal::MaxWaitForPacket() const {
return num_consecutive_expands_ >= kMaxWaitForPacket;
}
} // namespace webrtc

View File

@ -0,0 +1,106 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_NORMAL_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_NORMAL_H_
#include "webrtc/modules/audio_coding/neteq4/decision_logic.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Implementation of the DecisionLogic class for playout modes kPlayoutOn and
// kPlayoutStreaming.
class DecisionLogicNormal : public DecisionLogic {
public:
// Constructor.
DecisionLogicNormal(int fs_hz,
int output_size_samples,
NetEqPlayoutMode playout_mode,
DecoderDatabase* decoder_database,
const PacketBuffer& packet_buffer,
DelayManager* delay_manager,
BufferLevelFilter* buffer_level_filter)
: DecisionLogic(fs_hz, output_size_samples, playout_mode,
decoder_database, packet_buffer, delay_manager,
buffer_level_filter) {
}
// Destructor.
virtual ~DecisionLogicNormal() {}
protected:
// Returns the operation that should be done next. |sync_buffer| and |expand|
// are provided for reference. |decoder_frame_length| is the number of samples
// obtained from the last decoded frame. If there is a packet available, the
// packet header should be supplied in |packet_header|; otherwise it should
// be NULL. The mode resulting form the last call to NetEqImpl::GetAudio is
// supplied in |prev_mode|. If there is a DTMF event to play, |play_dtmf|
// should be set to true. The output variable |reset_decoder| will be set to
// true if a reset is required; otherwise it is left unchanged (i.e., it can
// remain true if it was true before the call).
virtual Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer,
const Expand& expand,
int decoder_frame_length,
const RTPHeader* packet_header,
Modes prev_mode, bool play_dtmf,
bool* reset_decoder);
private:
static const int kAllowMergeWithoutExpandMs = 20; // 20 ms.
static const int kReinitAfterExpands = 100;
static const int kMaxWaitForPacket = 10;
// Returns the operation given that the next available packet is a comfort
// noise payload (RFC 3389 only, not codec-internal).
Operations CngOperation(Modes prev_mode, uint32_t target_timestamp,
uint32_t available_timestamp);
// Returns the operation given that no packets are available (except maybe
// a DTMF event, flagged by setting |play_dtmf| true).
Operations NoPacket(bool play_dtmf);
// Returns the operation to do given that the expected packet is available.
Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf);
// Returns the operation to do given that the expected packet is not
// available, but a packet further into the future is at hand.
Operations FuturePacketAvailable(const SyncBuffer& sync_buffer,
const Expand& expand,
int decoder_frame_length, Modes prev_mode,
uint32_t target_timestamp,
uint32_t available_timestamp,
bool play_dtmf);
// Checks if enough time has elapsed since the last successful timescale
// operation was done (i.e., accelerate or preemptive expand).
bool TimescaleAllowed() const { return timescale_hold_off_ == 0; }
// Checks if the current (filtered) buffer level is under the target level.
bool UnderTargetLevel() const;
// Checks if |timestamp_leap| is so long into the future that a reset due
// to exceeding kReinitAfterExpands will be done.
bool ReinitAfterExpands(uint32_t timestamp_leap) const;
// Checks if we still have not done enough expands to cover the distance from
// the last decoded packet to the next available packet, the distance beeing
// conveyed in |timestamp_leap|.
bool PacketTooEarly(uint32_t timestamp_leap) const;
// Checks if num_consecutive_expands_ >= kMaxWaitForPacket.
bool MaxWaitForPacket() const;
DISALLOW_COPY_AND_ASSIGN(DecisionLogicNormal);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_NORMAL_H_

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for DecisionLogic class and derived classes.
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h"
#include "webrtc/modules/audio_coding/neteq4/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq4/decision_logic.h"
#include "webrtc/modules/audio_coding/neteq4/delay_manager.h"
#include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h"
#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h"
namespace webrtc {
TEST(DecisionLogic, CreateAndDestroy) {
int fs_hz = 8000;
int output_size_samples = fs_hz / 100; // Samples per 10 ms.
DecoderDatabase decoder_database;
PacketBuffer packet_buffer(10, 1000);
DelayPeakDetector delay_peak_detector;
DelayManager delay_manager(240, &delay_peak_detector);
BufferLevelFilter buffer_level_filter;
DecisionLogic* logic = DecisionLogic::Create(fs_hz, output_size_samples,
kPlayoutOn, &decoder_database,
packet_buffer, &delay_manager,
&buffer_level_filter);
delete logic;
logic = DecisionLogic::Create(fs_hz, output_size_samples,
kPlayoutStreaming,
&decoder_database,
packet_buffer, &delay_manager,
&buffer_level_filter);
delete logic;
logic = DecisionLogic::Create(fs_hz, output_size_samples,
kPlayoutFax,
&decoder_database,
packet_buffer, &delay_manager,
&buffer_level_filter);
delete logic;
logic = DecisionLogic::Create(fs_hz, output_size_samples,
kPlayoutOff,
&decoder_database,
packet_buffer, &delay_manager,
&buffer_level_filter);
delete logic;
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,251 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/decoder_database.h"
#include <assert.h>
#include <utility> // pair
#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h"
namespace webrtc {
DecoderDatabase::DecoderInfo::~DecoderInfo() {
if (!external) delete decoder;
}
void DecoderDatabase::Reset() {
decoders_.clear();
active_decoder_ = -1;
active_cng_decoder_ = -1;
}
int DecoderDatabase::RegisterPayload(uint8_t rtp_payload_type,
NetEqDecoder codec_type) {
if (rtp_payload_type > kMaxRtpPayloadType) {
return kInvalidRtpPayloadType;
}
if (!AudioDecoder::CodecSupported(codec_type)) {
return kCodecNotSupported;
}
int fs_hz = AudioDecoder::CodecSampleRateHz(codec_type);
std::pair<DecoderMap::iterator, bool> ret;
DecoderInfo info(codec_type, fs_hz, NULL, false);
ret = decoders_.insert(std::make_pair(rtp_payload_type, info));
if (ret.second == false) {
// Database already contains a decoder with type |rtp_payload_type|.
return kDecoderExists;
}
return kOK;
}
int DecoderDatabase::InsertExternal(uint8_t rtp_payload_type,
NetEqDecoder codec_type,
int fs_hz,
AudioDecoder* decoder) {
if (rtp_payload_type > 0x7F) {
return kInvalidRtpPayloadType;
}
if (!AudioDecoder::CodecSupported(codec_type)) {
return kCodecNotSupported;
}
if (fs_hz != 8000 && fs_hz != 16000 && fs_hz != 32000 && fs_hz != 48000) {
return kInvalidSampleRate;
}
if (!decoder) {
return kInvalidPointer;
}
decoder->Init();
std::pair<DecoderMap::iterator, bool> ret;
DecoderInfo info(codec_type, fs_hz, decoder, true);
ret = decoders_.insert(
std::pair<uint8_t, DecoderInfo>(rtp_payload_type, info));
if (ret.second == false) {
// Database already contains a decoder with type |rtp_payload_type|.
return kDecoderExists;
}
return kOK;
}
int DecoderDatabase::Remove(uint8_t rtp_payload_type) {
if (decoders_.erase(rtp_payload_type) == 0) {
// No decoder with that |rtp_payload_type|.
return kDecoderNotFound;
}
if (active_decoder_ == rtp_payload_type) {
active_decoder_ = -1; // No active decoder.
}
if (active_cng_decoder_ == rtp_payload_type) {
active_cng_decoder_ = -1; // No active CNG decoder.
}
return kOK;
}
const DecoderDatabase::DecoderInfo* DecoderDatabase::GetDecoderInfo(
uint8_t rtp_payload_type) const {
DecoderMap::const_iterator it = decoders_.find(rtp_payload_type);
if (it == decoders_.end()) {
// Decoder not found.
return NULL;
}
return &(*it).second;
}
uint8_t DecoderDatabase::GetRtpPayloadType(
NetEqDecoder codec_type) const {
DecoderMap::const_iterator it;
for (it = decoders_.begin(); it != decoders_.end(); ++it) {
if ((*it).second.codec_type == codec_type) {
// Match found.
return (*it).first;
}
}
// No match.
return kRtpPayloadTypeError;
}
AudioDecoder* DecoderDatabase::GetDecoder(uint8_t rtp_payload_type) {
if (IsDtmf(rtp_payload_type) || IsRed(rtp_payload_type)) {
// These are not real decoders.
return NULL;
}
DecoderMap::iterator it = decoders_.find(rtp_payload_type);
if (it == decoders_.end()) {
// Decoder not found.
return NULL;
}
DecoderInfo* info = &(*it).second;
if (!info->decoder) {
// Create the decoder object.
AudioDecoder* decoder = AudioDecoder::CreateAudioDecoder(info->codec_type);
assert(decoder); // Should not be able to have an unsupported codec here.
info->decoder = decoder;
info->decoder->Init();
}
return info->decoder;
}
bool DecoderDatabase::IsType(uint8_t rtp_payload_type,
NetEqDecoder codec_type) const {
DecoderMap::const_iterator it = decoders_.find(rtp_payload_type);
if (it == decoders_.end()) {
// Decoder not found.
return false;
}
return ((*it).second.codec_type == codec_type);
}
bool DecoderDatabase::IsComfortNoise(uint8_t rtp_payload_type) const {
if (IsType(rtp_payload_type, kDecoderCNGnb) ||
IsType(rtp_payload_type, kDecoderCNGwb) ||
IsType(rtp_payload_type, kDecoderCNGswb32kHz) ||
IsType(rtp_payload_type, kDecoderCNGswb48kHz)) {
return true;
} else {
return false;
}
}
bool DecoderDatabase::IsDtmf(uint8_t rtp_payload_type) const {
return IsType(rtp_payload_type, kDecoderAVT);
}
bool DecoderDatabase::IsRed(uint8_t rtp_payload_type) const {
return IsType(rtp_payload_type, kDecoderRED);
}
int DecoderDatabase::SetActiveDecoder(uint8_t rtp_payload_type,
bool* new_decoder) {
// Check that |rtp_payload_type| exists in the database.
DecoderMap::const_iterator it = decoders_.find(rtp_payload_type);
if (it == decoders_.end()) {
// Decoder not found.
return kDecoderNotFound;
}
assert(new_decoder);
*new_decoder = false;
if (active_decoder_ < 0) {
// This is the first active decoder.
*new_decoder = true;
} else if (active_decoder_ != rtp_payload_type) {
// Moving from one active decoder to another. Delete the first one.
DecoderMap::iterator it = decoders_.find(active_decoder_);
if (it == decoders_.end()) {
// Decoder not found. This should not be possible.
assert(false);
return kDecoderNotFound;
}
if (!(*it).second.external) {
// Delete the AudioDecoder object, unless it is an externally created
// decoder.
delete (*it).second.decoder;
(*it).second.decoder = NULL;
}
*new_decoder = true;
}
active_decoder_ = rtp_payload_type;
return kOK;
}
AudioDecoder* DecoderDatabase::GetActiveDecoder() {
if (active_decoder_ < 0) {
// No active decoder.
return NULL;
}
return GetDecoder(active_decoder_);
}
int DecoderDatabase::SetActiveCngDecoder(uint8_t rtp_payload_type) {
// Check that |rtp_payload_type| exists in the database.
DecoderMap::const_iterator it = decoders_.find(rtp_payload_type);
if (it == decoders_.end()) {
// Decoder not found.
return kDecoderNotFound;
}
if (active_cng_decoder_ >= 0 && active_cng_decoder_ != rtp_payload_type) {
// Moving from one active CNG decoder to another. Delete the first one.
DecoderMap::iterator it = decoders_.find(active_cng_decoder_);
if (it == decoders_.end()) {
// Decoder not found. This should not be possible.
assert(false);
return kDecoderNotFound;
}
if (!(*it).second.external) {
// Delete the AudioDecoder object, unless it is an externally created
// decoder.
delete (*it).second.decoder;
(*it).second.decoder = NULL;
}
}
active_cng_decoder_ = rtp_payload_type;
return kOK;
}
AudioDecoder* DecoderDatabase::GetActiveCngDecoder() {
if (active_cng_decoder_ < 0) {
// No active CNG decoder.
return NULL;
}
return GetDecoder(active_cng_decoder_);
}
int DecoderDatabase::CheckPayloadTypes(const PacketList& packet_list) const {
PacketList::const_iterator it;
for (it = packet_list.begin(); it != packet_list.end(); ++it) {
if (decoders_.find((*it)->header.payloadType) == decoders_.end()) {
// Payload type is not found.
return kDecoderNotFound;
}
}
return kOK;
}
} // namespace webrtc

View File

@ -0,0 +1,161 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECODER_DATABASE_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECODER_DATABASE_H_
#include <map>
#include "webrtc/common_types.h" // NULL
#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h"
#include "webrtc/modules/audio_coding/neteq4/packet.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declaration.
class AudioDecoder;
class DecoderDatabase {
public:
enum DatabaseReturnCodes {
kOK = 0,
kInvalidRtpPayloadType = -1,
kCodecNotSupported = -2,
kInvalidSampleRate = -3,
kDecoderExists = -4,
kDecoderNotFound = -5,
kInvalidPointer = -6
};
// Struct used to store decoder info in the database.
struct DecoderInfo {
// Constructors.
DecoderInfo()
: codec_type(kDecoderArbitrary),
fs_hz(8000),
decoder(NULL),
external(false) {
}
DecoderInfo(NetEqDecoder ct, int fs, AudioDecoder* dec, bool ext)
: codec_type(ct),
fs_hz(fs),
decoder(dec),
external(ext) {
}
// Destructor. (Defined in decoder_database.cc.)
~DecoderInfo();
NetEqDecoder codec_type;
int fs_hz;
AudioDecoder* decoder;
bool external;
};
static const uint8_t kMaxRtpPayloadType = 0x7F; // Max for a 7-bit number.
// Maximum value for 8 bits, and an invalid RTP payload type (since it is
// only 7 bits).
static const uint8_t kRtpPayloadTypeError = 0xFF;
DecoderDatabase()
: active_decoder_(-1),
active_cng_decoder_(-1) {
}
virtual ~DecoderDatabase() {}
// Returns true if the database is empty.
virtual bool Empty() const { return decoders_.empty(); }
// Returns the number of decoders registered in the database.
virtual int Size() const { return decoders_.size(); }
// Resets the database, erasing all registered payload types, and deleting
// any AudioDecoder objects that were not externally created and inserted
// using InsertExternal().
virtual void Reset();
// Registers |rtp_payload_type| as a decoder of type |codec_type|. Returns
// kOK on success; otherwise an error code.
virtual int RegisterPayload(uint8_t rtp_payload_type,
NetEqDecoder codec_type);
// Registers an externally created AudioDecoder object, and associates it
// as a decoder of type |codec_type| with |rtp_payload_type|.
virtual int InsertExternal(uint8_t rtp_payload_type,
NetEqDecoder codec_type,
int fs_hz, AudioDecoder* decoder);
// Removes the entry for |rtp_payload_type| from the database.
// Returns kDecoderNotFound or kOK depending on the outcome of the operation.
virtual int Remove(uint8_t rtp_payload_type);
// Returns a pointer to the DecoderInfo struct for |rtp_payload_type|. If
// no decoder is registered with that |rtp_payload_type|, NULL is returned.
virtual const DecoderInfo* GetDecoderInfo(uint8_t rtp_payload_type) const;
// Returns one RTP payload type associated with |codec_type|, or
// kDecoderNotFound if no entry exists for that value. Note that one
// |codec_type| may be registered with several RTP payload types, and the
// method may return any of them.
virtual uint8_t GetRtpPayloadType(NetEqDecoder codec_type) const;
// Returns a pointer to the AudioDecoder object associated with
// |rtp_payload_type|, or NULL if none is registered. If the AudioDecoder
// object does not exist for that decoder, the object is created.
virtual AudioDecoder* GetDecoder(uint8_t rtp_payload_type);
// Returns true if |rtp_payload_type| is registered as a |codec_type|.
virtual bool IsType(uint8_t rtp_payload_type,
NetEqDecoder codec_type) const;
// Returns true if |rtp_payload_type| is registered as comfort noise.
virtual bool IsComfortNoise(uint8_t rtp_payload_type) const;
// Returns true if |rtp_payload_type| is registered as DTMF.
virtual bool IsDtmf(uint8_t rtp_payload_type) const;
// Returns true if |rtp_payload_type| is registered as RED.
virtual bool IsRed(uint8_t rtp_payload_type) const;
// Sets the active decoder to be |rtp_payload_type|. If this call results in a
// change of active decoder, |new_decoder| is set to true. The previous active
// decoder's AudioDecoder object is deleted.
virtual int SetActiveDecoder(uint8_t rtp_payload_type, bool* new_decoder);
// Returns the current active decoder, or NULL if no active decoder exists.
virtual AudioDecoder* GetActiveDecoder();
// Sets the active comfort noise decoder to be |rtp_payload_type|. If this
// call results in a change of active comfort noise decoder, the previous
// active decoder's AudioDecoder object is deleted.
virtual int SetActiveCngDecoder(uint8_t rtp_payload_type);
// Returns the current active comfort noise decoder, or NULL if no active
// comfort noise decoder exists.
virtual AudioDecoder* GetActiveCngDecoder();
// Returns kOK if all packets in |packet_list| carry payload types that are
// registered in the database. Otherwise, returns kDecoderNotFound.
virtual int CheckPayloadTypes(const PacketList& packet_list) const;
private:
typedef std::map<uint8_t, DecoderInfo> DecoderMap;
DecoderMap decoders_;
int active_decoder_;
int active_cng_decoder_;
DISALLOW_COPY_AND_ASSIGN(DecoderDatabase);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECODER_DATABASE_H_

View File

@ -0,0 +1,226 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/decoder_database.h"
#include <assert.h>
#include <stdlib.h>
#include <string>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_audio_decoder.h"
namespace webrtc {
TEST(DecoderDatabase, CreateAndDestroy) {
DecoderDatabase db;
EXPECT_EQ(0, db.Size());
EXPECT_TRUE(db.Empty());
}
TEST(DecoderDatabase, InsertAndRemove) {
DecoderDatabase db;
const uint8_t kPayloadType = 0;
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(kPayloadType, kDecoderPCMu));
EXPECT_EQ(1, db.Size());
EXPECT_FALSE(db.Empty());
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(kPayloadType));
EXPECT_EQ(0, db.Size());
EXPECT_TRUE(db.Empty());
}
TEST(DecoderDatabase, GetDecoderInfo) {
DecoderDatabase db;
const uint8_t kPayloadType = 0;
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(kPayloadType, kDecoderPCMu));
const DecoderDatabase::DecoderInfo* info;
info = db.GetDecoderInfo(kPayloadType);
ASSERT_TRUE(info != NULL);
EXPECT_EQ(kDecoderPCMu, info->codec_type);
EXPECT_EQ(NULL, info->decoder);
EXPECT_EQ(8000, info->fs_hz);
EXPECT_FALSE(info->external);
info = db.GetDecoderInfo(kPayloadType + 1); // Other payload type.
EXPECT_TRUE(info == NULL); // Should not be found.
}
TEST(DecoderDatabase, GetRtpPayloadType) {
DecoderDatabase db;
const uint8_t kPayloadType = 0;
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(kPayloadType, kDecoderPCMu));
EXPECT_EQ(kPayloadType, db.GetRtpPayloadType(kDecoderPCMu));
const uint8_t expected_value = DecoderDatabase::kRtpPayloadTypeError;
EXPECT_EQ(expected_value,
db.GetRtpPayloadType(kDecoderISAC)); // iSAC is not registered.
}
TEST(DecoderDatabase, GetDecoder) {
DecoderDatabase db;
const uint8_t kPayloadType = 0;
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(kPayloadType, kDecoderILBC));
AudioDecoder* dec = db.GetDecoder(kPayloadType);
ASSERT_TRUE(dec != NULL);
}
TEST(DecoderDatabase, TypeTests) {
DecoderDatabase db;
const uint8_t kPayloadTypePcmU = 0;
const uint8_t kPayloadTypeCng = 13;
const uint8_t kPayloadTypeDtmf = 100;
const uint8_t kPayloadTypeRed = 101;
const uint8_t kPayloadNotUsed = 102;
// Load into database.
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(kPayloadTypePcmU, kDecoderPCMu));
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(kPayloadTypeCng, kDecoderCNGnb));
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(kPayloadTypeDtmf, kDecoderAVT));
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(kPayloadTypeRed, kDecoderRED));
EXPECT_EQ(4, db.Size());
// Test.
EXPECT_FALSE(db.IsComfortNoise(kPayloadNotUsed));
EXPECT_FALSE(db.IsDtmf(kPayloadNotUsed));
EXPECT_FALSE(db.IsRed(kPayloadNotUsed));
EXPECT_FALSE(db.IsComfortNoise(kPayloadTypePcmU));
EXPECT_FALSE(db.IsDtmf(kPayloadTypePcmU));
EXPECT_FALSE(db.IsRed(kPayloadTypePcmU));
EXPECT_FALSE(db.IsType(kPayloadTypePcmU, kDecoderISAC));
EXPECT_TRUE(db.IsType(kPayloadTypePcmU, kDecoderPCMu));
EXPECT_TRUE(db.IsComfortNoise(kPayloadTypeCng));
EXPECT_TRUE(db.IsDtmf(kPayloadTypeDtmf));
EXPECT_TRUE(db.IsRed(kPayloadTypeRed));
}
TEST(DecoderDatabase, ExternalDecoder) {
DecoderDatabase db;
const uint8_t kPayloadType = 0;
MockAudioDecoder decoder;
// Load into database.
EXPECT_EQ(DecoderDatabase::kOK,
db.InsertExternal(kPayloadType, kDecoderPCMu, 8000,
&decoder));
EXPECT_EQ(1, db.Size());
// Get decoder and make sure we get the external one.
EXPECT_EQ(&decoder, db.GetDecoder(kPayloadType));
// Get the decoder info struct and check it too.
const DecoderDatabase::DecoderInfo* info;
info = db.GetDecoderInfo(kPayloadType);
ASSERT_TRUE(info != NULL);
EXPECT_EQ(kDecoderPCMu, info->codec_type);
EXPECT_EQ(&decoder, info->decoder);
EXPECT_EQ(8000, info->fs_hz);
EXPECT_TRUE(info->external);
// Expect not to delete the decoder when removing it from the database, since
// it was declared externally.
EXPECT_CALL(decoder, Die()).Times(0);
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(kPayloadType));
EXPECT_TRUE(db.Empty());
EXPECT_CALL(decoder, Die()).Times(1); // Will be called when |db| is deleted.
}
TEST(DecoderDatabase, CheckPayloadTypes) {
DecoderDatabase db;
// Load a number of payloads into the database. Payload types are 0, 1, ...,
// while the decoder type is the same for all payload types (this does not
// matter for the test).
const int kNumPayloads = 10;
for (uint8_t payload_type = 0; payload_type < kNumPayloads; ++payload_type) {
EXPECT_EQ(DecoderDatabase::kOK,
db.RegisterPayload(payload_type, kDecoderArbitrary));
}
PacketList packet_list;
for (int i = 0; i < kNumPayloads + 1; ++i) {
// Create packet with payload type |i|. The last packet will have a payload
// type that is not registered in the decoder database.
Packet* packet = new Packet;
packet->header.payloadType = i;
packet_list.push_back(packet);
}
// Expect to return false, since the last packet is of an unknown type.
EXPECT_EQ(DecoderDatabase::kDecoderNotFound,
db.CheckPayloadTypes(packet_list));
delete packet_list.back();
packet_list.pop_back(); // Remove the unknown one.
EXPECT_EQ(DecoderDatabase::kOK, db.CheckPayloadTypes(packet_list));
// Delete all packets.
PacketList::iterator it = packet_list.begin();
while (it != packet_list.end()) {
delete packet_list.front();
it = packet_list.erase(it);
}
}
// Test the methods for setting and getting active speech and CNG decoders.
TEST(DecoderDatabase, ActiveDecoders) {
DecoderDatabase db;
// Load payload types.
ASSERT_EQ(DecoderDatabase::kOK, db.RegisterPayload(0, kDecoderPCMu));
ASSERT_EQ(DecoderDatabase::kOK, db.RegisterPayload(103, kDecoderISAC));
ASSERT_EQ(DecoderDatabase::kOK, db.RegisterPayload(13, kDecoderCNGnb));
// Verify that no decoders are active from the start.
EXPECT_EQ(NULL, db.GetActiveDecoder());
EXPECT_EQ(NULL, db.GetActiveCngDecoder());
// Set active speech codec.
bool changed; // Should be true when the active decoder changed.
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed));
EXPECT_TRUE(changed);
AudioDecoder* decoder = db.GetActiveDecoder();
ASSERT_FALSE(decoder == NULL); // Should get a decoder here.
EXPECT_EQ(kDecoderPCMu, decoder->codec_type());
// Set the same again. Expect no change.
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed));
EXPECT_FALSE(changed);
decoder = db.GetActiveDecoder();
ASSERT_FALSE(decoder == NULL); // Should get a decoder here.
EXPECT_EQ(kDecoderPCMu, decoder->codec_type());
// Change active decoder.
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(103, &changed));
EXPECT_TRUE(changed);
decoder = db.GetActiveDecoder();
ASSERT_FALSE(decoder == NULL); // Should get a decoder here.
EXPECT_EQ(kDecoderISAC, decoder->codec_type());
// Remove the active decoder, and verify that the active becomes NULL.
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(103));
EXPECT_EQ(NULL, db.GetActiveDecoder());
// Set active CNG codec.
EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveCngDecoder(13));
decoder = db.GetActiveCngDecoder();
ASSERT_FALSE(decoder == NULL); // Should get a decoder here.
EXPECT_EQ(kDecoderCNGnb, decoder->codec_type());
// Remove the active CNG decoder, and verify that the active becomes NULL.
EXPECT_EQ(DecoderDatabase::kOK, db.Remove(13));
EXPECT_EQ(NULL, db.GetActiveCngDecoder());
// Try to set non-existing codecs as active.
EXPECT_EQ(DecoderDatabase::kDecoderNotFound,
db.SetActiveDecoder(17, &changed));
EXPECT_EQ(DecoderDatabase::kDecoderNotFound,
db.SetActiveCngDecoder(17));
}
} // namespace webrtc

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DEFINES_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DEFINES_H_
namespace webrtc {
enum Operations {
kNormal = 0,
kMerge,
kExpand,
kAccelerate,
kPreemptiveExpand,
kRfc3389Cng,
kRfc3389CngNoPacket,
kCodecInternalCng,
kDtmf,
kAlternativePlc,
kAlternativePlcIncreaseTimestamp,
kAudioRepetition,
kAudioRepetitionIncreaseTimestamp,
kUndefined = -1
};
enum Modes {
kModeNormal = 0,
kModeExpand,
kModeMerge,
kModeAccelerateSuccess,
kModeAccelerateLowEnergy,
kModeAccelerateFail,
kModePreemptiveExpandSuccess,
kModePreemptiveExpandLowEnergy,
kModePreemptiveExpandFail,
kModeRfc3389Cng,
kModeCodecInternalCng,
kModeDtmf,
kModeError,
kModeUndefined = -1
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DEFINES_H_

View File

@ -0,0 +1,369 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/delay_manager.h"
#include <assert.h>
#include <math.h>
#include <algorithm> // max, min
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h"
#include "webrtc/system_wrappers/interface/logging.h"
namespace webrtc {
DelayManager::DelayManager(int max_packets_in_buffer,
DelayPeakDetector* peak_detector)
: first_packet_received_(false),
max_packets_in_buffer_(max_packets_in_buffer),
iat_vector_(kMaxIat + 1, 0),
iat_factor_(0),
packet_iat_count_ms_(0),
base_target_level_(4), // In Q0 domain.
target_level_(base_target_level_ << 8), // In Q8 domain.
packet_len_ms_(0),
streaming_mode_(false),
last_seq_no_(0),
last_timestamp_(0),
extra_delay_ms_(0),
iat_cumulative_sum_(0),
max_iat_cumulative_sum_(0),
max_timer_ms_(0),
peak_detector_(*peak_detector),
last_pack_cng_or_dtmf_(1) {
assert(peak_detector); // Should never be NULL.
Reset();
}
// Set the histogram vector to an exponentially decaying distribution
// iat_vector_[i] = 0.5^(i+1), i = 0, 1, 2, ...
// iat_vector_ is in Q30.
void DelayManager::ResetHistogram() {
// Set temp_prob to (slightly more than) 1 in Q14. This ensures that the sum
// of iat_vector_ is 1.
uint16_t temp_prob = 0x4002; // 16384 + 2 = 100000000000010 binary.
IATVector::iterator it = iat_vector_.begin();
for (; it < iat_vector_.end(); it++) {
temp_prob >>= 1;
(*it) = temp_prob << 16;
}
base_target_level_ = 4;
target_level_ = base_target_level_ << 8;
}
int DelayManager::Update(uint16_t sequence_number,
uint32_t timestamp,
int sample_rate_hz) {
if (sample_rate_hz <= 0) {
return -1;
}
if (!first_packet_received_) {
// Prepare for next packet arrival.
packet_iat_count_ms_ = 0;
last_seq_no_ = sequence_number;
last_timestamp_ = timestamp;
first_packet_received_ = true;
return 0;
}
// Try calculating packet length from current and previous timestamps.
// TODO(hlundin): Take care of wrap-around. Not done yet due to legacy
// bit-exactness.
int packet_len_ms;
if ((timestamp <= last_timestamp_) || (sequence_number <= last_seq_no_)) {
// Wrong timestamp or sequence order; use stored value.
packet_len_ms = packet_len_ms_;
} else {
// Calculate timestamps per packet and derive packet length in ms.
int packet_len_samp =
static_cast<uint32_t>(timestamp - last_timestamp_) /
static_cast<uint16_t>(sequence_number - last_seq_no_);
packet_len_ms = (1000 * packet_len_samp) / sample_rate_hz;
}
if (packet_len_ms > 0) {
// Cannot update statistics unless |packet_len_ms| is valid.
// Calculate inter-arrival time (IAT) in integer "packet times"
// (rounding down). This is the value used as index to the histogram
// vector |iat_vector_|.
int iat_packets = packet_iat_count_ms_ / packet_len_ms;
if (streaming_mode_) {
UpdateCumulativeSums(packet_len_ms, sequence_number);
}
// Check for discontinuous packet sequence and re-ordering.
if (sequence_number > last_seq_no_ + 1) {
// TODO(hlundin): Take care of wrap-around. Not done yet due to legacy
// bit-exactness.
// Compensate for gap in the sequence numbers. Reduce IAT with the
// expected extra time due to lost packets, but ensure that the IAT is
// not negative.
iat_packets -= sequence_number - last_seq_no_ - 1;
iat_packets = std::max(iat_packets, 0);
} else if (sequence_number < last_seq_no_) {
// TODO(hlundin): Take care of wrap-around.
// Compensate for re-ordering.
iat_packets += last_seq_no_ + 1 - sequence_number;
}
// Saturate IAT at maximum value.
const int max_iat = kMaxIat;
iat_packets = std::min(iat_packets, max_iat);
UpdateHistogram(iat_packets);
// Calculate new |target_level_| based on updated statistics.
target_level_ = CalculateTargetLevel(iat_packets);
if (streaming_mode_) {
target_level_ = std::max(target_level_, max_iat_cumulative_sum_);
}
LimitTargetLevel();
} // End if (packet_len_ms > 0).
// Prepare for next packet arrival.
packet_iat_count_ms_ = 0;
last_seq_no_ = sequence_number;
last_timestamp_ = timestamp;
return 0;
}
void DelayManager::UpdateCumulativeSums(int packet_len_ms,
uint16_t sequence_number) {
// Calculate IAT in Q8, including fractions of a packet (i.e., more
// accurate than |iat_packets|.
int iat_packets_q8 = (packet_iat_count_ms_ << 8) / packet_len_ms;
// Calculate cumulative sum IAT with sequence number compensation. The sum
// is zero if there is no clock-drift.
iat_cumulative_sum_ += (iat_packets_q8 -
(static_cast<int>(sequence_number - last_seq_no_) << 8));
// Subtract drift term.
iat_cumulative_sum_ -= kCumulativeSumDrift;
// Ensure not negative.
iat_cumulative_sum_ = std::max(iat_cumulative_sum_, 0);
if (iat_cumulative_sum_ > max_iat_cumulative_sum_) {
// Found a new maximum.
max_iat_cumulative_sum_ = iat_cumulative_sum_;
max_timer_ms_ = 0;
}
if (max_timer_ms_ > kMaxStreamingPeakPeriodMs) {
// Too long since the last maximum was observed; decrease max value.
max_iat_cumulative_sum_ -= kCumulativeSumDrift;
}
}
// Each element in the vector is first multiplied by the forgetting factor
// |iat_factor_|. Then the vector element indicated by |iat_packets| is then
// increased (additive) by 1 - |iat_factor_|. This way, the probability of
// |iat_packets| is slightly increased, while the sum of the histogram remains
// constant (=1).
// Due to inaccuracies in the fixed-point arithmetic, the histogram may no
// longer sum up to 1 (in Q30) after the update. To correct this, a correction
// term is added or subtracted from the first element (or elements) of the
// vector.
// The forgetting factor |iat_factor_| is also updated. When the DelayManager
// is reset, the factor is set to 0 to facilitate rapid convergence in the
// beginning. With each update of the histogram, the factor is increased towards
// the steady-state value |kIatFactor_|.
void DelayManager::UpdateHistogram(size_t iat_packets) {
assert(iat_packets < iat_vector_.size());
int vector_sum = 0; // Sum up the vector elements as they are processed.
// Multiply each element in |iat_vector_| with |iat_factor_|.
for (IATVector::iterator it = iat_vector_.begin();
it != iat_vector_.end(); ++it) {
*it = (static_cast<int64_t>(*it) * iat_factor_) >> 15;
vector_sum += *it;
}
// Increase the probability for the currently observed inter-arrival time
// by 1 - |iat_factor_|. The factor is in Q15, |iat_vector_| in Q30.
// Thus, left-shift 15 steps to obtain result in Q30.
iat_vector_[iat_packets] += (32768 - iat_factor_) << 15;
vector_sum += (32768 - iat_factor_) << 15; // Add to vector sum.
// |iat_vector_| should sum up to 1 (in Q30), but it may not due to
// fixed-point rounding errors.
vector_sum -= 1 << 30; // Should be zero. Compensate if not.
if (vector_sum != 0) {
// Modify a few values early in |iat_vector_|.
int flip_sign = vector_sum > 0 ? -1 : 1;
IATVector::iterator it = iat_vector_.begin();
while (it != iat_vector_.end() && abs(vector_sum) > 0) {
// Add/subtract 1/16 of the element, but not more than |vector_sum|.
int correction = flip_sign * std::min(abs(vector_sum), (*it) >> 4);
*it += correction;
vector_sum += correction;
++it;
}
}
assert(vector_sum == 0); // Verify that the above is correct.
// Update |iat_factor_| (changes only during the first seconds after a reset).
// The factor converges to |kIatFactor_|.
iat_factor_ += (kIatFactor_ - iat_factor_ + 3) >> 2;
}
// Enforces upper limit for |target_level_|. The limit is chosen to be
// 75% of |max_packets_in_buffer_|, to leave some headroom for natural
// fluctuations around the target. If an extra delay is requested, the
// cap is lowered even further. Note that in practice, this does not have
// any impact, since the target level is far below the buffer capacity in
// all reasonable cases.
// TODO(hlundin): Move this check to the buffer logistics class.
void DelayManager::LimitTargetLevel() {
int max_buffer_len = max_packets_in_buffer_;
if (extra_delay_ms_ > 0 && packet_len_ms_ > 0) {
max_buffer_len -= extra_delay_ms_ / packet_len_ms_;
max_buffer_len = std::max(max_buffer_len, 1); // Sanity check.
}
max_buffer_len = (3 * (max_buffer_len << 8)) / 4; // Shift to Q8, then 75%.
target_level_ = std::min(target_level_, max_buffer_len);
}
int DelayManager::CalculateTargetLevel(int iat_packets) {
int limit_probability = kLimitProbability;
if (streaming_mode_) {
limit_probability = kLimitProbabilityStreaming;
}
// Calculate target buffer level from inter-arrival time histogram.
// Find the |iat_index| for which the probability of observing an
// inter-arrival time larger than or equal to |iat_index| is less than or
// equal to |limit_probability|. The sought probability is estimated using
// the histogram as the reverse cumulant PDF, i.e., the sum of elements from
// the end up until |iat_index|. Now, since the sum of all elements is 1
// (in Q30) by definition, and since the solution is often a low value for
// |iat_index|, it is more efficient to start with |sum| = 1 and subtract
// elements from the start of the histogram.
size_t index = 0; // Start from the beginning of |iat_vector_|.
int sum = 1 << 30; // Assign to 1 in Q30.
sum -= iat_vector_[index]; // Ensure that target level is >= 1.
do {
// Subtract the probabilities one by one until the sum is no longer greater
// than limit_probability.
++index;
sum -= iat_vector_[index];
} while ((sum > limit_probability) && (index < iat_vector_.size() - 1));
// This is the base value for the target buffer level.
int target_level = index;
base_target_level_ = index;
// Update detector for delay peaks.
bool delay_peak_found = peak_detector_.Update(iat_packets, target_level);
if (delay_peak_found) {
target_level = std::max(static_cast<int>(target_level),
peak_detector_.MaxPeakHeight());
}
// Sanity check. |target_level| must be strictly positive.
target_level = std::max(target_level, 1);
// Scale to Q8 and assign to member variable.
target_level_ = target_level << 8;
return target_level_;
}
int DelayManager::SetPacketAudioLength(int length_ms) {
if (length_ms <= 0) {
LOG_F(LS_ERROR) << "length_ms = " << length_ms;
return -1;
}
packet_len_ms_ = length_ms;
peak_detector_.SetPacketAudioLength(packet_len_ms_);
packet_iat_count_ms_ = 0;
last_pack_cng_or_dtmf_ = 1; // TODO(hlundin): Legacy. Remove?
return 0;
}
void DelayManager::Reset() {
packet_len_ms_ = 0; // Packet size unknown.
streaming_mode_ = false;
peak_detector_.Reset();
ResetHistogram(); // Resets target levels too.
iat_factor_ = 0; // Adapt the histogram faster for the first few packets.
packet_iat_count_ms_ = 0;
max_timer_ms_ = 0;
iat_cumulative_sum_ = 0;
max_iat_cumulative_sum_ = 0;
last_pack_cng_or_dtmf_ = 1;
}
int DelayManager::AverageIAT() const {
int32_t sum_q24 = 0;
assert(iat_vector_.size() == 65); // Algorithm is hard-coded for this size.
for (size_t i = 0; i < iat_vector_.size(); ++i) {
// Shift 6 to fit worst case: 2^30 * 64.
sum_q24 += (iat_vector_[i] >> 6) * i;
}
// Subtract the nominal inter-arrival time 1 = 2^24 in Q24.
sum_q24 -= (1 << 24);
// Multiply with 1000000 / 2^24 = 15625 / 2^18 to get in parts-per-million.
// Shift 7 to Q17 first, then multiply with 15625 and shift another 11.
return ((sum_q24 >> 7) * 15625) >> 11;
}
bool DelayManager::PeakFound() const {
return peak_detector_.peak_found();
}
void DelayManager::UpdateCounters(int elapsed_time_ms) {
packet_iat_count_ms_ += elapsed_time_ms;
peak_detector_.IncrementCounter(elapsed_time_ms);
max_timer_ms_ += elapsed_time_ms;
}
void DelayManager::BufferLimits(int* lower_limit, int* higher_limit) const {
if (!lower_limit || !higher_limit) {
LOG_F(LS_ERROR) << "NULL pointers supplied as input";
assert(false);
return;
}
int extra_delay_packets_q8 = 0;
int window_20ms = 0x7FFF; // Default large value for legacy bit-exactness.
if (packet_len_ms_ > 0) {
extra_delay_packets_q8 = (extra_delay_ms_ << 8) / packet_len_ms_;
window_20ms = (20 << 8) / packet_len_ms_;
}
// |lower_limit| is 75% of |target_level_| + extra delay.
// |target_level_| is in Q8 already.
*lower_limit = (target_level_ * 3) / 4 + extra_delay_packets_q8;
// |higher_limit| is equal to |target_level_| + extra delay, but should at
// least be 20 ms higher than |lower_limit_|.
*higher_limit = std::max(target_level_ + extra_delay_packets_q8,
*lower_limit + window_20ms);
}
int DelayManager::TargetLevel() const {
if (packet_len_ms_ > 0) {
// Add |extra_delay_ms_| converted to packets in Q8.
return target_level_ + (extra_delay_ms_ << 8) / packet_len_ms_;
} else {
// Cannot convert |extra_delay_ms_|; simply return |target_level_|.
return target_level_;
}
}
void DelayManager::LastDecoderType(NetEqDecoder decoder_type) {
if (decoder_type == kDecoderAVT ||
decoder_type == kDecoderCNGnb ||
decoder_type == kDecoderCNGwb ||
decoder_type == kDecoderCNGswb32kHz ||
decoder_type == kDecoderCNGswb48kHz) {
last_pack_cng_or_dtmf_ = 1;
} else if (last_pack_cng_or_dtmf_ != 0) {
last_pack_cng_or_dtmf_ = -1;
}
}
} // namespace webrtc

View File

@ -0,0 +1,156 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DELAY_MANAGER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DELAY_MANAGER_H_
#include <cstring> // Provide access to size_t.
#include <vector>
#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declaration.
class DelayPeakDetector;
class DelayManager {
public:
typedef std::vector<int> IATVector;
// Create a DelayManager object. Notify the delay manager that the packet
// buffer can hold no more than |max_packets_in_buffer| packets (i.e., this
// is the number of packet slots in the buffer). Supply a PeakDetector
// object to the DelayManager.
DelayManager(int max_packets_in_buffer, DelayPeakDetector* peak_detector);
virtual ~DelayManager() {}
// Read the inter-arrival time histogram. Mainly for testing purposes.
virtual const IATVector& iat_vector() const { return iat_vector_; }
// Updates the delay manager with a new incoming packet, with
// |sequence_number| and |timestamp| from the RTP header. This updates the
// inter-arrival time histogram and other statistics, as well as the
// associated DelayPeakDetector. A new target buffer level is calculated.
// Returns 0 on success, -1 on failure (invalid sample rate).
virtual int Update(uint16_t sequence_number,
uint32_t timestamp,
int sample_rate_hz);
// Calculates a new target buffer level. Called from the Update() method.
// Sets target_level_ (in Q8) and returns the same value. Also calculates
// and updates base_target_level_, which is the target buffer level before
// taking delay peaks into account.
virtual int CalculateTargetLevel(int iat_packets);
// Notifies the DelayManager of how much audio data is carried in each packet.
// The method updates the DelayPeakDetector too, and resets the inter-arrival
// time counter. Returns 0 on success, -1 on failure.
virtual int SetPacketAudioLength(int length_ms);
// Resets the DelayManager and the associated DelayPeakDetector.
virtual void Reset();
// Calculates the average inter-arrival time deviation from the histogram.
// The result is returned as parts-per-million deviation from the nominal
// inter-arrival time. That is, if the average inter-arrival time is equal to
// the nominal frame time, the return value is zero. A positive value
// corresponds to packet spacing being too large, while a negative value means
// that the packets arrive with less spacing than expected.
virtual int AverageIAT() const;
// Returns true if peak-mode is active. That is, delay peaks were observed
// recently. This method simply asks for the same information from the
// DelayPeakDetector object.
virtual bool PeakFound() const;
// Notifies the counters in DelayManager and DelayPeakDetector that
// |elapsed_time_ms| have elapsed.
virtual void UpdateCounters(int elapsed_time_ms);
// Reset the inter-arrival time counter to 0.
virtual void ResetPacketIatCount() { packet_iat_count_ms_ = 0; }
// Writes the lower and higher limits which the buffer level should stay
// within to the corresponding pointers. The values are in (fractions of)
// packets in Q8.
virtual void BufferLimits(int* lower_limit, int* higher_limit) const;
// Gets the target buffer level, in (fractions of) packets in Q8. This value
// includes any extra delay set through the set_extra_delay_ms() method.
virtual int TargetLevel() const;
virtual void LastDecoderType(NetEqDecoder decoder_type);
// Accessors and mutators.
virtual void set_extra_delay_ms(int16_t delay) { extra_delay_ms_ = delay; }
virtual int base_target_level() const { return base_target_level_; }
virtual void set_streaming_mode(bool value) { streaming_mode_ = value; }
virtual int last_pack_cng_or_dtmf() const { return last_pack_cng_or_dtmf_; }
virtual void set_last_pack_cng_or_dtmf(int value) {
last_pack_cng_or_dtmf_ = value;
}
private:
static const int kLimitProbability = 53687091; // 1/20 in Q30.
static const int kLimitProbabilityStreaming = 536871; // 1/2000 in Q30.
static const int kMaxStreamingPeakPeriodMs = 600000; // 10 minutes in ms.
static const int kCumulativeSumDrift = 2; // Drift term for cumulative sum
// |iat_cumulative_sum_|.
// Steady-state forgetting factor for |iat_vector_|, 0.9993 in Q15.
static const int kIatFactor_ = 32745;
static const int kMaxIat = 64; // Max inter-arrival time to register.
// Sets |iat_vector_| to the default start distribution and sets the
// |base_target_level_| and |target_level_| to the corresponding values.
void ResetHistogram();
// Updates |iat_cumulative_sum_| and |max_iat_cumulative_sum_|. (These are
// used by the streaming mode.) This method is called by Update().
void UpdateCumulativeSums(int packet_len_ms, uint16_t sequence_number);
// Updates the histogram |iat_vector_|. The probability for inter-arrival time
// equal to |iat_packets| (in integer packets) is increased slightly, while
// all other entries are decreased. This method is called by Update().
void UpdateHistogram(size_t iat_packets);
// Makes sure that |target_level_| is not too large, taking
// |max_packets_in_buffer_| and |extra_delay_ms_| into account. This method is
// called by Update().
void LimitTargetLevel();
bool first_packet_received_;
const int max_packets_in_buffer_; // Capacity of the packet buffer.
IATVector iat_vector_; // Histogram of inter-arrival times.
int iat_factor_; // Forgetting factor for updating the IAT histogram (Q15).
int packet_iat_count_ms_; // Milliseconds elapsed since last packet.
int base_target_level_; // Currently preferred buffer level before peak
// detection and streaming mode (Q0).
int target_level_; // Currently preferred buffer level in (fractions)
// of packets (Q8), before adding any extra delay.
int packet_len_ms_; // Length of audio in each incoming packet [ms].
bool streaming_mode_;
uint16_t last_seq_no_; // Sequence number for last received packet.
uint32_t last_timestamp_; // Timestamp for the last received packet.
int extra_delay_ms_; // Externally set extra delay.
int iat_cumulative_sum_; // Cumulative sum of delta inter-arrival times.
int max_iat_cumulative_sum_; // Max of |iat_cumulative_sum_|.
int max_timer_ms_; // Time elapsed since maximum was observed.
DelayPeakDetector& peak_detector_;
int last_pack_cng_or_dtmf_;
DISALLOW_COPY_AND_ASSIGN(DelayManager);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DELAY_MANAGER_H_

View File

@ -0,0 +1,231 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for DelayManager class.
#include "webrtc/modules/audio_coding/neteq4/delay_manager.h"
#include <math.h>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_delay_peak_detector.h"
namespace webrtc {
using ::testing::Return;
class DelayManagerTest : public ::testing::Test {
protected:
static const int kMaxNumberOfPackets = 240;
static const int kTimeStepMs = 10;
static const int kFs = 8000;
static const int kFrameSizeMs = 20;
static const int kTsIncrement = kFrameSizeMs * kFs / 1000;
DelayManagerTest();
virtual void SetUp();
virtual void TearDown();
void SetPacketAudioLength(int lengt_ms);
void InsertNextPacket();
void IncreaseTime(int inc_ms);
DelayManager* dm_;
MockDelayPeakDetector detector_;
uint16_t seq_no_;
uint32_t ts_;
};
DelayManagerTest::DelayManagerTest()
: dm_(NULL),
seq_no_(0x1234),
ts_(0x12345678) {
}
void DelayManagerTest::SetUp() {
EXPECT_CALL(detector_, Reset())
.Times(1);
dm_ = new DelayManager(kMaxNumberOfPackets, &detector_);
}
void DelayManagerTest::SetPacketAudioLength(int lengt_ms) {
EXPECT_CALL(detector_, SetPacketAudioLength(lengt_ms));
dm_->SetPacketAudioLength(lengt_ms);
}
void DelayManagerTest::InsertNextPacket() {
EXPECT_EQ(0, dm_->Update(seq_no_, ts_, kFs));
seq_no_ += 1;
ts_ += kTsIncrement;
}
void DelayManagerTest::IncreaseTime(int inc_ms) {
for (int t = 0; t < inc_ms; t += kTimeStepMs) {
EXPECT_CALL(detector_, IncrementCounter(kTimeStepMs))
.Times(1);
dm_->UpdateCounters(kTimeStepMs);
}
}
void DelayManagerTest::TearDown() {
EXPECT_CALL(detector_, Die());
delete dm_;
}
TEST_F(DelayManagerTest, CreateAndDestroy) {
// Nothing to do here. The test fixture creates and destroys the DelayManager
// object.
}
TEST_F(DelayManagerTest, VectorInitialization) {
const DelayManager::IATVector& vec = dm_->iat_vector();
double sum = 0.0;
for (size_t i = 0; i < vec.size(); i++) {
EXPECT_NEAR(ldexp(pow(0.5, static_cast<int>(i + 1)), 30), vec[i], 65536);
// Tolerance 65536 in Q30 corresponds to a delta of approximately 0.00006.
sum += vec[i];
}
EXPECT_EQ(1 << 30, static_cast<int>(sum)); // Should be 1 in Q30.
}
TEST_F(DelayManagerTest, SetPacketAudioLength) {
const int kLengthMs = 30;
// Expect DelayManager to pass on the new length to the detector object.
EXPECT_CALL(detector_, SetPacketAudioLength(kLengthMs))
.Times(1);
EXPECT_EQ(0, dm_->SetPacketAudioLength(kLengthMs));
EXPECT_EQ(-1, dm_->SetPacketAudioLength(-1)); // Illegal parameter value.
}
TEST_F(DelayManagerTest, PeakFound) {
// Expect DelayManager to pass on the question to the detector.
// Call twice, and let the detector return true the first time and false the
// second time.
EXPECT_CALL(detector_, peak_found())
.WillOnce(Return(true))
.WillOnce(Return(false));
EXPECT_TRUE(dm_->PeakFound());
EXPECT_FALSE(dm_->PeakFound());
}
TEST_F(DelayManagerTest, UpdateCounters) {
// Expect DelayManager to pass on the counter update to the detector.
EXPECT_CALL(detector_, IncrementCounter(kTimeStepMs))
.Times(1);
dm_->UpdateCounters(kTimeStepMs);
}
TEST_F(DelayManagerTest, UpdateNormal) {
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Advance time by one frame size.
IncreaseTime(kFrameSizeMs);
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to 1 packet, and (base) target level equal to 1 as well.
// Return false to indicate no peaks found.
EXPECT_CALL(detector_, Update(1, 1))
.WillOnce(Return(false));
InsertNextPacket();
EXPECT_EQ(1 << 8, dm_->TargetLevel()); // In Q8.
EXPECT_EQ(1, dm_->base_target_level());
int lower, higher;
dm_->BufferLimits(&lower, &higher);
// Expect |lower| to be 75% of target level, and |higher| to be target level,
// but also at least 20 ms higher than |lower|, which is the limiting case
// here.
EXPECT_EQ((1 << 8) * 3 / 4, lower);
EXPECT_EQ(lower + (20 << 8) / kFrameSizeMs, higher);
}
TEST_F(DelayManagerTest, UpdateLongInterArrivalTime) {
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Advance time by two frame size.
IncreaseTime(2 * kFrameSizeMs);
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to 1 packet, and (base) target level equal to 1 as well.
// Return false to indicate no peaks found.
EXPECT_CALL(detector_, Update(2, 2))
.WillOnce(Return(false));
InsertNextPacket();
EXPECT_EQ(2 << 8, dm_->TargetLevel()); // In Q8.
EXPECT_EQ(2, dm_->base_target_level());
int lower, higher;
dm_->BufferLimits(&lower, &higher);
// Expect |lower| to be 75% of target level, and |higher| to be target level,
// but also at least 20 ms higher than |lower|, which is the limiting case
// here.
EXPECT_EQ((2 << 8) * 3 / 4, lower);
EXPECT_EQ(lower + (20 << 8) / kFrameSizeMs, higher);
}
TEST_F(DelayManagerTest, UpdatePeakFound) {
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Advance time by one frame size.
IncreaseTime(kFrameSizeMs);
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to 1 packet, and (base) target level equal to 1 as well.
// Return true to indicate that peaks are found. Let the peak height be 5.
EXPECT_CALL(detector_, Update(1, 1))
.WillOnce(Return(true));
EXPECT_CALL(detector_, MaxPeakHeight())
.WillOnce(Return(5));
InsertNextPacket();
EXPECT_EQ(5 << 8, dm_->TargetLevel());
EXPECT_EQ(1, dm_->base_target_level()); // Base target level is w/o peaks.
int lower, higher;
dm_->BufferLimits(&lower, &higher);
// Expect |lower| to be 75% of target level, and |higher| to be target level.
EXPECT_EQ((5 << 8) * 3 / 4, lower);
EXPECT_EQ(5 << 8, higher);
}
TEST_F(DelayManagerTest, ExtraDelay) {
const int kExtraDelayMs = 200;
dm_->set_extra_delay_ms(kExtraDelayMs);
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Advance time by one frame size.
IncreaseTime(kFrameSizeMs);
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to 1 packet, and (base) target level equal to 1 as well.
// Return false to indicate no peaks found.
EXPECT_CALL(detector_, Update(1, 1))
.WillOnce(Return(false));
InsertNextPacket();
const int kExpectedTarget = 1 + kExtraDelayMs / kFrameSizeMs;
EXPECT_EQ(kExpectedTarget << 8, dm_->TargetLevel()); // In Q8.
EXPECT_EQ(1, dm_->base_target_level());
int lower, higher;
dm_->BufferLimits(&lower, &higher);
// Expect |lower| to be 75% of base target level + extra delay, and |higher|
// to be target level + extra delay, but at least leave 20 ms headroom from
// lower.
EXPECT_EQ((1 << 8) * 3 / 4 + (kExtraDelayMs << 8) / kFrameSizeMs, lower);
EXPECT_EQ(lower + (20 << 8) / kFrameSizeMs, higher);
}
TEST_F(DelayManagerTest, Failures) {
// Wrong sample rate.
EXPECT_EQ(-1, dm_->Update(0, 0, -1));
// Wrong packet size.
EXPECT_EQ(-1, dm_->SetPacketAudioLength(0));
EXPECT_EQ(-1, dm_->SetPacketAudioLength(-1));
}
} // namespace webrtc

View File

@ -0,0 +1,110 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h"
#include <algorithm> // max
namespace webrtc {
// The DelayPeakDetector keeps track of severe inter-arrival times, called
// delay peaks. When a peak is observed, the "height" (the time elapsed since
// the previous packet arrival) and the peak "period" (the time since the last
// observed peak) is recorded in a vector. When enough peaks have been observed,
// peak-mode is engaged and the DelayManager asks the DelayPeakDetector for
// the worst peak height.
DelayPeakDetector::DelayPeakDetector()
: peak_found_(false),
peak_detection_threshold_(0),
peak_period_counter_ms_(-1) {
}
void DelayPeakDetector::Reset() {
peak_period_counter_ms_ = -1; // Indicate that next peak is the first.
peak_found_ = false;
peak_history_.clear();
}
// Calculates the threshold in number of packets.
void DelayPeakDetector::SetPacketAudioLength(int length_ms) {
if (length_ms > 0) {
peak_detection_threshold_ = kPeakHeightMs / length_ms;
}
}
int DelayPeakDetector::MaxPeakHeight() const {
int max_height = -1; // Returns -1 for an empty history.
std::list<Peak>::const_iterator it;
for (it = peak_history_.begin(); it != peak_history_.end(); ++it) {
max_height = std::max(max_height, it->peak_height_packets);
}
return max_height;
}
int DelayPeakDetector::MaxPeakPeriod() const {
int max_period = -1; // Returns -1 for an empty history.
std::list<Peak>::const_iterator it;
for (it = peak_history_.begin(); it != peak_history_.end(); ++it) {
max_period = std::max(max_period, it->period_ms);
}
return max_period;
}
bool DelayPeakDetector::Update(int inter_arrival_time, int target_level) {
if (inter_arrival_time > target_level + peak_detection_threshold_ ||
inter_arrival_time > 2 * target_level) {
// A delay peak is observed.
if (peak_period_counter_ms_ == -1) {
// This is the first peak. Reset the period counter.
peak_period_counter_ms_ = 0;
} else if (peak_period_counter_ms_ <= kMaxPeakPeriodMs) {
// This is not the first peak, and the period is valid.
// Store peak data in the vector.
Peak peak_data;
peak_data.period_ms = peak_period_counter_ms_;
peak_data.peak_height_packets = inter_arrival_time;
peak_history_.push_back(peak_data);
while (peak_history_.size() > kMaxNumPeaks) {
// Delete the oldest data point.
peak_history_.pop_front();
}
peak_period_counter_ms_ = 0;
} else if (peak_period_counter_ms_ <= 2 * kMaxPeakPeriodMs) {
// Invalid peak due to too long period. Reset period counter and start
// looking for next peak.
peak_period_counter_ms_ = 0;
} else {
// More than 2 times the maximum period has elapsed since the last peak
// was registered. It seams that the network conditions have changed.
// Reset the peak statistics.
Reset();
}
}
return CheckPeakConditions();
}
void DelayPeakDetector::IncrementCounter(int inc_ms) {
if (peak_period_counter_ms_ >= 0) {
peak_period_counter_ms_ += inc_ms;
}
}
bool DelayPeakDetector::CheckPeakConditions() {
size_t s = peak_history_.size();
if (s >= kMinPeaksToTrigger &&
peak_period_counter_ms_ <= 2 * MaxPeakPeriod()) {
peak_found_ = true;
} else {
peak_found_ = false;
}
return peak_found_;
}
} // namespace webrtc

View File

@ -0,0 +1,75 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DELAY_PEAK_DETECTOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DELAY_PEAK_DETECTOR_H_
#include <cstring> // size_t
#include <list>
#include "webrtc/system_wrappers/interface/constructor_magic.h"
namespace webrtc {
class DelayPeakDetector {
public:
DelayPeakDetector();
virtual ~DelayPeakDetector() {}
virtual void Reset();
// Notifies the DelayPeakDetector of how much audio data is carried in each
// packet.
virtual void SetPacketAudioLength(int length_ms);
// Returns true if peak-mode is active. That is, delay peaks were observed
// recently.
virtual bool peak_found() { return peak_found_; }
// Calculates and returns the maximum delay peak height. Returns -1 if no
// delay peaks have been observed recently. The unit is number of packets.
virtual int MaxPeakHeight() const;
// Calculates and returns the maximum delay peak distance in ms.
// Returns -1 if no delay peaks have been observed recently.
virtual int MaxPeakPeriod() const;
// Updates the DelayPeakDetector with a new inter-arrival time (in packets)
// and the current target buffer level (needed to decide if a peak is observed
// or not). Returns true if peak-mode is active, false if not.
virtual bool Update(int inter_arrival_time, int target_level);
// Increments the |peak_period_counter_ms_| with |inc_ms|. Only increments
// the counter if it is non-negative. A negative denotes that no peak has
// been observed.
virtual void IncrementCounter(int inc_ms);
private:
static const size_t kMaxNumPeaks = 8;
static const size_t kMinPeaksToTrigger = 2;
static const int kPeakHeightMs = 78;
static const int kMaxPeakPeriodMs = 10000;
typedef struct {
int period_ms;
int peak_height_packets;
} Peak;
bool CheckPeakConditions();
std::list<Peak> peak_history_;
bool peak_found_;
int peak_detection_threshold_;
int peak_period_counter_ms_;
DISALLOW_COPY_AND_ASSIGN(DelayPeakDetector);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DELAY_PEAK_DETECTOR_H_

View File

@ -0,0 +1,121 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for DelayPeakDetector class.
#include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h"
#include "gtest/gtest.h"
namespace webrtc {
TEST(DelayPeakDetector, CreateAndDestroy) {
DelayPeakDetector* detector = new DelayPeakDetector();
EXPECT_FALSE(detector->peak_found());
delete detector;
}
TEST(DelayPeakDetector, EmptyHistory) {
DelayPeakDetector detector;
EXPECT_EQ(-1, detector.MaxPeakHeight());
EXPECT_EQ(-1, detector.MaxPeakPeriod());
}
// Inject a series of packet arrivals into the detector. Three of the packets
// have suffered delays. After the third delay peak, peak-mode is expected to
// start. This should then continue until it is disengaged due to lack of peaks.
TEST(DelayPeakDetector, TriggerPeakMode) {
DelayPeakDetector detector;
const int kPacketSizeMs = 30;
detector.SetPacketAudioLength(kPacketSizeMs);
// Load up normal arrival times; 0 ms, 30 ms, 60 ms, 90 ms, ...
const int kNumPackets = 1000;
int arrival_times_ms[kNumPackets];
for (int i = 0; i < kNumPackets; ++i) {
arrival_times_ms[i] = i * kPacketSizeMs;
}
// Delay three packets.
const int kPeakDelayMs = 100;
// First delay peak.
arrival_times_ms[100] += kPeakDelayMs;
// Second delay peak.
arrival_times_ms[200] += kPeakDelayMs;
// Third delay peak. Trigger peak-mode after this packet.
arrival_times_ms[400] += kPeakDelayMs;
// The second peak period is the longest, 200 packets.
const int kWorstPeakPeriod = 200 * kPacketSizeMs;
int peak_mode_start_ms = arrival_times_ms[400];
// Expect to disengage after no peaks are observed for two period times.
int peak_mode_end_ms = peak_mode_start_ms + 2 * kWorstPeakPeriod;
// Load into detector.
int time = 0;
int next = 1; // Start with the second packet to get a proper IAT.
while (next < kNumPackets) {
while (arrival_times_ms[next] <= time) {
int iat_packets = (arrival_times_ms[next] - arrival_times_ms[next - 1]) /
kPacketSizeMs;
const int kTargetBufferLevel = 1; // Define peaks to be iat > 2.
if (time < peak_mode_start_ms || time > peak_mode_end_ms) {
EXPECT_FALSE(detector.Update(iat_packets, kTargetBufferLevel));
} else {
EXPECT_TRUE(detector.Update(iat_packets, kTargetBufferLevel));
EXPECT_EQ(kWorstPeakPeriod, detector.MaxPeakPeriod());
EXPECT_EQ(kPeakDelayMs / kPacketSizeMs + 1, detector.MaxPeakHeight());
}
++next;
}
detector.IncrementCounter(10);
time += 10; // Increase time 10 ms.
}
}
// Same test as TriggerPeakMode, but with base target buffer level increased to
// 2, in order to raise the bar for delay peaks to inter-arrival times > 4.
// The delay pattern has peaks with delay = 3, thus should not trigger.
TEST(DelayPeakDetector, DoNotTriggerPeakMode) {
DelayPeakDetector detector;
const int kPacketSizeMs = 30;
detector.SetPacketAudioLength(kPacketSizeMs);
// Load up normal arrival times; 0 ms, 30 ms, 60 ms, 90 ms, ...
const int kNumPackets = 1000;
int arrival_times_ms[kNumPackets];
for (int i = 0; i < kNumPackets; ++i) {
arrival_times_ms[i] = i * kPacketSizeMs;
}
// Delay three packets.
const int kPeakDelayMs = 100;
// First delay peak.
arrival_times_ms[100] += kPeakDelayMs;
// Second delay peak.
arrival_times_ms[200] += kPeakDelayMs;
// Third delay peak.
arrival_times_ms[400] += kPeakDelayMs;
// Load into detector.
int time = 0;
int next = 1; // Start with the second packet to get a proper IAT.
while (next < kNumPackets) {
while (arrival_times_ms[next] <= time) {
int iat_packets = (arrival_times_ms[next] - arrival_times_ms[next - 1]) /
kPacketSizeMs;
const int kTargetBufferLevel = 2; // Define peaks to be iat > 4.
EXPECT_FALSE(detector.Update(iat_packets, kTargetBufferLevel));
++next;
}
detector.IncrementCounter(10);
time += 10; // Increase time 10 ms.
}
}
} // namespace webrtc

View File

@ -0,0 +1,352 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/dsp_helper.h"
#include <assert.h>
#include <algorithm> // Access to min, max.
#include <cstring> // Access to memset.
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
namespace webrtc {
// Table of constants used in method DspHelper::ParabolicFit().
const int16_t DspHelper::kParabolaCoefficients[17][3] = {
{ 120, 32, 64 },
{ 140, 44, 75 },
{ 150, 50, 80 },
{ 160, 57, 85 },
{ 180, 72, 96 },
{ 200, 89, 107 },
{ 210, 98, 112 },
{ 220, 108, 117 },
{ 240, 128, 128 },
{ 260, 150, 139 },
{ 270, 162, 144 },
{ 280, 174, 149 },
{ 300, 200, 160 },
{ 320, 228, 171 },
{ 330, 242, 176 },
{ 340, 257, 181 },
{ 360, 288, 192 } };
// Filter coefficients used when downsampling from the indicated sample rates
// (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12. The corresponding Q0
// values are provided in the comments before each array.
// Q0 values: {0.3, 0.4, 0.3}.
const int16_t DspHelper::kDownsample8kHzTbl[3] = { 1229, 1638, 1229 };
// Q0 values: {0.15, 0.2, 0.3, 0.2, 0.15}.
const int16_t DspHelper::kDownsample16kHzTbl[5] = { 614, 819, 1229, 819, 614 };
// Q0 values: {0.1425, 0.1251, 0.1525, 0.1628, 0.1525, 0.1251, 0.1425}.
const int16_t DspHelper::kDownsample32kHzTbl[7] = {
584, 512, 625, 667, 625, 512, 584 };
// Q0 values: {0.2487, 0.0952, 0.1042, 0.1074, 0.1042, 0.0952, 0.2487}.
const int16_t DspHelper::kDownsample48kHzTbl[7] = {
1019, 390, 427, 440, 427, 390, 1019 };
int DspHelper::RampSignal(const int16_t* input,
size_t length,
int factor,
int increment,
int16_t* output) {
int factor_q20 = (factor << 6) + 32;
// TODO(hlundin): Add 32 to factor_q20 when converting back to Q14?
for (size_t i = 0; i < length; ++i) {
output[i] = (factor * input[i] + 8192) >> 14;
factor_q20 += increment;
factor_q20 = std::max(factor_q20, 0); // Never go negative.
factor = std::min(factor_q20 >> 6, 16384);
}
return factor;
}
int DspHelper::RampSignal(int16_t* signal,
size_t length,
int factor,
int increment) {
return RampSignal(signal, length, factor, increment, signal);
}
int DspHelper::RampSignal(AudioMultiVector<int16_t>* signal,
size_t start_index,
size_t length,
int factor,
int increment) {
assert(start_index + length <= signal->Size());
if (start_index + length > signal->Size()) {
// Wrong parameters. Do nothing and return the scale factor unaltered.
return factor;
}
int end_factor = 0;
// Loop over the channels, starting at the same |factor| each time.
for (size_t channel = 0; channel < signal->Channels(); ++channel) {
end_factor =
RampSignal(&(*signal)[channel][start_index], length, factor, increment);
}
return end_factor;
}
void DspHelper::PeakDetection(int16_t* data, int data_length,
int num_peaks, int fs_mult,
int* peak_index, int16_t* peak_value) {
int16_t min_index = 0;
int16_t max_index = 0;
for (int i = 0; i <= num_peaks - 1; i++) {
if (num_peaks == 1) {
// Single peak. The parabola fit assumes that an extra point is
// available; worst case it gets a zero on the high end of the signal.
// TODO(hlundin): This can potentially get much worse. It breaks the
// API contract, that the length of |data| is |data_length|.
data_length++;
}
peak_index[i] = WebRtcSpl_MaxIndexW16(data, data_length - 1);
if (i != num_peaks - 1) {
min_index = std::max(0, peak_index[i] - 2);
max_index = std::min(data_length - 1, peak_index[i] + 2);
}
if ((peak_index[i] != 0) && (peak_index[i] != (data_length - 2))) {
ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i],
&peak_value[i]);
} else {
if (peak_index[i] == data_length - 2) {
if (data[peak_index[i]] > data[peak_index[i] + 1]) {
ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i],
&peak_value[i]);
} else if (data[peak_index[i]] <= data[peak_index[i] + 1]) {
// Linear approximation.
peak_value[i] = (data[peak_index[i]] + data[peak_index[i] + 1]) >> 1;
peak_index[i] = (peak_index[i] * 2 + 1) * fs_mult;
}
} else {
peak_value[i] = data[peak_index[i]];
peak_index[i] = peak_index[i] * 2 * fs_mult;
}
}
if (i != num_peaks - 1) {
memset(&data[min_index], 0,
sizeof(data[0]) * (max_index - min_index + 1));
}
}
}
void DspHelper::ParabolicFit(int16_t* signal_points, int fs_mult,
int* peak_index, int16_t* peak_value) {
uint16_t fit_index[13];
if (fs_mult == 1) {
fit_index[0] = 0;
fit_index[1] = 8;
fit_index[2] = 16;
} else if (fs_mult == 2) {
fit_index[0] = 0;
fit_index[1] = 4;
fit_index[2] = 8;
fit_index[3] = 12;
fit_index[4] = 16;
} else if (fs_mult == 4) {
fit_index[0] = 0;
fit_index[1] = 2;
fit_index[2] = 4;
fit_index[3] = 6;
fit_index[4] = 8;
fit_index[5] = 10;
fit_index[6] = 12;
fit_index[7] = 14;
fit_index[8] = 16;
} else {
fit_index[0] = 0;
fit_index[1] = 1;
fit_index[2] = 3;
fit_index[3] = 4;
fit_index[4] = 5;
fit_index[5] = 7;
fit_index[6] = 8;
fit_index[7] = 9;
fit_index[8] = 11;
fit_index[9] = 12;
fit_index[10] = 13;
fit_index[11] = 15;
fit_index[12] = 16;
}
// num = -3 * signal_points[0] + 4 * signal_points[1] - signal_points[2];
// den = signal_points[0] - 2 * signal_points[1] + signal_points[2];
int32_t num = (signal_points[0] * -3) + (signal_points[1] * 4)
- signal_points[2];
int32_t den = signal_points[0] + (signal_points[1] * -2) + signal_points[2];
int32_t temp = num * 120;
int flag = 1;
int16_t stp = kParabolaCoefficients[fit_index[fs_mult]][0]
- kParabolaCoefficients[fit_index[fs_mult - 1]][0];
int16_t strt = (kParabolaCoefficients[fit_index[fs_mult]][0]
+ kParabolaCoefficients[fit_index[fs_mult - 1]][0]) / 2;
int16_t lmt;
if (temp < -den * strt) {
lmt = strt - stp;
while (flag) {
if ((flag == fs_mult) || (temp > -den * lmt)) {
*peak_value = (den * kParabolaCoefficients[fit_index[fs_mult - flag]][1]
+ num * kParabolaCoefficients[fit_index[fs_mult - flag]][2]
+ signal_points[0] * 256) / 256;
*peak_index = *peak_index * 2 * fs_mult - flag;
flag = 0;
} else {
flag++;
lmt -= stp;
}
}
} else if (temp > -den * (strt + stp)) {
lmt = strt + 2 * stp;
while (flag) {
if ((flag == fs_mult) || (temp < -den * lmt)) {
int32_t temp_term_1 =
den * kParabolaCoefficients[fit_index[fs_mult+flag]][1];
int32_t temp_term_2 =
num * kParabolaCoefficients[fit_index[fs_mult+flag]][2];
int32_t temp_term_3 = signal_points[0] * 256;
*peak_value = (temp_term_1 + temp_term_2 + temp_term_3) / 256;
*peak_index = *peak_index * 2 * fs_mult + flag;
flag = 0;
} else {
flag++;
lmt += stp;
}
}
} else {
*peak_value = signal_points[1];
*peak_index = *peak_index * 2 * fs_mult;
}
}
int DspHelper::MinDistortion(const int16_t* signal, int min_lag,
int max_lag, int length,
int32_t* distortion_value) {
int best_index = -1;
int32_t min_distortion = WEBRTC_SPL_WORD32_MAX;
for (int i = min_lag; i <= max_lag; i++) {
int32_t sum_diff = 0;
const int16_t* data1 = signal;
const int16_t* data2 = signal - i;
for (int j = 0; j < length; j++) {
sum_diff += WEBRTC_SPL_ABS_W32(data1[j] - data2[j]);
}
// Compare with previous minimum.
if (sum_diff < min_distortion) {
min_distortion = sum_diff;
best_index = i;
}
}
*distortion_value = min_distortion;
return best_index;
}
void DspHelper::CrossFade(const int16_t* input1, const int16_t* input2,
int length, int16_t* mix_factor,
int16_t factor_decrement, int16_t* output) {
int16_t factor = *mix_factor;
int16_t complement_factor = 16384 - factor;
for (int i = 0; i < length; i++) {
output[i] =
(factor * input1[i] + complement_factor * input2[i] + 8192) >> 14;
factor -= factor_decrement;
complement_factor += factor_decrement;
}
*mix_factor = factor;
}
void DspHelper::UnmuteSignal(const int16_t* input, int length, int16_t* factor,
int16_t increment, int16_t* output) {
uint16_t factor_16b = *factor;
int32_t factor_32b = (static_cast<int32_t>(factor_16b) << 6) + 32;
for (int i = 0; i < length; i++) {
output[i] = (factor_16b * input[i] + 8192) >> 14;
factor_32b = std::max(factor_32b + increment, 0);
factor_16b = std::min(16384, factor_32b >> 6);
}
*factor = factor_16b;
}
void DspHelper::MuteSignal(int16_t* signal, int16_t mute_slope, int length) {
int32_t factor = (16384 << 6) + 32;
for (int i = 0; i < length; i++) {
signal[i] = ((factor >> 6) * signal[i] + 8192) >> 14;
factor -= mute_slope;
}
}
int DspHelper::DownsampleTo4kHz(const int16_t* input, int input_length,
int output_length, int input_rate_hz,
bool compensate_delay, int16_t* output) {
// Set filter parameters depending on input frequency.
// NOTE: The phase delay values are wrong compared to the true phase delay
// of the filters. However, the error is preserved (through the +1 term) for
// consistency.
const int16_t* filter_coefficients; // Filter coefficients.
int16_t filter_length; // Number of coefficients.
int16_t filter_delay; // Phase delay in samples.
int16_t factor; // Conversion rate (inFsHz / 8000).
switch (input_rate_hz) {
case 8000: {
filter_length = 3;
factor = 2;
filter_coefficients = kDownsample8kHzTbl;
filter_delay = 1 + 1;
break;
}
case 16000: {
filter_length = 5;
factor = 4;
filter_coefficients = kDownsample16kHzTbl;
filter_delay = 2 + 1;
break;
}
case 32000: {
filter_length = 7;
factor = 8;
filter_coefficients = kDownsample32kHzTbl;
filter_delay = 3 + 1;
break;
}
case 48000: {
filter_length = 7;
factor = 12;
filter_coefficients = kDownsample48kHzTbl;
filter_delay = 3 + 1;
break;
}
default: {
assert(false);
return -1;
}
}
if (!compensate_delay) {
// Disregard delay compensation.
filter_delay = 0;
}
// Returns -1 if input signal is too short; 0 otherwise.
return WebRtcSpl_DownsampleFast(&input[filter_length - 1],
input_length - (filter_length - 1), output,
output_length, filter_coefficients,
filter_length, factor, filter_delay);
}
} // namespace webrtc

View File

@ -0,0 +1,136 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DSP_HELPER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DSP_HELPER_H_
#include <cstring> // Access to size_t.
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This class contains various signal processing functions, all implemented as
// static methods.
class DspHelper {
public:
// Filter coefficients used when downsampling from the indicated sample rates
// (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12.
static const int16_t kDownsample8kHzTbl[3];
static const int16_t kDownsample16kHzTbl[5];
static const int16_t kDownsample32kHzTbl[7];
static const int16_t kDownsample48kHzTbl[7];
// Constants used to mute and unmute over 5 samples. The coefficients are
// in Q15.
static const int kMuteFactorStart8kHz = 27307;
static const int kMuteFactorIncrement8kHz = -5461;
static const int kUnmuteFactorStart8kHz = 5461;
static const int kUnmuteFactorIncrement8kHz = 5461;
static const int kMuteFactorStart16kHz = 29789;
static const int kMuteFactorIncrement16kHz = -2979;
static const int kUnmuteFactorStart16kHz = 2979;
static const int kUnmuteFactorIncrement16kHz = 2979;
static const int kMuteFactorStart32kHz = 31208;
static const int kMuteFactorIncrement32kHz = -1560;
static const int kUnmuteFactorStart32kHz = 1560;
static const int kUnmuteFactorIncrement32kHz = 1560;
static const int kMuteFactorStart48kHz = 31711;
static const int kMuteFactorIncrement48kHz = -1057;
static const int kUnmuteFactorStart48kHz = 1057;
static const int kUnmuteFactorIncrement48kHz = 1057;
// Multiplies the signal with a gradually changing factor.
// The first sample is multiplied with |factor| (in Q14). For each sample,
// |factor| is increased (additive) by the |increment| (in Q20), which can
// be negative. Returns the scale factor after the last increment.
static int RampSignal(const int16_t* input,
size_t length,
int factor,
int increment,
int16_t* output);
// Same as above, but with the samples of |signal| being modified in-place.
static int RampSignal(int16_t* signal,
size_t length,
int factor,
int increment);
// Same as above, but processes |length| samples from |signal|, starting at
// |start_index|.
static int RampSignal(AudioMultiVector<int16_t>* signal,
size_t start_index,
size_t length,
int factor,
int increment);
// Peak detection with parabolic fit. Looks for |num_peaks| maxima in |data|,
// having length |data_length| and sample rate multiplier |fs_mult|. The peak
// locations and values are written to the arrays |peak_index| and
// |peak_value|, respectively. Both arrays must hold at least |num_peaks|
// elements.
static void PeakDetection(int16_t* data, int data_length,
int num_peaks, int fs_mult,
int* peak_index, int16_t* peak_value);
// Estimates the height and location of a maximum. The three values in the
// array |signal_points| are used as basis for a parabolic fit, which is then
// used to find the maximum in an interpolated signal. The |signal_points| are
// assumed to be from a 4 kHz signal, while the maximum, written to
// |peak_index| and |peak_value| is given in the full sample rate, as
// indicated by the sample rate multiplier |fs_mult|.
static void ParabolicFit(int16_t* signal_points, int fs_mult,
int* peak_index, int16_t* peak_value);
// Calculates the sum-abs-diff for |signal| when compared to a displaced
// version of itself. Returns the displacement lag that results in the minimum
// distortion. The resulting distortion is written to |distortion_value|.
// The values of |min_lag| and |max_lag| are boundaries for the search.
static int MinDistortion(const int16_t* signal, int min_lag,
int max_lag, int length, int32_t* distortion_value);
// Mixes |length| samples from |input1| and |input2| together and writes the
// result to |output|. The gain for |input1| starts at |mix_factor| (Q14) and
// is decreased by |factor_decrement| (Q14) for each sample. The gain for
// |input2| is the complement 16384 - mix_factor.
static void CrossFade(const int16_t* input1, const int16_t* input2,
int length, int16_t* mix_factor,
int16_t factor_decrement, int16_t* output);
// Scales |input| with an increasing gain. Applies |factor| (Q14) to the first
// sample and increases the gain by |increment| (Q20) for each sample. The
// result is written to |output|. |length| samples are processed.
static void UnmuteSignal(const int16_t* input, int length, int16_t* factor,
int16_t increment, int16_t* output);
// Starts at unity gain and gradually fades out |signal|. For each sample,
// the gain is reduced by |mute_slope| (Q14). |length| samples are processed.
static void MuteSignal(int16_t* signal, int16_t mute_slope, int length);
// Downsamples |input| from |sample_rate_hz| to 4 kHz sample rate. The input
// has |input_length| samples, and the method will write |output_length|
// samples to |output|. Compensates for the phase delay of the downsampling
// filters if |compensate_delay| is true. Returns -1 if the input is too short
// to produce |output_length| samples, otherwise 0.
static int DownsampleTo4kHz(const int16_t* input, int input_length,
int output_length, int input_rate_hz,
bool compensate_delay, int16_t* output);
private:
// Table of constants used in method DspHelper::ParabolicFit().
static const int16_t kParabolaCoefficients[17][3];
DISALLOW_COPY_AND_ASSIGN(DspHelper);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DSP_HELPER_H_

View File

@ -0,0 +1,89 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/dsp_helper.h"
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/typedefs.h"
namespace webrtc {
TEST(DspHelper, RampSignalArray) {
static const int kLen = 100;
int16_t input[kLen];
int16_t output[kLen];
// Fill input with 1000.
for (int i = 0; i < kLen; ++i) {
input[i] = 1000;
}
int start_factor = 0;
// Ramp from 0 to 1 (in Q14) over the array. Note that |increment| is in Q20,
// while the factor is in Q14, hence the shift by 6.
int increment = (16384 << 6) / kLen;
// Test first method.
int stop_factor = DspHelper::RampSignal(input, kLen, start_factor, increment,
output);
EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14.
for (int i = 0; i < kLen; ++i) {
EXPECT_EQ(1000 * i / kLen, output[i]);
}
// Test second method. (Note that this modifies |input|.)
stop_factor = DspHelper::RampSignal(input, kLen, start_factor, increment);
EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14.
for (int i = 0; i < kLen; ++i) {
EXPECT_EQ(1000 * i / kLen, input[i]);
}
}
TEST(DspHelper, RampSignalAudioMultiVector) {
static const int kLen = 100;
static const int kChannels = 5;
AudioMultiVector<int16_t> input(kChannels, kLen * 3);
// Fill input with 1000.
for (int i = 0; i < kLen * 3; ++i) {
for (int channel = 0; channel < kChannels; ++channel) {
input[channel][i] = 1000;
}
}
// We want to start ramping at |start_index| and keep ramping for |kLen|
// samples.
int start_index = kLen;
int start_factor = 0;
// Ramp from 0 to 1 (in Q14) in |kLen| samples. Note that |increment| is in
// Q20, while the factor is in Q14, hence the shift by 6.
int increment = (16384 << 6) / kLen;
int stop_factor = DspHelper::RampSignal(&input, start_index, kLen,
start_factor, increment);
EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14.
// Verify that the first |kLen| samples are left untouched.
int i;
for (i = 0; i < kLen; ++i) {
for (int channel = 0; channel < kChannels; ++channel) {
EXPECT_EQ(1000, input[channel][i]);
}
}
// Verify that the next block of |kLen| samples are ramped.
for (; i < 2 * kLen; ++i) {
for (int channel = 0; channel < kChannels; ++channel) {
EXPECT_EQ(1000 * (i - kLen) / kLen, input[channel][i]);
}
}
// Verify the last |kLen| samples are left untouched.
for (; i < 3 * kLen; ++i) {
for (int channel = 0; channel < kChannels; ++channel) {
EXPECT_EQ(1000, input[channel][i]);
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,226 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/dtmf_buffer.h"
#include <assert.h>
#include <algorithm> // max
// Modify the code to obtain backwards bit-exactness. Once bit-exactness is no
// longer required, this #define should be removed (and the code that it
// enables).
#define LEGACY_BITEXACT
namespace webrtc {
// The ParseEvent method parses 4 bytes from |payload| according to this format
// from RFC 4733:
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | event |E|R| volume | duration |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
// Legend (adapted from RFC 4733)
// - event: The event field is a number between 0 and 255 identifying a
// specific telephony event. The buffer will not accept any event
// numbers larger than 15.
// - E: If set to a value of one, the "end" bit indicates that this
// packet contains the end of the event. For long-lasting events
// that have to be split into segments, only the final packet for
// the final segment will have the E bit set.
// - R: Reserved.
// - volume: For DTMF digits and other events representable as tones, this
// field describes the power level of the tone, expressed in dBm0
// after dropping the sign. Power levels range from 0 to -63 dBm0.
// Thus, larger values denote lower volume. The buffer discards
// values larger than 36 (i.e., lower than -36 dBm0).
// - duration: The duration field indicates the duration of the event or segment
// being reported, in timestamp units, expressed as an unsigned
// integer in network byte order. For a non-zero value, the event
// or segment began at the instant identified by the RTP timestamp
// and has so far lasted as long as indicated by this parameter.
// The event may or may not have ended. If the event duration
// exceeds the maximum representable by the duration field, the
// event is split into several contiguous segments. The buffer will
// discard zero-duration events.
//
int DtmfBuffer::ParseEvent(uint32_t rtp_timestamp,
const uint8_t* payload,
int payload_length_bytes,
DtmfEvent* event) {
if (!payload || !event) {
return kInvalidPointer;
}
if (payload_length_bytes < 4) {
return kPayloadTooShort;
}
event->event_no = payload[0];
event->end_bit = ((payload[1] & 0x80) != 0);
event->volume = (payload[1] & 0x3F);
event->duration = payload[2] << 8 | payload[3];
event->timestamp = rtp_timestamp;
return kOK;
}
// Inserts a DTMF event into the buffer. The event should be parsed from the
// bit stream using the ParseEvent method above before inserting it in the
// buffer.
// DTMF events can be quite long, and in most cases the duration of the event
// is not known when the first packet describing it is sent. To deal with that,
// the RFC 4733 specifies that multiple packets are sent for one and the same
// event as it is being created (typically, as the user is pressing the key).
// These packets will all share the same start timestamp and event number,
// while the duration will be the cumulative duration from the start. When
// inserting a new event, the InsertEvent method tries to find a matching event
// already in the buffer. If so, the new event is simply merged with the
// existing one.
int DtmfBuffer::InsertEvent(const DtmfEvent& event) {
if (event.event_no < 0 || event.event_no > 15 ||
event.volume < 0 || event.volume > 36 ||
event.duration <= 0 || event.duration > 65535) {
return kInvalidEventParameters;
}
DtmfList::iterator it = buffer_.begin();
while (it != buffer_.end()) {
if (MergeEvents(it, event)) {
// A matching event was found and the new event was merged.
return kOK;
}
++it;
}
buffer_.push_back(event);
// Sort the buffer using CompareEvents to rank the events.
buffer_.sort(CompareEvents);
return kOK;
}
bool DtmfBuffer::GetEvent(uint32_t current_timestamp, DtmfEvent* event) {
DtmfList::iterator it = buffer_.begin();
while (it != buffer_.end()) {
// |event_end| is an estimate of where the current event ends. If the end
// bit is set, we know that the event ends at |timestamp| + |duration|.
uint32_t event_end = it->timestamp + it->duration;
#ifdef LEGACY_BITEXACT
bool next_available = false;
#endif
if (!it->end_bit) {
// If the end bit is not set, we allow extrapolation of the event for
// some time.
event_end += max_extrapolation_samples_;
DtmfList::iterator next = it;
++next;
if (next != buffer_.end()) {
// If there is a next event in the buffer, we will not extrapolate over
// the start of that new event.
event_end = std::min(event_end, next->timestamp);
#ifdef LEGACY_BITEXACT
next_available = true;
#endif
}
}
if (current_timestamp >= it->timestamp
&& current_timestamp <= event_end) { // TODO(hlundin): Change to <.
// Found a matching event.
if (event) {
event->event_no = it->event_no;
event->end_bit = it->end_bit;
event->volume = it->volume;
event->duration = it->duration;
event->timestamp = it->timestamp;
}
#ifdef LEGACY_BITEXACT
if (it->end_bit &&
current_timestamp + frame_len_samples_ >= event_end) {
// We are done playing this. Erase the event.
buffer_.erase(it);
}
#endif
return true;
} else if (current_timestamp > event_end) { // TODO(hlundin): Change to >=.
// Erase old event. Operation returns a valid pointer to the next element
// in the list.
#ifdef LEGACY_BITEXACT
if (!next_available) {
if (event) {
event->event_no = it->event_no;
event->end_bit = it->end_bit;
event->volume = it->volume;
event->duration = it->duration;
event->timestamp = it->timestamp;
}
it = buffer_.erase(it);
return true;
} else {
it = buffer_.erase(it);
}
#else
it = buffer_.erase(it);
#endif
} else {
++it;
}
}
return false;
}
int DtmfBuffer::SetSampleRate(int fs_hz) {
if (fs_hz != 8000 &&
fs_hz != 16000 &&
fs_hz != 32000 &&
fs_hz != 48000) {
return kInvalidSampleRate;
}
max_extrapolation_samples_ = 7 * fs_hz / 100;
frame_len_samples_ = fs_hz / 100;
return kOK;
}
// The method returns true if the two events are considered to be the same.
// The are defined as equal if they share the same timestamp and event number.
// The special case with long-lasting events that have to be split into segments
// is not handled in this method. These will be treated as separate events in
// the buffer.
bool DtmfBuffer::SameEvent(const DtmfEvent& a, const DtmfEvent& b) {
return (a.event_no == b.event_no) && (a.timestamp == b.timestamp);
}
bool DtmfBuffer::MergeEvents(DtmfList::iterator it, const DtmfEvent& event) {
if (SameEvent(*it, event)) {
if (!it->end_bit) {
// Do not extend the duration of an event for which the end bit was
// already received.
it->duration = std::max(event.duration, it->duration);
}
if (event.end_bit) {
it->end_bit = true;
}
return true;
} else {
return false;
}
}
// Returns true if |a| goes before |b| in the sorting order ("|a| < |b|").
// The events are ranked using their start timestamp (taking wrap-around into
// account). In the unlikely situation that two events share the same start
// timestamp, the event number is used to rank the two. Note that packets
// that belong to the same events, and therefore sharing the same start
// timestamp, have already been merged before the sort method is called.
bool DtmfBuffer::CompareEvents(const DtmfEvent& a, const DtmfEvent& b) {
if (a.timestamp == b.timestamp) {
return a.event_no < b.event_no;
}
// Take wrap-around into account.
return (static_cast<uint32_t>(b.timestamp - a.timestamp) < 0xFFFFFFFF / 2);
}
} // namespace webrtc

View File

@ -0,0 +1,116 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DTMF_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DTMF_BUFFER_H_
#include <list>
#include <string> // size_t
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
struct DtmfEvent {
uint32_t timestamp;
int event_no;
int volume;
int duration;
bool end_bit;
// Constructors
DtmfEvent()
: timestamp(0),
event_no(0),
volume(0),
duration(0),
end_bit(false) {
}
DtmfEvent(uint32_t ts, int ev, int vol, int dur, bool end)
: timestamp(ts),
event_no(ev),
volume(vol),
duration(dur),
end_bit(end) {
}
};
// This is the buffer holding DTMF events while waiting for them to be played.
class DtmfBuffer {
public:
enum BufferReturnCodes {
kOK = 0,
kInvalidPointer,
kPayloadTooShort,
kInvalidEventParameters,
kInvalidSampleRate
};
// Set up the buffer for use at sample rate |fs_hz|.
explicit DtmfBuffer(int fs_hz) {
SetSampleRate(fs_hz);
}
virtual ~DtmfBuffer() {}
// Flushes the buffer.
virtual void Flush() { buffer_.clear(); }
// Static method to parse 4 bytes from |payload| as a DTMF event (RFC 4733)
// and write the parsed information into the struct |event|. Input variable
// |rtp_timestamp| is simply copied into the struct.
static int ParseEvent(uint32_t rtp_timestamp,
const uint8_t* payload,
int payload_length_bytes,
DtmfEvent* event);
// Inserts |event| into the buffer. The method looks for a matching event and
// merges the two if a match is found.
virtual int InsertEvent(const DtmfEvent& event);
// Checks if a DTMF event should be played at time |current_timestamp|. If so,
// the method returns true; otherwise false. The parameters of the event to
// play will be written to |event|.
virtual bool GetEvent(uint32_t current_timestamp, DtmfEvent* event);
// Number of events in the buffer.
virtual size_t Length() const { return buffer_.size(); }
virtual bool Empty() const { return buffer_.empty(); }
// Set a new sample rate.
virtual int SetSampleRate(int fs_hz);
private:
typedef std::list<DtmfEvent> DtmfList;
int max_extrapolation_samples_;
int frame_len_samples_; // TODO(hlundin): Remove this later.
// Compares two events and returns true if they are the same.
static bool SameEvent(const DtmfEvent& a, const DtmfEvent& b);
// Merges |event| to the event pointed out by |it|. The method checks that
// the two events are the same (using the SameEvent method), and merges them
// if that was the case, returning true. If the events are not the same, false
// is returned.
bool MergeEvents(DtmfList::iterator it, const DtmfEvent& event);
// Method used by the sort algorithm to rank events in the buffer.
static bool CompareEvents(const DtmfEvent& a, const DtmfEvent& b);
DtmfList buffer_;
DISALLOW_COPY_AND_ASSIGN(DtmfBuffer);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DTMF_BUFFER_H_

View File

@ -0,0 +1,307 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/dtmf_buffer.h"
#ifdef WIN32
#include <winsock2.h> // ntohl()
#else
#include <arpa/inet.h> // ntohl()
#endif
#include <iostream>
#include "gtest/gtest.h"
// Modify the tests so that they pass with the modifications done to DtmfBuffer
// for backwards bit-exactness. Once bit-exactness is no longer required, this
// #define should be removed (and the code that it enables).
#define LEGACY_BITEXACT
namespace webrtc {
static int sample_rate_hz = 8000;
static uint32_t MakeDtmfPayload(int event, bool end, int volume, int duration) {
uint32_t payload = 0;
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | event |E|R| volume | duration |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
payload |= (event & 0x00FF) << 24;
payload |= (end ? 0x00800000 : 0x00000000);
payload |= (volume & 0x003F) << 16;
payload |= (duration & 0xFFFF);
payload = ntohl(payload);
return payload;
}
static bool EqualEvents(const DtmfEvent& a,
const DtmfEvent& b) {
return (a.duration == b.duration
&& a.end_bit == b.end_bit
&& a.event_no == b.event_no
&& a.timestamp == b.timestamp
&& a.volume == b.volume);
}
TEST(DtmfBuffer, CreateAndDestroy) {
DtmfBuffer* buffer = new DtmfBuffer(sample_rate_hz);
delete buffer;
}
// Test the event parser.
TEST(DtmfBuffer, ParseEvent) {
int event_no = 7;
bool end_bit = true;
int volume = 17;
int duration = 4711;
uint32_t timestamp = 0x12345678;
uint32_t payload = MakeDtmfPayload(event_no, end_bit, volume, duration);
uint8_t* payload_ptr = reinterpret_cast<uint8_t*>(&payload);
DtmfEvent event;
EXPECT_EQ(DtmfBuffer::kOK,
DtmfBuffer::ParseEvent(timestamp, payload_ptr, sizeof(payload),
&event));
EXPECT_EQ(duration, event.duration);
EXPECT_EQ(end_bit, event.end_bit);
EXPECT_EQ(event_no, event.event_no);
EXPECT_EQ(timestamp, event.timestamp);
EXPECT_EQ(volume, event.volume);
EXPECT_EQ(DtmfBuffer::kInvalidPointer,
DtmfBuffer::ParseEvent(timestamp, NULL, 4, &event));
EXPECT_EQ(DtmfBuffer::kInvalidPointer,
DtmfBuffer::ParseEvent(timestamp, payload_ptr, 4, NULL));
EXPECT_EQ(DtmfBuffer::kPayloadTooShort,
DtmfBuffer::ParseEvent(timestamp, payload_ptr, 3, &event));
}
TEST(DtmfBuffer, SimpleInsertAndGet) {
int event_no = 7;
bool end_bit = true;
int volume = 17;
int duration = 4711;
uint32_t timestamp = 0x12345678;
DtmfEvent event(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
EXPECT_EQ(1u, buffer.Length());
EXPECT_FALSE(buffer.Empty());
DtmfEvent out_event;
// Too early to get event.
EXPECT_FALSE(buffer.GetEvent(timestamp - 10, &out_event));
EXPECT_EQ(1u, buffer.Length());
EXPECT_FALSE(buffer.Empty());
// Get the event at its starting timestamp.
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
EXPECT_TRUE(EqualEvents(event, out_event));
EXPECT_EQ(1u, buffer.Length());
EXPECT_FALSE(buffer.Empty());
// Get the event some time into the event.
EXPECT_TRUE(buffer.GetEvent(timestamp + duration / 2, &out_event));
EXPECT_TRUE(EqualEvents(event, out_event));
EXPECT_EQ(1u, buffer.Length());
EXPECT_FALSE(buffer.Empty());
// Give a "current" timestamp after the event has ended.
#ifdef LEGACY_BITEXACT
EXPECT_TRUE(buffer.GetEvent(timestamp + duration + 10, &out_event));
#endif
EXPECT_FALSE(buffer.GetEvent(timestamp + duration + 10, &out_event));
EXPECT_EQ(0u, buffer.Length());
EXPECT_TRUE(buffer.Empty());
}
TEST(DtmfBuffer, MergingPackets) {
int event_no = 0;
bool end_bit = false;
int volume = 17;
int duration = 80;
uint32_t timestamp = 0x12345678;
DtmfEvent event(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
event.duration += 80;
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
event.duration += 80;
event.end_bit = true;
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
EXPECT_EQ(1u, buffer.Length());
DtmfEvent out_event;
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
EXPECT_TRUE(EqualEvents(event, out_event));
}
// This test case inserts one shorter event completely overlapped by one longer
// event. The expected outcome is that only the longer event is played.
TEST(DtmfBuffer, OverlappingEvents) {
int event_no = 0;
bool end_bit = true;
int volume = 1;
int duration = 80;
uint32_t timestamp = 0x12345678 + 80;
DtmfEvent short_event(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(short_event));
event_no = 10;
end_bit = false;
timestamp = 0x12345678;
DtmfEvent long_event(timestamp, event_no, volume, duration, end_bit);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event));
long_event.duration += 80;
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event));
long_event.duration += 80;
long_event.end_bit = true;
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event));
EXPECT_EQ(2u, buffer.Length());
DtmfEvent out_event;
// Expect to get the long event.
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
EXPECT_TRUE(EqualEvents(long_event, out_event));
// Expect no more events.
#ifdef LEGACY_BITEXACT
EXPECT_TRUE(buffer.GetEvent(timestamp + long_event.duration + 10,
&out_event));
EXPECT_TRUE(EqualEvents(long_event, out_event));
EXPECT_TRUE(buffer.GetEvent(timestamp + long_event.duration + 10,
&out_event));
EXPECT_TRUE(EqualEvents(short_event, out_event));
#else
EXPECT_FALSE(buffer.GetEvent(timestamp + long_event.duration + 10,
&out_event));
#endif
EXPECT_TRUE(buffer.Empty());
}
TEST(DtmfBuffer, ExtrapolationTime) {
int event_no = 0;
bool end_bit = false;
int volume = 1;
int duration = 80;
uint32_t timestamp = 0x12345678;
DtmfEvent event1(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1));
EXPECT_EQ(1u, buffer.Length());
DtmfEvent out_event;
// Get the event at the start.
EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event));
EXPECT_TRUE(EqualEvents(event1, out_event));
// Also get the event 100 samples after the end of the event (since we're
// missing the end bit).
uint32_t timestamp_now = timestamp + duration + 100;
EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event));
EXPECT_TRUE(EqualEvents(event1, out_event));
// Insert another event starting back-to-back with the previous event.
timestamp += duration;
event_no = 1;
DtmfEvent event2(timestamp, event_no, volume, duration, end_bit);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2));
EXPECT_EQ(2u, buffer.Length());
// Now we expect to get the new event when supplying |timestamp_now|.
EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event));
EXPECT_TRUE(EqualEvents(event2, out_event));
// Expect the the first event to be erased now.
EXPECT_EQ(1u, buffer.Length());
// Move |timestamp_now| to more than 560 samples after the end of the second
// event. Expect that event to be erased.
timestamp_now = timestamp + duration + 600;
#ifdef LEGACY_BITEXACT
EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event));
#endif
EXPECT_FALSE(buffer.GetEvent(timestamp_now, &out_event));
EXPECT_TRUE(buffer.Empty());
}
TEST(DtmfBuffer, TimestampWraparound) {
int event_no = 0;
bool end_bit = true;
int volume = 1;
int duration = 80;
uint32_t timestamp1 = 0xFFFFFFFF - duration;
DtmfEvent event1(timestamp1, event_no, volume, duration, end_bit);
uint32_t timestamp2 = 0;
DtmfEvent event2(timestamp2, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1));
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2));
EXPECT_EQ(2u, buffer.Length());
DtmfEvent out_event;
EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event));
EXPECT_TRUE(EqualEvents(event1, out_event));
#ifdef LEGACY_BITEXACT
EXPECT_EQ(1u, buffer.Length());
#else
EXPECT_EQ(2u, buffer.Length());
#endif
buffer.Flush();
// Reverse the insert order. Expect same results.
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2));
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1));
EXPECT_EQ(2u, buffer.Length());
EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event));
EXPECT_TRUE(EqualEvents(event1, out_event));
#ifdef LEGACY_BITEXACT
EXPECT_EQ(1u, buffer.Length());
#else
EXPECT_EQ(2u, buffer.Length());
#endif
}
TEST(DtmfBuffer, InvalidEvents) {
int event_no = 0;
bool end_bit = true;
int volume = 1;
int duration = 80;
uint32_t timestamp = 0x12345678;
DtmfEvent event(timestamp, event_no, volume, duration, end_bit);
DtmfBuffer buffer(sample_rate_hz);
// Invalid event number.
event.event_no = -1;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.event_no = 16;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.event_no = 0; // Valid value;
// Invalid volume.
event.volume = -1;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.volume = 37;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.volume = 0; // Valid value;
// Invalid duration.
event.duration = -1;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.duration = 0;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.duration = 0xFFFF + 1;
EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event));
event.duration = 1; // Valid value;
// Finish with a valid event, just to verify that all is ok.
EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event));
}
} // namespace webrtc

View File

@ -0,0 +1,192 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This class provides a generator for DTMF tones. The tone generation is based
// on a sinusoid recursion. Each sinusoid is generated using a recursion
// formula; x[n] = a * x[n-1] - x[n-2], where the coefficient
// a = 2*cos(2*pi*f/fs). The recursion is started with x[-1] = 0 and
// x[-2] = sin(2*pi*f/fs). (Note that with this initialization, the resulting
// sinusoid gets a "negative" rotation; x[n] = sin(-2*pi*f/fs * n + phi), but
// kept this way due to historical reasons.)
// TODO(hlundin): Change to positive rotation?
//
// Each key on the telephone keypad corresponds to an "event", 0-15. Each event
// is mapped to a tone pair, with a low and a high frequency. There are four
// low and four high frequencies, each corresponding to a row and column,
// respectively, on the keypad as illustrated below.
//
// 1209 Hz 1336 Hz 1477 Hz 1633 Hz
// 697 Hz 1 2 3 12
// 770 Hz 4 5 6 13
// 852 Hz 7 8 9 14
// 941 Hz 10 0 11 15
#include "webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.h"
#include <assert.h>
namespace webrtc {
// The filter coefficient a = 2*cos(2*pi*f/fs) for the low frequency tone, for
// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15.
// Values are in Q14.
const int DtmfToneGenerator::kCoeff1[4][16] = {
{ 24219, 27980, 27980, 27980, 26956, 26956, 26956, 25701, 25701, 25701,
24219, 24219, 27980, 26956, 25701, 24219 },
{ 30556, 31548, 31548, 31548, 31281, 31281, 31281, 30951, 30951, 30951,
30556, 30556, 31548, 31281, 30951, 30556 },
{ 32210, 32462, 32462, 32462, 32394, 32394, 32394, 32311, 32311, 32311,
32210, 32210, 32462, 32394, 32311, 32210 },
{ 32520, 32632, 32632, 32632, 32602, 32602, 32602, 32564, 32564, 32564,
32520, 32520, 32632, 32602, 32564, 32520 } };
// The filter coefficient a = 2*cos(2*pi*f/fs) for the high frequency tone, for
// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15.
// Values are in Q14.
const int DtmfToneGenerator::kCoeff2[4][16] = {
{ 16325, 19073, 16325, 13085, 19073, 16325, 13085, 19073, 16325, 13085,
19073, 13085, 9315, 9315, 9315, 9315},
{ 28361, 29144, 28361, 27409, 29144, 28361, 27409, 29144, 28361, 27409,
29144, 27409, 26258, 26258, 26258, 26258},
{ 31647, 31849, 31647, 31400, 31849, 31647, 31400, 31849, 31647, 31400,
31849, 31400, 31098, 31098, 31098, 31098},
{ 32268, 32359, 32268, 32157, 32359, 32268, 32157, 32359, 32268, 32157,
32359, 32157, 32022, 32022, 32022, 32022} };
// The initialization value x[-2] = sin(2*pi*f/fs) for the low frequency tone,
// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15.
// Values are in Q14.
const int DtmfToneGenerator::kInitValue1[4][16] = {
{ 11036, 8528, 8528, 8528, 9315, 9315, 9315, 10163, 10163, 10163, 11036,
11036, 8528, 9315, 10163, 11036},
{ 5918, 4429, 4429, 4429, 4879, 4879, 4879, 5380, 5380, 5380, 5918, 5918,
4429, 4879, 5380, 5918},
{ 3010, 2235, 2235, 2235, 2468, 2468, 2468, 2728, 2728, 2728, 3010, 3010,
2235, 2468, 2728, 3010},
{ 2013, 1493, 1493, 1493, 1649, 1649, 1649, 1823, 1823, 1823, 2013, 2013,
1493, 1649, 1823, 2013 } };
// The initialization value x[-2] = sin(2*pi*f/fs) for the high frequency tone,
// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15.
// Values are in Q14.
const int DtmfToneGenerator::kInitValue2[4][16] = {
{ 14206, 13323, 14206, 15021, 13323, 14206, 15021, 13323, 14206, 15021,
13323, 15021, 15708, 15708, 15708, 15708},
{ 8207, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8979,
9801, 9801, 9801, 9801},
{ 4249, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4685,
5164, 5164, 5164, 5164},
{ 2851, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 3148,
3476, 3476, 3476, 3476} };
// Amplitude multipliers for volume values 0 through 36, corresponding to
// 0 dBm0 through -36 dBm0. Values are in Q14.
const int DtmfToneGenerator::kAmplitude[37] = {
16141, 14386, 12821, 11427, 10184, 9077, 8090, 7210, 6426, 5727, 5104, 4549,
4054, 3614, 3221, 2870, 2558, 2280, 2032, 1811, 1614, 1439, 1282, 1143,
1018, 908, 809, 721, 643, 573, 510, 455, 405, 361, 322, 287, 256 };
// Constructor.
DtmfToneGenerator::DtmfToneGenerator()
: initialized_(false),
coeff1_(0),
coeff2_(0),
amplitude_(0) {
}
// Initialize the DTMF generator with sample rate fs Hz (8000, 16000, 32000,
// 48000), event (0-15) and attenuation (0-36 dB).
// Returns 0 on success, otherwise an error code.
int DtmfToneGenerator::Init(int fs, int event, int attenuation) {
initialized_ = false;
int fs_index;
if (fs == 8000) {
fs_index = 0;
} else if (fs == 16000) {
fs_index = 1;
} else if (fs == 32000) {
fs_index = 2;
} else if (fs == 48000) {
fs_index = 3;
} else {
assert(false);
fs_index = 1; // Default to 8000 Hz.
}
if (event < 0 || event > 15) {
return kParameterError; // Invalid event number.
}
if (attenuation < 0 || attenuation > 36) {
return kParameterError; // Invalid attenuation.
}
// Look up oscillator coefficient for low and high frequencies.
coeff1_ = kCoeff1[fs_index][event];
coeff2_ = kCoeff2[fs_index][event];
// Look up amplitude multiplier.
amplitude_ = kAmplitude[attenuation];
// Initialize sample history.
sample_history1_[0] = kInitValue1[fs_index][event];
sample_history1_[1] = 0;
sample_history2_[0] = kInitValue2[fs_index][event];
sample_history2_[1] = 0;
initialized_ = true;
return 0;
}
// Reset tone generator to uninitialized state.
void DtmfToneGenerator::Reset() {
initialized_ = false;
}
// Generate num_samples of DTMF signal and write to |output|.
int DtmfToneGenerator::Generate(int num_samples,
AudioMultiVector<int16_t>* output) {
if (!initialized_) {
return kNotInitialized;
}
if (num_samples < 0 || !output) {
return kParameterError;
}
assert(output->Channels() == 1); // Not adapted for multi-channel yet.
if (output->Channels() != 1) {
return kStereoNotSupported;
}
output->AssertSize(num_samples);
for (int i = 0; i < num_samples; ++i) {
// Use recursion formula y[n] = a * y[n - 1] - y[n - 2].
int16_t temp_val_low = ((coeff1_ * sample_history1_[1] + 8192) >> 14)
- sample_history1_[0];
int16_t temp_val_high = ((coeff2_ * sample_history2_[1] + 8192) >> 14)
- sample_history2_[0];
// Update recursion memory.
sample_history1_[0] = sample_history1_[1];
sample_history1_[1] = temp_val_low;
sample_history2_[0] = sample_history2_[1];
sample_history2_[1] = temp_val_high;
// Attenuate the low frequency tone 3 dB.
int32_t temp_val = kAmpMultiplier * temp_val_low + (temp_val_high << 15);
// Normalize the signal to Q14 with proper rounding.
temp_val = (temp_val + 16384) >> 15;
// Scale the signal to correct volume.
(*output)[0][i] =
static_cast<int16_t>((temp_val * amplitude_ + 8192) >> 14);
}
return num_samples;
}
} // namespace webrtc

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DTMF_TONE_GENERATOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DTMF_TONE_GENERATOR_H_
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This class provides a generator for DTMF tones.
class DtmfToneGenerator {
public:
enum ReturnCodes {
kNotInitialized = -1,
kParameterError = -2,
kStereoNotSupported = -3,
};
DtmfToneGenerator();
virtual ~DtmfToneGenerator() {}
virtual int Init(int fs, int event, int attenuation);
virtual void Reset();
virtual int Generate(int num_samples, AudioMultiVector<int16_t>* output);
virtual bool initialized() const { return initialized_; }
private:
static const int kCoeff1[4][16]; // 1st oscillator model coefficient table.
static const int kCoeff2[4][16]; // 2nd oscillator model coefficient table.
static const int kInitValue1[4][16]; // Initialization for 1st oscillator.
static const int kInitValue2[4][16]; // Initialization for 2nd oscillator.
static const int kAmplitude[37]; // Amplitude for 0 through -36 dBm0.
static const int16_t kAmpMultiplier = 23171; // 3 dB attenuation (in Q15).
bool initialized_; // True if generator is initialized properly.
int coeff1_; // 1st oscillator coefficient for this event.
int coeff2_; // 2nd oscillator coefficient for this event.
int amplitude_; // Amplitude for this event.
int16_t sample_history1_[2]; // Last 2 samples for the 1st oscillator.
int16_t sample_history2_[2]; // Last 2 samples for the 2nd oscillator.
DISALLOW_COPY_AND_ASSIGN(DtmfToneGenerator);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DTMF_TONE_GENERATOR_H_

View File

@ -0,0 +1,142 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for DtmfToneGenerator class.
#include "webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.h"
#include <cmath>
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
namespace webrtc {
TEST(DtmfToneGenerator, CreateAndDestroy) {
DtmfToneGenerator* tone_gen = new DtmfToneGenerator();
delete tone_gen;
}
TEST(DtmfToneGenerator, TestErrors) {
DtmfToneGenerator tone_gen;
const int kNumSamples = 10;
AudioMultiVector<int16_t> signal(1); // One channel.
// Try to generate tones without initializing.
EXPECT_EQ(DtmfToneGenerator::kNotInitialized,
tone_gen.Generate(kNumSamples, &signal));
const int fs = 16000; // Valid sample rate.
const int event = 7; // Valid event.
const int attenuation = 0; // Valid attenuation.
// Initialize with invalid event -1.
EXPECT_EQ(DtmfToneGenerator::kParameterError,
tone_gen.Init(fs, -1, attenuation));
// Initialize with invalid event 16.
EXPECT_EQ(DtmfToneGenerator::kParameterError,
tone_gen.Init(fs, 16, attenuation));
// Initialize with invalid attenuation -1.
EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, -1));
// Initialize with invalid attenuation 37.
EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, 37));
EXPECT_FALSE(tone_gen.initialized()); // Should still be uninitialized.
// Initialize with valid parameters.
ASSERT_EQ(0, tone_gen.Init(fs, event, attenuation));
EXPECT_TRUE(tone_gen.initialized());
// Negative number of samples.
EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Generate(-1, &signal));
// NULL pointer to destination.
EXPECT_EQ(DtmfToneGenerator::kParameterError,
tone_gen.Generate(kNumSamples, NULL));
}
TEST(DtmfToneGenerator, TestTones) {
DtmfToneGenerator tone_gen;
const int kAttenuation = 0;
const int kNumSamples = 10;
AudioMultiVector<int16_t> signal(1); // One channel.
// Low and high frequencies for events 0 through 15.
const double low_freq_hz[] = { 941.0, 697.0, 697.0, 697.0, 770.0, 770.0,
770.0, 852.0, 852.0, 852.0, 941.0, 941.0, 697.0, 770.0, 852.0, 941.0 };
const double hi_freq_hz[] = { 1336.0, 1209.0, 1336.0, 1477.0, 1209.0, 1336.0,
1477.0, 1209.0, 1336.0, 1477.0, 1209.0, 1477.0, 1633.0, 1633.0, 1633.0,
1633.0 };
const double attenuate_3dB = 23171.0 / 32768; // 3 dB attenuation.
const double base_attenuation = 16141.0 / 16384.0; // This is the attenuation
// applied to all cases.
const int fs_vec[] = { 8000, 16000, 32000, 48000 };
for (int f = 0; f < 4; ++f) {
int fs = fs_vec[f];
for (int event = 0; event <= 15; ++event) {
std::ostringstream ss;
ss << "Checking event " << event << " at sample rate " << fs;
SCOPED_TRACE(ss.str());
ASSERT_EQ(0, tone_gen.Init(fs, event, kAttenuation));
EXPECT_TRUE(tone_gen.initialized());
EXPECT_EQ(kNumSamples, tone_gen.Generate(kNumSamples, &signal));
double f1 = low_freq_hz[event];
double f2 = hi_freq_hz[event];
const double pi = 3.14159265358979323846;
for (int n = 0; n < kNumSamples; ++n) {
double x = attenuate_3dB * sin(2.0 * pi * f1 / fs * (-n - 1))
+ sin(2.0 * pi * f2 / fs * (-n - 1));
x *= base_attenuation;
x = ldexp(x, 14); // Scale to Q14.
static const int kChannel = 0;
EXPECT_NEAR(x, static_cast<double>(signal[kChannel][n]), 25);
}
tone_gen.Reset();
EXPECT_FALSE(tone_gen.initialized());
}
}
}
TEST(DtmfToneGenerator, TestAmplitudes) {
DtmfToneGenerator tone_gen;
const int kNumSamples = 10;
AudioMultiVector<int16_t> signal(1); // One channel.
AudioMultiVector<int16_t> ref_signal(1); // One channel.
const int fs_vec[] = { 8000, 16000, 32000, 48000 };
const int event_vec[] = { 0, 4, 9, 13 }; // Test a few events.
for (int f = 0; f < 4; ++f) {
int fs = fs_vec[f];
int event = event_vec[f];
// Create full-scale reference.
ASSERT_EQ(0, tone_gen.Init(fs, event, 0)); // 0 attenuation.
EXPECT_EQ(kNumSamples, tone_gen.Generate(kNumSamples, &ref_signal));
// Test every 5 steps (to save time).
for (int attenuation = 1; attenuation <= 36; attenuation += 5) {
std::ostringstream ss;
ss << "Checking event " << event << " at sample rate " << fs;
ss << "; attenuation " << attenuation;
SCOPED_TRACE(ss.str());
ASSERT_EQ(0, tone_gen.Init(fs, event, attenuation));
EXPECT_EQ(kNumSamples, tone_gen.Generate(kNumSamples, &signal));
for (int n = 0; n < kNumSamples; ++n) {
double attenuation_factor =
pow(10, -static_cast<double>(attenuation)/20);
// Verify that the attenuation is correct.
static const int kChannel = 0;
EXPECT_NEAR(attenuation_factor * ref_signal[kChannel][n],
signal[kChannel][n], 2);
}
tone_gen.Reset();
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,860 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/expand.h"
#include <assert.h>
#include <algorithm> // min, max
#include <cstring> // memset
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/neteq4/background_noise.h"
#include "webrtc/modules/audio_coding/neteq4/dsp_helper.h"
#include "webrtc/modules/audio_coding/neteq4/random_vector.h"
#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h"
namespace webrtc {
void Expand::Reset() {
first_expand_ = true;
consecutive_expands_ = 0;
max_lag_ = 0;
for (size_t ix = 0; ix < num_channels_; ++ix) {
channel_parameters_[ix].expand_vector0.Clear();
channel_parameters_[ix].expand_vector1.Clear();
}
}
int Expand::Process(AudioMultiVector<int16_t>* output) {
int16_t random_vector[kMaxSampleRate / 8000 * 120 + 30];
int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125];
static const int kTempDataSize = 3600;
int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this.
int16_t* voiced_vector_storage = temp_data;
int16_t* voiced_vector = &voiced_vector_storage[overlap_length_];
static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125];
int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder;
int16_t* noise_vector = unvoiced_array_memory + kNoiseLpcOrder;
int fs_mult = fs_hz_ / 8000;
if (first_expand_) {
// Perform initial setup if this is the first expansion since last reset.
AnalyzeSignal(random_vector);
first_expand_ = false;
} else {
// This is not the first expansion, parameters are already estimated.
// Extract a noise segment.
int16_t rand_length = max_lag_;
// TODO(hlundin): This if-statement should not be needed. Should be just
// as good to generate all of the vector in one call in either case.
if (rand_length <= RandomVector::kRandomTableSize) {
random_vector_->IncreaseSeedIncrement(2);
random_vector_->Generate(rand_length, random_vector);
} else {
// This only applies to SWB where length could be larger than 256.
assert(rand_length <= kMaxSampleRate / 8000 * 120 + 30);
random_vector_->IncreaseSeedIncrement(2);
random_vector_->Generate(RandomVector::kRandomTableSize, random_vector);
random_vector_->IncreaseSeedIncrement(2);
random_vector_->Generate(rand_length - RandomVector::kRandomTableSize,
&random_vector[RandomVector::kRandomTableSize]);
}
}
// Generate signal.
UpdateLagIndex();
// Voiced part.
// Generate a weighted vector with the current lag.
size_t expansion_vector_length = max_lag_ + overlap_length_;
size_t current_lag = expand_lags_[current_lag_index_];
// Copy lag+overlap data.
size_t expansion_vector_position = expansion_vector_length - current_lag -
overlap_length_;
size_t temp_length = current_lag + overlap_length_;
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
ChannelParameters& parameters = channel_parameters_[channel_ix];
if (current_lag_index_ == 0) {
// Use only expand_vector0.
assert(expansion_vector_position + temp_length <=
parameters.expand_vector0.Size());
memcpy(voiced_vector_storage,
&parameters.expand_vector0[expansion_vector_position],
sizeof(int16_t) * temp_length);
} else if (current_lag_index_ == 1) {
// Mix 3/4 of expand_vector0 with 1/4 of expand_vector1.
WebRtcSpl_ScaleAndAddVectorsWithRound(
&parameters.expand_vector0[expansion_vector_position], 3,
&parameters.expand_vector1[expansion_vector_position], 1, 2,
voiced_vector_storage, temp_length);
} else if (current_lag_index_ == 2) {
// Mix 1/2 of expand_vector0 with 1/2 of expand_vector1.
assert(expansion_vector_position + temp_length <=
parameters.expand_vector0.Size());
assert(expansion_vector_position + temp_length <=
parameters.expand_vector1.Size());
WebRtcSpl_ScaleAndAddVectorsWithRound(
&parameters.expand_vector0[expansion_vector_position], 1,
&parameters.expand_vector1[expansion_vector_position], 1, 1,
voiced_vector_storage, temp_length);
}
// Get tapering window parameters. Values are in Q15.
int16_t muting_window, muting_window_increment;
int16_t unmuting_window, unmuting_window_increment;
if (fs_hz_ == 8000) {
muting_window = DspHelper::kMuteFactorStart8kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement8kHz;
unmuting_window = DspHelper::kUnmuteFactorStart8kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz;
} else if (fs_hz_ == 16000) {
muting_window = DspHelper::kMuteFactorStart16kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement16kHz;
unmuting_window = DspHelper::kUnmuteFactorStart16kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz;
} else if (fs_hz_ == 32000) {
muting_window = DspHelper::kMuteFactorStart32kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement32kHz;
unmuting_window = DspHelper::kUnmuteFactorStart32kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz;
} else { // fs_ == 48000
muting_window = DspHelper::kMuteFactorStart48kHz;
muting_window_increment = DspHelper::kMuteFactorIncrement48kHz;
unmuting_window = DspHelper::kUnmuteFactorStart48kHz;
unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz;
}
// Smooth the expanded if it has not been muted to a low amplitude and
// |current_voice_mix_factor| is larger than 0.5.
if ((parameters.mute_factor > 819) &&
(parameters.current_voice_mix_factor > 8192)) {
size_t start_ix = sync_buffer_->Size() - overlap_length_;
for (size_t i = 0; i < overlap_length_; i++) {
// Do overlap add between new vector and overlap.
(*sync_buffer_)[channel_ix][start_ix + i] =
(((*sync_buffer_)[channel_ix][start_ix + i] * muting_window) +
(((parameters.mute_factor * voiced_vector_storage[i]) >> 14) *
unmuting_window) + 16384) >> 15;
muting_window += muting_window_increment;
unmuting_window += unmuting_window_increment;
}
} else if (parameters.mute_factor == 0) {
// The expanded signal will consist of only comfort noise if
// mute_factor = 0. Set the output length to 15 ms for best noise
// production.
// TODO(hlundin): This has been disabled since the length of
// parameters.expand_vector0 and parameters.expand_vector1 no longer
// match with expand_lags_, causing invalid reads and writes. Is it a good
// idea to enable this again, and solve the vector size problem?
// max_lag_ = fs_mult * 120;
// expand_lags_[0] = fs_mult * 120;
// expand_lags_[1] = fs_mult * 120;
// expand_lags_[2] = fs_mult * 120;
}
// Unvoiced part.
// Filter |scaled_random_vector| through |ar_filter_|.
memcpy(unvoiced_vector - kUnvoicedLpcOrder, parameters.ar_filter_state,
sizeof(int16_t) * kUnvoicedLpcOrder);
int32_t add_constant = 0;
if (parameters.ar_gain_scale > 0) {
add_constant = 1 << (parameters.ar_gain_scale - 1);
}
WebRtcSpl_AffineTransformVector(scaled_random_vector, random_vector,
parameters.ar_gain, add_constant,
parameters.ar_gain_scale, current_lag);
WebRtcSpl_FilterARFastQ12(scaled_random_vector, unvoiced_vector,
parameters.ar_filter,
kUnvoicedLpcOrder + 1, current_lag);
memcpy(parameters.ar_filter_state,
&(unvoiced_vector[current_lag - kUnvoicedLpcOrder]),
sizeof(int16_t) * kUnvoicedLpcOrder);
// Combine voiced and unvoiced contributions.
// Set a suitable cross-fading slope.
// For lag =
// <= 31 * fs_mult => go from 1 to 0 in about 8 ms;
// (>= 31 .. <= 63) * fs_mult => go from 1 to 0 in about 16 ms;
// >= 64 * fs_mult => go from 1 to 0 in about 32 ms.
// temp_shift = getbits(max_lag_) - 5.
int temp_shift = (31 - WebRtcSpl_NormW32(max_lag_)) - 5;
int16_t mix_factor_increment = 256 >> temp_shift;
if (stop_muting_) {
mix_factor_increment = 0;
}
// Create combined signal by shifting in more and more of unvoiced part.
temp_shift = 8 - temp_shift; // = getbits(mix_factor_increment).
size_t temp_lenght = (parameters.current_voice_mix_factor -
parameters.voice_mix_factor) >> temp_shift;
temp_lenght = std::min(temp_lenght, current_lag);
DspHelper::CrossFade(voiced_vector, unvoiced_vector, temp_lenght,
&parameters.current_voice_mix_factor,
mix_factor_increment, temp_data);
// End of cross-fading period was reached before end of expanded signal
// path. Mix the rest with a fixed mixing factor.
if (temp_lenght < current_lag) {
if (mix_factor_increment != 0) {
parameters.current_voice_mix_factor = parameters.voice_mix_factor;
}
int temp_scale = 16384 - parameters.current_voice_mix_factor;
WebRtcSpl_ScaleAndAddVectorsWithRound(
voiced_vector + temp_lenght, parameters.current_voice_mix_factor,
unvoiced_vector + temp_lenght, temp_scale, 14,
temp_data + temp_lenght, current_lag - temp_lenght);
}
// Select muting slope depending on how many consecutive expands we have
// done.
if (consecutive_expands_ == 3) {
// Let the mute factor decrease from 1.0 to 0.95 in 6.25 ms.
// mute_slope = 0.0010 / fs_mult in Q20.
parameters.mute_slope = std::max(parameters.mute_slope,
static_cast<int16_t>(1049 / fs_mult));
}
if (consecutive_expands_ == 7) {
// Let the mute factor decrease from 1.0 to 0.90 in 6.25 ms.
// mute_slope = 0.0020 / fs_mult in Q20.
parameters.mute_slope = std::max(parameters.mute_slope,
static_cast<int16_t>(2097 / fs_mult));
}
// Mute segment according to slope value.
if ((consecutive_expands_ != 0) || !parameters.onset) {
// Mute to the previous level, then continue with the muting.
WebRtcSpl_AffineTransformVector(temp_data, temp_data,
parameters.mute_factor, 8192,
14, current_lag);
if (!stop_muting_) {
DspHelper::MuteSignal(temp_data, parameters.mute_slope, current_lag);
// Shift by 6 to go from Q20 to Q14.
// TODO(hlundin): Adding 8192 before shifting 6 steps seems wrong.
// Legacy.
int16_t gain = 16384 -
(((current_lag * parameters.mute_slope) + 8192) >> 6);
gain = ((gain * parameters.mute_factor) + 8192) >> 14;
// Guard against getting stuck with very small (but sometimes audible)
// gain.
if ((consecutive_expands_ > 3) && (gain >= parameters.mute_factor)) {
parameters.mute_factor = 0;
} else {
parameters.mute_factor = gain;
}
}
}
// Background noise part.
// TODO(hlundin): Move to separate method? In BackgroundNoise class?
if (background_noise_->initialized()) {
// Use background noise parameters.
memcpy(noise_vector - kNoiseLpcOrder,
background_noise_->FilterState(channel_ix),
sizeof(int16_t) * kNoiseLpcOrder);
if (background_noise_->ScaleShift(channel_ix) > 1) {
add_constant = 1 << (background_noise_->ScaleShift(channel_ix) - 1);
} else {
add_constant = 0;
}
// Scale random vector to correct energy level.
WebRtcSpl_AffineTransformVector(
scaled_random_vector, random_vector,
background_noise_->Scale(channel_ix), add_constant,
background_noise_->ScaleShift(channel_ix), current_lag);
WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_vector,
background_noise_->Filter(channel_ix),
kNoiseLpcOrder + 1,
current_lag);
background_noise_->SetFilterState(
channel_ix,
&(noise_vector[current_lag - kNoiseLpcOrder]),
kNoiseLpcOrder);
// Unmute the background noise.
int16_t bgn_mute_factor = background_noise_->MuteFactor(channel_ix);
BackgroundNoise::BackgroundNoiseMode bgn_mode = background_noise_->mode();
if (bgn_mode == BackgroundNoise::kBgnFade &&
consecutive_expands_ >= kMaxConsecutiveExpands &&
bgn_mute_factor > 0) {
// Fade BGN to zero.
// Calculate muting slope, approximately -2^18 / fs_hz.
int16_t mute_slope;
if (fs_hz_ == 8000) {
mute_slope = -32;
} else if (fs_hz_ == 16000) {
mute_slope = -16;
} else if (fs_hz_ == 32000) {
mute_slope = -8;
} else {
mute_slope = -5;
}
// Use UnmuteSignal function with negative slope.
// |bgn_mute_factor| is in Q14. |mute_slope| is in Q20.
DspHelper::UnmuteSignal(noise_vector, current_lag, &bgn_mute_factor,
mute_slope, noise_vector);
} else if (bgn_mute_factor < 16384) {
// If mode is kBgnOff, or if kBgnFade has started fading,
// Use regular |mute_slope|.
if (!stop_muting_ && bgn_mode != BackgroundNoise::kBgnOff &&
!(bgn_mode == BackgroundNoise::kBgnFade &&
consecutive_expands_ >= kMaxConsecutiveExpands)) {
DspHelper::UnmuteSignal(noise_vector, current_lag, &bgn_mute_factor,
parameters.mute_slope, noise_vector);
} else {
// kBgnOn and stop muting, or
// kBgnOff (mute factor is always 0), or
// kBgnFade has reached 0.
WebRtcSpl_AffineTransformVector(noise_vector, noise_vector,
bgn_mute_factor, 8192, 14,
current_lag);
}
}
// Update mute_factor in BackgroundNoise class.
background_noise_->SetMuteFactor(channel_ix, bgn_mute_factor);
} else {
// BGN parameters have not been initialized; use zero noise.
memset(noise_vector, 0, sizeof(int16_t) * current_lag);
}
// Add background noise to the combined voiced-unvoiced signal.
for (size_t i = 0; i < current_lag; i++) {
temp_data[i] = temp_data[i] + noise_vector[i];
}
if (channel_ix == 0) {
output->AssertSize(current_lag);
} else {
assert(output->Size() == current_lag);
}
memcpy(&(*output)[channel_ix][0], temp_data,
sizeof(temp_data[0]) * current_lag);
}
// Increase call number and cap it.
++consecutive_expands_;
if (consecutive_expands_ > kMaxConsecutiveExpands) {
consecutive_expands_ = kMaxConsecutiveExpands;
}
return 0;
}
void Expand::SetParametersForNormalAfterExpand() {
current_lag_index_ = 0;
lag_index_direction_ = 0;
stop_muting_ = true; // Do not mute signal any more.
}
void Expand::SetParametersForMergeAfterExpand() {
current_lag_index_ = -1; /* out of the 3 possible ones */
lag_index_direction_ = 1; /* make sure we get the "optimal" lag */
stop_muting_ = true;
}
void Expand::AnalyzeSignal(int16_t* random_vector) {
int32_t auto_correlation[kUnvoicedLpcOrder + 1];
int16_t reflection_coeff[kUnvoicedLpcOrder];
int16_t correlation_vector[kMaxSampleRate / 8000 * 102];
int best_correlation_index[kNumCorrelationCandidates];
int16_t best_correlation[kNumCorrelationCandidates];
int16_t best_distortion_index[kNumCorrelationCandidates];
int16_t best_distortion[kNumCorrelationCandidates];
int32_t correlation_vector2[(99 * kMaxSampleRate / 8000) + 1];
int32_t best_distortion_w32[kNumCorrelationCandidates];
static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125];
int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder;
int fs_mult = fs_hz_ / 8000;
// Pre-calculate common multiplications with fs_mult.
int fs_mult_4 = fs_mult * 4;
int fs_mult_20 = fs_mult * 20;
int fs_mult_120 = fs_mult * 120;
int fs_mult_dist_len = fs_mult * kDistortionLength;
int fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength;
const size_t signal_length = 256 * fs_mult;
const int16_t* audio_history =
&(*sync_buffer_)[0][sync_buffer_->Size() - signal_length];
// Initialize some member variables.
lag_index_direction_ = 1;
current_lag_index_ = -1;
stop_muting_ = false;
random_vector_->set_seed_increment(1);
consecutive_expands_ = 0;
for (size_t ix = 0; ix < num_channels_; ++ix) {
channel_parameters_[ix].current_voice_mix_factor = 16384; // 1.0 in Q14.
channel_parameters_[ix].mute_factor = 16384; // 1.0 in Q14.
// Start with 0 gain for background noise.
background_noise_->SetMuteFactor(ix, 0);
}
// Calculate correlation in downsampled domain (4 kHz sample rate).
int16_t correlation_scale;
int correlation_length = Correlation(audio_history, signal_length,
correlation_vector, &correlation_scale);
correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.
// Find peaks in correlation vector.
DspHelper::PeakDetection(correlation_vector, correlation_length,
kNumCorrelationCandidates, fs_mult,
best_correlation_index, best_correlation);
// Adjust peak locations; cross-correlation lags start at 2.5 ms
// (20 * fs_mult samples).
best_correlation_index[0] += fs_mult_20;
best_correlation_index[1] += fs_mult_20;
best_correlation_index[2] += fs_mult_20;
// Calculate distortion around the |kNumCorrelationCandidates| best lags.
int distortion_scale = 0;
for (int i = 0; i < kNumCorrelationCandidates; i++) {
int16_t min_index = std::max(fs_mult_20,
best_correlation_index[i] - fs_mult_4);
int16_t max_index = std::min(fs_mult_120 - 1,
best_correlation_index[i] + fs_mult_4);
best_distortion_index[i] = DspHelper::MinDistortion(
&(audio_history[signal_length - fs_mult_dist_len]), min_index,
max_index, fs_mult_dist_len, &best_distortion_w32[i]);
distortion_scale = std::max(16 - WebRtcSpl_NormW32(best_distortion_w32[i]),
distortion_scale);
}
// Shift the distortion values to fit in 16 bits.
WebRtcSpl_VectorBitShiftW32ToW16(best_distortion, kNumCorrelationCandidates,
best_distortion_w32, distortion_scale);
// Find the maximizing index |i| of the cost function
// f[i] = best_correlation[i] / best_distortion[i].
int32_t best_ratio = -1;
int best_index = -1;
for (int i = 0; i < kNumCorrelationCandidates; ++i) {
int32_t ratio;
if (best_distortion[i] > 0) {
ratio = (best_correlation[i] << 16) / best_distortion[i];
} else {
assert(best_correlation[i] == 0); // If one is zero, both must be.
ratio = 0; // Divide zero by zero => set result to zero.
}
if (ratio > best_ratio) {
best_index = i;
best_ratio = ratio;
}
}
int distortion_lag = best_distortion_index[best_index];
int correlation_lag = best_correlation_index[best_index];
max_lag_ = std::max(distortion_lag, correlation_lag);
// Calculate the exact best correlation in the range between
// |correlation_lag| and |distortion_lag|.
correlation_length = distortion_lag + 10;
correlation_length = std::min(correlation_length, fs_mult_120);
correlation_length = std::max(correlation_length, 60 * fs_mult);
int start_index = std::min(distortion_lag, correlation_lag);
int correlation_lags = WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag))
+ 1;
assert(correlation_lags <= 99 * fs_mult + 1); // Cannot be larger.
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
ChannelParameters& parameters = channel_parameters_[channel_ix];
// Calculate suitable scaling.
int16_t signal_max = WebRtcSpl_MaxAbsValueW16(
&audio_history[signal_length - correlation_length - start_index
- correlation_lags],
correlation_length + start_index + correlation_lags - 1);
correlation_scale = ((31 - WebRtcSpl_NormW32(signal_max * signal_max))
+ (31 - WebRtcSpl_NormW32(correlation_length))) - 31;
correlation_scale = std::max(static_cast<int16_t>(0), correlation_scale);
// Calculate the correlation, store in |correlation_vector2|.
WebRtcSpl_CrossCorrelation(
correlation_vector2,
&(audio_history[signal_length - correlation_length]),
&(audio_history[signal_length - correlation_length - start_index]),
correlation_length, correlation_lags, correlation_scale, -1);
// Find maximizing index.
best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags);
int32_t max_correlation = correlation_vector2[best_index];
// Compensate index with start offset.
best_index = best_index + start_index;
// Calculate energies.
int32_t energy1 = WebRtcSpl_DotProductWithScale(
&(audio_history[signal_length - correlation_length]),
&(audio_history[signal_length - correlation_length]),
correlation_length, correlation_scale);
int32_t energy2 = WebRtcSpl_DotProductWithScale(
&(audio_history[signal_length - correlation_length - best_index]),
&(audio_history[signal_length - correlation_length - best_index]),
correlation_length, correlation_scale);
// Calculate the correlation coefficient between the two portions of the
// signal.
int16_t corr_coefficient;
if ((energy1 > 0) && (energy2 > 0)) {
int energy1_scale = std::max(16 - WebRtcSpl_NormW32(energy1), 0);
int energy2_scale = std::max(16 - WebRtcSpl_NormW32(energy2), 0);
// Make sure total scaling is even (to simplify scale factor after sqrt).
if ((energy1_scale + energy2_scale) & 1) {
// If sum is odd, add 1 to make it even.
energy1_scale += 1;
}
int16_t scaled_energy1 = energy1 >> energy1_scale;
int16_t scaled_energy2 = energy2 >> energy2_scale;
int16_t sqrt_energy_product = WebRtcSpl_SqrtFloor(
scaled_energy1 * scaled_energy2);
// Calculate max_correlation / sqrt(energy1 * energy2) in Q14.
int cc_shift = 14 - (energy1_scale + energy2_scale) / 2;
max_correlation = WEBRTC_SPL_SHIFT_W32(max_correlation, cc_shift);
corr_coefficient = WebRtcSpl_DivW32W16(max_correlation,
sqrt_energy_product);
corr_coefficient = std::min(static_cast<int16_t>(16384),
corr_coefficient); // Cap at 1.0 in Q14.
} else {
corr_coefficient = 0;
}
// Extract the two vectors expand_vector0 and expand_vector1 from
// |audio_history|.
int16_t expansion_length = max_lag_ + overlap_length_;
const int16_t* vector1 = &(audio_history[signal_length - expansion_length]);
const int16_t* vector2 = vector1 - distortion_lag;
// Normalize the second vector to the same energy as the first.
energy1 = WebRtcSpl_DotProductWithScale(vector1, vector1, expansion_length,
correlation_scale);
energy2 = WebRtcSpl_DotProductWithScale(vector2, vector2, expansion_length,
correlation_scale);
// Confirm that amplitude ratio sqrt(energy1 / energy2) is within 0.5 - 2.0,
// i.e., energy1 / energy1 is within 0.25 - 4.
int16_t amplitude_ratio;
if ((energy1 / 4 < energy2) && (energy1 > energy2 / 4)) {
// Energy constraint fulfilled. Use both vectors and scale them
// accordingly.
int16_t scaled_energy2 = std::max(16 - WebRtcSpl_NormW32(energy2), 0);
int16_t scaled_energy1 = scaled_energy2 - 13;
// Calculate scaled_energy1 / scaled_energy2 in Q13.
int32_t energy_ratio = WebRtcSpl_DivW32W16(
WEBRTC_SPL_SHIFT_W32(energy1, -scaled_energy1),
WEBRTC_SPL_RSHIFT_W32(energy2, scaled_energy2));
// Calculate sqrt ratio in Q13 (sqrt of en1/en2 in Q26).
amplitude_ratio = WebRtcSpl_SqrtFloor(energy_ratio << 13);
// Copy the two vectors and give them the same energy.
parameters.expand_vector0.Clear();
parameters.expand_vector0.PushBack(vector1, expansion_length);
parameters.expand_vector1.Clear();
if (parameters.expand_vector1.Size() <
static_cast<size_t>(expansion_length)) {
parameters.expand_vector1.Extend(
expansion_length - parameters.expand_vector1.Size());
}
WebRtcSpl_AffineTransformVector(&parameters.expand_vector1[0],
const_cast<int16_t*>(vector2),
amplitude_ratio,
4096,
13,
expansion_length);
} else {
// Energy change constraint not fulfilled. Only use last vector.
parameters.expand_vector0.Clear();
parameters.expand_vector0.PushBack(vector1, expansion_length);
// Copy from expand_vector0 to expand_vector1.
parameters.expand_vector0.CopyFrom(&parameters.expand_vector1);
// Set the energy_ratio since it is used by muting slope.
if ((energy1 / 4 < energy2) || (energy2 == 0)) {
amplitude_ratio = 4096; // 0.5 in Q13.
} else {
amplitude_ratio = 16384; // 2.0 in Q13.
}
}
// Set the 3 lag values.
int lag_difference = distortion_lag - correlation_lag;
if (lag_difference == 0) {
// |distortion_lag| and |correlation_lag| are equal.
expand_lags_[0] = distortion_lag;
expand_lags_[1] = distortion_lag;
expand_lags_[2] = distortion_lag;
} else {
// |distortion_lag| and |correlation_lag| are not equal; use different
// combinations of the two.
// First lag is |distortion_lag| only.
expand_lags_[0] = distortion_lag;
// Second lag is the average of the two.
expand_lags_[1] = (distortion_lag + correlation_lag) / 2;
// Third lag is the average again, but rounding towards |correlation_lag|.
if (lag_difference > 0) {
expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2;
} else {
expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2;
}
}
// Calculate the LPC and the gain of the filters.
// Calculate scale value needed for auto-correlation.
correlation_scale = WebRtcSpl_MaxAbsValueW16(
&(audio_history[signal_length - fs_mult_lpc_analysis_len]),
fs_mult_lpc_analysis_len);
correlation_scale = std::min(16 - WebRtcSpl_NormW32(correlation_scale), 0);
correlation_scale = std::max(correlation_scale * 2 + 7, 0);
// Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.
size_t temp_index = signal_length - fs_mult_lpc_analysis_len -
kUnvoicedLpcOrder;
// Copy signal to temporary vector to be able to pad with leading zeros.
int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len
+ kUnvoicedLpcOrder];
memset(temp_signal, 0,
sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder));
memcpy(&temp_signal[kUnvoicedLpcOrder],
&audio_history[temp_index + kUnvoicedLpcOrder],
sizeof(int16_t) * fs_mult_lpc_analysis_len);
WebRtcSpl_CrossCorrelation(auto_correlation,
&temp_signal[kUnvoicedLpcOrder],
&temp_signal[kUnvoicedLpcOrder],
fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1,
correlation_scale, -1);
delete [] temp_signal;
// Verify that variance is positive.
if (auto_correlation[0] > 0) {
// Estimate AR filter parameters using Levinson-Durbin algorithm;
// kUnvoicedLpcOrder + 1 filter coefficients.
int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation,
parameters.ar_filter,
reflection_coeff,
kUnvoicedLpcOrder);
// Keep filter parameters only if filter is stable.
if (stability != 1) {
// Set first coefficient to 4096 (1.0 in Q12).
parameters.ar_filter[0] = 4096;
// Set remaining |kUnvoicedLpcOrder| coefficients to zero.
WebRtcSpl_MemSetW16(parameters.ar_filter + 1, 0, kUnvoicedLpcOrder);
}
}
if (channel_ix == 0) {
// Extract a noise segment.
int16_t noise_length;
if (distortion_lag < 40) {
noise_length = 2 * distortion_lag + 30;
} else {
noise_length = distortion_lag + 30;
}
if (noise_length <= RandomVector::kRandomTableSize) {
memcpy(random_vector, RandomVector::kRandomTable,
sizeof(int16_t) * noise_length);
} else {
// Only applies to SWB where length could be larger than
// |kRandomTableSize|.
memcpy(random_vector, RandomVector::kRandomTable,
sizeof(int16_t) * RandomVector::kRandomTableSize);
assert(noise_length <= kMaxSampleRate / 8000 * 120 + 30);
random_vector_->IncreaseSeedIncrement(2);
random_vector_->Generate(
noise_length - RandomVector::kRandomTableSize,
&random_vector[RandomVector::kRandomTableSize]);
}
}
// Set up state vector and calculate scale factor for unvoiced filtering.
memcpy(parameters.ar_filter_state,
&(audio_history[signal_length - kUnvoicedLpcOrder]),
sizeof(int16_t) * kUnvoicedLpcOrder);
memcpy(unvoiced_vector - kUnvoicedLpcOrder,
&(audio_history[signal_length - 128 - kUnvoicedLpcOrder]),
sizeof(int16_t) * kUnvoicedLpcOrder);
WebRtcSpl_FilterMAFastQ12(
const_cast<int16_t*>(&audio_history[signal_length - 128]),
unvoiced_vector, parameters.ar_filter, kUnvoicedLpcOrder + 1, 128);
int16_t unvoiced_prescale;
if (WebRtcSpl_MaxAbsValueW16(unvoiced_vector, 128) > 4000) {
unvoiced_prescale = 4;
} else {
unvoiced_prescale = 0;
}
int32_t unvoiced_energy = WebRtcSpl_DotProductWithScale(unvoiced_vector,
unvoiced_vector,
128,
unvoiced_prescale);
// Normalize |unvoiced_energy| to 28 or 29 bits to preserve sqrt() accuracy.
int16_t unvoiced_scale = WebRtcSpl_NormW32(unvoiced_energy) - 3;
// Make sure we do an odd number of shifts since we already have 7 shifts
// from dividing with 128 earlier. This will make the total scale factor
// even, which is suitable for the sqrt.
unvoiced_scale += ((unvoiced_scale & 0x1) ^ 0x1);
unvoiced_energy = WEBRTC_SPL_SHIFT_W32(unvoiced_energy, unvoiced_scale);
int32_t unvoiced_gain = WebRtcSpl_SqrtFloor(unvoiced_energy);
parameters.ar_gain_scale = 13
+ (unvoiced_scale + 7 - unvoiced_prescale) / 2;
parameters.ar_gain = unvoiced_gain;
// Calculate voice_mix_factor from corr_coefficient.
// Let x = corr_coefficient. Then, we compute:
// if (x > 0.48)
// voice_mix_factor = (-5179 + 19931x - 16422x^2 + 5776x^3) / 4096;
// else
// voice_mix_factor = 0;
if (corr_coefficient > 7875) {
int16_t x1, x2, x3;
x1 = corr_coefficient; // |corr_coefficient| is in Q14.
x2 = (x1 * x1) >> 14; // Shift 14 to keep result in Q14.
x3 = (x1 * x2) >> 14;
static const int kCoefficients[4] = { -5179, 19931, -16422, 5776 };
int32_t temp_sum = kCoefficients[0] << 14;
temp_sum += kCoefficients[1] * x1;
temp_sum += kCoefficients[2] * x2;
temp_sum += kCoefficients[3] * x3;
parameters.voice_mix_factor = temp_sum / 4096;
parameters.voice_mix_factor = std::min(parameters.voice_mix_factor,
static_cast<int16_t>(16384));
parameters.voice_mix_factor = std::max(parameters.voice_mix_factor,
static_cast<int16_t>(0));
} else {
parameters.voice_mix_factor = 0;
}
// Calculate muting slope. Reuse value from earlier scaling of
// |expand_vector0| and |expand_vector1|.
int16_t slope = amplitude_ratio;
if (slope > 12288) {
// slope > 1.5.
// Calculate (1 - (1 / slope)) / distortion_lag =
// (slope - 1) / (distortion_lag * slope).
// |slope| is in Q13, so 1 corresponds to 8192. Shift up to Q25 before
// the division.
// Shift the denominator from Q13 to Q5 before the division. The result of
// the division will then be in Q20.
int16_t temp_ratio = WebRtcSpl_DivW32W16((slope - 8192) << 12,
(distortion_lag * slope) >> 8);
if (slope > 14746) {
// slope > 1.8.
// Divide by 2, with proper rounding.
parameters.mute_slope = (temp_ratio + 1) / 2;
} else {
// Divide by 8, with proper rounding.
parameters.mute_slope = (temp_ratio + 4) / 8;
}
parameters.onset = true;
} else {
// Calculate (1 - slope) / distortion_lag.
// Shift |slope| by 7 to Q20 before the division. The result is in Q20.
parameters.mute_slope = WebRtcSpl_DivW32W16((8192 - slope) << 7,
distortion_lag);
if (parameters.voice_mix_factor <= 13107) {
// Make sure the mute factor decreases from 1.0 to 0.9 in no more than
// 6.25 ms.
// mute_slope >= 0.005 / fs_mult in Q20.
parameters.mute_slope = std::max(static_cast<int16_t>(5243 / fs_mult),
parameters.mute_slope);
} else if (slope > 8028) {
parameters.mute_slope = 0;
}
parameters.onset = false;
}
}
}
int16_t Expand::Correlation(const int16_t* input, int16_t input_length,
int16_t* output, int16_t* output_scale) const {
// Set parameters depending on sample rate.
const int16_t* filter_coefficients;
int16_t num_coefficients;
int16_t downsampling_factor;
if (fs_hz_ == 8000) {
num_coefficients = 3;
downsampling_factor = 2;
filter_coefficients = DspHelper::kDownsample8kHzTbl;
} else if (fs_hz_ == 16000) {
num_coefficients = 5;
downsampling_factor = 4;
filter_coefficients = DspHelper::kDownsample16kHzTbl;
} else if (fs_hz_ == 32000) {
num_coefficients = 7;
downsampling_factor = 8;
filter_coefficients = DspHelper::kDownsample32kHzTbl;
} else { // fs_hz_ == 48000.
num_coefficients = 7;
downsampling_factor = 12;
filter_coefficients = DspHelper::kDownsample48kHzTbl;
}
// Correlate from lag 10 to lag 60 in downsampled domain.
// (Corresponds to 20-120 for narrow-band, 40-240 for wide-band, and so on.)
static const int kCorrelationStartLag = 10;
static const int kNumCorrelationLags = 54;
static const int kCorrelationLength = 60;
// Downsample to 4 kHz sample rate.
static const int kDownsampledLength = kCorrelationStartLag
+ kNumCorrelationLags + kCorrelationLength;
int16_t downsampled_input[kDownsampledLength];
static const int kFilterDelay = 0;
WebRtcSpl_DownsampleFast(
input + input_length - kDownsampledLength * downsampling_factor,
kDownsampledLength * downsampling_factor, downsampled_input,
kDownsampledLength, filter_coefficients, num_coefficients,
downsampling_factor, kFilterDelay);
// Normalize |downsampled_input| to using all 16 bits.
int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input,
kDownsampledLength);
int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value);
WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength,
downsampled_input, norm_shift);
int32_t correlation[kNumCorrelationLags];
static const int kCorrelationShift = 6;
WebRtcSpl_CrossCorrelation(
correlation,
&downsampled_input[kDownsampledLength - kCorrelationLength],
&downsampled_input[kDownsampledLength - kCorrelationLength
- kCorrelationStartLag],
kCorrelationLength, kNumCorrelationLags, kCorrelationShift, -1);
// Normalize and move data from 32-bit to 16-bit vector.
int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,
kNumCorrelationLags);
int16_t norm_shift2 = std::max(18 - WebRtcSpl_NormW32(max_correlation), 0);
WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,
norm_shift2);
// Total scale factor (right shifts) of correlation value.
*output_scale = 2 * norm_shift + kCorrelationShift + norm_shift2;
return kNumCorrelationLags;
}
void Expand::UpdateLagIndex() {
current_lag_index_ = current_lag_index_ + lag_index_direction_;
// Change direction if needed.
if (current_lag_index_ <= 0) {
lag_index_direction_ = 1;
}
if (current_lag_index_ >= kNumLags - 1) {
lag_index_direction_ = -1;
}
}
} // namespace webrtc

View File

@ -0,0 +1,157 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_EXPAND_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_EXPAND_H_
#include <assert.h>
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class BackgroundNoise;
class RandomVector;
class SyncBuffer;
// This class handles extrapolation of audio data from the sync_buffer to
// produce packet-loss concealment.
// TODO(hlundin): Refactor this class to divide the long methods into shorter
// ones.
class Expand {
public:
Expand(BackgroundNoise* background_noise,
SyncBuffer* sync_buffer,
RandomVector* random_vector,
int fs,
size_t num_channels)
: background_noise_(background_noise),
sync_buffer_(sync_buffer),
random_vector_(random_vector),
first_expand_(true),
fs_hz_(fs),
num_channels_(num_channels),
overlap_length_(5 * fs / 8000),
lag_index_direction_(0),
current_lag_index_(0),
stop_muting_(false),
channel_parameters_(new ChannelParameters[num_channels_]) {
assert(fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000);
assert(fs <= kMaxSampleRate); // Should not be possible.
assert(num_channels_ > 0);
memset(expand_lags_, 0, sizeof(expand_lags_));
Reset();
}
virtual ~Expand() {}
// Resets the object.
void Reset();
// The main method to produce concealment data. The data is appended to the
// end of |output|.
int Process(AudioMultiVector<int16_t>* output);
// Prepare the object to do extra expansion during normal operation following
// a period of expands.
void SetParametersForNormalAfterExpand();
// Prepare the object to do extra expansion during merge operation following
// a period of expands.
void SetParametersForMergeAfterExpand();
// Sets the mute factor for |channel| to |value|.
void SetMuteFactor(int16_t value, size_t channel) {
assert(channel < num_channels_);
channel_parameters_[channel].mute_factor = value;
}
// Returns the mute factor for |channel|.
int16_t MuteFactor(size_t channel) {
assert(channel < num_channels_);
return channel_parameters_[channel].mute_factor;
}
// Accessors and mutators.
size_t overlap_length() const { return overlap_length_; }
int16_t max_lag() const { return max_lag_; }
private:
static const int kUnvoicedLpcOrder = 6;
static const int kNumCorrelationCandidates = 3;
static const int kDistortionLength = 20;
static const int kLpcAnalysisLength = 160;
static const int kMaxSampleRate = 48000;
static const int kNumLags = 3;
static const int kMaxConsecutiveExpands = 200;
struct ChannelParameters {
// Constructor.
ChannelParameters()
: mute_factor(16384),
ar_gain(0),
ar_gain_scale(0),
voice_mix_factor(0),
current_voice_mix_factor(0),
onset(false),
mute_slope(0) {
memset(ar_filter, 0, sizeof(ar_filter));
memset(ar_filter_state, 0, sizeof(ar_filter_state));
}
int16_t mute_factor;
int16_t ar_filter[kUnvoicedLpcOrder + 1];
int16_t ar_filter_state[kUnvoicedLpcOrder];
int16_t ar_gain;
int16_t ar_gain_scale;
int16_t voice_mix_factor; /* Q14 */
int16_t current_voice_mix_factor; /* Q14 */
AudioVector<int16_t> expand_vector0;
AudioVector<int16_t> expand_vector1;
bool onset;
int16_t mute_slope; /* Q20 */
};
// Analyze the signal history in |sync_buffer_|, and set up all parameters
// necessary to produce concealment data.
void AnalyzeSignal(int16_t* random_vector);
// Calculate the auto-correlation of |input|, with length |input_length|
// samples. The correlation is calculated from a downsampled version of
// |input|, and is written to |output|. The scale factor is written to
// |output_scale|. Returns the length of the correlation vector.
int16_t Correlation(const int16_t* input, int16_t input_length,
int16_t* output, int16_t* output_scale) const;
void UpdateLagIndex();
BackgroundNoise* background_noise_;
SyncBuffer* sync_buffer_;
RandomVector* random_vector_;
bool first_expand_;
int fs_hz_;
size_t num_channels_;
size_t overlap_length_;
int consecutive_expands_;
int16_t max_lag_;
size_t expand_lags_[kNumLags];
int lag_index_direction_;
int current_lag_index_;
bool stop_muting_;
scoped_array<ChannelParameters> channel_parameters_;
DISALLOW_COPY_AND_ASSIGN(Expand);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_EXPAND_H_

View File

@ -0,0 +1,33 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for Expand class.
#include "webrtc/modules/audio_coding/neteq4/expand.h"
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/background_noise.h"
#include "webrtc/modules/audio_coding/neteq4/random_vector.h"
#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h"
namespace webrtc {
TEST(Expand, CreateAndDestroy) {
int fs = 8000;
size_t channels = 1;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(1, 1000);
RandomVector random_vector;
Expand expand(&bgn, &sync_buffer, &random_vector, fs, channels);
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,144 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_AUDIO_DECODER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_AUDIO_DECODER_H_
#include <stdlib.h> // NULL
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
enum NetEqDecoder {
kDecoderPCMu,
kDecoderPCMa,
kDecoderPCMu_2ch,
kDecoderPCMa_2ch,
kDecoderILBC,
kDecoderISAC,
kDecoderISACswb,
kDecoderPCM16B,
kDecoderPCM16Bwb,
kDecoderPCM16Bswb32kHz,
kDecoderPCM16Bswb48kHz,
kDecoderPCM16B_2ch,
kDecoderPCM16Bwb_2ch,
kDecoderPCM16Bswb32kHz_2ch,
kDecoderPCM16Bswb48kHz_2ch,
kDecoderPCM16B_5ch,
kDecoderG722,
kDecoderG722_2ch,
kDecoderRED,
kDecoderAVT,
kDecoderCNGnb,
kDecoderCNGwb,
kDecoderCNGswb32kHz,
kDecoderCNGswb48kHz,
kDecoderArbitrary,
kDecoderOpus,
kDecoderOpus_2ch,
kDecoderCELT_32,
kDecoderCELT_32_2ch,
};
// This is the interface class for decoders in NetEQ. Each codec type will have
// and implementation of this class.
class AudioDecoder {
public:
enum SpeechType {
kSpeech = 1,
kComfortNoise = 2
};
// Used by PacketDuration below. Save the value -1 for errors.
enum { kNotImplemented = -2 };
explicit AudioDecoder(enum NetEqDecoder type)
: codec_type_(type),
channels_(1),
state_(NULL) {
}
virtual ~AudioDecoder() {}
// Decodes |encode_len| bytes from |encoded| and writes the result in
// |decoded|. The number of samples produced is in the return value. If the
// decoder produced comfort noise, |speech_type| is set to kComfortNoise,
// otherwise it is kSpeech.
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type) = 0;
// Same as Decode(), but interfaces to the decoders redundant decode function.
// The default implementation simply calls the regular Decode() method.
virtual int DecodeRedundant(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type) {
return Decode(encoded, encoded_len, decoded, speech_type);
}
// Indicates if the decoder implements the DecodePlc method.
virtual bool HasDecodePlc() const { return false; }
// Calls the packet-loss concealment of the decoder to update the state after
// one or several lost packets.
virtual int DecodePlc(int num_frames, int16_t* decoded) { return -1; }
// Initializes the decoder.
virtual int Init() = 0;
// Notifies the decoder of an incoming packet to NetEQ.
virtual int IncomingPacket(const uint8_t* payload,
size_t payload_len,
uint16_t rtp_sequence_number,
uint32_t rtp_timestamp,
uint32_t arrival_timestamp) { return 0; }
// Returns the last error code from the decoder.
virtual int ErrorCode() { return 0; }
// Returns the duration in samples of the payload in |encoded| which is
// |encoded_len| bytes long. Returns kNotImplemented if no duration estimate
// is available, or -1 in case of an error.
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) {
return kNotImplemented;
}
virtual NetEqDecoder codec_type() const { return codec_type_; }
// Returns the underlying decoder state.
void* state() { return state_; }
// Returns true if |codec_type| is supported.
static bool CodecSupported(NetEqDecoder codec_type);
// Returns the sample rate for |codec_type|.
static int CodecSampleRateHz(NetEqDecoder codec_type);
// Creates an AudioDecoder object of type |codec_type|. Returns NULL for
// for unsupported codecs, and when creating an AudioDecoder is not
// applicable (e.g., for RED and DTMF/AVT types).
static AudioDecoder* CreateAudioDecoder(NetEqDecoder codec_type);
size_t channels() { return channels_; }
protected:
static SpeechType ConvertSpeechType(int16_t type);
enum NetEqDecoder codec_type_;
size_t channels_;
void* state_;
private:
DISALLOW_COPY_AND_ASSIGN(AudioDecoder);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_AUDIO_DECODER_H_

View File

@ -0,0 +1,227 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_NETEQ_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_NETEQ_H_
#include <cstring> // Provide access to size_t.
#include <vector>
#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
struct WebRtcRTPHeader;
// RTCP statistics.
struct RtcpStatistics {
uint16_t fraction_lost;
uint32_t cumulative_lost;
uint32_t extended_max;
uint32_t jitter;
};
struct NetEqNetworkStatistics {
uint16_t current_buffer_size_ms; // Current jitter buffer size in ms.
uint16_t preferred_buffer_size_ms; // Target buffer size in ms.
uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky
// jitter; 0 otherwise.
uint16_t packet_loss_rate; // Loss rate (network + late) in Q14.
uint16_t packet_discard_rate; // Late loss rate in Q14.
uint16_t expand_rate; // Fraction (of original stream) of synthesized
// speech inserted through expansion (in Q14).
uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive
// expansion (in Q14).
uint16_t accelerate_rate; // Fraction of data removed through acceleration
// (in Q14).
int32_t clockdrift_ppm; // Average clock-drift in parts-per-million
// (positive or negative).
int added_zero_samples; // Number of zero samples added in "off" mode.
};
enum NetEqOutputType {
kOutputNormal,
kOutputPLC,
kOutputCNG,
kOutputPLCtoCNG,
kOutputVADPassive
};
enum NetEqPlayoutMode {
kPlayoutOn,
kPlayoutOff,
kPlayoutFax,
kPlayoutStreaming
};
// This is the interface class for NetEq.
class NetEq {
public:
enum ReturnCodes {
kOK = 0,
kFail = -1,
kNotImplemented = -2
};
enum ErrorCodes {
kNoError = 0,
kOtherError,
kInvalidRtpPayloadType,
kUnknownRtpPayloadType,
kCodecNotSupported,
kDecoderExists,
kDecoderNotFound,
kInvalidSampleRate,
kInvalidPointer,
kAccelerateError,
kPreemptiveExpandError,
kComfortNoiseErrorCode,
kDecoderErrorCode,
kOtherDecoderError,
kInvalidOperation,
kDtmfParameterError,
kDtmfParsingError,
kDtmfInsertError,
kStereoNotSupported,
kSampleUnderrun,
kDecodedTooMuch,
kFrameSplitError,
kRedundancySplitError,
kPacketBufferCorruption
};
static const int kMaxNumPacketsInBuffer = 240; // TODO(hlundin): Remove.
static const int kMaxBytesInBuffer = 113280; // TODO(hlundin): Remove.
// Creates a new NetEq object, starting at the sample rate |sample_rate_hz|.
// (Note that it will still change the sample rate depending on what payloads
// are being inserted; |sample_rate_hz| is just for startup configuration.)
static NetEq* Create(int sample_rate_hz);
virtual ~NetEq() {}
// Inserts a new packet into NetEq. The |receive_timestamp| is an indication
// of the time when the packet was received, and should be measured with
// the same tick rate as the RTP timestamp of the current payload.
// Returns 0 on success, -1 on failure.
virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
const uint8_t* payload,
int length_bytes,
uint32_t receive_timestamp) = 0;
// Instructs NetEq to deliver 10 ms of audio data. The data is written to
// |output_audio|, which can hold (at least) |max_length| elements.
// The number of channels that were written to the output is provided in
// the output variable |num_channels|, and each channel contains
// |samples_per_channel| elements. If more than one channel is written,
// the samples are interleaved.
// The speech type is written to |type|, if |type| is not NULL.
// Returns kOK on success, or kFail in case of an error.
virtual int GetAudio(size_t max_length, int16_t* output_audio,
int* samples_per_channel, int* num_channels,
NetEqOutputType* type) = 0;
// Associates |rtp_payload_type| with |codec| and stores the information in
// the codec database. Returns 0 on success, -1 on failure.
virtual int RegisterPayloadType(enum NetEqDecoder codec,
uint8_t rtp_payload_type) = 0;
// Provides an externally created decoder object |decoder| to insert in the
// decoder database. The decoder implements a decoder of type |codec| and
// associates it with |rtp_payload_type|. The decoder operates at the
// frequency |sample_rate_hz|. Returns kOK on success, kFail on failure.
virtual int RegisterExternalDecoder(AudioDecoder* decoder,
enum NetEqDecoder codec,
int sample_rate_hz,
uint8_t rtp_payload_type) = 0;
// Removes |rtp_payload_type| from the codec database. Returns 0 on success,
// -1 on failure.
virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
// Sets the desired extra delay on top of what NetEq already applies due to
// current network situation. Used for synchronization with video. Returns
// true if successful, otherwise false.
virtual bool SetExtraDelay(int extra_delay_ms) = 0;
// Not implemented.
virtual int SetTargetDelay() = 0;
// Not implemented.
virtual int TargetDelay() = 0;
// Not implemented.
virtual int CurrentDelay() = 0;
// Enables playout of DTMF tones.
virtual int EnableDtmf() = 0;
// Sets the playout mode to |mode|.
virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
// Returns the current playout mode.
virtual NetEqPlayoutMode PlayoutMode() const = 0;
// Writes the current network statistics to |stats|. The statistics are reset
// after the call.
virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
// Writes the last packet waiting times (in ms) to |waiting_times|. The number
// of values written is no more than 100, but may be smaller if the interface
// is polled again before 100 packets has arrived.
virtual void WaitingTimes(std::vector<int>* waiting_times) = 0;
// Writes the current RTCP statistics to |stats|. The statistics are reset
// and a new report period is started with the call.
virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
// Same as RtcpStatistics(), but does not reset anything.
virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
// Enables post-decode VAD. When enabled, GetAudio() will return
// kOutputVADPassive when the signal contains no speech.
virtual void EnableVad() = 0;
// Disables post-decode VAD.
virtual void DisableVad() = 0;
// Returns the RTP timestamp for the last sample delivered by GetAudio().
virtual uint32_t PlayoutTimestamp() = 0;
// Not implemented.
virtual int SetTargetNumberOfChannels() = 0;
// Not implemented.
virtual int SetTargetSampleRate() = 0;
// Returns the error code for the last occurred error. If no error has
// occurred, 0 is returned.
virtual int LastError() = 0;
// Returns the error code last returned by a decoder (audio or comfort noise).
// When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
// this method to get the decoder's error code.
virtual int LastDecoderError() = 0;
// Flushes both the packet buffer and the sync buffer.
virtual void FlushBuffers() = 0;
protected:
NetEq() {}
private:
DISALLOW_COPY_AND_ASSIGN(NetEq);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_NETEQ_H_

View File

@ -0,0 +1,361 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/merge.h"
#include <assert.h>
#include <algorithm> // min, max
#include <cstring> // memmove, memcpy, memset, size_t
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq4/dsp_helper.h"
#include "webrtc/modules/audio_coding/neteq4/expand.h"
#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h"
namespace webrtc {
int Merge::Process(int16_t* input, int input_length,
int16_t* external_mute_factor_array,
AudioMultiVector<int16_t>* output) {
// TODO(hlundin): Change to an enumerator and skip assert.
assert(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 ||
fs_hz_ == 48000);
assert(fs_hz_ <= kMaxSampleRate); // Should not be possible.
int old_length;
int expand_period;
// Get expansion data to overlap and mix with.
int expanded_length = GetExpandedSignal(&old_length, &expand_period);
// Transfer input signal to an AudioMultiVector.
AudioMultiVector<int16_t> input_vector(num_channels_);
input_vector.PushBackInterleaved(input, input_length);
size_t input_length_per_channel = input_vector.Size();
assert(input_length_per_channel == input_length / num_channels_);
int16_t best_correlation_index = 0;
size_t output_length = 0;
for (size_t channel = 0; channel < num_channels_; ++channel) {
int16_t* input_channel = &input_vector[channel][0];
int16_t* expanded_channel = &expanded_[channel][0];
int16_t expanded_max, input_max;
int16_t new_mute_factor = SignalScaling(input_channel,
input_length_per_channel,
expanded_channel, &expanded_max,
&input_max);
// Adjust muting factor (product of "main" muting factor and expand muting
// factor).
int16_t* external_mute_factor = &external_mute_factor_array[channel];
*external_mute_factor =
(*external_mute_factor * expand_->MuteFactor(channel)) >> 14;
// Update |external_mute_factor| if it is lower than |new_mute_factor|.
if (new_mute_factor > *external_mute_factor) {
*external_mute_factor = std::min(new_mute_factor,
static_cast<int16_t>(16384));
}
if (channel == 0) {
// Downsample, correlate, and find strongest correlation period for the
// master (i.e., first) channel only.
// Downsample to 4kHz sample rate.
Downsample(input_channel, input_length_per_channel, expanded_channel,
expanded_length);
// Calculate the lag of the strongest correlation period.
best_correlation_index = CorrelateAndPeakSearch(expanded_max,
input_max,
old_length,
input_length_per_channel,
expand_period);
}
static const int kTempDataSize = 3600;
int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this.
int16_t* decoded_output = temp_data + best_correlation_index;
// Mute the new decoded data if needed (and unmute it linearly).
// This is the overlapping part of expanded_signal.
int interpolation_length = std::min(
kMaxCorrelationLength * fs_mult_,
expanded_length - best_correlation_index);
interpolation_length = std::min(interpolation_length,
static_cast<int>(input_length_per_channel));
if (*external_mute_factor < 16384) {
// Set a suitable muting slope (Q20). 0.004 for NB, 0.002 for WB,
// and so on.
int increment = 4194 / fs_mult_;
*external_mute_factor = DspHelper::RampSignal(input_channel,
interpolation_length,
*external_mute_factor,
increment);
DspHelper::UnmuteSignal(&input_channel[interpolation_length],
input_length_per_channel - interpolation_length,
external_mute_factor, increment,
&decoded_output[interpolation_length]);
} else {
// No muting needed.
memmove(
&decoded_output[interpolation_length],
&input_channel[interpolation_length],
sizeof(int16_t) * (input_length_per_channel - interpolation_length));
}
// Do overlap and mix linearly.
int increment = 16384 / (interpolation_length + 1); // In Q14.
int16_t mute_factor = 16384 - increment;
memmove(temp_data, expanded_channel,
sizeof(int16_t) * best_correlation_index);
DspHelper::CrossFade(&expanded_channel[best_correlation_index],
input_channel, interpolation_length,
&mute_factor, increment, decoded_output);
output_length = best_correlation_index + input_length_per_channel;
if (channel == 0) {
assert(output->Empty()); // Output should be empty at this point.
output->AssertSize(output_length);
} else {
assert(output->Size() == output_length);
}
memcpy(&(*output)[channel][0], temp_data,
sizeof(temp_data[0]) * output_length);
}
// Copy back the first part of the data to |sync_buffer_| and remove it from
// |output|.
sync_buffer_->ReplaceAtIndex(*output, old_length, sync_buffer_->next_index());
output->PopFront(old_length);
// Return new added length. |old_length| samples were borrowed from
// |sync_buffer_|.
return output_length - old_length;
}
int Merge::GetExpandedSignal(int* old_length, int* expand_period) {
// Check how much data that is left since earlier.
*old_length = sync_buffer_->FutureLength();
// Should never be less than overlap_length.
assert(*old_length >= static_cast<int>(expand_->overlap_length()));
// Generate data to merge the overlap with using expand.
expand_->SetParametersForMergeAfterExpand();
if (*old_length >= 210 * kMaxSampleRate / 8000) {
// TODO(hlundin): Write test case for this.
// The number of samples available in the sync buffer is more than what fits
// in expanded_signal. Keep the first 210 * kMaxSampleRate / 8000 samples,
// but shift them towards the end of the buffer. This is ok, since all of
// the buffer will be expand data anyway, so as long as the beginning is
// left untouched, we're fine.
int16_t length_diff = *old_length - 210 * kMaxSampleRate / 8000;
sync_buffer_->InsertZerosAtIndex(length_diff, sync_buffer_->next_index());
*old_length = 210 * kMaxSampleRate / 8000;
// This is the truncated length.
}
// This assert should always be true thanks to the if statement above.
assert(210 * kMaxSampleRate / 8000 - *old_length >= 0);
AudioMultiVector<int16_t> expanded_temp(num_channels_);
expand_->Process(&expanded_temp);
*expand_period = expanded_temp.Size(); // Samples per channel.
expanded_.Clear();
// Copy what is left since earlier into the expanded vector.
expanded_.PushBackFromIndex(*sync_buffer_, sync_buffer_->next_index());
assert(expanded_.Size() == static_cast<size_t>(*old_length));
assert(expanded_temp.Size() > 0);
// Do "ugly" copy and paste from the expanded in order to generate more data
// to correlate (but not interpolate) with.
const int required_length = (120 + 80 + 2) * fs_mult_;
if (expanded_.Size() < static_cast<size_t>(required_length)) {
while (expanded_.Size() < static_cast<size_t>(required_length)) {
// Append one more pitch period each time.
expanded_.PushBack(expanded_temp);
}
// Trim the length to exactly |required_length|.
expanded_.PopBack(expanded_.Size() - required_length);
}
assert(expanded_.Size() >= static_cast<size_t>(required_length));
return required_length;
}
int16_t Merge::SignalScaling(const int16_t* input, int input_length,
const int16_t* expanded_signal,
int16_t* expanded_max, int16_t* input_max) const {
// Adjust muting factor if new vector is more or less of the BGN energy.
const int mod_input_length = std::min(64 * fs_mult_, input_length);
*expanded_max = WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length);
*input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);
// Calculate energy of expanded signal.
// |log_fs_mult| is log2(fs_mult_), but is not exact for 48000 Hz.
int log_fs_mult = 30 - WebRtcSpl_NormW32(fs_mult_);
int expanded_shift = 6 + log_fs_mult
- WebRtcSpl_NormW32(*expanded_max * *expanded_max);
expanded_shift = std::max(expanded_shift, 0);
int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal,
expanded_signal,
mod_input_length,
expanded_shift);
// Calculate energy of input signal.
int input_shift = 6 + log_fs_mult -
WebRtcSpl_NormW32(*input_max * *input_max);
input_shift = std::max(input_shift, 0);
int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input,
mod_input_length,
input_shift);
// Align to the same Q-domain.
if (input_shift > expanded_shift) {
energy_expanded = energy_expanded >> (input_shift - expanded_shift);
} else {
energy_input = energy_input >> (expanded_shift - input_shift);
}
// Calculate muting factor to use for new frame.
int16_t mute_factor;
if (energy_input > energy_expanded) {
// Normalize |energy_input| to 14 bits.
int16_t temp_shift = WebRtcSpl_NormW32(energy_input) - 17;
energy_input = WEBRTC_SPL_SHIFT_W32(energy_input, temp_shift);
// Put |energy_expanded| in a domain 14 higher, so that
// energy_expanded / energy_input is in Q14.
energy_expanded = WEBRTC_SPL_SHIFT_W32(energy_expanded, temp_shift + 14);
// Calculate sqrt(energy_expanded / energy_input) in Q14.
mute_factor = WebRtcSpl_SqrtFloor((energy_expanded / energy_input) << 14);
} else {
// Set to 1 (in Q14) when |expanded| has higher energy than |input|.
mute_factor = 16384;
}
return mute_factor;
}
// TODO(hlundin): There are some parameter values in this method that seem
// strange. Compare with Expand::Correlation.
void Merge::Downsample(const int16_t* input, int input_length,
const int16_t* expanded_signal, int expanded_length) {
const int16_t* filter_coefficients;
int num_coefficients;
int decimation_factor = fs_hz_ / 4000;
static const int kCompensateDelay = 0;
int length_limit = fs_hz_ / 100;
if (fs_hz_ == 8000) {
filter_coefficients = DspHelper::kDownsample8kHzTbl;
num_coefficients = 3;
} else if (fs_hz_ == 16000) {
filter_coefficients = DspHelper::kDownsample16kHzTbl;
num_coefficients = 5;
} else if (fs_hz_ == 32000) {
filter_coefficients = DspHelper::kDownsample32kHzTbl;
num_coefficients = 7;
} else { // fs_hz_ == 48000
filter_coefficients = DspHelper::kDownsample48kHzTbl;
num_coefficients = 7;
// TODO(hlundin) Why is |length_limit| not 480 (legacy)?
length_limit = 320;
}
int signal_offset = num_coefficients - 1;
WebRtcSpl_DownsampleFast(&expanded_signal[signal_offset],
expanded_length - signal_offset,
expanded_downsampled_, kExpandDownsampLength,
filter_coefficients, num_coefficients,
decimation_factor, kCompensateDelay);
if (input_length <= length_limit) {
// Not quite long enough, so we have to cheat a bit.
int16_t temp_len = input_length - signal_offset;
// TODO(hlundin): Should |downsamp_temp_len| be corrected for round-off
// errors? I.e., (temp_len + decimation_factor - 1) / decimation_factor?
int16_t downsamp_temp_len = temp_len / decimation_factor;
WebRtcSpl_DownsampleFast(&input[signal_offset], temp_len,
input_downsampled_, downsamp_temp_len,
filter_coefficients, num_coefficients,
decimation_factor, kCompensateDelay);
memset(&input_downsampled_[downsamp_temp_len], 0,
sizeof(int16_t) * (kInputDownsampLength - downsamp_temp_len));
} else {
WebRtcSpl_DownsampleFast(&input[signal_offset],
input_length - signal_offset, input_downsampled_,
kInputDownsampLength, filter_coefficients,
num_coefficients, decimation_factor,
kCompensateDelay);
}
}
int16_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
int start_position, int input_length,
int expand_period) const {
// Calculate correlation without any normalization.
const int max_corr_length = kMaxCorrelationLength;
int stop_position_downsamp = std::min(
max_corr_length, expand_->max_lag() / (fs_mult_ * 2) + 1);
int16_t correlation_shift = 0;
if (expanded_max * input_max > 26843546) {
correlation_shift = 3;
}
int32_t correlation[kMaxCorrelationLength];
WebRtcSpl_CrossCorrelation(correlation, input_downsampled_,
expanded_downsampled_, kInputDownsampLength,
stop_position_downsamp, correlation_shift, 1);
// Normalize correlation to 14 bits and copy to a 16-bit array.
static const int kPadLength = 4;
int16_t correlation16[kPadLength + kMaxCorrelationLength + kPadLength] = {0};
int16_t* correlation_ptr = &correlation16[kPadLength];
int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,
stop_position_downsamp);
int16_t norm_shift = std::max(0, 17 - WebRtcSpl_NormW32(max_correlation));
WebRtcSpl_VectorBitShiftW32ToW16(correlation_ptr, stop_position_downsamp,
correlation, norm_shift);
// Calculate allowed starting point for peak finding.
// The peak location bestIndex must fulfill two criteria:
// (1) w16_bestIndex + input_length <
// timestamps_per_call_ + expand_->overlap_length();
// (2) w16_bestIndex + input_length < start_position.
int start_index = timestamps_per_call_ + expand_->overlap_length();
start_index = std::max(start_position, start_index);
start_index = std::max(start_index - input_length, 0);
// Downscale starting index to 4kHz domain. (fs_mult_ * 2 = fs_hz_ / 4000.)
int start_index_downsamp = start_index / (fs_mult_ * 2);
// Calculate a modified |stop_position_downsamp| to account for the increased
// start index |start_index_downsamp| and the effective array length.
int16_t modified_stop_pos =
std::min(stop_position_downsamp,
kMaxCorrelationLength + kPadLength - start_index_downsamp);
int best_correlation_index;
int16_t best_correlation;
static const int kNumCorrelationCandidates = 1;
DspHelper::PeakDetection(&correlation_ptr[start_index_downsamp],
modified_stop_pos, kNumCorrelationCandidates,
fs_mult_, &best_correlation_index,
&best_correlation);
// Compensate for modified start index.
best_correlation_index += start_index;
// Ensure that underrun does not occur for 10ms case => we have to get at
// least 10ms + overlap . (This should never happen thanks to the above
// modification of peak-finding starting point.)
while ((best_correlation_index + input_length) <
static_cast<int>(timestamps_per_call_ + expand_->overlap_length()) ||
best_correlation_index + input_length < start_position) {
assert(false); // Should never happen.
best_correlation_index += expand_period; // Jump one lag ahead.
}
return best_correlation_index;
}
} // namespace webrtc

View File

@ -0,0 +1,104 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MERGE_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MERGE_H_
#include <assert.h>
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class Expand;
class SyncBuffer;
// This class handles the transition from expansion to normal operation.
// When a packet is not available for decoding when needed, the expand operation
// is called to generate extrapolation data. If the missing packet arrives,
// i.e., it was just delayed, it can be decoded and appended directly to the
// end of the expanded data (thanks to how the Expand class operates). However,
// if a later packet arrives instead, the loss is a fact, and the new data must
// be stitched together with the end of the expanded data. This stitching is
// what the Merge class does.
class Merge {
public:
Merge(int fs_hz, size_t num_channels, Expand* expand, SyncBuffer* sync_buffer)
: fs_hz_(fs_hz),
fs_mult_(fs_hz_ / 8000),
num_channels_(num_channels),
timestamps_per_call_(fs_hz_ / 100),
expand_(expand),
sync_buffer_(sync_buffer),
expanded_(num_channels_) {
assert(num_channels_ > 0);
}
// The main method to produce the audio data. The decoded data is supplied in
// |input|, having |input_length| samples in total for all channels
// (interleaved). The result is written to |output|. The number of channels
// allocated in |output| defines the number of channels that will be used when
// de-interleaving |input|. The values in |external_mute_factor_array| (Q14)
// will be used to scale the audio, and is updated in the process. The array
// must have |num_channels_| elements.
int Process(int16_t* input, int input_length,
int16_t* external_mute_factor_array,
AudioMultiVector<int16_t>* output);
private:
static const int kMaxSampleRate = 48000;
static const int kExpandDownsampLength = 100;
static const int kInputDownsampLength = 40;
static const int kMaxCorrelationLength = 60;
// Calls |expand_| to get more expansion data to merge with. The data is
// written to |expanded_signal_|. Returns the length of the expanded data,
// while |expand_period| will be the number of samples in one expansion period
// (typically one pitch period). The value of |old_length| will be the number
// of samples that were taken from the |sync_buffer_|.
int GetExpandedSignal(int* old_length, int* expand_period);
// Analyzes |input| and |expanded_signal| to find maximum values. Returns
// a muting factor (Q14) to be used on the new data.
int16_t SignalScaling(const int16_t* input, int input_length,
const int16_t* expanded_signal,
int16_t* expanded_max, int16_t* input_max) const;
// Downsamples |input| (|input_length| samples) and |expanded_signal| to
// 4 kHz sample rate. The downsampled signals are written to
// |input_downsampled_| and |expanded_downsampled_|, respectively.
void Downsample(const int16_t* input, int input_length,
const int16_t* expanded_signal, int expanded_length);
// Calculates cross-correlation between |input_downsampled_| and
// |expanded_downsampled_|, and finds the correlation maximum. The maximizing
// lag is returned.
int16_t CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
int start_position, int input_length,
int expand_period) const;
const int fs_hz_;
const int fs_mult_; // fs_hz_ / 8000.
const size_t num_channels_;
const int timestamps_per_call_;
Expand* expand_;
SyncBuffer* sync_buffer_;
int16_t expanded_downsampled_[kExpandDownsampLength];
int16_t input_downsampled_[kInputDownsampLength];
AudioMultiVector<int16_t> expanded_;
DISALLOW_COPY_AND_ASSIGN(Merge);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MERGE_H_

View File

@ -0,0 +1,37 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for Merge class.
#include "webrtc/modules/audio_coding/neteq4/merge.h"
#include <vector>
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/background_noise.h"
#include "webrtc/modules/audio_coding/neteq4/expand.h"
#include "webrtc/modules/audio_coding/neteq4/random_vector.h"
#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h"
namespace webrtc {
TEST(Merge, CreateAndDestroy) {
int fs = 8000;
size_t channels = 1;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(1, 1000);
RandomVector random_vector;
Expand expand(&bgn, &sync_buffer, &random_vector, fs, channels);
Merge merge(fs, channels, &expand, &sync_buffer);
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,38 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_AUDIO_DECODER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_AUDIO_DECODER_H_
#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h"
#include "gmock/gmock.h"
namespace webrtc {
class MockAudioDecoder : public AudioDecoder {
public:
MockAudioDecoder() : AudioDecoder(kDecoderArbitrary) {}
virtual ~MockAudioDecoder() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD4(Decode, int(const uint8_t*, size_t, int16_t*,
AudioDecoder::SpeechType*));
MOCK_CONST_METHOD0(HasDecodePlc, bool());
MOCK_METHOD2(DecodePlc, int(int, int16_t*));
MOCK_METHOD0(Init, int());
MOCK_METHOD5(IncomingPacket, int(const uint8_t*, size_t, uint16_t, uint32_t,
uint32_t));
MOCK_METHOD0(ErrorCode, int());
MOCK_CONST_METHOD0(codec_type, NetEqDecoder());
MOCK_METHOD1(CodecSupported, bool(NetEqDecoder));
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_AUDIO_DECODER_H_

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_AUDIO_VECTOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_AUDIO_VECTOR_H_
#include "webrtc/modules/audio_coding/neteq4/audio_vector.h"
#include "gmock/gmock.h"
namespace webrtc {
class MockAudioVector : public AudioVector {
public:
MOCK_METHOD0(Clear,
void());
MOCK_CONST_METHOD1(CopyFrom,
void(AudioVector<T>* copy_to));
MOCK_METHOD1(PushFront,
void(const AudioVector<T>& prepend_this));
MOCK_METHOD2(PushFront,
void(const T* prepend_this, size_t length));
MOCK_METHOD1(PushBack,
void(const AudioVector<T>& append_this));
MOCK_METHOD2(PushBack,
void(const T* append_this, size_t length));
MOCK_METHOD1(PopFront,
void(size_t length));
MOCK_METHOD1(PopBack,
void(size_t length));
MOCK_METHOD1(Extend,
void(size_t extra_length));
MOCK_METHOD3(InsertAt,
void(const T* insert_this, size_t length, size_t position));
MOCK_METHOD3(OverwriteAt,
void(const T* insert_this, size_t length, size_t position));
MOCK_CONST_METHOD0(Size,
size_t());
MOCK_CONST_METHOD0(Empty,
bool());
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_AUDIO_VECTOR_H_

View File

@ -0,0 +1,37 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_
#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h"
#include "gmock/gmock.h"
namespace webrtc {
class MockBufferLevelFilter : public BufferLevelFilter {
public:
virtual ~MockBufferLevelFilter() { Die(); }
MOCK_METHOD0(Die,
void());
MOCK_METHOD0(Reset,
void());
MOCK_METHOD3(Update,
void(int buffer_size_packets, int time_stretched_samples,
int packet_len_samples));
MOCK_METHOD1(SetTargetBufferLevel,
void(int target_buffer_level));
MOCK_CONST_METHOD0(filtered_current_level,
int());
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_

View File

@ -0,0 +1,64 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DECODER_DATABASE_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DECODER_DATABASE_H_
#include "webrtc/modules/audio_coding/neteq4/decoder_database.h"
#include "gmock/gmock.h"
namespace webrtc {
class MockDecoderDatabase : public DecoderDatabase {
public:
virtual ~MockDecoderDatabase() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_CONST_METHOD0(Empty,
bool());
MOCK_CONST_METHOD0(Size,
int());
MOCK_METHOD0(Reset,
void());
MOCK_METHOD2(RegisterPayload,
int(uint8_t rtp_payload_type, NetEqDecoder codec_type));
MOCK_METHOD4(InsertExternal,
int(uint8_t rtp_payload_type, NetEqDecoder codec_type, int fs_hz,
AudioDecoder* decoder));
MOCK_METHOD1(Remove,
int(uint8_t rtp_payload_type));
MOCK_CONST_METHOD1(GetDecoderInfo,
const DecoderInfo*(uint8_t rtp_payload_type));
MOCK_CONST_METHOD1(GetRtpPayloadType,
uint8_t(NetEqDecoder codec_type));
MOCK_METHOD1(GetDecoder,
AudioDecoder*(uint8_t rtp_payload_type));
MOCK_CONST_METHOD2(IsType,
bool(uint8_t rtp_payload_type, NetEqDecoder codec_type));
MOCK_CONST_METHOD1(IsComfortNoise,
bool(uint8_t rtp_payload_type));
MOCK_CONST_METHOD1(IsDtmf,
bool(uint8_t rtp_payload_type));
MOCK_CONST_METHOD1(IsRed,
bool(uint8_t rtp_payload_type));
MOCK_METHOD2(SetActiveDecoder,
int(uint8_t rtp_payload_type, bool* new_decoder));
MOCK_METHOD0(GetActiveDecoder,
AudioDecoder*());
MOCK_METHOD1(SetActiveCngDecoder,
int(uint8_t rtp_payload_type));
MOCK_METHOD0(GetActiveCngDecoder,
AudioDecoder*());
MOCK_CONST_METHOD1(CheckPayloadTypes,
int(const PacketList& packet_list));
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DECODER_DATABASE_H_

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DELAY_MANAGER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DELAY_MANAGER_H_
#include "webrtc/modules/audio_coding/neteq4/delay_manager.h"
#include "gmock/gmock.h"
namespace webrtc {
class MockDelayManager : public DelayManager {
public:
MockDelayManager(int max_packets_in_buffer, DelayPeakDetector* peak_detector)
: DelayManager(max_packets_in_buffer, peak_detector) {}
virtual ~MockDelayManager() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_CONST_METHOD0(iat_vector,
const IATVector&());
MOCK_METHOD3(Update,
int(uint16_t sequence_number, uint32_t timestamp, int sample_rate_hz));
MOCK_METHOD1(CalculateTargetLevel,
int(int iat_packets));
MOCK_METHOD1(SetPacketAudioLength,
int(int length_ms));
MOCK_METHOD0(Reset,
void());
MOCK_CONST_METHOD0(AverageIAT,
int());
MOCK_CONST_METHOD0(PeakFound,
bool());
MOCK_METHOD1(UpdateCounters,
void(int elapsed_time_ms));
MOCK_METHOD0(ResetPacketIatCount,
void());
MOCK_CONST_METHOD2(BufferLimits,
void(int* lower_limit, int* higher_limit));
MOCK_CONST_METHOD0(TargetLevel,
int());
MOCK_METHOD1(LastDecoderType,
void(NetEqDecoder decoder_type));
MOCK_METHOD1(set_extra_delay_ms,
void(int16_t delay));
MOCK_CONST_METHOD0(base_target_level,
int());
MOCK_METHOD1(set_streaming_mode,
void(bool value));
MOCK_CONST_METHOD0(last_pack_cng_or_dtmf,
int());
MOCK_METHOD1(set_last_pack_cng_or_dtmf,
void(int value));
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DELAY_MANAGER_H_

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DELAY_PEAK_DETECTOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DELAY_PEAK_DETECTOR_H_
#include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h"
#include "gmock/gmock.h"
namespace webrtc {
class MockDelayPeakDetector : public DelayPeakDetector {
public:
virtual ~MockDelayPeakDetector() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD0(Reset, void());
MOCK_METHOD1(SetPacketAudioLength, void(int length_ms));
MOCK_METHOD0(peak_found, bool());
MOCK_CONST_METHOD0(MaxPeakHeight, int());
MOCK_CONST_METHOD0(MaxPeakPeriod, int());
MOCK_METHOD2(Update, bool(int inter_arrival_time, int target_level));
MOCK_METHOD1(IncrementCounter, void(int inc_ms));
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DELAY_PEAK_DETECTOR_H_

View File

@ -0,0 +1,38 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DTMF_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DTMF_BUFFER_H_
#include "webrtc/modules/audio_coding/neteq4/dtmf_buffer.h"
#include "gmock/gmock.h"
namespace webrtc {
class MockDtmfBuffer : public DtmfBuffer {
public:
MockDtmfBuffer(int fs) : DtmfBuffer(fs) {}
virtual ~MockDtmfBuffer() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD0(Flush,
void());
MOCK_METHOD1(InsertEvent,
int(const DtmfEvent& event));
MOCK_METHOD2(GetEvent,
bool(uint32_t current_timestamp, DtmfEvent* event));
MOCK_CONST_METHOD0(Length,
size_t());
MOCK_CONST_METHOD0(Empty,
bool());
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DTMF_BUFFER_H_

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DTMF_TONE_GENERATOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DTMF_TONE_GENERATOR_H_
#include "webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.h"
#include "gmock/gmock.h"
namespace webrtc {
class MockDtmfToneGenerator : public DtmfToneGenerator {
public:
virtual ~MockDtmfToneGenerator() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD3(Init,
int(int fs, int event, int attenuation));
MOCK_METHOD0(Reset,
void());
MOCK_METHOD2(Generate,
int(int num_samples, AudioMultiVector<int16_t>* output));
MOCK_CONST_METHOD0(initialized,
bool());
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DTMF_TONE_GENERATOR_H_

View File

@ -0,0 +1,99 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_EXTERNAL_DECODER_PCM16B_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_EXTERNAL_DECODER_PCM16B_H_
#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h"
#include "gmock/gmock.h"
#include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
using ::testing::_;
using ::testing::Invoke;
// Implement an external version of the PCM16b decoder. This is a copy from
// audio_decoder_impl.{cc, h}.
class ExternalPcm16B : public AudioDecoder {
public:
explicit ExternalPcm16B(enum NetEqDecoder type)
: AudioDecoder(type) {
}
virtual int Decode(const uint8_t* encoded, size_t encoded_len,
int16_t* decoded, SpeechType* speech_type) {
int16_t temp_type;
int16_t ret = WebRtcPcm16b_DecodeW16(
state_, reinterpret_cast<int16_t*>(const_cast<uint8_t*>(encoded)),
static_cast<int16_t>(encoded_len), decoded, &temp_type);
*speech_type = ConvertSpeechType(temp_type);
return ret;
}
virtual int Init() { return 0; }
private:
DISALLOW_COPY_AND_ASSIGN(ExternalPcm16B);
};
// Create a mock of ExternalPcm16B which delegates all calls to the real object.
// The reason is that we can then track that the correct calls are being made.
class MockExternalPcm16B : public ExternalPcm16B {
public:
explicit MockExternalPcm16B(enum NetEqDecoder type)
: ExternalPcm16B(type),
real_(type) {
// By default, all calls are delegated to the real object.
ON_CALL(*this, Decode(_, _, _, _))
.WillByDefault(Invoke(&real_, &ExternalPcm16B::Decode));
ON_CALL(*this, HasDecodePlc())
.WillByDefault(Invoke(&real_, &ExternalPcm16B::HasDecodePlc));
ON_CALL(*this, DecodePlc(_, _))
.WillByDefault(Invoke(&real_, &ExternalPcm16B::DecodePlc));
ON_CALL(*this, Init())
.WillByDefault(Invoke(&real_, &ExternalPcm16B::Init));
ON_CALL(*this, IncomingPacket(_, _, _, _, _))
.WillByDefault(Invoke(&real_, &ExternalPcm16B::IncomingPacket));
ON_CALL(*this, ErrorCode())
.WillByDefault(Invoke(&real_, &ExternalPcm16B::ErrorCode));
ON_CALL(*this, codec_type())
.WillByDefault(Invoke(&real_, &ExternalPcm16B::codec_type));
}
virtual ~MockExternalPcm16B() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD4(Decode,
int(const uint8_t* encoded, size_t encoded_len, int16_t* decoded,
SpeechType* speech_type));
MOCK_CONST_METHOD0(HasDecodePlc,
bool());
MOCK_METHOD2(DecodePlc,
int(int num_frames, int16_t* decoded));
MOCK_METHOD0(Init,
int());
MOCK_METHOD5(IncomingPacket,
int(const uint8_t* payload, size_t payload_len,
uint16_t rtp_sequence_number, uint32_t rtp_timestamp,
uint32_t arrival_timestamp));
MOCK_METHOD0(ErrorCode,
int());
MOCK_CONST_METHOD0(codec_type,
NetEqDecoder());
private:
ExternalPcm16B real_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_EXTERNAL_DECODER_PCM16B_H_

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_PACKET_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_PACKET_BUFFER_H_
#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h"
#include "gmock/gmock.h"
namespace webrtc {
class MockPacketBuffer : public PacketBuffer {
public:
MockPacketBuffer(size_t max_number_of_packets, size_t max_payload_memory)
: PacketBuffer(max_number_of_packets, max_payload_memory) {}
virtual ~MockPacketBuffer() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD0(Flush,
void());
MOCK_CONST_METHOD0(Empty,
bool());
MOCK_METHOD1(InsertPacket,
int(Packet* packet));
MOCK_METHOD4(InsertPacketList,
int(PacketList* packet_list,
const DecoderDatabase& decoder_database,
uint8_t* current_rtp_payload_type,
uint8_t* current_cng_rtp_payload_type));
MOCK_CONST_METHOD1(NextTimestamp,
int(uint32_t* next_timestamp));
MOCK_CONST_METHOD2(NextHigherTimestamp,
int(uint32_t timestamp, uint32_t* next_timestamp));
MOCK_CONST_METHOD0(NextRtpHeader,
const RTPHeader*());
MOCK_METHOD1(GetNextPacket,
Packet*(int* discard_count));
MOCK_METHOD0(DiscardNextPacket,
int());
MOCK_METHOD1(DiscardOldPackets,
int(uint32_t timestamp_limit));
MOCK_CONST_METHOD0(NumPacketsInBuffer,
int());
MOCK_METHOD1(IncrementWaitingTimes,
void(int));
MOCK_CONST_METHOD0(current_memory_bytes,
int());
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_PACKET_BUFFER_H_

View File

@ -0,0 +1,37 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_PAYLOAD_SPLITTER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_PAYLOAD_SPLITTER_H_
#include "webrtc/modules/audio_coding/neteq4/payload_splitter.h"
#include "gmock/gmock.h"
namespace webrtc {
class MockPayloadSplitter : public PayloadSplitter {
public:
MOCK_METHOD1(SplitRed,
int(PacketList* packet_list));
MOCK_METHOD2(CheckRedPayloads,
int(PacketList* packet_list, const DecoderDatabase& decoder_database));
MOCK_METHOD2(SplitAudio,
int(PacketList* packet_list, const DecoderDatabase& decoder_database));
MOCK_METHOD4(SplitBySamples,
void(const Packet* packet, int bytes_per_ms, int timestamps_per_ms,
PacketList* new_packets));
MOCK_METHOD4(SplitByFrames,
int(const Packet* packet, int bytes_per_frame, int timestamps_per_frame,
PacketList* new_packets));
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_PAYLOAD_SPLITTER_H_

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h"
#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h"
#include "webrtc/modules/audio_coding/neteq4/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq4/delay_manager.h"
#include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h"
#include "webrtc/modules/audio_coding/neteq4/dtmf_buffer.h"
#include "webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.h"
#include "webrtc/modules/audio_coding/neteq4/neteq_impl.h"
#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h"
#include "webrtc/modules/audio_coding/neteq4/payload_splitter.h"
#include "webrtc/modules/audio_coding/neteq4/timestamp_scaler.h"
namespace webrtc {
// Creates all classes needed and inject them into a new NetEqImpl object.
// Return the new object.
NetEq* NetEq::Create(int sample_rate_hz) {
BufferLevelFilter* buffer_level_filter = new BufferLevelFilter;
DecoderDatabase* decoder_database = new DecoderDatabase;
DelayPeakDetector* delay_peak_detector = new DelayPeakDetector;
DelayManager* delay_manager = new DelayManager(kMaxNumPacketsInBuffer,
delay_peak_detector);
DtmfBuffer* dtmf_buffer = new DtmfBuffer(sample_rate_hz);
DtmfToneGenerator* dtmf_tone_generator = new DtmfToneGenerator;
PacketBuffer* packet_buffer = new PacketBuffer(kMaxNumPacketsInBuffer,
kMaxBytesInBuffer);
PayloadSplitter* payload_splitter = new PayloadSplitter;
TimestampScaler* timestamp_scaler = new TimestampScaler(*decoder_database);
return new NetEqImpl(sample_rate_hz,
buffer_level_filter,
decoder_database,
delay_manager,
delay_peak_detector,
dtmf_buffer,
dtmf_tone_generator,
packet_buffer,
payload_splitter,
timestamp_scaler);
}
} // namespace webrtc

View File

@ -0,0 +1,228 @@
# Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'variables': {
'neteq_dependencies': [
'G711',
'G722',
'PCM16B',
'iLBC',
'iSAC',
'iSACFix',
'CNG',
'<(webrtc_root)/common_audio/common_audio.gyp:signal_processing',
'<(webrtc_root)/common_audio/common_audio.gyp:vad',
'<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
],
'neteq_defines': [],
'conditions': [
['include_opus==1', {
'neteq_dependencies': ['webrtc_opus',],
'neteq_defines': ['WEBRTC_CODEC_OPUS',],
}],
],
},
'targets': [
{
'target_name': 'NetEq4',
'type': 'static_library',
'dependencies': [
'<@(neteq_dependencies)',
],
'defines': [
'<@(neteq_defines)',
],
'include_dirs': [
'interface',
],
'direct_dependent_settings': {
'include_dirs': [
'interface',
],
},
'sources': [
'interface/audio_decoder.h',
'interface/neteq.h',
'accelerate.cc',
'accelerate.h',
'audio_decoder_impl.cc',
'audio_decoder_impl.h',
'audio_decoder.cc',
'audio_multi_vector.cc',
'audio_multi_vector.h',
'audio_vector.cc',
'audio_vector.h',
'background_noise.cc',
'background_noise.h',
'buffer_level_filter.cc',
'buffer_level_filter.h',
'comfort_noise.cc',
'comfort_noise.h',
'decision_logic.cc',
'decision_logic.h',
'decision_logic_fax.cc',
'decision_logic_fax.h',
'decision_logic_normal.cc',
'decision_logic_normal.h',
'decoder_database.cc',
'decoder_database.h',
'defines.h',
'delay_manager.cc',
'delay_manager.h',
'delay_peak_detector.cc',
'delay_peak_detector.h',
'dsp_helper.cc',
'dsp_helper.h',
'dtmf_buffer.cc',
'dtmf_buffer.h',
'dtmf_tone_generator.cc',
'dtmf_tone_generator.h',
'expand.cc',
'expand.h',
'merge.cc',
'merge.h',
'neteq_impl.cc',
'neteq_impl.h',
'neteq.cc',
'statistics_calculator.cc',
'statistics_calculator.h',
'normal.cc',
'normal.h',
'packet_buffer.cc',
'packet_buffer.h',
'payload_splitter.cc',
'payload_splitter.h',
'post_decode_vad.cc',
'post_decode_vad.h',
'preemptive_expand.cc',
'preemptive_expand.h',
'random_vector.cc',
'random_vector.h',
'rtcp.cc',
'rtcp.h',
'sync_buffer.cc',
'sync_buffer.h',
'timestamp_scaler.cc',
'timestamp_scaler.h',
'time_stretch.cc',
'time_stretch.h',
],
},
], # targets
'conditions': [
['include_tests==1', {
'includes': ['neteq_tests.gypi',],
'targets': [
{
'target_name': 'neteq4_unittests',
'type': 'executable',
'dependencies': [
'NetEq4',
'NetEq4TestTools',
'neteq_unittest_tools',
'PCM16B',
'<(DEPTH)/testing/gmock.gyp:gmock',
'<(DEPTH)/testing/gtest.gyp:gtest',
'<(webrtc_root)/test/test.gyp:test_support_main',
],
'sources': [
'audio_multi_vector_unittest.cc',
'audio_vector_unittest.cc',
'background_noise_unittest.cc',
'buffer_level_filter_unittest.cc',
'comfort_noise_unittest.cc',
'decision_logic_unittest.cc',
'decoder_database_unittest.cc',
'delay_manager_unittest.cc',
'delay_peak_detector_unittest.cc',
'dsp_helper_unittest.cc',
'dtmf_buffer_unittest.cc',
'dtmf_tone_generator_unittest.cc',
'expand_unittest.cc',
'merge_unittest.cc',
'neteq_external_decoder_unittest.cc',
'neteq_impl_unittest.cc',
'neteq_stereo_unittest.cc',
'neteq_unittest.cc',
'normal_unittest.cc',
'packet_buffer_unittest.cc',
'payload_splitter_unittest.cc',
'post_decode_vad_unittest.cc',
'random_vector_unittest.cc',
'sync_buffer_unittest.cc',
'timestamp_scaler_unittest.cc',
'time_stretch_unittest.cc',
'mock/mock_audio_decoder.h',
'mock/mock_audio_vector.h',
'mock/mock_buffer_level_filter.h',
'mock/mock_decoder_database.h',
'mock/mock_delay_manager.h',
'mock/mock_delay_peak_detector.h',
'mock/mock_dtmf_buffer.h',
'mock/mock_dtmf_tone_generator.h',
'mock/mock_external_decoder_pcm16b.h',
'mock/mock_packet_buffer.h',
'mock/mock_payload_splitter.h',
],
}, # neteq_unittests
{
'target_name': 'audio_decoder_unittests',
'type': 'executable',
'dependencies': [
'<@(neteq_dependencies)',
'<(DEPTH)/testing/gtest.gyp:gtest',
'<(webrtc_root)/common_audio/common_audio.gyp:resampler',
'<(webrtc_root)/test/test.gyp:test_support_main',
],
'defines': [
'AUDIO_DECODER_UNITTEST',
'WEBRTC_CODEC_G722',
'WEBRTC_CODEC_ILBC',
'WEBRTC_CODEC_ISACFX',
'WEBRTC_CODEC_ISAC',
'WEBRTC_CODEC_PCM16',
'<@(neteq_defines)',
],
'sources': [
'audio_decoder_impl.cc',
'audio_decoder_impl.h',
'audio_decoder_unittest.cc',
'audio_decoder.cc',
'interface/audio_decoder.h',
],
}, # audio_decoder_unittest
{
'target_name': 'neteq_unittest_tools',
'type': 'static_library',
'dependencies': [
'<(DEPTH)/testing/gmock.gyp:gmock',
'<(DEPTH)/testing/gtest.gyp:gtest',
'<(webrtc_root)/test/test.gyp:test_support_main',
],
'direct_dependent_settings': {
'include_dirs': [
'tools',
],
},
'include_dirs': [
'tools',
],
'sources': [
'tools/input_audio_file.cc',
'tools/input_audio_file.h',
'tools/rtp_generator.cc',
'tools/rtp_generator.h',
],
}, # neteq_unittest_tools
], # targets
}], # include_tests
], # conditions
}

View File

@ -0,0 +1,208 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Test to verify correct operation for externally created decoders.
#include <string>
#include <list>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_external_decoder_pcm16b.h"
#include "webrtc/modules/audio_coding/neteq4/tools/input_audio_file.h"
#include "webrtc/modules/audio_coding/neteq4/tools/rtp_generator.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/test/testsupport/fileutils.h"
namespace webrtc {
using ::testing::_;
// This test encodes a few packets of PCM16b 32 kHz data and inserts it into two
// different NetEq instances. The first instance uses the internal version of
// the decoder object, while the second one uses an externally created decoder
// object (ExternalPcm16B wrapped in MockExternalPcm16B, both defined above).
// The test verifies that the output from both instances match.
class NetEqExternalDecoderTest : public ::testing::Test {
protected:
static const int kTimeStepMs = 10;
static const int kMaxBlockSize = 480; // 10 ms @ 48 kHz.
static const uint8_t kPayloadType = 95;
static const int kSampleRateHz = 32000;
NetEqExternalDecoderTest()
: sample_rate_hz_(kSampleRateHz),
samples_per_ms_(sample_rate_hz_ / 1000),
frame_size_ms_(10),
frame_size_samples_(frame_size_ms_ * samples_per_ms_),
output_size_samples_(frame_size_ms_ * samples_per_ms_),
neteq_external_(NetEq::Create(sample_rate_hz_)),
neteq_(NetEq::Create(sample_rate_hz_)),
external_decoder_(new MockExternalPcm16B(kDecoderPCM16Bswb32kHz)),
rtp_generator_(samples_per_ms_),
payload_size_bytes_(0),
last_send_time_(0),
last_arrival_time_(0) {
input_ = new int16_t[frame_size_samples_];
encoded_ = new uint8_t[2 * frame_size_samples_];
}
~NetEqExternalDecoderTest() {
delete neteq_external_;
delete neteq_;
// We will now delete the decoder ourselves, so expecting Die to be called.
EXPECT_CALL(*external_decoder_, Die()).Times(1);
delete external_decoder_;
delete [] input_;
delete [] encoded_;
}
virtual void SetUp() {
const std::string file_name =
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
input_file_.reset(new test::InputAudioFile(file_name));
assert(sample_rate_hz_ == 32000);
NetEqDecoder decoder = kDecoderPCM16Bswb32kHz;
EXPECT_CALL(*external_decoder_, Init());
// NetEq is not allowed to delete the external decoder (hence Times(0)).
EXPECT_CALL(*external_decoder_, Die()).Times(0);
ASSERT_EQ(NetEq::kOK,
neteq_external_->RegisterExternalDecoder(external_decoder_,
decoder,
sample_rate_hz_,
kPayloadType));
ASSERT_EQ(NetEq::kOK,
neteq_->RegisterPayloadType(decoder, kPayloadType));
}
virtual void TearDown() {}
int GetNewPackets() {
if (!input_file_->Read(frame_size_samples_, input_)) {
return -1;
}
payload_size_bytes_ = WebRtcPcm16b_Encode(input_, frame_size_samples_,
encoded_);
if (frame_size_samples_ * 2 != payload_size_bytes_) {
return -1;
}
int next_send_time = rtp_generator_.GetRtpHeader(kPayloadType,
frame_size_samples_,
&rtp_header_);
return next_send_time;
}
void VerifyOutput(size_t num_samples) {
for (size_t i = 0; i < num_samples; ++i) {
ASSERT_EQ(output_[i], output_external_[i]) <<
"Diff in sample " << i << ".";
}
}
virtual int GetArrivalTime(int send_time) {
int arrival_time = last_arrival_time_ + (send_time - last_send_time_);
last_send_time_ = send_time;
last_arrival_time_ = arrival_time;
return arrival_time;
}
virtual bool Lost() { return false; }
void RunTest(int num_loops) {
// Get next input packets (mono and multi-channel).
int next_send_time;
int next_arrival_time;
do {
next_send_time = GetNewPackets();
ASSERT_NE(-1, next_send_time);
next_arrival_time = GetArrivalTime(next_send_time);
} while (Lost()); // If lost, immediately read the next packet.
EXPECT_CALL(*external_decoder_, Decode(_, payload_size_bytes_, _, _))
.Times(num_loops);
int time_now = 0;
for (int k = 0; k < num_loops; ++k) {
while (time_now >= next_arrival_time) {
// Insert packet in regular instance.
ASSERT_EQ(NetEq::kOK,
neteq_->InsertPacket(rtp_header_, encoded_,
payload_size_bytes_,
next_arrival_time));
// Insert packet in external decoder instance.
EXPECT_CALL(*external_decoder_,
IncomingPacket(_, payload_size_bytes_,
rtp_header_.header.sequenceNumber,
rtp_header_.header.timestamp,
next_arrival_time));
ASSERT_EQ(NetEq::kOK,
neteq_external_->InsertPacket(rtp_header_, encoded_,
payload_size_bytes_,
next_arrival_time));
// Get next input packet.
do {
next_send_time = GetNewPackets();
ASSERT_NE(-1, next_send_time);
next_arrival_time = GetArrivalTime(next_send_time);
} while (Lost()); // If lost, immediately read the next packet.
}
NetEqOutputType output_type;
// Get audio from regular instance.
int samples_per_channel;
int num_channels;
EXPECT_EQ(NetEq::kOK,
neteq_->GetAudio(kMaxBlockSize, output_,
&samples_per_channel, &num_channels,
&output_type));
EXPECT_EQ(1, num_channels);
EXPECT_EQ(output_size_samples_, samples_per_channel);
// Get audio from external decoder instance.
ASSERT_EQ(NetEq::kOK,
neteq_external_->GetAudio(kMaxBlockSize, output_external_,
&samples_per_channel, &num_channels,
&output_type));
EXPECT_EQ(1, num_channels);
EXPECT_EQ(output_size_samples_, samples_per_channel);
std::ostringstream ss;
ss << "Lap number " << k << ".";
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
// Compare mono and multi-channel.
ASSERT_NO_FATAL_FAILURE(VerifyOutput(output_size_samples_));
time_now += kTimeStepMs;
}
}
const int sample_rate_hz_;
const int samples_per_ms_;
const int frame_size_ms_;
const int frame_size_samples_;
const int output_size_samples_;
NetEq* neteq_external_;
NetEq* neteq_;
MockExternalPcm16B* external_decoder_;
test::RtpGenerator rtp_generator_;
int16_t* input_;
uint8_t* encoded_;
int16_t output_[kMaxBlockSize];
int16_t output_external_[kMaxBlockSize];
WebRtcRTPHeader rtp_header_;
int payload_size_bytes_;
int last_send_time_;
int last_arrival_time_;
scoped_ptr<test::InputAudioFile> input_file_;
};
TEST_F(NetEqExternalDecoderTest, RunTest) {
RunTest(100); // Run 100 laps @ 10 ms each in the test loop.
}
} // namespace webrtc

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,319 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_NETEQ_IMPL_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_NETEQ_IMPL_H_
#include <vector>
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq4/defines.h"
#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h"
#include "webrtc/modules/audio_coding/neteq4/packet.h" // Declare PacketList.
#include "webrtc/modules/audio_coding/neteq4/random_vector.h"
#include "webrtc/modules/audio_coding/neteq4/rtcp.h"
#include "webrtc/modules/audio_coding/neteq4/statistics_calculator.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class BackgroundNoise;
class BufferLevelFilter;
class ComfortNoise;
class CriticalSectionWrapper;
class DecisionLogic;
class DecoderDatabase;
class DelayManager;
class DelayPeakDetector;
class DtmfBuffer;
class DtmfToneGenerator;
class Expand;
class PacketBuffer;
class PayloadSplitter;
class PostDecodeVad;
class RandomVector;
class SyncBuffer;
class TimestampScaler;
struct DtmfEvent;
class NetEqImpl : public webrtc::NetEq {
public:
// Creates a new NetEqImpl object. The object will assume ownership of all
// injected dependencies, and will delete them when done.
NetEqImpl(int fs,
BufferLevelFilter* buffer_level_filter,
DecoderDatabase* decoder_database,
DelayManager* delay_manager,
DelayPeakDetector* delay_peak_detector,
DtmfBuffer* dtmf_buffer,
DtmfToneGenerator* dtmf_tone_generator,
PacketBuffer* packet_buffer,
PayloadSplitter* payload_splitter,
TimestampScaler* timestamp_scaler);
virtual ~NetEqImpl();
// Inserts a new packet into NetEq. The |receive_timestamp| is an indication
// of the time when the packet was received, and should be measured with
// the same tick rate as the RTP timestamp of the current payload.
// Returns 0 on success, -1 on failure.
virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
const uint8_t* payload,
int length_bytes,
uint32_t receive_timestamp);
// Instructs NetEq to deliver 10 ms of audio data. The data is written to
// |output_audio|, which can hold (at least) |max_length| elements.
// The number of channels that were written to the output is provided in
// the output variable |num_channels|, and each channel contains
// |samples_per_channel| elements. If more than one channel is written,
// the samples are interleaved.
// The speech type is written to |type|, if |type| is not NULL.
// Returns kOK on success, or kFail in case of an error.
virtual int GetAudio(size_t max_length, int16_t* output_audio,
int* samples_per_channel, int* num_channels,
NetEqOutputType* type);
// Associates |rtp_payload_type| with |codec| and stores the information in
// the codec database. Returns kOK on success, kFail on failure.
virtual int RegisterPayloadType(enum NetEqDecoder codec,
uint8_t rtp_payload_type);
// Provides an externally created decoder object |decoder| to insert in the
// decoder database. The decoder implements a decoder of type |codec| and
// associates it with |rtp_payload_type|. The decoder operates at the
// frequency |sample_rate_hz|. Returns kOK on success, kFail on failure.
virtual int RegisterExternalDecoder(AudioDecoder* decoder,
enum NetEqDecoder codec,
int sample_rate_hz,
uint8_t rtp_payload_type);
// Removes |rtp_payload_type| from the codec database. Returns 0 on success,
// -1 on failure.
virtual int RemovePayloadType(uint8_t rtp_payload_type);
// Sets the desired extra delay on top of what NetEq already applies due to
// current network situation. Used for synchronization with video. Returns
// true if successful, otherwise false.
virtual bool SetExtraDelay(int extra_delay_ms);
virtual int SetTargetDelay() { return kNotImplemented; }
virtual int TargetDelay() { return kNotImplemented; }
virtual int CurrentDelay() { return kNotImplemented; }
// Enables playout of DTMF tones.
virtual int EnableDtmf();
// Sets the playout mode to |mode|.
virtual void SetPlayoutMode(NetEqPlayoutMode mode);
// Returns the current playout mode.
virtual NetEqPlayoutMode PlayoutMode() const;
// Writes the current network statistics to |stats|. The statistics are reset
// after the call.
virtual int NetworkStatistics(NetEqNetworkStatistics* stats);
// Writes the last packet waiting times (in ms) to |waiting_times|. The number
// of values written is no more than 100, but may be smaller if the interface
// is polled again before 100 packets has arrived.
virtual void WaitingTimes(std::vector<int>* waiting_times);
// Writes the current RTCP statistics to |stats|. The statistics are reset
// and a new report period is started with the call.
virtual void GetRtcpStatistics(RtcpStatistics* stats);
// Same as RtcpStatistics(), but does not reset anything.
virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats);
// Enables post-decode VAD. When enabled, GetAudio() will return
// kOutputVADPassive when the signal contains no speech.
virtual void EnableVad();
// Disables post-decode VAD.
virtual void DisableVad();
// Returns the RTP timestamp for the last sample delivered by GetAudio().
virtual uint32_t PlayoutTimestamp();
virtual int SetTargetNumberOfChannels() { return kNotImplemented; }
virtual int SetTargetSampleRate() { return kNotImplemented; }
// Returns the error code for the last occurred error. If no error has
// occurred, 0 is returned.
virtual int LastError();
// Returns the error code last returned by a decoder (audio or comfort noise).
// When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
// this method to get the decoder's error code.
virtual int LastDecoderError();
// Flushes both the packet buffer and the sync buffer.
virtual void FlushBuffers();
private:
static const int kOutputSizeMs = 10;
static const int kMaxFrameSize = 2880; // 60 ms @ 48 kHz.
// TODO(hlundin): Provide a better value for kSyncBufferSize.
static const int kSyncBufferSize = 2 * kMaxFrameSize;
// Inserts a new packet into NetEq. This is used by the InsertPacket method
// above. Returns 0 on success, otherwise an error code.
// TODO(hlundin): Merge this with InsertPacket above?
int InsertPacketInternal(const WebRtcRTPHeader& rtp_header,
const uint8_t* payload,
int length_bytes,
uint32_t receive_timestamp);
// Delivers 10 ms of audio to |output|. The number of samples produced is
// written to |output_length|. Returns 0 on success, or an error code.
int GetAudioInternal(size_t max_length, int16_t* output,
int* samples_per_channel, int* num_channels);
// Provides a decision to the GetAudioInternal method. The decision what to
// do is written to |operation|. Packets to decode are written to
// |packet_list|, and a DTMF event to play is written to |dtmf_event|. When
// DTMF should be played, |play_dtmf| is set to true by the method.
// Returns 0 on success, otherwise an error code.
int GetDecision(Operations* operation,
PacketList* packet_list,
DtmfEvent* dtmf_event,
bool* play_dtmf);
// Decodes the speech packets in |packet_list|, and writes the results to
// |decoded_buffer|, which is allocated to hold |decoded_buffer_length|
// elements. The length of the decoded data is written to |decoded_length|.
// The speech type -- speech or (codec-internal) comfort noise -- is written
// to |speech_type|. If |packet_list| contains any SID frames for RFC 3389
// comfort noise, those are not decoded.
int Decode(PacketList* packet_list, Operations* operation,
int* decoded_length, AudioDecoder::SpeechType* speech_type);
// Sub-method to Decode(). Performs the actual decoding.
int DecodeLoop(PacketList* packet_list, Operations* operation,
AudioDecoder* decoder, int* decoded_length,
AudioDecoder::SpeechType* speech_type);
// Sub-method which calls the Normal class to perform the normal operation.
void DoNormal(const int16_t* decoded_buffer, size_t decoded_length,
AudioDecoder::SpeechType speech_type, bool play_dtmf,
AudioMultiVector<int16_t>* algorithm_buffer);
// Sub-method which calls the Merge class to perform the merge operation.
void DoMerge(int16_t* decoded_buffer, size_t decoded_length,
AudioDecoder::SpeechType speech_type, bool play_dtmf,
AudioMultiVector<int16_t>* algorithm_buffer);
// Sub-method which calls the Expand class to perform the expand operation.
int DoExpand(bool play_dtmf, AudioMultiVector<int16_t>* algorithm_buffer);
// Sub-method which calls the Accelerate class to perform the accelerate
// operation.
int DoAccelerate(int16_t* decoded_buffer, size_t decoded_length,
AudioDecoder::SpeechType speech_type, bool play_dtmf,
AudioMultiVector<int16_t>* algorithm_buffer);
// Sub-method which calls the PreemptiveExpand class to perform the
// preemtive expand operation.
int DoPreemptiveExpand(int16_t* decoded_buffer, size_t decoded_length,
AudioDecoder::SpeechType speech_type, bool play_dtmf,
AudioMultiVector<int16_t>* algorithm_buffer);
// Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort
// noise. |packet_list| can either contain one SID frame to update the
// noise parameters, or no payload at all, in which case the previously
// received parameters are used.
int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf,
AudioMultiVector<int16_t>* algorithm_buffer);
// Calls the audio decoder to generate codec-internal comfort noise when
// no packet was received.
void DoCodecInternalCng(AudioMultiVector<int16_t>* algorithm_buffer);
// Calls the DtmfToneGenerator class to generate DTMF tones.
int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf,
AudioMultiVector<int16_t>* algorithm_buffer);
// Produces packet-loss concealment using alternative methods. If the codec
// has an internal PLC, it is called to generate samples. Otherwise, the
// method performs zero-stuffing.
void DoAlternativePlc(bool increase_timestamp,
AudioMultiVector<int16_t>* algorithm_buffer);
// Overdub DTMF on top of |output|.
int DtmfOverdub(const DtmfEvent& dtmf_event, size_t num_channels,
int16_t* output) const;
// Extracts packets from |packet_buffer_| to produce at least
// |required_samples| samples. The packets are inserted into |packet_list|.
// Returns the number of samples that the packets in the list will produce, or
// -1 in case of an error.
int ExtractPackets(int required_samples, PacketList* packet_list);
// Resets various variables and objects to new values based on the sample rate
// |fs_hz| and |channels| number audio channels.
void SetSampleRateAndChannels(int fs_hz, size_t channels);
// Returns the output type for the audio produced by the latest call to
// GetAudio().
NetEqOutputType LastOutputType();
BackgroundNoise* background_noise_;
scoped_ptr<BufferLevelFilter> buffer_level_filter_;
scoped_ptr<DecoderDatabase> decoder_database_;
scoped_ptr<DelayManager> delay_manager_;
scoped_ptr<DelayPeakDetector> delay_peak_detector_;
scoped_ptr<DtmfBuffer> dtmf_buffer_;
scoped_ptr<DtmfToneGenerator> dtmf_tone_generator_;
scoped_ptr<PacketBuffer> packet_buffer_;
scoped_ptr<PayloadSplitter> payload_splitter_;
scoped_ptr<TimestampScaler> timestamp_scaler_;
scoped_ptr<DecisionLogic> decision_logic_;
scoped_ptr<PostDecodeVad> vad_;
SyncBuffer* sync_buffer_;
Expand* expand_;
RandomVector random_vector_;
ComfortNoise* comfort_noise_;
Rtcp rtcp_;
StatisticsCalculator stats_;
int fs_hz_;
int fs_mult_;
int output_size_samples_;
int decoder_frame_length_;
Modes last_mode_;
scoped_array<int16_t> mute_factor_array_;
size_t decoded_buffer_length_;
scoped_array<int16_t> decoded_buffer_;
uint32_t playout_timestamp_;
bool new_codec_;
uint32_t timestamp_;
bool reset_decoder_;
uint8_t current_rtp_payload_type_;
uint8_t current_cng_rtp_payload_type_;
uint32_t ssrc_;
bool first_packet_;
bool dtmf_enabled_;
int error_code_; // Store last error code.
int decoder_error_code_;
CriticalSectionWrapper* crit_sect_;
DISALLOW_COPY_AND_ASSIGN(NetEqImpl);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_NETEQ_IMPL_H_

View File

@ -0,0 +1,226 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h"
#include "webrtc/modules/audio_coding/neteq4/neteq_impl.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_audio_decoder.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_buffer_level_filter.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_delay_manager.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_delay_peak_detector.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_dtmf_buffer.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_dtmf_tone_generator.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_packet_buffer.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_payload_splitter.h"
#include "webrtc/modules/audio_coding/neteq4/timestamp_scaler.h"
using ::testing::Return;
using ::testing::ReturnNull;
using ::testing::_;
using ::testing::SetArgPointee;
using ::testing::InSequence;
using ::testing::Invoke;
using ::testing::WithArg;
namespace webrtc {
// This function is called when inserting a packet list into the mock packet
// buffer. The purpose is to delete all inserted packets properly, to avoid
// memory leaks in the test.
int DeletePacketsAndReturnOk(PacketList* packet_list) {
PacketBuffer::DeleteAllPackets(packet_list);
return PacketBuffer::kOK;
}
class NetEqImplTest : public ::testing::Test {
protected:
static const int kInitSampleRateHz = 8000;
NetEqImplTest() {
buffer_level_filter_ = new MockBufferLevelFilter;
decoder_database_ = new MockDecoderDatabase;
delay_peak_detector_ = new MockDelayPeakDetector;
EXPECT_CALL(*delay_peak_detector_, Reset()).Times(1);
delay_manager_ = new MockDelayManager(NetEq::kMaxNumPacketsInBuffer,
delay_peak_detector_);
dtmf_buffer_ = new MockDtmfBuffer(kInitSampleRateHz);
dtmf_tone_generator_ = new MockDtmfToneGenerator;
packet_buffer_ = new MockPacketBuffer(NetEq::kMaxNumPacketsInBuffer,
NetEq::kMaxBytesInBuffer);
payload_splitter_ = new MockPayloadSplitter;
timestamp_scaler_ = new TimestampScaler(*decoder_database_);
EXPECT_CALL(*decoder_database_, GetActiveCngDecoder())
.WillOnce(ReturnNull());
neteq_ = new NetEqImpl(kInitSampleRateHz,
buffer_level_filter_,
decoder_database_,
delay_manager_,
delay_peak_detector_,
dtmf_buffer_,
dtmf_tone_generator_,
packet_buffer_,
payload_splitter_,
timestamp_scaler_);
}
virtual ~NetEqImplTest() {
EXPECT_CALL(*buffer_level_filter_, Die()).Times(1);
EXPECT_CALL(*decoder_database_, Die()).Times(1);
EXPECT_CALL(*delay_manager_, Die()).Times(1);
EXPECT_CALL(*delay_peak_detector_, Die()).Times(1);
EXPECT_CALL(*dtmf_buffer_, Die()).Times(1);
EXPECT_CALL(*dtmf_tone_generator_, Die()).Times(1);
EXPECT_CALL(*packet_buffer_, Die()).Times(1);
delete neteq_;
}
NetEqImpl* neteq_;
MockBufferLevelFilter* buffer_level_filter_;
MockDecoderDatabase* decoder_database_;
MockDelayPeakDetector* delay_peak_detector_;
MockDelayManager* delay_manager_;
MockDtmfBuffer* dtmf_buffer_;
MockDtmfToneGenerator* dtmf_tone_generator_;
MockPacketBuffer* packet_buffer_;
MockPayloadSplitter* payload_splitter_;
TimestampScaler* timestamp_scaler_;
};
// This tests the interface class NetEq.
// TODO(hlundin): Move to separate file?
TEST(NetEq, CreateAndDestroy) {
NetEq* neteq = NetEq::Create(8000);
delete neteq;
}
TEST_F(NetEqImplTest, RegisterPayloadType) {
uint8_t rtp_payload_type = 0;
NetEqDecoder codec_type = kDecoderPCMu;
EXPECT_CALL(*decoder_database_,
RegisterPayload(rtp_payload_type, codec_type));
neteq_->RegisterPayloadType(codec_type, rtp_payload_type);
}
TEST_F(NetEqImplTest, RemovePayloadType) {
uint8_t rtp_payload_type = 0;
EXPECT_CALL(*decoder_database_,
Remove(rtp_payload_type))
.WillOnce(Return(DecoderDatabase::kDecoderNotFound));
// Check that kFail is returned when database returns kDecoderNotFound.
EXPECT_EQ(NetEq::kFail, neteq_->RemovePayloadType(rtp_payload_type));
}
TEST_F(NetEqImplTest, InsertPacket) {
const int kPayloadLength = 100;
const uint8_t kPayloadType = 0;
const uint16_t kFirstSequenceNumber = 0x1234;
const uint32_t kFirstTimestamp = 0x12345678;
const uint32_t kSsrc = 0x87654321;
const uint32_t kFirstReceiveTime = 17;
uint8_t payload[kPayloadLength] = {0};
WebRtcRTPHeader rtp_header;
rtp_header.header.payloadType = kPayloadType;
rtp_header.header.sequenceNumber = kFirstSequenceNumber;
rtp_header.header.timestamp = kFirstTimestamp;
rtp_header.header.ssrc = kSsrc;
// Create a mock decoder object.
MockAudioDecoder mock_decoder;
// BWE update function called with first packet.
EXPECT_CALL(mock_decoder, IncomingPacket(_,
kPayloadLength,
kFirstSequenceNumber,
kFirstTimestamp,
kFirstReceiveTime));
// BWE update function called with second packet.
EXPECT_CALL(mock_decoder, IncomingPacket(_,
kPayloadLength,
kFirstSequenceNumber + 1,
kFirstTimestamp + 160,
kFirstReceiveTime + 155));
EXPECT_CALL(mock_decoder, Die()).Times(1); // Called when deleted.
// Expectations for decoder database.
EXPECT_CALL(*decoder_database_, IsRed(kPayloadType))
.WillRepeatedly(Return(false)); // This is not RED.
EXPECT_CALL(*decoder_database_, CheckPayloadTypes(_))
.Times(2)
.WillRepeatedly(Return(DecoderDatabase::kOK)); // Payload type is valid.
EXPECT_CALL(*decoder_database_, IsDtmf(kPayloadType))
.WillRepeatedly(Return(false)); // This is not DTMF.
EXPECT_CALL(*decoder_database_, GetDecoder(kPayloadType))
.Times(2)
.WillRepeatedly(Return(&mock_decoder));
EXPECT_CALL(*decoder_database_, IsComfortNoise(kPayloadType))
.WillRepeatedly(Return(false)); // This is not CNG.
DecoderDatabase::DecoderInfo info;
info.codec_type = kDecoderPCMu;
EXPECT_CALL(*decoder_database_, GetDecoderInfo(kPayloadType))
.WillRepeatedly(Return(&info));
// Expectations for packet buffer.
EXPECT_CALL(*packet_buffer_, NumPacketsInBuffer())
.WillOnce(Return(0)) // First packet.
.WillOnce(Return(1)) // Second packet.
.WillOnce(Return(2)); // Second packet, checking after it was inserted.
EXPECT_CALL(*packet_buffer_, Flush())
.Times(1);
EXPECT_CALL(*packet_buffer_, InsertPacketList(_, _, _, _))
.Times(2)
.WillRepeatedly(DoAll(SetArgPointee<2>(kPayloadType),
WithArg<0>(Invoke(DeletePacketsAndReturnOk))));
// SetArgPointee<2>(kPayloadType) means that the third argument (zero-based
// index) is a pointer, and the variable pointed to is set to kPayloadType.
// Also invoke the function DeletePacketsAndReturnOk to properly delete all
// packets in the list (to avoid memory leaks in the test).
// Expectations for DTMF buffer.
EXPECT_CALL(*dtmf_buffer_, Flush())
.Times(1);
// Expectations for delay manager.
{
// All expectations within this block must be called in this specific order.
InSequence sequence; // Dummy variable.
// Expectations when the first packet is inserted.
EXPECT_CALL(*delay_manager_, LastDecoderType(kDecoderPCMu))
.Times(1);
EXPECT_CALL(*delay_manager_, last_pack_cng_or_dtmf())
.Times(2)
.WillRepeatedly(Return(-1));
EXPECT_CALL(*delay_manager_, set_last_pack_cng_or_dtmf(0))
.Times(1);
EXPECT_CALL(*delay_manager_, ResetPacketIatCount()).Times(1);
// Expectations when the second packet is inserted. Slightly different.
EXPECT_CALL(*delay_manager_, LastDecoderType(kDecoderPCMu))
.Times(1);
EXPECT_CALL(*delay_manager_, last_pack_cng_or_dtmf())
.WillOnce(Return(0));
}
// Expectations for payload splitter.
EXPECT_CALL(*payload_splitter_, SplitAudio(_, _))
.Times(2)
.WillRepeatedly(Return(PayloadSplitter::kOK));
// Insert first packet.
neteq_->InsertPacket(rtp_header, payload, kPayloadLength, kFirstReceiveTime);
// Insert second packet.
rtp_header.header.timestamp += 160;
rtp_header.header.sequenceNumber += 1;
neteq_->InsertPacket(rtp_header, payload, kPayloadLength,
kFirstReceiveTime + 155);
}
} // namespace webrtc

View File

@ -0,0 +1,417 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Test to verify correct stereo and multi-channel operation.
#include <string>
#include <list>
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h"
#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h"
#include "webrtc/modules/audio_coding/neteq4/tools/input_audio_file.h"
#include "webrtc/modules/audio_coding/neteq4/tools/rtp_generator.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/test/testsupport/fileutils.h"
namespace webrtc {
struct TestParameters {
int frame_size;
int sample_rate;
int num_channels;
};
// This is a parameterized test. The test parameters are supplied through a
// TestParameters struct, which is obtained through the GetParam() method.
//
// The objective of the test is to create a mono input signal and a
// multi-channel input signal, where each channel is identical to the mono
// input channel. The two input signals are processed through their respective
// NetEq instances. After that, the output signals are compared. The expected
// result is that each channel in the multi-channel output is identical to the
// mono output.
class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
protected:
static const int kTimeStepMs = 10;
static const int kMaxBlockSize = 480; // 10 ms @ 48 kHz.
static const uint8_t kPayloadTypeMono = 95;
static const uint8_t kPayloadTypeMulti = 96;
NetEqStereoTest()
: num_channels_(GetParam().num_channels),
sample_rate_hz_(GetParam().sample_rate),
samples_per_ms_(sample_rate_hz_ / 1000),
frame_size_ms_(GetParam().frame_size),
frame_size_samples_(frame_size_ms_ * samples_per_ms_),
output_size_samples_(10 * samples_per_ms_),
neteq_mono_(NetEq::Create(sample_rate_hz_)),
neteq_(NetEq::Create(sample_rate_hz_)),
rtp_generator_mono_(samples_per_ms_),
rtp_generator_(samples_per_ms_),
payload_size_bytes_(0),
multi_payload_size_bytes_(0),
last_send_time_(0),
last_arrival_time_(0) {
input_ = new int16_t[frame_size_samples_];
encoded_ = new uint8_t[2 * frame_size_samples_];
input_multi_channel_ = new int16_t[frame_size_samples_ * num_channels_];
encoded_multi_channel_ = new uint8_t[frame_size_samples_ * 2 *
num_channels_];
output_multi_channel_ = new int16_t[kMaxBlockSize * num_channels_];
}
~NetEqStereoTest() {
delete neteq_mono_;
delete neteq_;
delete [] input_;
delete [] encoded_;
delete [] input_multi_channel_;
delete [] encoded_multi_channel_;
delete [] output_multi_channel_;
}
virtual void SetUp() {
const std::string file_name =
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
input_file_.reset(new test::InputAudioFile(file_name));
NetEqDecoder mono_decoder;
NetEqDecoder multi_decoder;
switch (sample_rate_hz_) {
case 8000:
mono_decoder = kDecoderPCM16B;
if (num_channels_ == 2) {
multi_decoder = kDecoderPCM16B_2ch;
} else if (num_channels_ == 5) {
multi_decoder = kDecoderPCM16B_5ch;
} else {
FAIL() << "Only 2 and 5 channels supported for 8000 Hz.";
}
break;
case 16000:
mono_decoder = kDecoderPCM16Bwb;
if (num_channels_ == 2) {
multi_decoder = kDecoderPCM16Bwb_2ch;
} else {
FAIL() << "More than 2 channels is not supported for 16000 Hz.";
}
break;
case 32000:
mono_decoder = kDecoderPCM16Bswb32kHz;
if (num_channels_ == 2) {
multi_decoder = kDecoderPCM16Bswb32kHz_2ch;
} else {
FAIL() << "More than 2 channels is not supported for 32000 Hz.";
}
break;
case 48000:
mono_decoder = kDecoderPCM16Bswb48kHz;
if (num_channels_ == 2) {
multi_decoder = kDecoderPCM16Bswb48kHz_2ch;
} else {
FAIL() << "More than 2 channels is not supported for 48000 Hz.";
}
break;
default:
FAIL() << "We shouldn't get here.";
}
ASSERT_EQ(NetEq::kOK,
neteq_mono_->RegisterPayloadType(mono_decoder,
kPayloadTypeMono));
ASSERT_EQ(NetEq::kOK,
neteq_->RegisterPayloadType(multi_decoder,
kPayloadTypeMulti));
}
virtual void TearDown() {}
int GetNewPackets() {
if (!input_file_->Read(frame_size_samples_, input_)) {
return -1;
}
payload_size_bytes_ = WebRtcPcm16b_Encode(input_, frame_size_samples_,
encoded_);
if (frame_size_samples_ * 2 != payload_size_bytes_) {
return -1;
}
int next_send_time = rtp_generator_mono_.GetRtpHeader(kPayloadTypeMono,
frame_size_samples_,
&rtp_header_mono_);
test::InputAudioFile::DuplicateInterleaved(input_, frame_size_samples_,
num_channels_,
input_multi_channel_);
multi_payload_size_bytes_ = WebRtcPcm16b_Encode(
input_multi_channel_, frame_size_samples_ * num_channels_,
encoded_multi_channel_);
if (frame_size_samples_ * 2 * num_channels_ != multi_payload_size_bytes_) {
return -1;
}
rtp_generator_.GetRtpHeader(kPayloadTypeMulti, frame_size_samples_,
&rtp_header_);
return next_send_time;
}
void VerifyOutput(size_t num_samples) {
for (size_t i = 0; i < num_samples; ++i) {
for (int j = 0; j < num_channels_; ++j) {
ASSERT_EQ(output_[i], output_multi_channel_[i * num_channels_ + j]) <<
"Diff in sample " << i << ", channel " << j << ".";
}
}
}
virtual int GetArrivalTime(int send_time) {
int arrival_time = last_arrival_time_ + (send_time - last_send_time_);
last_send_time_ = send_time;
last_arrival_time_ = arrival_time;
return arrival_time;
}
virtual bool Lost() { return false; }
void RunTest(int num_loops) {
// Get next input packets (mono and multi-channel).
int next_send_time;
int next_arrival_time;
do {
next_send_time = GetNewPackets();
ASSERT_NE(-1, next_send_time);
next_arrival_time = GetArrivalTime(next_send_time);
} while (Lost()); // If lost, immediately read the next packet.
int time_now = 0;
for (int k = 0; k < num_loops; ++k) {
while (time_now >= next_arrival_time) {
// Insert packet in mono instance.
ASSERT_EQ(NetEq::kOK,
neteq_mono_->InsertPacket(rtp_header_mono_, encoded_,
payload_size_bytes_,
next_arrival_time));
// Insert packet in multi-channel instance.
ASSERT_EQ(NetEq::kOK,
neteq_->InsertPacket(rtp_header_, encoded_multi_channel_,
multi_payload_size_bytes_,
next_arrival_time));
// Get next input packets (mono and multi-channel).
do {
next_send_time = GetNewPackets();
ASSERT_NE(-1, next_send_time);
next_arrival_time = GetArrivalTime(next_send_time);
} while (Lost()); // If lost, immediately read the next packet.
}
NetEqOutputType output_type;
// Get audio from mono instance.
int samples_per_channel;
int num_channels;
EXPECT_EQ(NetEq::kOK,
neteq_mono_->GetAudio(kMaxBlockSize, output_,
&samples_per_channel, &num_channels,
&output_type));
EXPECT_EQ(1, num_channels);
EXPECT_EQ(output_size_samples_, samples_per_channel);
// Get audio from multi-channel instance.
ASSERT_EQ(NetEq::kOK,
neteq_->GetAudio(kMaxBlockSize * num_channels_,
output_multi_channel_,
&samples_per_channel, &num_channels,
&output_type));
EXPECT_EQ(num_channels_, num_channels);
EXPECT_EQ(output_size_samples_, samples_per_channel);
std::ostringstream ss;
ss << "Lap number " << k << ".";
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
// Compare mono and multi-channel.
ASSERT_NO_FATAL_FAILURE(VerifyOutput(output_size_samples_));
time_now += kTimeStepMs;
}
}
const int num_channels_;
const int sample_rate_hz_;
const int samples_per_ms_;
const int frame_size_ms_;
const int frame_size_samples_;
const int output_size_samples_;
NetEq* neteq_mono_;
NetEq* neteq_;
test::RtpGenerator rtp_generator_mono_;
test::RtpGenerator rtp_generator_;
int16_t* input_;
int16_t* input_multi_channel_;
uint8_t* encoded_;
uint8_t* encoded_multi_channel_;
int16_t output_[kMaxBlockSize];
int16_t* output_multi_channel_;
WebRtcRTPHeader rtp_header_mono_;
WebRtcRTPHeader rtp_header_;
int payload_size_bytes_;
int multi_payload_size_bytes_;
int last_send_time_;
int last_arrival_time_;
scoped_ptr<test::InputAudioFile> input_file_;
};
class NetEqStereoTestNoJitter : public NetEqStereoTest {
protected:
NetEqStereoTestNoJitter()
: NetEqStereoTest() {
// Start the sender 100 ms before the receiver to pre-fill the buffer.
// This is to avoid doing preemptive expand early in the test.
// TODO(hlundin): Mock the decision making instead to control the modes.
last_arrival_time_ = -100;
}
};
TEST_P(NetEqStereoTestNoJitter, RunTest) {
RunTest(8);
}
class NetEqStereoTestPositiveDrift : public NetEqStereoTest {
protected:
NetEqStereoTestPositiveDrift()
: NetEqStereoTest(),
drift_factor(0.9) {
// Start the sender 100 ms before the receiver to pre-fill the buffer.
// This is to avoid doing preemptive expand early in the test.
// TODO(hlundin): Mock the decision making instead to control the modes.
last_arrival_time_ = -100;
}
virtual int GetArrivalTime(int send_time) {
int arrival_time = last_arrival_time_ +
drift_factor * (send_time - last_send_time_);
last_send_time_ = send_time;
last_arrival_time_ = arrival_time;
return arrival_time;
}
double drift_factor;
};
TEST_P(NetEqStereoTestPositiveDrift, RunTest) {
RunTest(100);
}
class NetEqStereoTestNegativeDrift : public NetEqStereoTestPositiveDrift {
protected:
NetEqStereoTestNegativeDrift()
: NetEqStereoTestPositiveDrift() {
drift_factor = 1.1;
last_arrival_time_ = 0;
}
};
TEST_P(NetEqStereoTestNegativeDrift, RunTest) {
RunTest(100);
}
class NetEqStereoTestDelays : public NetEqStereoTest {
protected:
static const int kDelayInterval = 10;
static const int kDelay = 1000;
NetEqStereoTestDelays()
: NetEqStereoTest(),
frame_index_(0) {
}
virtual int GetArrivalTime(int send_time) {
// Deliver immediately, unless we have a back-log.
int arrival_time = std::min(last_arrival_time_, send_time);
if (++frame_index_ % kDelayInterval == 0) {
// Delay this packet.
arrival_time += kDelay;
}
last_send_time_ = send_time;
last_arrival_time_ = arrival_time;
return arrival_time;
}
int frame_index_;
};
TEST_P(NetEqStereoTestDelays, RunTest) {
RunTest(1000);
}
class NetEqStereoTestLosses : public NetEqStereoTest {
protected:
static const int kLossInterval = 10;
NetEqStereoTestLosses()
: NetEqStereoTest(),
frame_index_(0) {
}
virtual bool Lost() {
return (++frame_index_) % kLossInterval == 0;
}
int frame_index_;
};
TEST_P(NetEqStereoTestLosses, RunTest) {
RunTest(100);
}
// Creates a list of parameter sets.
std::list<TestParameters> GetTestParameters() {
std::list<TestParameters> l;
const int sample_rates[] = {8000, 16000, 32000};
const int num_rates = sizeof(sample_rates) / sizeof(sample_rates[0]);
// Loop through sample rates.
for (int rate_index = 0; rate_index < num_rates; ++rate_index) {
int sample_rate = sample_rates[rate_index];
// Loop through all frame sizes between 10 and 60 ms.
for (int frame_size = 10; frame_size <= 60; frame_size += 10) {
TestParameters p;
p.frame_size = frame_size;
p.sample_rate = sample_rate;
p.num_channels = 2;
l.push_back(p);
if (sample_rate == 8000) {
// Add a five-channel test for 8000 Hz.
p.num_channels = 5;
l.push_back(p);
}
}
}
return l;
}
// Pretty-printing the test parameters in case of an error.
void PrintTo(const TestParameters& p, ::std::ostream* os) {
*os << "{frame_size = " << p.frame_size <<
", num_channels = " << p.num_channels <<
", sample_rate = " << p.sample_rate << "}";
}
// Instantiate the tests. Each test is instantiated using the function above,
// so that all different parameter combinations are tested.
INSTANTIATE_TEST_CASE_P(MultiChannel,
NetEqStereoTestNoJitter,
::testing::ValuesIn(GetTestParameters()));
INSTANTIATE_TEST_CASE_P(MultiChannel,
NetEqStereoTestPositiveDrift,
::testing::ValuesIn(GetTestParameters()));
INSTANTIATE_TEST_CASE_P(MultiChannel,
NetEqStereoTestNegativeDrift,
::testing::ValuesIn(GetTestParameters()));
INSTANTIATE_TEST_CASE_P(MultiChannel,
NetEqStereoTestDelays,
::testing::ValuesIn(GetTestParameters()));
INSTANTIATE_TEST_CASE_P(MultiChannel,
NetEqStereoTestLosses,
::testing::ValuesIn(GetTestParameters()));
} // namespace webrtc

View File

@ -0,0 +1,179 @@
# Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'targets': [
{
'target_name': 'neteq_rtpplay',
'type': 'executable',
'dependencies': [
'NetEq4',
'NetEq4TestTools',
'<(webrtc_root)/test/test.gyp:test_support_main',
'<(DEPTH)/third_party/google-gflags/google-gflags.gyp:google-gflags',
],
'sources': [
'tools/neteq_rtpplay.cc',
],
'defines': [
],
}, # neteq_rtpplay
{
'target_name': 'RTPencode',
'type': 'executable',
'dependencies': [
# TODO(hlundin): Make RTPencode use ACM to encode files.
'NetEq4TestTools',# Test helpers
'G711',
'G722',
'PCM16B',
'iLBC',
'iSAC',
'CNG',
'<(webrtc_root)/common_audio/common_audio.gyp:vad',
],
'defines': [
'CODEC_ILBC',
'CODEC_PCM16B',
'CODEC_G711',
'CODEC_G722',
'CODEC_ISAC',
'CODEC_PCM16B_WB',
'CODEC_ISAC_SWB',
'CODEC_PCM16B_32KHZ',
'CODEC_CNGCODEC8',
'CODEC_CNGCODEC16',
'CODEC_CNGCODEC32',
'CODEC_ATEVENT_DECODE',
'CODEC_RED',
],
'include_dirs': [
'interface',
'test',
],
'sources': [
'test/RTPencode.cc',
],
},
{
'target_name': 'RTPjitter',
'type': 'executable',
'dependencies': [
'<(DEPTH)/testing/gtest.gyp:gtest',
],
'sources': [
'test/RTPjitter.cc',
],
},
{
'target_name': 'RTPanalyze',
'type': 'executable',
'dependencies': [
'NetEq4TestTools',
'<(DEPTH)/testing/gtest.gyp:gtest',
],
'sources': [
'test/RTPanalyze.cc',
],
},
{
'target_name': 'RTPchange',
'type': 'executable',
'dependencies': [
'NetEq4TestTools',
'<(DEPTH)/testing/gtest.gyp:gtest',
],
'sources': [
'test/RTPchange.cc',
],
},
{
'target_name': 'RTPtimeshift',
'type': 'executable',
'dependencies': [
'NetEq4TestTools',
'<(DEPTH)/testing/gtest.gyp:gtest',
],
'sources': [
'test/RTPtimeshift.cc',
],
},
{
'target_name': 'RTPcat',
'type': 'executable',
'dependencies': [
'NetEq4TestTools',
'<(DEPTH)/testing/gtest.gyp:gtest',
],
'sources': [
'test/RTPcat.cc',
],
},
{
'target_name': 'rtp_to_text',
'type': 'executable',
'dependencies': [
'NetEq4TestTools',
'<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
],
'sources': [
'test/rtp_to_text.cc',
],
},
{
'target_name': 'NetEq4TestTools',
# Collection of useful functions used in other tests.
'type': 'static_library',
'variables': {
# Expects RTP packets without payloads when enabled.
'neteq_dummy_rtp%': 0,
},
'dependencies': [
'G711',
'G722',
'PCM16B',
'iLBC',
'iSAC',
'CNG',
'<(DEPTH)/testing/gtest.gyp:gtest',
],
'direct_dependent_settings': {
'include_dirs': [
'interface',
'test',
],
},
'defines': [
],
'include_dirs': [
'interface',
'test',
],
'sources': [
'test/NETEQTEST_DummyRTPpacket.cc',
'test/NETEQTEST_DummyRTPpacket.h',
'test/NETEQTEST_RTPpacket.cc',
'test/NETEQTEST_RTPpacket.h',
],
},
], # targets
}
# Local Variables:
# tab-width:2
# indent-tabs-mode:nil
# End:
# vim: set expandtab tabstop=2 shiftwidth=2:

View File

@ -0,0 +1,694 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file includes unit tests for NetEQ.
*/
#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h"
#include <stdlib.h>
#include <string.h> // memset
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.h"
#include "webrtc/test/testsupport/fileutils.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class RefFiles {
public:
RefFiles(const std::string& input_file, const std::string& output_file);
~RefFiles();
template<class T> void ProcessReference(const T& test_results);
template<typename T, size_t n> void ProcessReference(
const T (&test_results)[n],
size_t length);
template<typename T, size_t n> void WriteToFile(
const T (&test_results)[n],
size_t length);
template<typename T, size_t n> void ReadFromFileAndCompare(
const T (&test_results)[n],
size_t length);
void WriteToFile(const NetEqNetworkStatistics& stats);
void ReadFromFileAndCompare(const NetEqNetworkStatistics& stats);
void WriteToFile(const RtcpStatistics& stats);
void ReadFromFileAndCompare(const RtcpStatistics& stats);
FILE* input_fp_;
FILE* output_fp_;
};
RefFiles::RefFiles(const std::string &input_file,
const std::string &output_file)
: input_fp_(NULL),
output_fp_(NULL) {
if (!input_file.empty()) {
input_fp_ = fopen(input_file.c_str(), "rb");
EXPECT_TRUE(input_fp_ != NULL);
}
if (!output_file.empty()) {
output_fp_ = fopen(output_file.c_str(), "wb");
EXPECT_TRUE(output_fp_ != NULL);
}
}
RefFiles::~RefFiles() {
if (input_fp_) {
EXPECT_EQ(EOF, fgetc(input_fp_)); // Make sure that we reached the end.
fclose(input_fp_);
}
if (output_fp_) fclose(output_fp_);
}
template<class T>
void RefFiles::ProcessReference(const T& test_results) {
WriteToFile(test_results);
ReadFromFileAndCompare(test_results);
}
template<typename T, size_t n>
void RefFiles::ProcessReference(const T (&test_results)[n], size_t length) {
WriteToFile(test_results, length);
ReadFromFileAndCompare(test_results, length);
}
template<typename T, size_t n>
void RefFiles::WriteToFile(const T (&test_results)[n], size_t length) {
if (output_fp_) {
ASSERT_EQ(length, fwrite(&test_results, sizeof(T), length, output_fp_));
}
}
template<typename T, size_t n>
void RefFiles::ReadFromFileAndCompare(const T (&test_results)[n],
size_t length) {
if (input_fp_) {
// Read from ref file.
T* ref = new T[length];
ASSERT_EQ(length, fread(ref, sizeof(T), length, input_fp_));
// Compare
ASSERT_EQ(0, memcmp(&test_results, ref, sizeof(T) * length));
delete [] ref;
}
}
void RefFiles::WriteToFile(const NetEqNetworkStatistics& stats) {
if (output_fp_) {
ASSERT_EQ(1u, fwrite(&stats, sizeof(NetEqNetworkStatistics), 1,
output_fp_));
}
}
void RefFiles::ReadFromFileAndCompare(
const NetEqNetworkStatistics& stats) {
if (input_fp_) {
// Read from ref file.
size_t stat_size = sizeof(NetEqNetworkStatistics);
NetEqNetworkStatistics ref_stats;
ASSERT_EQ(1u, fread(&ref_stats, stat_size, 1, input_fp_));
// Compare
EXPECT_EQ(0, memcmp(&stats, &ref_stats, stat_size));
}
}
void RefFiles::WriteToFile(const RtcpStatistics& stats) {
if (output_fp_) {
ASSERT_EQ(1u, fwrite(&(stats.fraction_lost), sizeof(stats.fraction_lost), 1,
output_fp_));
ASSERT_EQ(1u, fwrite(&(stats.cumulative_lost),
sizeof(stats.cumulative_lost), 1, output_fp_));
ASSERT_EQ(1u, fwrite(&(stats.extended_max), sizeof(stats.extended_max), 1,
output_fp_));
ASSERT_EQ(1u, fwrite(&(stats.jitter), sizeof(stats.jitter), 1,
output_fp_));
}
}
void RefFiles::ReadFromFileAndCompare(
const RtcpStatistics& stats) {
if (input_fp_) {
// Read from ref file.
RtcpStatistics ref_stats;
ASSERT_EQ(1u, fread(&(ref_stats.fraction_lost),
sizeof(ref_stats.fraction_lost), 1, input_fp_));
ASSERT_EQ(1u, fread(&(ref_stats.cumulative_lost),
sizeof(ref_stats.cumulative_lost), 1, input_fp_));
ASSERT_EQ(1u, fread(&(ref_stats.extended_max),
sizeof(ref_stats.extended_max), 1, input_fp_));
ASSERT_EQ(1u, fread(&(ref_stats.jitter), sizeof(ref_stats.jitter), 1,
input_fp_));
// Compare
EXPECT_EQ(ref_stats.fraction_lost, stats.fraction_lost);
EXPECT_EQ(ref_stats.cumulative_lost, stats.cumulative_lost);
EXPECT_EQ(ref_stats.extended_max, stats.extended_max);
EXPECT_EQ(ref_stats.jitter, stats.jitter);
}
}
class NetEqDecodingTest : public ::testing::Test {
protected:
// NetEQ must be polled for data once every 10 ms. Thus, neither of the
// constants below can be changed.
static const int kTimeStepMs = 10;
static const int kBlockSize8kHz = kTimeStepMs * 8;
static const int kBlockSize16kHz = kTimeStepMs * 16;
static const int kBlockSize32kHz = kTimeStepMs * 32;
static const int kMaxBlockSize = kBlockSize32kHz;
static const int kInitSampleRateHz = 8000;
NetEqDecodingTest();
virtual void SetUp();
virtual void TearDown();
void SelectDecoders(NetEqDecoder* used_codec);
void LoadDecoders();
void OpenInputFile(const std::string &rtp_file);
void Process(NETEQTEST_RTPpacket* rtp_ptr, int* out_len);
void DecodeAndCompare(const std::string &rtp_file,
const std::string &ref_file);
void DecodeAndCheckStats(const std::string &rtp_file,
const std::string &stat_ref_file,
const std::string &rtcp_ref_file);
static void PopulateRtpInfo(int frame_index,
int timestamp,
WebRtcRTPHeader* rtp_info);
static void PopulateCng(int frame_index,
int timestamp,
WebRtcRTPHeader* rtp_info,
uint8_t* payload,
int* payload_len);
NetEq* neteq_;
FILE* rtp_fp_;
unsigned int sim_clock_;
int16_t out_data_[kMaxBlockSize];
int output_sample_rate_;
};
// Allocating the static const so that it can be passed by reference.
const int NetEqDecodingTest::kTimeStepMs;
const int NetEqDecodingTest::kBlockSize8kHz;
const int NetEqDecodingTest::kBlockSize16kHz;
const int NetEqDecodingTest::kBlockSize32kHz;
const int NetEqDecodingTest::kMaxBlockSize;
const int NetEqDecodingTest::kInitSampleRateHz;
NetEqDecodingTest::NetEqDecodingTest()
: neteq_(NULL),
rtp_fp_(NULL),
sim_clock_(0),
output_sample_rate_(kInitSampleRateHz) {
memset(out_data_, 0, sizeof(out_data_));
}
void NetEqDecodingTest::SetUp() {
neteq_ = NetEq::Create(kInitSampleRateHz);
ASSERT_TRUE(neteq_);
LoadDecoders();
}
void NetEqDecodingTest::TearDown() {
delete neteq_;
if (rtp_fp_)
fclose(rtp_fp_);
}
void NetEqDecodingTest::LoadDecoders() {
// Load PCMu.
ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCMu, 0));
// Load PCMa.
ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCMa, 8));
// Load iLBC.
ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderILBC, 102));
// Load iSAC.
ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderISAC, 103));
// Load iSAC SWB.
ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderISACswb, 104));
// Load PCM16B nb.
ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCM16B, 93));
// Load PCM16B wb.
ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCM16Bwb, 94));
// Load PCM16B swb32.
ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCM16Bswb32kHz, 95));
// Load CNG 8 kHz.
ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderCNGnb, 13));
// Load CNG 16 kHz.
ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderCNGwb, 98));
}
void NetEqDecodingTest::OpenInputFile(const std::string &rtp_file) {
rtp_fp_ = fopen(rtp_file.c_str(), "rb");
ASSERT_TRUE(rtp_fp_ != NULL);
ASSERT_EQ(0, NETEQTEST_RTPpacket::skipFileHeader(rtp_fp_));
}
void NetEqDecodingTest::Process(NETEQTEST_RTPpacket* rtp, int* out_len) {
// Check if time to receive.
while ((sim_clock_ >= rtp->time()) &&
(rtp->dataLen() >= 0)) {
if (rtp->dataLen() > 0) {
WebRtcRTPHeader rtpInfo;
rtp->parseHeader(&rtpInfo);
ASSERT_EQ(0, neteq_->InsertPacket(
rtpInfo,
rtp->payload(),
rtp->payloadLen(),
rtp->time() * (output_sample_rate_ / 1000)));
}
// Get next packet.
ASSERT_NE(-1, rtp->readFromFile(rtp_fp_));
}
// RecOut
NetEqOutputType type;
int num_channels;
ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, out_len,
&num_channels, &type));
ASSERT_TRUE((*out_len == kBlockSize8kHz) ||
(*out_len == kBlockSize16kHz) ||
(*out_len == kBlockSize32kHz));
output_sample_rate_ = *out_len / 10 * 1000;
// Increase time.
sim_clock_ += kTimeStepMs;
}
void NetEqDecodingTest::DecodeAndCompare(const std::string &rtp_file,
const std::string &ref_file) {
OpenInputFile(rtp_file);
std::string ref_out_file = "";
if (ref_file.empty()) {
ref_out_file = webrtc::test::OutputPath() + "neteq_out.pcm";
}
RefFiles ref_files(ref_file, ref_out_file);
NETEQTEST_RTPpacket rtp;
ASSERT_GT(rtp.readFromFile(rtp_fp_), 0);
int i = 0;
while (rtp.dataLen() >= 0) {
std::ostringstream ss;
ss << "Lap number " << i++ << " in DecodeAndCompare while loop";
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
int out_len;
ASSERT_NO_FATAL_FAILURE(Process(&rtp, &out_len));
ASSERT_NO_FATAL_FAILURE(ref_files.ProcessReference(out_data_, out_len));
}
}
void NetEqDecodingTest::DecodeAndCheckStats(const std::string &rtp_file,
const std::string &stat_ref_file,
const std::string &rtcp_ref_file) {
OpenInputFile(rtp_file);
std::string stat_out_file = "";
if (stat_ref_file.empty()) {
stat_out_file = webrtc::test::OutputPath() +
"neteq_network_stats.dat";
}
RefFiles network_stat_files(stat_ref_file, stat_out_file);
std::string rtcp_out_file = "";
if (rtcp_ref_file.empty()) {
rtcp_out_file = webrtc::test::OutputPath() +
"neteq_rtcp_stats.dat";
}
RefFiles rtcp_stat_files(rtcp_ref_file, rtcp_out_file);
NETEQTEST_RTPpacket rtp;
ASSERT_GT(rtp.readFromFile(rtp_fp_), 0);
while (rtp.dataLen() >= 0) {
int out_len;
Process(&rtp, &out_len);
// Query the network statistics API once per second
if (sim_clock_ % 1000 == 0) {
// Process NetworkStatistics.
NetEqNetworkStatistics network_stats;
ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
network_stat_files.ProcessReference(network_stats);
// Process RTCPstat.
RtcpStatistics rtcp_stats;
neteq_->GetRtcpStatistics(&rtcp_stats);
rtcp_stat_files.ProcessReference(rtcp_stats);
}
}
}
void NetEqDecodingTest::PopulateRtpInfo(int frame_index,
int timestamp,
WebRtcRTPHeader* rtp_info) {
rtp_info->header.sequenceNumber = frame_index;
rtp_info->header.timestamp = timestamp;
rtp_info->header.ssrc = 0x1234; // Just an arbitrary SSRC.
rtp_info->header.payloadType = 94; // PCM16b WB codec.
rtp_info->header.markerBit = 0;
}
void NetEqDecodingTest::PopulateCng(int frame_index,
int timestamp,
WebRtcRTPHeader* rtp_info,
uint8_t* payload,
int* payload_len) {
rtp_info->header.sequenceNumber = frame_index;
rtp_info->header.timestamp = timestamp;
rtp_info->header.ssrc = 0x1234; // Just an arbitrary SSRC.
rtp_info->header.payloadType = 98; // WB CNG.
rtp_info->header.markerBit = 0;
payload[0] = 64; // Noise level -64 dBov, quite arbitrarily chosen.
*payload_len = 1; // Only noise level, no spectral parameters.
}
TEST_F(NetEqDecodingTest, TestBitExactness) {
const std::string kInputRtpFile = webrtc::test::ProjectRootPath() +
"resources/neteq_universal.rtp";
const std::string kInputRefFile =
webrtc::test::ResourcePath("neteq_universal_ref", "pcm");
DecodeAndCompare(kInputRtpFile, kInputRefFile);
}
TEST_F(NetEqDecodingTest, TestNetworkStatistics) {
const std::string kInputRtpFile = webrtc::test::ProjectRootPath() +
"resources/neteq_universal.rtp";
const std::string kNetworkStatRefFile =
webrtc::test::ResourcePath("neteq_network_stats", "dat");
const std::string kRtcpStatRefFile =
webrtc::test::ResourcePath("neteq_rtcp_stats", "dat");
DecodeAndCheckStats(kInputRtpFile, kNetworkStatRefFile, kRtcpStatRefFile);
}
// TODO(hlundin): Re-enable test once the statistics interface is up and again.
TEST_F(NetEqDecodingTest, TestFrameWaitingTimeStatistics) {
// Use fax mode to avoid time-scaling. This is to simplify the testing of
// packet waiting times in the packet buffer.
neteq_->SetPlayoutMode(kPlayoutFax);
ASSERT_EQ(kPlayoutFax, neteq_->PlayoutMode());
// Insert 30 dummy packets at once. Each packet contains 10 ms 16 kHz audio.
size_t num_frames = 30;
const int kSamples = 10 * 16;
const int kPayloadBytes = kSamples * 2;
for (size_t i = 0; i < num_frames; ++i) {
uint16_t payload[kSamples] = {0};
WebRtcRTPHeader rtp_info;
rtp_info.header.sequenceNumber = i;
rtp_info.header.timestamp = i * kSamples;
rtp_info.header.ssrc = 0x1234; // Just an arbitrary SSRC.
rtp_info.header.payloadType = 94; // PCM16b WB codec.
rtp_info.header.markerBit = 0;
ASSERT_EQ(0, neteq_->InsertPacket(
rtp_info,
reinterpret_cast<uint8_t*>(payload),
kPayloadBytes, 0));
}
// Pull out all data.
for (size_t i = 0; i < num_frames; ++i) {
int out_len;
int num_channels;
NetEqOutputType type;
ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
&num_channels, &type));
ASSERT_EQ(kBlockSize16kHz, out_len);
}
std::vector<int> waiting_times;
neteq_->WaitingTimes(&waiting_times);
int len = waiting_times.size();
EXPECT_EQ(num_frames, waiting_times.size());
// Since all frames are dumped into NetEQ at once, but pulled out with 10 ms
// spacing (per definition), we expect the delay to increase with 10 ms for
// each packet.
for (size_t i = 0; i < waiting_times.size(); ++i) {
EXPECT_EQ(static_cast<int>(i + 1) * 10, waiting_times[i]);
}
// Check statistics again and make sure it's been reset.
neteq_->WaitingTimes(&waiting_times);
len = waiting_times.size();
EXPECT_EQ(0, len);
// Process > 100 frames, and make sure that that we get statistics
// only for 100 frames. Note the new SSRC, causing NetEQ to reset.
num_frames = 110;
for (size_t i = 0; i < num_frames; ++i) {
uint16_t payload[kSamples] = {0};
WebRtcRTPHeader rtp_info;
rtp_info.header.sequenceNumber = i;
rtp_info.header.timestamp = i * kSamples;
rtp_info.header.ssrc = 0x1235; // Just an arbitrary SSRC.
rtp_info.header.payloadType = 94; // PCM16b WB codec.
rtp_info.header.markerBit = 0;
ASSERT_EQ(0, neteq_->InsertPacket(
rtp_info,
reinterpret_cast<uint8_t*>(payload),
kPayloadBytes, 0));
int out_len;
int num_channels;
NetEqOutputType type;
ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
&num_channels, &type));
ASSERT_EQ(kBlockSize16kHz, out_len);
}
neteq_->WaitingTimes(&waiting_times);
EXPECT_EQ(100u, waiting_times.size());
}
TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimeNegative) {
const int kNumFrames = 3000; // Needed for convergence.
int frame_index = 0;
const int kSamples = 10 * 16;
const int kPayloadBytes = kSamples * 2;
while (frame_index < kNumFrames) {
// Insert one packet each time, except every 10th time where we insert two
// packets at once. This will create a negative clock-drift of approx. 10%.
int num_packets = (frame_index % 10 == 0 ? 2 : 1);
for (int n = 0; n < num_packets; ++n) {
uint8_t payload[kPayloadBytes] = {0};
WebRtcRTPHeader rtp_info;
PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info);
ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
++frame_index;
}
// Pull out data once.
int out_len;
int num_channels;
NetEqOutputType type;
ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
&num_channels, &type));
ASSERT_EQ(kBlockSize16kHz, out_len);
}
NetEqNetworkStatistics network_stats;
ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
EXPECT_EQ(-103196, network_stats.clockdrift_ppm);
}
TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimePositive) {
const int kNumFrames = 5000; // Needed for convergence.
int frame_index = 0;
const int kSamples = 10 * 16;
const int kPayloadBytes = kSamples * 2;
for (int i = 0; i < kNumFrames; ++i) {
// Insert one packet each time, except every 10th time where we don't insert
// any packet. This will create a positive clock-drift of approx. 11%.
int num_packets = (i % 10 == 9 ? 0 : 1);
for (int n = 0; n < num_packets; ++n) {
uint8_t payload[kPayloadBytes] = {0};
WebRtcRTPHeader rtp_info;
PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info);
ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
++frame_index;
}
// Pull out data once.
int out_len;
int num_channels;
NetEqOutputType type;
ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
&num_channels, &type));
ASSERT_EQ(kBlockSize16kHz, out_len);
}
NetEqNetworkStatistics network_stats;
ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
EXPECT_EQ(110946, network_stats.clockdrift_ppm);
}
TEST_F(NetEqDecodingTest, LongCngWithClockDrift) {
uint16_t seq_no = 0;
uint32_t timestamp = 0;
const int kFrameSizeMs = 30;
const int kSamples = kFrameSizeMs * 16;
const int kPayloadBytes = kSamples * 2;
// Apply a clock drift of -25 ms / s (sender faster than receiver).
const double kDriftFactor = 1000.0 / (1000.0 + 25.0);
double next_input_time_ms = 0.0;
double t_ms;
NetEqOutputType type;
// Insert speech for 5 seconds.
const int kSpeechDurationMs = 5000;
for (t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) {
// Each turn in this for loop is 10 ms.
while (next_input_time_ms <= t_ms) {
// Insert one 30 ms speech frame.
uint8_t payload[kPayloadBytes] = {0};
WebRtcRTPHeader rtp_info;
PopulateRtpInfo(seq_no, timestamp, &rtp_info);
ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
++seq_no;
timestamp += kSamples;
next_input_time_ms += static_cast<double>(kFrameSizeMs) * kDriftFactor;
}
// Pull out data once.
int out_len;
int num_channels;
ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
&num_channels, &type));
ASSERT_EQ(kBlockSize16kHz, out_len);
}
EXPECT_EQ(kOutputNormal, type);
int32_t delay_before = timestamp - neteq_->PlayoutTimestamp();
// Insert CNG for 1 minute (= 60000 ms).
const int kCngPeriodMs = 100;
const int kCngPeriodSamples = kCngPeriodMs * 16; // Period in 16 kHz samples.
const int kCngDurationMs = 60000;
for (; t_ms < kSpeechDurationMs + kCngDurationMs; t_ms += 10) {
// Each turn in this for loop is 10 ms.
while (next_input_time_ms <= t_ms) {
// Insert one CNG frame each 100 ms.
uint8_t payload[kPayloadBytes];
int payload_len;
WebRtcRTPHeader rtp_info;
PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, payload_len, 0));
++seq_no;
timestamp += kCngPeriodSamples;
next_input_time_ms += static_cast<double>(kCngPeriodMs) * kDriftFactor;
}
// Pull out data once.
int out_len;
int num_channels;
ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
&num_channels, &type));
ASSERT_EQ(kBlockSize16kHz, out_len);
}
EXPECT_EQ(kOutputCNG, type);
// Insert speech again until output type is speech.
while (type != kOutputNormal) {
// Each turn in this for loop is 10 ms.
while (next_input_time_ms <= t_ms) {
// Insert one 30 ms speech frame.
uint8_t payload[kPayloadBytes] = {0};
WebRtcRTPHeader rtp_info;
PopulateRtpInfo(seq_no, timestamp, &rtp_info);
ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
++seq_no;
timestamp += kSamples;
next_input_time_ms += static_cast<double>(kFrameSizeMs) * kDriftFactor;
}
// Pull out data once.
int out_len;
int num_channels;
ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
&num_channels, &type));
ASSERT_EQ(kBlockSize16kHz, out_len);
// Increase clock.
t_ms += 10;
}
int32_t delay_after = timestamp - neteq_->PlayoutTimestamp();
// Compare delay before and after, and make sure it differs less than 20 ms.
EXPECT_LE(delay_after, delay_before + 20 * 16);
EXPECT_GE(delay_after, delay_before - 20 * 16);
}
TEST_F(NetEqDecodingTest, UnknownPayloadType) {
const int kPayloadBytes = 100;
uint8_t payload[kPayloadBytes] = {0};
WebRtcRTPHeader rtp_info;
PopulateRtpInfo(0, 0, &rtp_info);
rtp_info.header.payloadType = 1; // Not registered as a decoder.
EXPECT_EQ(NetEq::kFail,
neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
EXPECT_EQ(NetEq::kUnknownRtpPayloadType, neteq_->LastError());
}
TEST_F(NetEqDecodingTest, DecoderError) {
const int kPayloadBytes = 100;
uint8_t payload[kPayloadBytes] = {0};
WebRtcRTPHeader rtp_info;
PopulateRtpInfo(0, 0, &rtp_info);
rtp_info.header.payloadType = 103; // iSAC, but the payload is invalid.
EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
NetEqOutputType type;
// Set all of |out_data_| to 1, and verify that it was set to 0 by the call
// to GetAudio.
for (int i = 0; i < kMaxBlockSize; ++i) {
out_data_[i] = 1;
}
int num_channels;
int samples_per_channel;
EXPECT_EQ(NetEq::kFail,
neteq_->GetAudio(kMaxBlockSize, out_data_,
&samples_per_channel, &num_channels, &type));
// Verify that there is a decoder error to check.
EXPECT_EQ(NetEq::kDecoderErrorCode, neteq_->LastError());
// Code 6730 is an iSAC error code.
EXPECT_EQ(6730, neteq_->LastDecoderError());
// Verify that the first 160 samples are set to 0, and that the remaining
// samples are left unmodified.
static const int kExpectedOutputLength = 160; // 10 ms at 16 kHz sample rate.
for (int i = 0; i < kExpectedOutputLength; ++i) {
std::ostringstream ss;
ss << "i = " << i;
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
EXPECT_EQ(0, out_data_[i]);
}
for (int i = kExpectedOutputLength; i < kMaxBlockSize; ++i) {
std::ostringstream ss;
ss << "i = " << i;
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
EXPECT_EQ(1, out_data_[i]);
}
}
TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) {
NetEqOutputType type;
// Set all of |out_data_| to 1, and verify that it was set to 0 by the call
// to GetAudio.
for (int i = 0; i < kMaxBlockSize; ++i) {
out_data_[i] = 1;
}
int num_channels;
int samples_per_channel;
EXPECT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_,
&samples_per_channel,
&num_channels, &type));
// Verify that the first block of samples is set to 0.
static const int kExpectedOutputLength =
kInitSampleRateHz / 100; // 10 ms at initial sample rate.
for (int i = 0; i < kExpectedOutputLength; ++i) {
std::ostringstream ss;
ss << "i = " << i;
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
EXPECT_EQ(0, out_data_[i]);
}
}
} // namespace

View File

@ -0,0 +1,189 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/normal.h"
#include <algorithm> // min
#include <cstring> // memset, memcpy
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h"
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq4/background_noise.h"
#include "webrtc/modules/audio_coding/neteq4/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq4/expand.h"
#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h"
namespace webrtc {
int Normal::Process(const int16_t* input,
size_t length,
Modes last_mode,
int16_t* external_mute_factor_array,
AudioMultiVector<int16_t>* output) {
if (length == 0) {
// Nothing to process.
output->Clear();
return length;
}
assert(output->Empty());
// Output should be empty at this point.
output->PushBackInterleaved(input, length);
int16_t* signal = &(*output)[0][0];
const unsigned fs_mult = fs_hz_ / 8000;
assert(fs_mult > 0);
// fs_shift = log2(fs_mult), rounded down.
// Note that |fs_shift| is not "exact" for 48 kHz.
// TODO(hlundin): Investigate this further.
const int fs_shift = 30 - WebRtcSpl_NormW32(fs_mult);
// Check if last RecOut call resulted in an Expand. If so, we have to take
// care of some cross-fading and unmuting.
if (last_mode == kModeExpand) {
// Generate interpolation data using Expand.
// First, set Expand parameters to appropriate values.
expand_->SetParametersForNormalAfterExpand();
// Call Expand.
AudioMultiVector<int16_t> expanded(output->Channels());
expand_->Process(&expanded);
expand_->Reset();
for (size_t channel_ix = 0; channel_ix < output->Channels(); ++channel_ix) {
// Adjust muting factor (main muting factor times expand muting factor).
external_mute_factor_array[channel_ix] = static_cast<int16_t>(
WEBRTC_SPL_MUL_16_16_RSFT(external_mute_factor_array[channel_ix],
expand_->MuteFactor(channel_ix), 14));
int16_t* signal = &(*output)[channel_ix][0];
size_t length_per_channel = length / output->Channels();
// Find largest absolute value in new data.
int16_t decoded_max = WebRtcSpl_MaxAbsValueW16(signal,
length_per_channel);
// Adjust muting factor if needed (to BGN level).
int energy_length = std::min(static_cast<size_t>(fs_mult * 64),
length_per_channel);
int scaling = 6 + fs_shift
- WebRtcSpl_NormW32(decoded_max * decoded_max);
scaling = std::max(scaling, 0); // |scaling| should always be >= 0.
int32_t energy = WebRtcSpl_DotProductWithScale(signal, signal,
energy_length, scaling);
energy = energy / (energy_length >> scaling);
int mute_factor;
if ((energy != 0) &&
(energy > background_noise_.Energy(channel_ix))) {
// Normalize new frame energy to 15 bits.
scaling = WebRtcSpl_NormW32(energy) - 16;
// We want background_noise_.energy() / energy in Q14.
int32_t bgn_energy =
background_noise_.Energy(channel_ix) << (scaling+14);
int16_t energy_scaled = energy << scaling;
int16_t ratio = WebRtcSpl_DivW32W16(bgn_energy, energy_scaled);
mute_factor = WebRtcSpl_SqrtFloor(static_cast<int32_t>(ratio) << 14);
} else {
mute_factor = 16384; // 1.0 in Q14.
}
if (mute_factor > external_mute_factor_array[channel_ix]) {
external_mute_factor_array[channel_ix] = std::min(mute_factor, 16384);
}
// If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14).
int16_t increment = 64 / fs_mult;
for (size_t i = 0; i < length_per_channel; i++) {
// Scale with mute factor.
assert(channel_ix < output->Channels());
assert(i < output->Size());
int32_t scaled_signal = (*output)[channel_ix][i] *
external_mute_factor_array[channel_ix];
// Shift 14 with proper rounding.
(*output)[channel_ix][i] = (scaled_signal + 8192) >> 14;
// Increase mute_factor towards 16384.
external_mute_factor_array[channel_ix] =
std::min(external_mute_factor_array[channel_ix] + increment, 16384);
}
// Interpolate the expanded data into the new vector.
// (NB/WB/SWB32/SWB48 8/16/32/48 samples.)
assert(fs_shift < 3); // Will always be 0, 1, or, 2.
increment = 4 >> fs_shift;
int fraction = increment;
for (size_t i = 0; i < 8 * fs_mult; i++) {
// TODO(hlundin): Add 16 instead of 8 for correct rounding. Keeping 8
// now for legacy bit-exactness.
assert(channel_ix < output->Channels());
assert(i < output->Size());
(*output)[channel_ix][i] =
(fraction * (*output)[channel_ix][i] +
(32 - fraction) * expanded[channel_ix][i] + 8) >> 5;
fraction += increment;
}
}
} else if (last_mode == kModeRfc3389Cng) {
assert(output->Channels() == 1); // Not adapted for multi-channel yet.
static const int kCngLength = 32;
int16_t cng_output[kCngLength];
// Reset mute factor and start up fresh.
external_mute_factor_array[0] = 16384;
AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
if (cng_decoder) {
CNG_dec_inst* cng_inst = static_cast<CNG_dec_inst*>(cng_decoder->state());
// Generate long enough for 32kHz.
if (WebRtcCng_Generate(cng_inst, cng_output, kCngLength, 0) < 0) {
// Error returned; set return vector to all zeros.
memset(cng_output, 0, sizeof(cng_output));
}
} else {
// If no CNG instance is defined, just copy from the decoded data.
// (This will result in interpolating the decoded with itself.)
memcpy(cng_output, signal, fs_mult * 8 * sizeof(int16_t));
}
// Interpolate the CNG into the new vector.
// (NB/WB/SWB32/SWB48 8/16/32/48 samples.)
assert(fs_shift < 3); // Will always be 0, 1, or, 2.
int16_t increment = 4 >> fs_shift;
int16_t fraction = increment;
for (size_t i = 0; i < 8 * fs_mult; i++) {
// TODO(hlundin): Add 16 instead of 8 for correct rounding. Keeping 8 now
// for legacy bit-exactness.
signal[i] =
(fraction * signal[i] + (32 - fraction) * cng_output[i] + 8) >> 5;
fraction += increment;
}
} else if (external_mute_factor_array[0] < 16384) {
// Previous was neither of Expand, FadeToBGN or RFC3389_CNG, but we are
// still ramping up from previous muting.
// If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14).
int16_t increment = 64 / fs_mult;
size_t length_per_channel = length / output->Channels();
for (size_t i = 0; i < length_per_channel; i++) {
for (size_t channel_ix = 0; channel_ix < output->Channels();
++channel_ix) {
// Scale with mute factor.
assert(channel_ix < output->Channels());
assert(i < output->Size());
int32_t scaled_signal = (*output)[channel_ix][i] *
external_mute_factor_array[channel_ix];
// Shift 14 with proper rounding.
(*output)[channel_ix][i] = (scaled_signal + 8192) >> 14;
// Increase mute_factor towards 16384.
external_mute_factor_array[channel_ix] =
std::min(16384, external_mute_factor_array[channel_ix] + increment);
}
}
}
return length;
}
} // namespace webrtc

View File

@ -0,0 +1,67 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_NORMAL_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_NORMAL_H_
#include <cstring> // Access to size_t.
#include <vector>
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq4/defines.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class BackgroundNoise;
class DecoderDatabase;
class Expand;
// This class provides the "Normal" DSP operation, that is performed when
// there is no data loss, no need to stretch the timing of the signal, and
// no other "special circumstances" are at hand.
class Normal {
public:
Normal(int fs_hz, DecoderDatabase* decoder_database,
const BackgroundNoise& background_noise,
Expand* expand)
: fs_hz_(fs_hz),
decoder_database_(decoder_database),
background_noise_(background_noise),
expand_(expand) {
}
virtual ~Normal() {}
// Performs the "Normal" operation. The decoder data is supplied in |input|,
// having |length| samples in total for all channels (interleaved). The
// result is written to |output|. The number of channels allocated in
// |output| defines the number of channels that will be used when
// de-interleaving |input|. |last_mode| contains the mode used in the previous
// GetAudio call (i.e., not the current one), and |external_mute_factor| is
// a pointer to the mute factor in the NetEqImpl class.
int Process(const int16_t* input, size_t length,
Modes last_mode,
int16_t* external_mute_factor_array,
AudioMultiVector<int16_t>* output);
private:
int fs_hz_;
DecoderDatabase* decoder_database_;
const BackgroundNoise& background_noise_;
Expand* expand_;
DISALLOW_COPY_AND_ASSIGN(Normal);
};
} // namespace webrtc
#endif // SRC_MODULES_AUDIO_CODING_NETEQ4_NORMAL_H_

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for Normal class.
#include "webrtc/modules/audio_coding/neteq4/normal.h"
#include <vector>
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/background_noise.h"
#include "webrtc/modules/audio_coding/neteq4/expand.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h"
#include "webrtc/modules/audio_coding/neteq4/random_vector.h"
#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h"
namespace webrtc {
TEST(Normal, CreateAndDestroy) {
MockDecoderDatabase db;
int fs = 8000;
size_t channels = 1;
BackgroundNoise bgn(channels);
SyncBuffer sync_buffer(1, 1000);
RandomVector random_vector;
Expand expand(&bgn, &sync_buffer, &random_vector, fs, channels);
Normal normal(fs, &db, bgn, &expand);
EXPECT_CALL(db, Die()); // Called when |db| goes out of scope.
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PACKET_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PACKET_H_
#include <list>
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Struct for holding RTP packets.
struct Packet {
RTPHeader header;
uint8_t* payload; // Datagram excluding RTP header and header extension.
int payload_length;
bool primary; // Primary, i.e., not redundant payload.
int waiting_time;
// Constructor.
Packet()
: payload(NULL),
payload_length(0),
primary(true),
waiting_time(0) {
}
// Comparison operators. Establish a packet ordering based on (1) timestamp,
// (2) sequence number, and (3) redundancy. Timestamp and sequence numbers
// are compared taking wrap-around into account. If both timestamp and
// sequence numbers are identical, a primary payload is considered "smaller"
// than a secondary.
bool operator==(const Packet& rhs) const {
return (this->header.timestamp == rhs.header.timestamp &&
this->header.sequenceNumber == rhs.header.sequenceNumber &&
this->primary == rhs.primary);
}
bool operator!=(const Packet& rhs) const { return !operator==(rhs); }
bool operator<(const Packet& rhs) const {
if (this->header.timestamp == rhs.header.timestamp) {
if (this->header.sequenceNumber == rhs.header.sequenceNumber) {
// Timestamp and sequence numbers are identical. Deem left hand side
// to be "smaller" (i.e., "earlier") if it is primary, and right hand
// side is not.
return (this->primary && !rhs.primary);
}
return (static_cast<uint16_t>(rhs.header.sequenceNumber
- this->header.sequenceNumber) < 0xFFFF / 2);
}
return (static_cast<uint32_t>(rhs.header.timestamp
- this->header.timestamp) < 0xFFFFFFFF / 2);
}
bool operator>(const Packet& rhs) const { return rhs.operator<(*this); }
bool operator<=(const Packet& rhs) const { return !operator>(rhs); }
bool operator>=(const Packet& rhs) const { return !operator<(rhs); }
};
// A list of packets.
typedef std::list<Packet*> PacketList;
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PACKET_H_

View File

@ -0,0 +1,278 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This is the implementation of the PacketBuffer class. It is mostly based on
// an STL list. The list is kept sorted at all times so that the next packet to
// decode is at the beginning of the list.
#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h"
#include <algorithm> // find_if()
#include "webrtc/modules/audio_coding/neteq4/decoder_database.h"
#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h"
namespace webrtc {
// Predicate used when inserting packets in the buffer list.
// Operator() returns true when |packet| goes before |new_packet|.
class NewTimestampIsLarger {
public:
explicit NewTimestampIsLarger(const Packet* new_packet)
: new_packet_(new_packet) {
}
bool operator()(Packet* packet) {
return (*new_packet_ >= *packet);
}
private:
const Packet* new_packet_;
};
// Constructor. The arguments define the maximum number of slots and maximum
// payload memory (excluding RTP headers) that the buffer will accept.
PacketBuffer::PacketBuffer(size_t max_number_of_packets,
size_t max_memory_bytes)
: max_number_of_packets_(max_number_of_packets),
max_memory_bytes_(max_memory_bytes),
current_memory_bytes_(0) {
}
// Destructor. All packets in the buffer will be destroyed.
PacketBuffer::~PacketBuffer() {
Flush();
}
// Flush the buffer. All packets in the buffer will be destroyed.
void PacketBuffer::Flush() {
DeleteAllPackets(&buffer_);
current_memory_bytes_ = 0;
}
int PacketBuffer::InsertPacket(Packet* packet) {
if (!packet || !packet->payload) {
if (packet) {
delete packet;
}
return kInvalidPacket;
}
int return_val = kOK;
if ((buffer_.size() >= max_number_of_packets_) ||
(current_memory_bytes_ + packet->payload_length
> static_cast<int>(max_memory_bytes_))) {
// Buffer is full. Flush it.
Flush();
return_val = kFlushed;
if ((buffer_.size() >= max_number_of_packets_) ||
(current_memory_bytes_ + packet->payload_length
> static_cast<int>(max_memory_bytes_))) {
// Buffer is still too small for the packet. Either the buffer limits are
// really small, or the packet is really large. Delete the packet and
// return an error.
delete [] packet->payload;
delete packet;
return kOversizePacket;
}
}
// Get an iterator pointing to the place in the buffer where the new packet
// should be inserted. The list is searched from the back, since the most
// likely case is that the new packet should be near the end of the list.
PacketList::reverse_iterator rit = std::find_if(
buffer_.rbegin(), buffer_.rend(),
NewTimestampIsLarger(packet));
buffer_.insert(rit.base(), packet); // Insert the packet at that position.
current_memory_bytes_ += packet->payload_length;
return return_val;
}
int PacketBuffer::InsertPacketList(PacketList* packet_list,
const DecoderDatabase& decoder_database,
uint8_t* current_rtp_payload_type,
uint8_t* current_cng_rtp_payload_type) {
bool flushed = false;
while (!packet_list->empty()) {
Packet* packet = packet_list->front();
if (decoder_database.IsComfortNoise(packet->header.payloadType)) {
if (*current_cng_rtp_payload_type != 0xFF &&
*current_cng_rtp_payload_type != packet->header.payloadType) {
// New CNG payload type implies new codec type.
*current_rtp_payload_type = 0xFF;
Flush();
flushed = true;
}
*current_cng_rtp_payload_type = packet->header.payloadType;
} else if (!decoder_database.IsDtmf(packet->header.payloadType)) {
// This must be speech.
if (*current_rtp_payload_type != 0xFF &&
*current_rtp_payload_type != packet->header.payloadType) {
*current_cng_rtp_payload_type = 0xFF;
Flush();
flushed = true;
}
*current_rtp_payload_type = packet->header.payloadType;
}
int return_val = InsertPacket(packet);
packet_list->pop_front();
if (return_val == kFlushed) {
// The buffer flushed, but this is not an error. We can still continue.
flushed = true;
} else if (return_val != kOK) {
// An error occurred. Delete remaining packets in list and return.
DeleteAllPackets(packet_list);
return return_val;
}
}
return flushed ? kFlushed : kOK;
}
int PacketBuffer::NextTimestamp(uint32_t* next_timestamp) const {
if (Empty()) {
return kBufferEmpty;
}
if (!next_timestamp) {
return kInvalidPointer;
}
*next_timestamp = buffer_.front()->header.timestamp;
return kOK;
}
int PacketBuffer::NextHigherTimestamp(uint32_t timestamp,
uint32_t* next_timestamp) const {
if (Empty()) {
return kBufferEmpty;
}
if (!next_timestamp) {
return kInvalidPointer;
}
PacketList::const_iterator it;
for (it = buffer_.begin(); it != buffer_.end(); ++it) {
if ((*it)->header.timestamp >= timestamp) {
// Found a packet matching the search.
*next_timestamp = (*it)->header.timestamp;
return kOK;
}
}
return kNotFound;
}
const RTPHeader* PacketBuffer::NextRtpHeader() const {
if (Empty()) {
return NULL;
}
return const_cast<const RTPHeader*>(&(buffer_.front()->header));
}
Packet* PacketBuffer::GetNextPacket(int* discard_count) {
if (Empty()) {
// Buffer is empty.
return NULL;
}
Packet* packet = buffer_.front();
// Assert that the packet sanity checks in InsertPacket method works.
assert(packet && packet->payload);
buffer_.pop_front();
current_memory_bytes_ -= packet->payload_length;
assert(current_memory_bytes_ >= 0); // Assert bookkeeping is correct.
// Discard other packets with the same timestamp. These are duplicates or
// redundant payloads that should not be used.
if (discard_count) {
*discard_count = 0;
}
while (!Empty() &&
buffer_.front()->header.timestamp == packet->header.timestamp) {
if (DiscardNextPacket() != kOK) {
assert(false); // Must be ok by design.
}
if (discard_count) {
++(*discard_count);
}
}
return packet;
}
int PacketBuffer::DiscardNextPacket() {
if (Empty()) {
return kBufferEmpty;
}
Packet* temp_packet = buffer_.front();
// Assert that the packet sanity checks in InsertPacket method works.
assert(temp_packet && temp_packet->payload);
current_memory_bytes_ -= temp_packet->payload_length;
assert(current_memory_bytes_ >= 0); // Assert bookkeeping is correct.
DeleteFirstPacket(&buffer_);
return kOK;
}
int PacketBuffer::DiscardOldPackets(uint32_t timestamp_limit) {
int discard_count = 0;
while (!Empty() &&
timestamp_limit != buffer_.front()->header.timestamp &&
static_cast<uint32_t>(timestamp_limit
- buffer_.front()->header.timestamp) <
0xFFFFFFFF / 2) {
if (DiscardNextPacket() != kOK) {
assert(false); // Must be ok by design.
}
++discard_count;
}
return 0;
}
int PacketBuffer::NumSamplesInBuffer(DecoderDatabase* decoder_database,
int last_decoded_length) const {
PacketList::const_iterator it;
int num_samples = 0;
for (it = buffer_.begin(); it != buffer_.end(); ++it) {
Packet* packet = (*it);
AudioDecoder* decoder =
decoder_database->GetDecoder(packet->header.payloadType);
if (decoder) {
int duration = decoder->PacketDuration(packet->payload,
packet->payload_length);
if (duration >= 0) {
num_samples += duration;
continue; // Go to next packet in loop.
}
}
num_samples += last_decoded_length;
}
return num_samples;
}
void PacketBuffer::IncrementWaitingTimes(int inc) {
PacketList::iterator it;
for (it = buffer_.begin(); it != buffer_.end(); ++it) {
(*it)->waiting_time += inc;
}
}
bool PacketBuffer::DeleteFirstPacket(PacketList* packet_list) {
if (packet_list->empty()) {
return false;
}
Packet* first_packet = packet_list->front();
delete [] first_packet->payload;
delete first_packet;
packet_list->pop_front();
return true;
}
void PacketBuffer::DeleteAllPackets(PacketList* packet_list) {
while (DeleteFirstPacket(packet_list)) {
// Continue while the list is not empty.
}
}
} // namespace webrtc

View File

@ -0,0 +1,139 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PACKET_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PACKET_BUFFER_H_
#include "webrtc/modules/audio_coding/neteq4/packet.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declaration.
class DecoderDatabase;
// This is the actual buffer holding the packets before decoding.
class PacketBuffer {
public:
enum BufferReturnCodes {
kOK = 0,
kFlushed,
kNotFound,
kBufferEmpty,
kInvalidPacket,
kInvalidPointer,
kOversizePacket
};
// Constructor creates a buffer which can hold a maximum of
// |max_number_of_packets| packets and |max_payload_memory| bytes of payload,
// excluding RTP headers.
PacketBuffer(size_t max_number_of_packets, size_t max_payload_memory);
// Deletes all packets in the buffer before destroying the buffer.
virtual ~PacketBuffer();
// Flushes the buffer and deletes all packets in it.
virtual void Flush();
// Returns true for an empty buffer.
virtual bool Empty() const { return buffer_.empty(); }
// Inserts |packet| into the buffer. The buffer will take over ownership of
// the packet object.
// Returns PacketBuffer::kOK on success, PacketBuffer::kFlushed if the buffer
// was flushed due to overfilling.
virtual int InsertPacket(Packet* packet);
// Inserts a list of packets into the buffer. The buffer will take over
// ownership of the packet objects.
// Returns PacketBuffer::kOK if all packets were inserted successfully.
// If the buffer was flushed due to overfilling, only a subset of the list is
// inserted, and PacketBuffer::kFlushed is returned.
// The last three parameters are included for legacy compatibility.
// TODO(hlundin): Redesign to not use current_*_payload_type and
// decoder_database.
virtual int InsertPacketList(PacketList* packet_list,
const DecoderDatabase& decoder_database,
uint8_t* current_rtp_payload_type,
uint8_t* current_cng_rtp_payload_type);
// Gets the timestamp for the first packet in the buffer and writes it to the
// output variable |next_timestamp|.
// Returns PacketBuffer::kBufferEmpty if the buffer is empty,
// PacketBuffer::kOK otherwise.
virtual int NextTimestamp(uint32_t* next_timestamp) const;
// Gets the timestamp for the first packet in the buffer with a timestamp no
// lower than the input limit |timestamp|. The result is written to the output
// variable |next_timestamp|.
// Returns PacketBuffer::kBufferEmpty if the buffer is empty,
// PacketBuffer::kOK otherwise.
virtual int NextHigherTimestamp(uint32_t timestamp,
uint32_t* next_timestamp) const;
// Returns a (constant) pointer the RTP header of the first packet in the
// buffer. Returns NULL if the buffer is empty.
virtual const RTPHeader* NextRtpHeader() const;
// Extracts the first packet in the buffer and returns a pointer to it.
// Returns NULL if the buffer is empty. The caller is responsible for deleting
// the packet.
// Subsequent packets with the same timestamp as the one extracted will be
// discarded and properly deleted. The number of discarded packets will be
// written to the output variable |discard_count|.
virtual Packet* GetNextPacket(int* discard_count);
// Discards the first packet in the buffer. The packet is deleted.
// Returns PacketBuffer::kBufferEmpty if the buffer is empty,
// PacketBuffer::kOK otherwise.
virtual int DiscardNextPacket();
// Discards all packets that are (strictly) older than |timestamp_limit|.
// Returns number of packets discarded.
virtual int DiscardOldPackets(uint32_t timestamp_limit);
// Returns the number of packets in the buffer, including duplicates and
// redundant packets.
virtual int NumPacketsInBuffer() const {
return static_cast<int>(buffer_.size());
}
// Returns the number of samples in the buffer, including samples carried in
// duplicate and redundant packets.
virtual int NumSamplesInBuffer(DecoderDatabase* decoder_database,
int last_decoded_length) const;
// Increase the waiting time counter for every packet in the buffer by |inc|.
// The default value for |inc| is 1.
virtual void IncrementWaitingTimes(int inc = 1);
virtual int current_memory_bytes() const { return current_memory_bytes_; }
// Static method that properly deletes the first packet, and its payload
// array, in |packet_list|. Returns false if |packet_list| already was empty,
// otherwise true.
static bool DeleteFirstPacket(PacketList* packet_list);
// Static method that properly deletes all packets, and their payload arrays,
// in |packet_list|.
static void DeleteAllPackets(PacketList* packet_list);
private:
size_t max_number_of_packets_;
size_t max_memory_bytes_;
int current_memory_bytes_;
PacketList buffer_;
DISALLOW_COPY_AND_ASSIGN(PacketBuffer);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PACKET_BUFFER_H_

View File

@ -0,0 +1,560 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for PacketBuffer class.
#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h"
#include "webrtc/modules/audio_coding/neteq4/packet.h"
using ::testing::Return;
using ::testing::_;
namespace webrtc {
// Helper class to generate packets. Packets must be deleted by the user.
class PacketGenerator {
public:
PacketGenerator(uint16_t seq_no, uint32_t ts, uint8_t pt, int frame_size);
virtual ~PacketGenerator() {}
Packet* NextPacket(int payload_size_bytes);
void SkipPacket();
uint16_t seq_no_;
uint32_t ts_;
uint8_t pt_;
int frame_size_;
};
PacketGenerator::PacketGenerator(uint16_t seq_no, uint32_t ts, uint8_t pt,
int frame_size)
: seq_no_(seq_no),
ts_(ts),
pt_(pt),
frame_size_(frame_size) {
}
Packet* PacketGenerator::NextPacket(int payload_size_bytes) {
Packet* packet = new Packet;
packet->header.sequenceNumber = seq_no_;
packet->header.timestamp = ts_;
packet->header.payloadType = pt_;
packet->header.markerBit = false;
packet->header.ssrc = 0x12345678;
packet->header.numCSRCs = 0;
packet->header.paddingLength = 0;
packet->payload_length = payload_size_bytes;
packet->primary = true;
packet->payload = new uint8_t[payload_size_bytes];
++seq_no_;
ts_ += frame_size_;
return packet;
}
void PacketGenerator::SkipPacket() {
++seq_no_;
ts_ += frame_size_;
}
// Start of test definitions.
TEST(PacketBuffer, CreateAndDestroy) {
PacketBuffer* buffer = new PacketBuffer(10, 1000); // 10 packets, 1000 bytes.
EXPECT_TRUE(buffer->Empty());
delete buffer;
}
TEST(PacketBuffer, InsertPacket) {
PacketBuffer buffer(10, 1000); // 10 packets, 1000 bytes.
PacketGenerator gen(17u, 4711u, 0, 10);
const int payload_len = 100;
Packet* packet = gen.NextPacket(payload_len);
EXPECT_EQ(0, buffer.InsertPacket(packet));
uint32_t next_ts;
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
EXPECT_EQ(4711u, next_ts);
EXPECT_FALSE(buffer.Empty());
EXPECT_EQ(1, buffer.NumPacketsInBuffer());
EXPECT_EQ(payload_len, buffer.current_memory_bytes());
const RTPHeader* hdr = buffer.NextRtpHeader();
EXPECT_EQ(&(packet->header), hdr); // Compare pointer addresses.
// Do not explicitly flush buffer or delete packet to test that it is deleted
// with the buffer. (Tested with Valgrind or similar tool.)
}
// Test to flush buffer.
TEST(PacketBuffer, FlushBuffer) {
PacketBuffer buffer(10, 1000); // 10 packets, 1000 bytes.
PacketGenerator gen(0, 0, 0, 10);
const int payload_len = 10;
// Insert 10 small packets; should be ok.
for (int i = 0; i < 10; ++i) {
Packet* packet = gen.NextPacket(payload_len);
EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(packet));
}
EXPECT_EQ(10, buffer.NumPacketsInBuffer());
EXPECT_FALSE(buffer.Empty());
EXPECT_EQ(10 * payload_len, buffer.current_memory_bytes());
buffer.Flush();
// Buffer should delete the payloads itself.
EXPECT_EQ(0, buffer.NumPacketsInBuffer());
EXPECT_TRUE(buffer.Empty());
EXPECT_EQ(0, buffer.current_memory_bytes());
}
// Test to fill the buffer over the limits, and verify that it flushes.
TEST(PacketBuffer, OverfillBuffer) {
PacketBuffer buffer(10, 1000); // 10 packets, 1000 bytes.
PacketGenerator gen(0, 0, 0, 10);
// Insert 10 small packets; should be ok.
const int payload_len = 10;
int i;
for (i = 0; i < 10; ++i) {
Packet* packet = gen.NextPacket(payload_len);
EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(packet));
}
EXPECT_EQ(10, buffer.NumPacketsInBuffer());
EXPECT_EQ(10 * payload_len, buffer.current_memory_bytes());
uint32_t next_ts;
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
EXPECT_EQ(0u, next_ts); // Expect first inserted packet to be first in line.
// Insert 11th packet; should flush the buffer and insert it after flushing.
Packet* packet = gen.NextPacket(payload_len);
EXPECT_EQ(PacketBuffer::kFlushed, buffer.InsertPacket(packet));
EXPECT_EQ(1, buffer.NumPacketsInBuffer());
EXPECT_EQ(payload_len, buffer.current_memory_bytes());
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts));
// Expect last inserted packet to be first in line.
EXPECT_EQ(packet->header.timestamp, next_ts);
// Insert 2 large packets; expect to flush when inserting the second one.
const int large_payload_len = 500;
packet = gen.NextPacket(large_payload_len);
EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(packet));
EXPECT_EQ(2, buffer.NumPacketsInBuffer());
EXPECT_EQ(payload_len + large_payload_len, buffer.current_memory_bytes());
packet = gen.NextPacket(large_payload_len);
EXPECT_EQ(PacketBuffer::kFlushed, buffer.InsertPacket(packet));
EXPECT_EQ(1, buffer.NumPacketsInBuffer());
EXPECT_EQ(large_payload_len, buffer.current_memory_bytes());
// Flush buffer to delete remaining packets.
buffer.Flush();
}
// Test inserting a list of packets.
TEST(PacketBuffer, InsertPacketList) {
PacketBuffer buffer(10, 1000); // 10 packets, 1000 bytes.
PacketGenerator gen(0, 0, 0, 10);
PacketList list;
const int payload_len = 10;
// Insert 10 small packets.
for (int i = 0; i < 10; ++i) {
Packet* packet = gen.NextPacket(payload_len);
list.push_back(packet);
}
MockDecoderDatabase decoder_database;
EXPECT_CALL(decoder_database, IsComfortNoise(0))
.WillRepeatedly(Return(false));
EXPECT_CALL(decoder_database, IsDtmf(0))
.WillRepeatedly(Return(false));
uint8_t current_pt = 0xFF;
uint8_t current_cng_pt = 0xFF;
EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacketList(&list,
decoder_database,
&current_pt,
&current_cng_pt));
EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list.
EXPECT_EQ(10, buffer.NumPacketsInBuffer());
EXPECT_EQ(10 * payload_len, buffer.current_memory_bytes());
EXPECT_EQ(0, current_pt); // Current payload type changed to 0.
EXPECT_EQ(0xFF, current_cng_pt); // CNG payload type not changed.
buffer.Flush(); // Clean up.
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
}
// Test inserting a list of packets. Last packet is of a different payload type.
// Expecting the buffer to flush.
// TODO(hlundin): Remove this test when legacy operation is no longer needed.
TEST(PacketBuffer, InsertPacketListChangePayloadType) {
PacketBuffer buffer(10, 1000); // 10 packets, 1000 bytes.
PacketGenerator gen(0, 0, 0, 10);
PacketList list;
const int payload_len = 10;
// Insert 10 small packets.
for (int i = 0; i < 10; ++i) {
Packet* packet = gen.NextPacket(payload_len);
list.push_back(packet);
}
// Insert 11th packet of another payload type (not CNG).
Packet* packet = gen.NextPacket(payload_len);
packet->header.payloadType = 1;
list.push_back(packet);
MockDecoderDatabase decoder_database;
EXPECT_CALL(decoder_database, IsComfortNoise(_))
.WillRepeatedly(Return(false));
EXPECT_CALL(decoder_database, IsDtmf(_))
.WillRepeatedly(Return(false));
uint8_t current_pt = 0xFF;
uint8_t current_cng_pt = 0xFF;
EXPECT_EQ(PacketBuffer::kFlushed, buffer.InsertPacketList(&list,
decoder_database,
&current_pt,
&current_cng_pt));
EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list.
EXPECT_EQ(1, buffer.NumPacketsInBuffer()); // Only the last packet.
EXPECT_EQ(1 * payload_len, buffer.current_memory_bytes());
EXPECT_EQ(1, current_pt); // Current payload type changed to 0.
EXPECT_EQ(0xFF, current_cng_pt); // CNG payload type not changed.
buffer.Flush(); // Clean up.
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
}
// Test inserting a number of packets, and verifying correct extraction order.
// The packets inserted are as follows:
// Packet no. Seq. no. Primary TS Secondary TS
// 0 0xFFFD 0xFFFFFFD7 -
// 1 0xFFFE 0xFFFFFFE1 0xFFFFFFD7
// 2 0xFFFF 0xFFFFFFEB 0xFFFFFFE1
// 3 0x0000 0xFFFFFFF5 0xFFFFFFEB
// 4 0x0001 0xFFFFFFFF 0xFFFFFFF5
// 5 0x0002 0x0000000A 0xFFFFFFFF
// 6 MISSING--0x0003------0x00000014----0x0000000A--MISSING
// 7 0x0004 0x0000001E 0x00000014
// 8 0x0005 0x00000028 0x0000001E
// 9 0x0006 0x00000032 0x00000028
TEST(PacketBuffer, ExtractOrderRedundancy) {
PacketBuffer buffer(100, 1000); // 100 packets, 1000 bytes.
const uint32_t ts_increment = 10; // Samples per packet.
const uint16_t start_seq_no = 0xFFFF - 2; // Wraps after 3 packets.
const uint32_t start_ts = 0xFFFFFFFF -
4 * ts_increment; // Wraps after 5 packets.
const uint8_t primary_pt = 0;
const uint8_t secondary_pt = 1;
PacketGenerator gen(start_seq_no, start_ts, primary_pt, ts_increment);
// Insert secondary payloads too. (Simulating RED.)
PacketGenerator red_gen(start_seq_no + 1, start_ts, secondary_pt,
ts_increment);
// Insert 9 small packets (skip one).
for (int i = 0; i < 10; ++i) {
const int payload_len = 10;
if (i == 6) {
// Skip this packet.
gen.SkipPacket();
red_gen.SkipPacket();
continue;
}
// Primary payload.
Packet* packet = gen.NextPacket(payload_len);
EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(packet));
if (i >= 1) {
// Secondary payload.
packet = red_gen.NextPacket(payload_len);
packet->primary = false;
EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(packet));
}
}
EXPECT_EQ(17, buffer.NumPacketsInBuffer()); // 9 primary + 8 secondary
uint16_t current_seq_no = start_seq_no;
uint32_t current_ts = start_ts;
for (int i = 0; i < 10; ++i) {
// Extract packets.
int drop_count = 0;
Packet* packet = buffer.GetNextPacket(&drop_count);
ASSERT_FALSE(packet == NULL);
if (i == 6) {
// Special case for the dropped primary payload.
// Expect secondary payload, and one step higher sequence number.
EXPECT_EQ(current_seq_no + 1, packet->header.sequenceNumber);
EXPECT_EQ(current_ts, packet->header.timestamp);
EXPECT_FALSE(packet->primary);
EXPECT_EQ(1, packet->header.payloadType);
EXPECT_EQ(0, drop_count);
} else {
EXPECT_EQ(current_seq_no, packet->header.sequenceNumber);
EXPECT_EQ(current_ts, packet->header.timestamp);
EXPECT_TRUE(packet->primary);
EXPECT_EQ(0, packet->header.payloadType);
if (i == 5 || i == 9) {
// No duplicate TS for dropped packet or for last primary payload.
EXPECT_EQ(0, drop_count);
} else {
EXPECT_EQ(1, drop_count);
}
}
++current_seq_no;
current_ts += ts_increment;
delete [] packet->payload;
delete packet;
}
}
TEST(PacketBuffer, DiscardPackets) {
PacketBuffer buffer(100, 1000); // 100 packets, 1000 bytes.
const uint16_t start_seq_no = 17;
const uint32_t start_ts = 4711;
const uint32_t ts_increment = 10;
PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment);
PacketList list;
const int payload_len = 10;
// Insert 10 small packets.
for (int i = 0; i < 10; ++i) {
Packet* packet = gen.NextPacket(payload_len);
buffer.InsertPacket(packet);
}
EXPECT_EQ(10, buffer.NumPacketsInBuffer());
EXPECT_EQ(10 * payload_len, buffer.current_memory_bytes());
// Discard them one by one and make sure that the right packets are at the
// front of the buffer.
uint32_t current_ts = start_ts;
for (int i = 0; i < 10; ++i) {
uint32_t ts;
EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&ts));
EXPECT_EQ(current_ts, ts);
EXPECT_EQ(PacketBuffer::kOK, buffer.DiscardNextPacket());
current_ts += ts_increment;
}
EXPECT_TRUE(buffer.Empty());
}
TEST(PacketBuffer, Reordering) {
PacketBuffer buffer(100, 1000); // 100 packets, 1000 bytes.
const uint16_t start_seq_no = 17;
const uint32_t start_ts = 4711;
const uint32_t ts_increment = 10;
PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment);
const int payload_len = 10;
// Generate 10 small packets and insert them into a PacketList. Insert every
// odd packet to the front, and every even packet to the back, thus creating
// a (rather strange) reordering.
PacketList list;
for (int i = 0; i < 10; ++i) {
Packet* packet = gen.NextPacket(payload_len);
if (i % 2) {
list.push_front(packet);
} else {
list.push_back(packet);
}
}
MockDecoderDatabase decoder_database;
EXPECT_CALL(decoder_database, IsComfortNoise(0))
.WillRepeatedly(Return(false));
EXPECT_CALL(decoder_database, IsDtmf(0))
.WillRepeatedly(Return(false));
uint8_t current_pt = 0xFF;
uint8_t current_cng_pt = 0xFF;
EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacketList(&list,
decoder_database,
&current_pt,
&current_cng_pt));
EXPECT_EQ(10, buffer.NumPacketsInBuffer());
EXPECT_EQ(10 * payload_len, buffer.current_memory_bytes());
// Extract them and make sure that come out in the right order.
uint32_t current_ts = start_ts;
for (int i = 0; i < 10; ++i) {
Packet* packet = buffer.GetNextPacket(NULL);
ASSERT_FALSE(packet == NULL);
EXPECT_EQ(current_ts, packet->header.timestamp);
current_ts += ts_increment;
delete [] packet->payload;
delete packet;
}
EXPECT_TRUE(buffer.Empty());
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
}
TEST(PacketBuffer, Failures) {
const uint16_t start_seq_no = 17;
const uint32_t start_ts = 4711;
const uint32_t ts_increment = 10;
int payload_len = 100;
PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment);
PacketBuffer* buffer = new PacketBuffer(0, 1000); // 0 packets, 1000 bytes.
Packet* packet = gen.NextPacket(payload_len);
EXPECT_EQ(PacketBuffer::kOversizePacket, buffer->InsertPacket(packet));
delete buffer;
buffer = new PacketBuffer(100, 10); // 100 packets, 10 bytes.
packet = gen.NextPacket(payload_len);
EXPECT_EQ(PacketBuffer::kOversizePacket, buffer->InsertPacket(packet));
delete buffer;
buffer = new PacketBuffer(100, 10000); // 100 packets, 10000 bytes.
packet = NULL;
EXPECT_EQ(PacketBuffer::kInvalidPacket, buffer->InsertPacket(packet));
packet = gen.NextPacket(payload_len);
delete [] packet->payload;
packet->payload = NULL;
EXPECT_EQ(PacketBuffer::kInvalidPacket, buffer->InsertPacket(packet));
// Packet is deleted by the PacketBuffer.
// Buffer should still be empty. Test all empty-checks.
uint32_t temp_ts;
EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer->NextTimestamp(&temp_ts));
EXPECT_EQ(PacketBuffer::kBufferEmpty,
buffer->NextHigherTimestamp(0, &temp_ts));
EXPECT_EQ(NULL, buffer->NextRtpHeader());
EXPECT_EQ(NULL, buffer->GetNextPacket(NULL));
EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer->DiscardNextPacket());
EXPECT_EQ(0, buffer->DiscardOldPackets(0)); // 0 packets discarded.
// Insert one packet to make the buffer non-empty.
packet = gen.NextPacket(payload_len);
EXPECT_EQ(PacketBuffer::kOK, buffer->InsertPacket(packet));
EXPECT_EQ(PacketBuffer::kInvalidPointer, buffer->NextTimestamp(NULL));
EXPECT_EQ(PacketBuffer::kInvalidPointer,
buffer->NextHigherTimestamp(0, NULL));
delete buffer;
// Insert packet list of three packets, where the second packet has an invalid
// payload. Expect first packet to be inserted, and the remaining two to be
// discarded.
buffer = new PacketBuffer(100, 1000); // 100 packets, 1000 bytes.
PacketList list;
list.push_back(gen.NextPacket(payload_len)); // Valid packet.
packet = gen.NextPacket(payload_len);
delete [] packet->payload;
packet->payload = NULL; // Invalid.
list.push_back(packet);
list.push_back(gen.NextPacket(payload_len)); // Valid packet.
MockDecoderDatabase decoder_database;
EXPECT_CALL(decoder_database, IsComfortNoise(0))
.WillRepeatedly(Return(false));
EXPECT_CALL(decoder_database, IsDtmf(0))
.WillRepeatedly(Return(false));
uint8_t current_pt = 0xFF;
uint8_t current_cng_pt = 0xFF;
EXPECT_EQ(PacketBuffer::kInvalidPacket,
buffer->InsertPacketList(&list,
decoder_database,
&current_pt,
&current_cng_pt));
EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list.
EXPECT_EQ(1, buffer->NumPacketsInBuffer());
delete buffer;
EXPECT_CALL(decoder_database, Die()); // Called when object is deleted.
}
// Test packet comparison function.
// The function should return true if the first packet "goes before" the second.
TEST(PacketBuffer, ComparePackets) {
PacketGenerator gen(0, 0, 0, 10);
Packet* a = gen.NextPacket(10); // SN = 0, TS = 0.
Packet* b = gen.NextPacket(10); // SN = 1, TS = 10.
EXPECT_FALSE(*a == *b);
EXPECT_TRUE(*a != *b);
EXPECT_TRUE(*a < *b);
EXPECT_FALSE(*a > *b);
EXPECT_TRUE(*a <= *b);
EXPECT_FALSE(*a >= *b);
// Testing wrap-around case; 'a' is earlier but has a larger timestamp value.
a->header.timestamp = 0xFFFFFFFF - 10;
EXPECT_FALSE(*a == *b);
EXPECT_TRUE(*a != *b);
EXPECT_TRUE(*a < *b);
EXPECT_FALSE(*a > *b);
EXPECT_TRUE(*a <= *b);
EXPECT_FALSE(*a >= *b);
// Test equal packets.
EXPECT_TRUE(*a == *a);
EXPECT_FALSE(*a != *a);
EXPECT_FALSE(*a < *a);
EXPECT_FALSE(*a > *a);
EXPECT_TRUE(*a <= *a);
EXPECT_TRUE(*a >= *a);
// Test equal timestamps but different sequence numbers (0 and 1).
a->header.timestamp = b->header.timestamp;
EXPECT_FALSE(*a == *b);
EXPECT_TRUE(*a != *b);
EXPECT_TRUE(*a < *b);
EXPECT_FALSE(*a > *b);
EXPECT_TRUE(*a <= *b);
EXPECT_FALSE(*a >= *b);
// Test equal timestamps but different sequence numbers (32767 and 1).
a->header.sequenceNumber = 0xFFFF;
EXPECT_FALSE(*a == *b);
EXPECT_TRUE(*a != *b);
EXPECT_TRUE(*a < *b);
EXPECT_FALSE(*a > *b);
EXPECT_TRUE(*a <= *b);
EXPECT_FALSE(*a >= *b);
// Test equal timestamps and sequence numbers, but only 'b' is primary.
a->header.sequenceNumber = b->header.sequenceNumber;
a->primary = false;
b->primary = true;
EXPECT_FALSE(*a == *b);
EXPECT_TRUE(*a != *b);
EXPECT_FALSE(*a < *b);
EXPECT_TRUE(*a > *b);
EXPECT_FALSE(*a <= *b);
EXPECT_TRUE(*a >= *b);
delete [] a->payload;
delete a;
delete [] b->payload;
delete b;
}
// Test the DeleteFirstPacket DeleteAllPackets methods.
TEST(PacketBuffer, DeleteAllPackets) {
PacketGenerator gen(0, 0, 0, 10);
PacketList list;
const int payload_len = 10;
// Insert 10 small packets.
for (int i = 0; i < 10; ++i) {
Packet* packet = gen.NextPacket(payload_len);
list.push_back(packet);
}
EXPECT_TRUE(PacketBuffer::DeleteFirstPacket(&list));
EXPECT_EQ(9u, list.size());
PacketBuffer::DeleteAllPackets(&list);
EXPECT_TRUE(list.empty());
EXPECT_FALSE(PacketBuffer::DeleteFirstPacket(&list));
}
} // namespace webrtc

View File

@ -0,0 +1,368 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/payload_splitter.h"
#include <assert.h>
#include "webrtc/modules/audio_coding/neteq4/decoder_database.h"
namespace webrtc {
// The method loops through a list of packets {A, B, C, ...}. Each packet is
// split into its corresponding RED payloads, {A1, A2, ...}, which is
// temporarily held in the list |new_packets|.
// When the first packet in |packet_list| has been processed, the orignal packet
// is replaced by the new ones in |new_packets|, so that |packet_list| becomes:
// {A1, A2, ..., B, C, ...}. The method then continues with B, and C, until all
// the original packets have been replaced by their split payloads.
int PayloadSplitter::SplitRed(PacketList* packet_list) {
int ret = kOK;
PacketList::iterator it = packet_list->begin();
while (it != packet_list->end()) {
PacketList new_packets; // An empty list to store the split packets in.
Packet* red_packet = (*it);
assert(red_packet->payload);
uint8_t* payload_ptr = red_packet->payload;
// Read RED headers (according to RFC 2198):
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |F| block PT | timestamp offset | block length |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// Last RED header:
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |0| Block PT |
// +-+-+-+-+-+-+-+-+
bool last_block = false;
int sum_length = 0;
while (!last_block) {
Packet* new_packet = new Packet;
new_packet->header = red_packet->header;
// Check the F bit. If F == 0, this was the last block.
last_block = ((*payload_ptr & 0x80) == 0);
// Bits 1 through 7 are payload type.
new_packet->header.payloadType = payload_ptr[0] & 0x7F;
if (last_block) {
// No more header data to read.
++sum_length; // Account for RED header size of 1 byte.
new_packet->payload_length = red_packet->payload_length - sum_length;
new_packet->primary = true; // Last block is always primary.
payload_ptr += 1; // Advance to first payload byte.
} else {
// Bits 8 through 21 are timestamp offset.
int timestamp_offset = (payload_ptr[1] << 6) +
((payload_ptr[2] & 0xFC) >> 2);
new_packet->header.timestamp = red_packet->header.timestamp -
timestamp_offset;
// Bits 22 through 31 are payload length.
new_packet->payload_length = ((payload_ptr[2] & 0x03) << 8) +
payload_ptr[3];
new_packet->primary = false;
payload_ptr += 4; // Advance to next RED header.
}
sum_length += new_packet->payload_length;
sum_length += 4; // Account for RED header size of 4 bytes.
// Store in new list of packets.
new_packets.push_back(new_packet);
}
// Populate the new packets with payload data.
// |payload_ptr| now points at the first payload byte.
PacketList::iterator new_it;
for (new_it = new_packets.begin(); new_it != new_packets.end(); ++new_it) {
int payload_length = (*new_it)->payload_length;
if (payload_ptr + payload_length >
red_packet->payload + red_packet->payload_length) {
// The block lengths in the RED headers do not match the overall packet
// length. Something is corrupt. Discard this and the remaining
// payloads from this packet.
while (new_it != new_packets.end()) {
// Payload should not have been allocated yet.
assert(!(*new_it)->payload);
delete (*new_it);
new_it = new_packets.erase(new_it);
}
ret = kRedLengthMismatch;
break;
}
(*new_it)->payload = new uint8_t[payload_length];
memcpy((*new_it)->payload, payload_ptr, payload_length);
payload_ptr += payload_length;
}
// Reverse the order of the new packets, so that the primary payload is
// always first.
new_packets.reverse();
// Insert new packets into original list, before the element pointed to by
// iterator |it|.
packet_list->splice(it, new_packets, new_packets.begin(),
new_packets.end());
// Delete old packet payload.
delete [] (*it)->payload;
delete (*it);
// Remove |it| from the packet list. This operation effectively moves the
// iterator |it| to the next packet in the list. Thus, we do not have to
// increment it manually.
it = packet_list->erase(it);
}
return ret;
}
int PayloadSplitter::CheckRedPayloads(PacketList* packet_list,
const DecoderDatabase& decoder_database) {
PacketList::iterator it = packet_list->begin();
int main_payload_type = -1;
int num_deleted_packets = 0;
while (it != packet_list->end()) {
uint8_t this_payload_type = (*it)->header.payloadType;
if (!decoder_database.IsDtmf(this_payload_type) &&
!decoder_database.IsComfortNoise(this_payload_type)) {
if (main_payload_type == -1) {
// This is the first packet in the list which is non-DTMF non-CNG.
main_payload_type = this_payload_type;
} else {
if (this_payload_type != main_payload_type) {
// We do not allow redundant payloads of a different type.
// Discard this payload.
delete [] (*it)->payload;
delete (*it);
// Remove |it| from the packet list. This operation effectively
// moves the iterator |it| to the next packet in the list. Thus, we
// do not have to increment it manually.
it = packet_list->erase(it);
++num_deleted_packets;
continue;
}
}
}
++it;
}
return num_deleted_packets;
}
int PayloadSplitter::SplitAudio(PacketList* packet_list,
const DecoderDatabase& decoder_database) {
PacketList::iterator it = packet_list->begin();
// Iterate through all packets in |packet_list|.
while (it != packet_list->end()) {
Packet* packet = (*it); // Just to make the notation more intuitive.
// Get codec type for this payload.
const DecoderDatabase::DecoderInfo* info =
decoder_database.GetDecoderInfo(packet->header.payloadType);
if (!info) {
return kUnknownPayloadType;
}
PacketList new_packets;
switch (info->codec_type) {
case kDecoderPCMu:
case kDecoderPCMa: {
// 8 bytes per ms; 8 timestamps per ms.
SplitBySamples(packet, 8, 8, &new_packets);
break;
}
case kDecoderPCMu_2ch:
case kDecoderPCMa_2ch: {
// 2 * 8 bytes per ms; 8 timestamps per ms.
SplitBySamples(packet, 2 * 8, 8, &new_packets);
break;
}
case kDecoderG722: {
// 8 bytes per ms; 16 timestamps per ms.
SplitBySamples(packet, 8, 16, &new_packets);
break;
}
case kDecoderPCM16B: {
// 16 bytes per ms; 8 timestamps per ms.
SplitBySamples(packet, 16, 8, &new_packets);
break;
}
case kDecoderPCM16Bwb: {
// 32 bytes per ms; 16 timestamps per ms.
SplitBySamples(packet, 32, 16, &new_packets);
break;
}
case kDecoderPCM16Bswb32kHz: {
// 64 bytes per ms; 32 timestamps per ms.
SplitBySamples(packet, 64, 32, &new_packets);
break;
}
case kDecoderPCM16Bswb48kHz: {
// 96 bytes per ms; 48 timestamps per ms.
SplitBySamples(packet, 96, 48, &new_packets);
break;
}
case kDecoderPCM16B_2ch: {
// 2 * 16 bytes per ms; 8 timestamps per ms.
SplitBySamples(packet, 2 * 16, 8, &new_packets);
break;
}
case kDecoderPCM16Bwb_2ch: {
// 2 * 32 bytes per ms; 16 timestamps per ms.
SplitBySamples(packet, 2 * 32, 16, &new_packets);
break;
}
case kDecoderPCM16Bswb32kHz_2ch: {
// 2 * 64 bytes per ms; 32 timestamps per ms.
SplitBySamples(packet, 2 * 64, 32, &new_packets);
break;
}
case kDecoderPCM16Bswb48kHz_2ch: {
// 2 * 96 bytes per ms; 48 timestamps per ms.
SplitBySamples(packet, 2 * 96, 48, &new_packets);
break;
}
case kDecoderPCM16B_5ch: {
// 5 * 16 bytes per ms; 8 timestamps per ms.
SplitBySamples(packet, 5 * 16, 8, &new_packets);
break;
}
case kDecoderILBC: {
int bytes_per_frame;
int timestamps_per_frame;
if (packet->payload_length >= 950) {
return kTooLargePayload;
} else if (packet->payload_length % 38 == 0) {
// 20 ms frames.
bytes_per_frame = 38;
timestamps_per_frame = 160;
} else if (packet->payload_length % 50 == 0) {
// 30 ms frames.
bytes_per_frame = 50;
timestamps_per_frame = 240;
} else {
return kFrameSplitError;
}
int ret = SplitByFrames(packet, bytes_per_frame, timestamps_per_frame,
&new_packets);
if (ret < 0) {
return ret;
} else if (ret == kNoSplit) {
// Do not split at all. Simply advance to the next packet in the list.
++it;
// We do not have any new packets to insert, and should not delete the
// old one. Skip the code after the switch case, and jump straight to
// the next packet in the while loop.
continue;
}
break;
}
default: {
// Do not split at all. Simply advance to the next packet in the list.
++it;
// We do not have any new packets to insert, and should not delete the
// old one. Skip the code after the switch case, and jump straight to
// the next packet in the while loop.
continue;
}
}
// Insert new packets into original list, before the element pointed to by
// iterator |it|.
packet_list->splice(it, new_packets, new_packets.begin(),
new_packets.end());
// Delete old packet payload.
delete [] (*it)->payload;
delete (*it);
// Remove |it| from the packet list. This operation effectively moves the
// iterator |it| to the next packet in the list. Thus, we do not have to
// increment it manually.
it = packet_list->erase(it);
}
return 0;
}
void PayloadSplitter::SplitBySamples(const Packet* packet,
int bytes_per_ms,
int timestamps_per_ms,
PacketList* new_packets) {
assert(packet);
assert(new_packets);
int split_size_bytes = packet->payload_length;
// Find a "chunk size" >= 20 ms and < 40 ms.
int min_chunk_size = bytes_per_ms * 20;
// Reduce the split size by half as long as |split_size_bytes| is at least
// twice the minimum chunk size (so that the resulting size is at least as
// large as the minimum chunk size).
while (split_size_bytes >= 2 * min_chunk_size) {
split_size_bytes >>= 1;
}
int timestamps_per_chunk =
split_size_bytes * timestamps_per_ms / bytes_per_ms;
uint32_t timestamp = packet->header.timestamp;
uint8_t* payload_ptr = packet->payload;
int len = packet->payload_length;
while (len >= (2 * split_size_bytes)) {
Packet* new_packet = new Packet;
new_packet->payload_length = split_size_bytes;
new_packet->header = packet->header;
new_packet->header.timestamp = timestamp;
timestamp += timestamps_per_chunk;
new_packet->primary = packet->primary;
new_packet->payload = new uint8_t[split_size_bytes];
memcpy(new_packet->payload, payload_ptr, split_size_bytes);
payload_ptr += split_size_bytes;
new_packets->push_back(new_packet);
len -= split_size_bytes;
}
if (len > 0) {
Packet* new_packet = new Packet;
new_packet->payload_length = len;
new_packet->header = packet->header;
new_packet->header.timestamp = timestamp;
new_packet->primary = packet->primary;
new_packet->payload = new uint8_t[len];
memcpy(new_packet->payload, payload_ptr, len);
payload_ptr += len;
new_packets->push_back(new_packet);
}
}
int PayloadSplitter::SplitByFrames(const Packet* packet,
int bytes_per_frame,
int timestamps_per_frame,
PacketList* new_packets) {
if (packet->payload_length % bytes_per_frame != 0) {
return kFrameSplitError;
}
int num_frames = packet->payload_length / bytes_per_frame;
if (num_frames == 1) {
// Special case. Do not split the payload.
return kNoSplit;
}
uint32_t timestamp = packet->header.timestamp;
uint8_t* payload_ptr = packet->payload;
int len = packet->payload_length;
while (len > 0) {
assert(len >= bytes_per_frame);
Packet* new_packet = new Packet;
new_packet->payload_length = bytes_per_frame;
new_packet->header = packet->header;
new_packet->header.timestamp = timestamp;
timestamp += timestamps_per_frame;
new_packet->primary = packet->primary;
new_packet->payload = new uint8_t[bytes_per_frame];
memcpy(new_packet->payload, payload_ptr, bytes_per_frame);
payload_ptr += bytes_per_frame;
new_packets->push_back(new_packet);
len -= bytes_per_frame;
}
return kOK;
}
} // namespace webrtc

View File

@ -0,0 +1,83 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PAYLOAD_SPLITTER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PAYLOAD_SPLITTER_H_
#include "webrtc/modules/audio_coding/neteq4/packet.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
namespace webrtc {
// Forward declarations.
class DecoderDatabase;
// This class handles splitting of payloads into smaller parts.
// The class does not have any member variables, and the methods could have
// been made static. The reason for not making them static is testability.
// With this design, the splitting functionality can be mocked during testing
// of the NetEqImpl class.
class PayloadSplitter {
public:
enum SplitterReturnCodes {
kOK = 0,
kNoSplit = 1,
kTooLargePayload = -1,
kFrameSplitError = -2,
kUnknownPayloadType = -3,
kRedLengthMismatch = -4
};
PayloadSplitter() {}
virtual ~PayloadSplitter() {}
// Splits each packet in |packet_list| into its separate RED payloads. Each
// RED payload is packetized into a Packet. The original elements in
// |packet_list| are properly deleted, and replaced by the new packets.
// Note that all packets in |packet_list| must be RED payloads, i.e., have
// RED headers according to RFC 2198 at the very beginning of the payload.
// Returns kOK or an error.
virtual int SplitRed(PacketList* packet_list);
// Checks all packets in |packet_list|. Packets that are DTMF events or
// comfort noise payloads are kept. Except that, only one single payload type
// is accepted. Any packet with another payload type is discarded.
virtual int CheckRedPayloads(PacketList* packet_list,
const DecoderDatabase& decoder_database);
// Iterates through |packet_list| and, if possible, splits each audio payload
// into suitable size chunks. The result is written back to |packet_list| as
// new packets. The decoder database is needed to get information about which
// payload type each packet contains.
virtual int SplitAudio(PacketList* packet_list,
const DecoderDatabase& decoder_database);
private:
// Splits the payload in |packet|. The payload is assumed to be from a
// sample-based codec.
virtual void SplitBySamples(const Packet* packet,
int bytes_per_ms,
int timestamps_per_ms,
PacketList* new_packets);
// Splits the payload in |packet|. The payload will be split into chunks of
// size |bytes_per_frame|, corresponding to a |timestamps_per_frame|
// RTP timestamps.
virtual int SplitByFrames(const Packet* packet,
int bytes_per_frame,
int timestamps_per_frame,
PacketList* new_packets);
DISALLOW_COPY_AND_ASSIGN(PayloadSplitter);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PAYLOAD_SPLITTER_H_

View File

@ -0,0 +1,694 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for PayloadSplitter class.
#include "webrtc/modules/audio_coding/neteq4/payload_splitter.h"
#include <assert.h>
#include <utility> // pair
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h"
#include "webrtc/modules/audio_coding/neteq4/packet.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
using ::testing::Return;
using ::testing::ReturnNull;
namespace webrtc {
static const int kRedPayloadType = 100;
static const int kPayloadLength = 10;
static const int kRedHeaderLength = 4; // 4 bytes RED header.
static const uint16_t kSequenceNumber = 0;
static const uint32_t kBaseTimestamp = 0x12345678;
// RED headers (according to RFC 2198):
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// |F| block PT | timestamp offset | block length |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
// Last RED header:
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |0| Block PT |
// +-+-+-+-+-+-+-+-+
// Creates a RED packet, with |num_payloads| payloads, with payload types given
// by the values in array |payload_types| (which must be of length
// |num_payloads|). Each redundant payload is |timestamp_offset| samples
// "behind" the the previous payload.
Packet* CreateRedPayload(int num_payloads,
uint8_t* payload_types,
int timestamp_offset) {
Packet* packet = new Packet;
packet->header.payloadType = kRedPayloadType;
packet->header.timestamp = kBaseTimestamp;
packet->header.sequenceNumber = kSequenceNumber;
packet->payload_length = (kPayloadLength + 1) +
(num_payloads - 1) * (kPayloadLength + kRedHeaderLength);
uint8_t* payload = new uint8_t[packet->payload_length];
uint8_t* payload_ptr = payload;
for (int i = 0; i < num_payloads; ++i) {
// Write the RED headers.
if (i == num_payloads - 1) {
// Special case for last payload.
*payload_ptr = payload_types[i] & 0x7F; // F = 0;
++payload_ptr;
break;
}
*payload_ptr = payload_types[i] & 0x7F;
// Not the last block; set F = 1.
*payload_ptr |= 0x80;
++payload_ptr;
int this_offset = (num_payloads - i - 1) * timestamp_offset;
*payload_ptr = this_offset >> 6;
++payload_ptr;
assert(kPayloadLength <= 1023); // Max length described by 10 bits.
*payload_ptr = ((this_offset & 0x3F) << 2) | (kPayloadLength >> 8);
++payload_ptr;
*payload_ptr = kPayloadLength & 0xFF;
++payload_ptr;
}
for (int i = 0; i < num_payloads; ++i) {
// Write |i| to all bytes in each payload.
memset(payload_ptr, i, kPayloadLength);
payload_ptr += kPayloadLength;
}
packet->payload = payload;
return packet;
}
// Create a packet with all payload bytes set to |payload_value|.
Packet* CreatePacket(uint8_t payload_type, int payload_length,
uint8_t payload_value) {
Packet* packet = new Packet;
packet->header.payloadType = payload_type;
packet->header.timestamp = kBaseTimestamp;
packet->header.sequenceNumber = kSequenceNumber;
packet->payload_length = payload_length;
uint8_t* payload = new uint8_t[packet->payload_length];
memset(payload, payload_value, payload_length);
packet->payload = payload;
return packet;
}
// Checks that |packet| has the attributes given in the remaining parameters.
void VerifyPacket(const Packet* packet,
int payload_length,
uint8_t payload_type,
uint16_t sequence_number,
uint32_t timestamp,
uint8_t payload_value,
bool primary = true) {
EXPECT_EQ(payload_length, packet->payload_length);
EXPECT_EQ(payload_type, packet->header.payloadType);
EXPECT_EQ(sequence_number, packet->header.sequenceNumber);
EXPECT_EQ(timestamp, packet->header.timestamp);
EXPECT_EQ(primary, packet->primary);
ASSERT_FALSE(packet->payload == NULL);
for (int i = 0; i < packet->payload_length; ++i) {
EXPECT_EQ(payload_value, packet->payload[i]);
}
}
// Start of test definitions.
TEST(PayloadSplitter, CreateAndDestroy) {
PayloadSplitter* splitter = new PayloadSplitter;
delete splitter;
}
// Packet A is split into A1 and A2.
TEST(RedPayloadSplitter, OnePacketTwoPayloads) {
uint8_t payload_types[] = {0, 0};
const int kTimestampOffset = 160;
Packet* packet = CreateRedPayload(2, payload_types, kTimestampOffset);
PacketList packet_list;
packet_list.push_back(packet);
PayloadSplitter splitter;
EXPECT_EQ(PayloadSplitter::kOK, splitter.SplitRed(&packet_list));
ASSERT_EQ(2u, packet_list.size());
// Check first packet. The first in list should always be the primary payload.
packet = packet_list.front();
VerifyPacket(packet, kPayloadLength, payload_types[1], kSequenceNumber,
kBaseTimestamp, 1, true);
delete [] packet->payload;
delete packet;
packet_list.pop_front();
// Check second packet.
packet = packet_list.front();
VerifyPacket(packet, kPayloadLength, payload_types[0], kSequenceNumber,
kBaseTimestamp - kTimestampOffset, 0, false);
delete [] packet->payload;
delete packet;
}
// Packets A and B are not split at all. Only the RED header in each packet is
// removed.
TEST(RedPayloadSplitter, TwoPacketsOnePayload) {
uint8_t payload_types[] = {0};
const int kTimestampOffset = 160;
// Create first packet, with a single RED payload.
Packet* packet = CreateRedPayload(1, payload_types, kTimestampOffset);
PacketList packet_list;
packet_list.push_back(packet);
// Create second packet, with a single RED payload.
packet = CreateRedPayload(1, payload_types, kTimestampOffset);
// Manually change timestamp and sequence number of second packet.
packet->header.timestamp += kTimestampOffset;
packet->header.sequenceNumber++;
packet_list.push_back(packet);
PayloadSplitter splitter;
EXPECT_EQ(PayloadSplitter::kOK, splitter.SplitRed(&packet_list));
ASSERT_EQ(2u, packet_list.size());
// Check first packet.
packet = packet_list.front();
VerifyPacket(packet, kPayloadLength, payload_types[0], kSequenceNumber,
kBaseTimestamp, 0, true);
delete [] packet->payload;
delete packet;
packet_list.pop_front();
// Check second packet.
packet = packet_list.front();
VerifyPacket(packet, kPayloadLength, payload_types[0], kSequenceNumber + 1,
kBaseTimestamp + kTimestampOffset, 0, true);
delete [] packet->payload;
delete packet;
}
// Packets A and B are split into packets A1, A2, A3, B1, B2, B3, with
// attributes as follows:
//
// A1* A2 A3 B1* B2 B3
// Payload type 0 1 2 0 1 2
// Timestamp b b-o b-2o b+o b b-o
// Sequence number 0 0 0 1 1 1
//
// b = kBaseTimestamp, o = kTimestampOffset, * = primary.
TEST(RedPayloadSplitter, TwoPacketsThreePayloads) {
uint8_t payload_types[] = {2, 1, 0}; // Primary is the last one.
const int kTimestampOffset = 160;
// Create first packet, with 3 RED payloads.
Packet* packet = CreateRedPayload(3, payload_types, kTimestampOffset);
PacketList packet_list;
packet_list.push_back(packet);
// Create first packet, with 3 RED payloads.
packet = CreateRedPayload(3, payload_types, kTimestampOffset);
// Manually change timestamp and sequence number of second packet.
packet->header.timestamp += kTimestampOffset;
packet->header.sequenceNumber++;
packet_list.push_back(packet);
PayloadSplitter splitter;
EXPECT_EQ(PayloadSplitter::kOK, splitter.SplitRed(&packet_list));
ASSERT_EQ(6u, packet_list.size());
// Check first packet, A1.
packet = packet_list.front();
VerifyPacket(packet, kPayloadLength, payload_types[2], kSequenceNumber,
kBaseTimestamp, 2, true);
delete [] packet->payload;
delete packet;
packet_list.pop_front();
// Check second packet, A2.
packet = packet_list.front();
VerifyPacket(packet, kPayloadLength, payload_types[1], kSequenceNumber,
kBaseTimestamp - kTimestampOffset, 1, false);
delete [] packet->payload;
delete packet;
packet_list.pop_front();
// Check third packet, A3.
packet = packet_list.front();
VerifyPacket(packet, kPayloadLength, payload_types[0], kSequenceNumber,
kBaseTimestamp - 2 * kTimestampOffset, 0, false);
delete [] packet->payload;
delete packet;
packet_list.pop_front();
// Check fourth packet, B1.
packet = packet_list.front();
VerifyPacket(packet, kPayloadLength, payload_types[2], kSequenceNumber + 1,
kBaseTimestamp + kTimestampOffset, 2, true);
delete [] packet->payload;
delete packet;
packet_list.pop_front();
// Check fifth packet, B2.
packet = packet_list.front();
VerifyPacket(packet, kPayloadLength, payload_types[1], kSequenceNumber + 1,
kBaseTimestamp, 1, false);
delete [] packet->payload;
delete packet;
packet_list.pop_front();
// Check sixth packet, B3.
packet = packet_list.front();
VerifyPacket(packet, kPayloadLength, payload_types[0], kSequenceNumber + 1,
kBaseTimestamp - kTimestampOffset, 0, false);
delete [] packet->payload;
delete packet;
}
// Creates a list with 4 packets with these payload types:
// 0 = CNGnb
// 1 = PCMu
// 2 = DTMF (AVT)
// 3 = iLBC
// We expect the method CheckRedPayloads to discard the iLBC packet, since it
// is a non-CNG, non-DTMF payload of another type than the first speech payload
// found in the list (which is PCMu).
TEST(RedPayloadSplitter, CheckRedPayloads) {
PacketList packet_list;
for (int i = 0; i <= 3; ++i) {
// Create packet with payload type |i|, payload length 10 bytes, all 0.
Packet* packet = CreatePacket(i, 10, 0);
packet_list.push_back(packet);
}
// Use a real DecoderDatabase object here instead of a mock, since it is
// easier to just register the payload types and let the actual implementation
// do its job.
DecoderDatabase decoder_database;
decoder_database.RegisterPayload(0, kDecoderCNGnb);
decoder_database.RegisterPayload(1, kDecoderPCMu);
decoder_database.RegisterPayload(2, kDecoderAVT);
decoder_database.RegisterPayload(3, kDecoderILBC);
PayloadSplitter splitter;
splitter.CheckRedPayloads(&packet_list, decoder_database);
ASSERT_EQ(3u, packet_list.size()); // Should have dropped the last packet.
// Verify packets. The loop verifies that payload types 0, 1, and 2 are in the
// list.
for (int i = 0; i <= 2; ++i) {
Packet* packet = packet_list.front();
VerifyPacket(packet, 10, i, kSequenceNumber, kBaseTimestamp, 0, true);
delete [] packet->payload;
delete packet;
packet_list.pop_front();
}
EXPECT_TRUE(packet_list.empty());
}
// Packet A is split into A1, A2 and A3. But the length parameter is off, so
// the last payloads should be discarded.
TEST(RedPayloadSplitter, WrongPayloadLength) {
uint8_t payload_types[] = {0, 0, 0};
const int kTimestampOffset = 160;
Packet* packet = CreateRedPayload(3, payload_types, kTimestampOffset);
// Manually tamper with the payload length of the packet.
// This is one byte too short for the second payload (out of three).
// We expect only the first payload to be returned.
packet->payload_length -= kPayloadLength + 1;
PacketList packet_list;
packet_list.push_back(packet);
PayloadSplitter splitter;
EXPECT_EQ(PayloadSplitter::kRedLengthMismatch,
splitter.SplitRed(&packet_list));
ASSERT_EQ(1u, packet_list.size());
// Check first packet.
packet = packet_list.front();
VerifyPacket(packet, kPayloadLength, payload_types[0], kSequenceNumber,
kBaseTimestamp - 2 * kTimestampOffset, 0, false);
delete [] packet->payload;
delete packet;
packet_list.pop_front();
}
// Test that iSAC, iSAC-swb, RED, DTMF, CNG, and "Arbitrary" payloads do not
// get split.
TEST(AudioPayloadSplitter, NonSplittable) {
// Set up packets with different RTP payload types. The actual values do not
// matter, since we are mocking the decoder database anyway.
PacketList packet_list;
for (int i = 0; i < 6; ++i) {
// Let the payload type be |i|, and the payload value 10 * |i|.
packet_list.push_back(CreatePacket(i, kPayloadLength, 10 * i));
}
MockDecoderDatabase decoder_database;
// Tell the mock decoder database to return DecoderInfo structs with different
// codec types.
// Use scoped pointers to avoid having to delete them later.
scoped_ptr<DecoderDatabase::DecoderInfo> info0(
new DecoderDatabase::DecoderInfo(kDecoderISAC, 16000, NULL, false));
EXPECT_CALL(decoder_database, GetDecoderInfo(0))
.WillRepeatedly(Return(info0.get()));
scoped_ptr<DecoderDatabase::DecoderInfo> info1(
new DecoderDatabase::DecoderInfo(kDecoderISACswb, 32000, NULL, false));
EXPECT_CALL(decoder_database, GetDecoderInfo(1))
.WillRepeatedly(Return(info1.get()));
scoped_ptr<DecoderDatabase::DecoderInfo> info2(
new DecoderDatabase::DecoderInfo(kDecoderRED, 8000, NULL, false));
EXPECT_CALL(decoder_database, GetDecoderInfo(2))
.WillRepeatedly(Return(info2.get()));
scoped_ptr<DecoderDatabase::DecoderInfo> info3(
new DecoderDatabase::DecoderInfo(kDecoderAVT, 8000, NULL, false));
EXPECT_CALL(decoder_database, GetDecoderInfo(3))
.WillRepeatedly(Return(info3.get()));
scoped_ptr<DecoderDatabase::DecoderInfo> info4(
new DecoderDatabase::DecoderInfo(kDecoderCNGnb, 8000, NULL, false));
EXPECT_CALL(decoder_database, GetDecoderInfo(4))
.WillRepeatedly(Return(info4.get()));
scoped_ptr<DecoderDatabase::DecoderInfo> info5(
new DecoderDatabase::DecoderInfo(kDecoderArbitrary, 8000, NULL, false));
EXPECT_CALL(decoder_database, GetDecoderInfo(5))
.WillRepeatedly(Return(info5.get()));
PayloadSplitter splitter;
EXPECT_EQ(0, splitter.SplitAudio(&packet_list, decoder_database));
EXPECT_EQ(6u, packet_list.size());
// Check that all payloads are intact.
uint8_t payload_type = 0;
PacketList::iterator it = packet_list.begin();
while (it != packet_list.end()) {
VerifyPacket((*it), kPayloadLength, payload_type, kSequenceNumber,
kBaseTimestamp, 10 * payload_type);
++payload_type;
delete [] (*it)->payload;
delete (*it);
it = packet_list.erase(it);
}
// The destructor is called when decoder_database goes out of scope.
EXPECT_CALL(decoder_database, Die());
}
// Test unknown payload type.
TEST(AudioPayloadSplitter, UnknownPayloadType) {
PacketList packet_list;
static const uint8_t kPayloadType = 17; // Just a random number.
int kPayloadLengthBytes = 4711; // Random number.
packet_list.push_back(CreatePacket(kPayloadType, kPayloadLengthBytes, 0));
MockDecoderDatabase decoder_database;
// Tell the mock decoder database to return NULL when asked for decoder info.
// This signals that the decoder database does not recognize the payload type.
EXPECT_CALL(decoder_database, GetDecoderInfo(kPayloadType))
.WillRepeatedly(ReturnNull());
PayloadSplitter splitter;
EXPECT_EQ(PayloadSplitter::kUnknownPayloadType,
splitter.SplitAudio(&packet_list, decoder_database));
EXPECT_EQ(1u, packet_list.size());
// Delete the packets and payloads to avoid having the test leak memory.
PacketList::iterator it = packet_list.begin();
while (it != packet_list.end()) {
delete [] (*it)->payload;
delete (*it);
it = packet_list.erase(it);
}
// The destructor is called when decoder_database goes out of scope.
EXPECT_CALL(decoder_database, Die());
}
class SplitBySamplesTest : public ::testing::TestWithParam<NetEqDecoder> {
protected:
virtual void SetUp() {
decoder_type_ = GetParam();
switch (decoder_type_) {
case kDecoderPCMu:
case kDecoderPCMa:
bytes_per_ms_ = 8;
samples_per_ms_ = 8;
break;
case kDecoderPCMu_2ch:
case kDecoderPCMa_2ch:
bytes_per_ms_ = 2 * 8;
samples_per_ms_ = 8;
break;
case kDecoderG722:
bytes_per_ms_ = 8;
samples_per_ms_ = 16;
break;
case kDecoderPCM16B:
bytes_per_ms_ = 16;
samples_per_ms_ = 8;
break;
case kDecoderPCM16Bwb:
bytes_per_ms_ = 32;
samples_per_ms_ = 16;
break;
case kDecoderPCM16Bswb32kHz:
bytes_per_ms_ = 64;
samples_per_ms_ = 32;
break;
case kDecoderPCM16Bswb48kHz:
bytes_per_ms_ = 96;
samples_per_ms_ = 48;
break;
case kDecoderPCM16B_2ch:
bytes_per_ms_ = 2 * 16;
samples_per_ms_ = 8;
break;
case kDecoderPCM16Bwb_2ch:
bytes_per_ms_ = 2 * 32;
samples_per_ms_ = 16;
break;
case kDecoderPCM16Bswb32kHz_2ch:
bytes_per_ms_ = 2 * 64;
samples_per_ms_ = 32;
break;
case kDecoderPCM16Bswb48kHz_2ch:
bytes_per_ms_ = 2 * 96;
samples_per_ms_ = 48;
break;
case kDecoderPCM16B_5ch:
bytes_per_ms_ = 5 * 16;
samples_per_ms_ = 8;
break;
default:
assert(false);
break;
}
}
int bytes_per_ms_;
int samples_per_ms_;
NetEqDecoder decoder_type_;
};
// Test splitting sample-based payloads.
TEST_P(SplitBySamplesTest, PayloadSizes) {
PacketList packet_list;
static const uint8_t kPayloadType = 17; // Just a random number.
for (int payload_size_ms = 10; payload_size_ms <= 60; payload_size_ms += 10) {
// The payload values are set to be the same as the payload_size, so that
// one can distinguish from which packet the split payloads come from.
int payload_size_bytes = payload_size_ms * bytes_per_ms_;
packet_list.push_back(CreatePacket(kPayloadType, payload_size_bytes,
payload_size_ms));
}
MockDecoderDatabase decoder_database;
// Tell the mock decoder database to return DecoderInfo structs with different
// codec types.
// Use scoped pointers to avoid having to delete them later.
// (Sample rate is set to 8000 Hz, but does not matter.)
scoped_ptr<DecoderDatabase::DecoderInfo> info(
new DecoderDatabase::DecoderInfo(decoder_type_, 8000, NULL, false));
EXPECT_CALL(decoder_database, GetDecoderInfo(kPayloadType))
.WillRepeatedly(Return(info.get()));
PayloadSplitter splitter;
EXPECT_EQ(0, splitter.SplitAudio(&packet_list, decoder_database));
// The payloads are expected to be split as follows:
// 10 ms -> 10 ms
// 20 ms -> 20 ms
// 30 ms -> 30 ms
// 40 ms -> 20 + 20 ms
// 50 ms -> 25 + 25 ms
// 60 ms -> 30 + 30 ms
int expected_size_ms[] = {10, 20, 30, 20, 20, 25, 25, 30, 30};
int expected_payload_value[] = {10, 20, 30, 40, 40, 50, 50, 60, 60};
int expected_timestamp_offset_ms[] = {0, 0, 0, 0, 20, 0, 25, 0, 30};
size_t expected_num_packets =
sizeof(expected_size_ms) / sizeof(expected_size_ms[0]);
EXPECT_EQ(expected_num_packets, packet_list.size());
PacketList::iterator it = packet_list.begin();
int i = 0;
while (it != packet_list.end()) {
int length_bytes = expected_size_ms[i] * bytes_per_ms_;
uint32_t expected_timestamp = kBaseTimestamp +
expected_timestamp_offset_ms[i] * samples_per_ms_;
VerifyPacket((*it), length_bytes, kPayloadType, kSequenceNumber,
expected_timestamp, expected_payload_value[i]);
delete [] (*it)->payload;
delete (*it);
it = packet_list.erase(it);
++i;
}
// The destructor is called when decoder_database goes out of scope.
EXPECT_CALL(decoder_database, Die());
}
INSTANTIATE_TEST_CASE_P(
PayloadSplitter, SplitBySamplesTest,
::testing::Values(kDecoderPCMu, kDecoderPCMa, kDecoderPCMu_2ch,
kDecoderPCMa_2ch, kDecoderG722, kDecoderPCM16B,
kDecoderPCM16Bwb, kDecoderPCM16Bswb32kHz,
kDecoderPCM16Bswb48kHz, kDecoderPCM16B_2ch,
kDecoderPCM16Bwb_2ch, kDecoderPCM16Bswb32kHz_2ch,
kDecoderPCM16Bswb48kHz_2ch, kDecoderPCM16B_5ch));
class SplitIlbcTest : public ::testing::TestWithParam<std::pair<int, int> > {
protected:
virtual void SetUp() {
const std::pair<int, int> parameters = GetParam();
num_frames_ = parameters.first;
frame_length_ms_ = parameters.second;
frame_length_bytes_ = (frame_length_ms_ == 20) ? 38 : 50;
}
size_t num_frames_;
int frame_length_ms_;
int frame_length_bytes_;
};
// Test splitting sample-based payloads.
TEST_P(SplitIlbcTest, NumFrames) {
PacketList packet_list;
static const uint8_t kPayloadType = 17; // Just a random number.
const int frame_length_samples = frame_length_ms_ * 8;
int payload_length_bytes = frame_length_bytes_ * num_frames_;
Packet* packet = CreatePacket(kPayloadType, payload_length_bytes, 0);
// Fill payload with increasing integers {0, 1, 2, ...}.
for (int i = 0; i < packet->payload_length; ++i) {
packet->payload[i] = static_cast<uint8_t>(i);
}
packet_list.push_back(packet);
MockDecoderDatabase decoder_database;
// Tell the mock decoder database to return DecoderInfo structs with different
// codec types.
// Use scoped pointers to avoid having to delete them later.
scoped_ptr<DecoderDatabase::DecoderInfo> info(
new DecoderDatabase::DecoderInfo(kDecoderILBC, 8000, NULL, false));
EXPECT_CALL(decoder_database, GetDecoderInfo(kPayloadType))
.WillRepeatedly(Return(info.get()));
PayloadSplitter splitter;
EXPECT_EQ(0, splitter.SplitAudio(&packet_list, decoder_database));
EXPECT_EQ(num_frames_, packet_list.size());
PacketList::iterator it = packet_list.begin();
int frame_num = 0;
uint8_t payload_value = 0;
while (it != packet_list.end()) {
Packet* packet = (*it);
EXPECT_EQ(kBaseTimestamp + frame_length_samples * frame_num,
packet->header.timestamp);
EXPECT_EQ(frame_length_bytes_, packet->payload_length);
EXPECT_EQ(kPayloadType, packet->header.payloadType);
EXPECT_EQ(kSequenceNumber, packet->header.sequenceNumber);
EXPECT_EQ(true, packet->primary);
ASSERT_FALSE(packet->payload == NULL);
for (int i = 0; i < packet->payload_length; ++i) {
EXPECT_EQ(payload_value, packet->payload[i]);
++payload_value;
}
delete [] (*it)->payload;
delete (*it);
it = packet_list.erase(it);
++frame_num;
}
// The destructor is called when decoder_database goes out of scope.
EXPECT_CALL(decoder_database, Die());
}
// Test 1 through 5 frames of 20 and 30 ms size.
// Also test the maximum number of frames in one packet for 20 and 30 ms.
// The maximum is defined by the largest payload length that can be uniquely
// resolved to a frame size of either 38 bytes (20 ms) or 50 bytes (30 ms).
INSTANTIATE_TEST_CASE_P(
PayloadSplitter, SplitIlbcTest,
::testing::Values(std::pair<int, int>(1, 20), // 1 frame, 20 ms.
std::pair<int, int>(2, 20), // 2 frames, 20 ms.
std::pair<int, int>(3, 20), // And so on.
std::pair<int, int>(4, 20),
std::pair<int, int>(5, 20),
std::pair<int, int>(24, 20),
std::pair<int, int>(1, 30),
std::pair<int, int>(2, 30),
std::pair<int, int>(3, 30),
std::pair<int, int>(4, 30),
std::pair<int, int>(5, 30),
std::pair<int, int>(18, 30)));
// Test too large payload size.
TEST(IlbcPayloadSplitter, TooLargePayload) {
PacketList packet_list;
static const uint8_t kPayloadType = 17; // Just a random number.
int kPayloadLengthBytes = 950;
Packet* packet = CreatePacket(kPayloadType, kPayloadLengthBytes, 0);
packet_list.push_back(packet);
MockDecoderDatabase decoder_database;
scoped_ptr<DecoderDatabase::DecoderInfo> info(
new DecoderDatabase::DecoderInfo(kDecoderILBC, 8000, NULL, false));
EXPECT_CALL(decoder_database, GetDecoderInfo(kPayloadType))
.WillRepeatedly(Return(info.get()));
PayloadSplitter splitter;
EXPECT_EQ(PayloadSplitter::kTooLargePayload,
splitter.SplitAudio(&packet_list, decoder_database));
EXPECT_EQ(1u, packet_list.size());
// Delete the packets and payloads to avoid having the test leak memory.
PacketList::iterator it = packet_list.begin();
while (it != packet_list.end()) {
delete [] (*it)->payload;
delete (*it);
it = packet_list.erase(it);
}
// The destructor is called when decoder_database goes out of scope.
EXPECT_CALL(decoder_database, Die());
}
// Payload not an integer number of frames.
TEST(IlbcPayloadSplitter, UnevenPayload) {
PacketList packet_list;
static const uint8_t kPayloadType = 17; // Just a random number.
int kPayloadLengthBytes = 39; // Not an even number of frames.
Packet* packet = CreatePacket(kPayloadType, kPayloadLengthBytes, 0);
packet_list.push_back(packet);
MockDecoderDatabase decoder_database;
scoped_ptr<DecoderDatabase::DecoderInfo> info(
new DecoderDatabase::DecoderInfo(kDecoderILBC, 8000, NULL, false));
EXPECT_CALL(decoder_database, GetDecoderInfo(kPayloadType))
.WillRepeatedly(Return(info.get()));
PayloadSplitter splitter;
EXPECT_EQ(PayloadSplitter::kFrameSplitError,
splitter.SplitAudio(&packet_list, decoder_database));
EXPECT_EQ(1u, packet_list.size());
// Delete the packets and payloads to avoid having the test leak memory.
PacketList::iterator it = packet_list.begin();
while (it != packet_list.end()) {
delete [] (*it)->payload;
delete (*it);
it = packet_list.erase(it);
}
// The destructor is called when decoder_database goes out of scope.
EXPECT_CALL(decoder_database, Die());
}
} // namespace webrtc

View File

@ -0,0 +1,82 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/post_decode_vad.h"
namespace webrtc {
void PostDecodeVad::Enable() {
if (!vad_instance_) {
// Create the instance.
if (WebRtcVad_Create(&vad_instance_) != 0) {
// Failed to create instance.
Disable();
return;
}
}
Init();
enabled_ = true;
}
void PostDecodeVad::Disable() {
enabled_ = false;
running_ = false;
}
void PostDecodeVad::Init() {
running_ = false;
if (vad_instance_) {
WebRtcVad_Init(vad_instance_);
WebRtcVad_set_mode(vad_instance_, kVadMode);
running_ = true;
}
}
void PostDecodeVad::Update(int16_t* signal, size_t length,
AudioDecoder::SpeechType speech_type,
bool sid_frame,
int fs_hz) {
if (!vad_instance_ || !enabled_) {
return;
}
if (speech_type == AudioDecoder::kComfortNoise || sid_frame ||
fs_hz > 16000) {
// TODO(hlundin): Remove restriction on fs_hz.
running_ = false;
active_speech_ = true;
sid_interval_counter_ = 0;
} else if (!running_) {
++sid_interval_counter_;
}
if (sid_interval_counter_ >= kVadAutoEnable) {
Init();
}
if (length > 0 && running_) {
size_t vad_sample_index = 0;
active_speech_ = false;
// Loop through frame sizes 30, 20, and 10 ms.
for (size_t vad_frame_size_ms = 30; vad_frame_size_ms >= 10;
vad_frame_size_ms -= 10) {
size_t vad_frame_size_samples = vad_frame_size_ms * fs_hz / 1000;
while (length - vad_sample_index >= vad_frame_size_samples) {
int vad_return = WebRtcVad_Process(vad_instance_, fs_hz,
&signal[vad_sample_index],
vad_frame_size_samples);
active_speech_ |= (vad_return == 1);
vad_sample_index += vad_frame_size_samples;
}
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,76 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_POST_DECODE_VAD_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_POST_DECODE_VAD_H_
#include <string> // size_t
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
#include "webrtc/common_types.h" // NULL
#include "webrtc/modules/audio_coding/neteq4/defines.h"
#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h"
#include "webrtc/modules/audio_coding/neteq4/packet.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class PostDecodeVad {
public:
PostDecodeVad()
: enabled_(false),
running_(false),
active_speech_(true),
sid_interval_counter_(0),
vad_instance_(NULL) {
}
virtual ~PostDecodeVad() {
if (vad_instance_) {
WebRtcVad_Free(vad_instance_);
}
}
// Enables post-decode VAD.
void Enable();
// Disables post-decode VAD.
void Disable();
// Initializes post-decode VAD.
void Init();
// Updates post-decode VAD with the audio data in |signal| having |length|
// samples. The data is of type |speech_type|, at the sample rate |fs_hz|.
void Update(int16_t* signal, size_t length,
AudioDecoder::SpeechType speech_type, bool sid_frame, int fs_hz);
// Accessors.
bool enabled() const { return enabled_; }
bool running() const { return running_; }
bool active_speech() const { return active_speech_; }
private:
static const int kVadMode = 0; // Sets aggressiveness to "Normal".
// Number of Update() calls without CNG/SID before re-enabling VAD.
static const int kVadAutoEnable = 3000;
bool enabled_;
bool running_;
bool active_speech_;
int sid_interval_counter_;
::VadInst* vad_instance_;
DISALLOW_COPY_AND_ASSIGN(PostDecodeVad);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_POST_DECODE_VAD_H_

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for PostDecodeVad class.
#include "webrtc/modules/audio_coding/neteq4/post_decode_vad.h"
#include "gtest/gtest.h"
namespace webrtc {
TEST(PostDecodeVad, CreateAndDestroy) {
PostDecodeVad vad;
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,100 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/preemptive_expand.h"
#include <algorithm> // min, max
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
namespace webrtc {
PreemptiveExpand::ReturnCodes PreemptiveExpand::Process(
const int16_t* input,
int input_length,
int old_data_length,
AudioMultiVector<int16_t>* output,
int16_t* length_change_samples) {
old_data_length_per_channel_ = old_data_length;
// Input length must be (almost) 30 ms.
// Also, the new part must be at least |overlap_samples_| elements.
static const int k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate.
if (num_channels_ == 0 ||
input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_ ||
old_data_length >= input_length / num_channels_ - overlap_samples_) {
// Length of input data too short to do preemptive expand. Simply move all
// data from input to output.
output->PushBackInterleaved(input, input_length);
return kError;
}
return TimeStretch::Process(input, input_length, output,
length_change_samples);
}
void PreemptiveExpand::SetParametersForPassiveSpeech(int len,
int16_t* best_correlation,
int* peak_index) const {
// When the signal does not contain any active speech, the correlation does
// not matter. Simply set it to zero.
*best_correlation = 0;
// For low energy expansion, the new data can be less than 15 ms,
// but we must ensure that best_correlation is not larger than the length of
// the new data.
// but we must ensure that best_correlation is not larger than the new data.
*peak_index = std::min(*peak_index, len - old_data_length_per_channel_);
}
PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch(
const WebRtc_Word16 *input, int input_length, size_t peak_index,
int16_t best_correlation, bool active_speech,
AudioMultiVector<int16_t>* output) const {
// Pre-calculate common multiplication with |fs_mult_|.
// 120 corresponds to 15 ms.
int fs_mult_120 = fs_mult_ * 120;
assert(old_data_length_per_channel_ >= 0); // Make sure it's been set.
// Check for strong correlation (>0.9 in Q14) and at least 15 ms new data,
// or passive speech.
if (((best_correlation > kCorrelationThreshold) &&
(old_data_length_per_channel_ <= fs_mult_120)) ||
!active_speech) {
// Do accelerate operation by overlap add.
// Set length of the first part, not to be modified.
size_t unmodified_length = std::max(old_data_length_per_channel_,
fs_mult_120);
// Copy first part, including cross-fade region.
output->PushBackInterleaved(
input, (unmodified_length + peak_index) * num_channels_);
// Copy the last |peak_index| samples up to 15 ms to |temp_vector|.
AudioMultiVector<int16_t> temp_vector(num_channels_);
temp_vector.PushBackInterleaved(
&input[(unmodified_length - peak_index) * num_channels_],
peak_index * num_channels_);
// Cross-fade |temp_vector| onto the end of |output|.
output->CrossFade(temp_vector, peak_index);
// Copy the last unmodified part, 15 ms + pitch period until the end.
output->PushBackInterleaved(
&input[unmodified_length * num_channels_],
input_length - unmodified_length * num_channels_);
if (active_speech) {
return kSuccess;
} else {
return kSuccessLowEnergy;
}
} else {
// Accelerate not allowed. Simply move all data from decoded to outData.
output->PushBackInterleaved(input, input_length);
return kNoStretch;
}
}
} // namespace webrtc

View File

@ -0,0 +1,74 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PREEMPTIVE_EXPAND_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PREEMPTIVE_EXPAND_H_
#include <assert.h>
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq4/time_stretch.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class BackgroundNoise;
// This class implements the PreemptiveExpand operation. Most of the work is
// done in the base class TimeStretch, which is shared with the Accelerate
// operation. In the PreemptiveExpand class, the operations that are specific to
// PreemptiveExpand are implemented.
class PreemptiveExpand : public TimeStretch {
public:
PreemptiveExpand(int sample_rate_hz, size_t num_channels,
const BackgroundNoise& background_noise)
: TimeStretch(sample_rate_hz, num_channels, background_noise),
old_data_length_per_channel_(-1),
overlap_samples_(5 * sample_rate_hz / 8000) {
}
virtual ~PreemptiveExpand() {}
// This method performs the actual PreemptiveExpand operation. The samples are
// read from |input|, of length |input_length| elements, and are written to
// |output|. The number of samples added through time-stretching is
// is provided in the output |length_change_samples|. The method returns
// the outcome of the operation as an enumerator value.
ReturnCodes Process(const WebRtc_Word16 *pw16_decoded,
int len,
int oldDataLen,
AudioMultiVector<int16_t>* output,
int16_t* length_change_samples);
protected:
// Sets the parameters |best_correlation| and |peak_index| to suitable
// values when the signal contains no active speech.
virtual void SetParametersForPassiveSpeech(int len,
int16_t* w16_bestCorr,
int* w16_bestIndex) const;
// Checks the criteria for performing the time-stretching operation and,
// if possible, performs the time-stretching.
virtual ReturnCodes CheckCriteriaAndStretch(
const WebRtc_Word16 *pw16_decoded, int len, size_t w16_bestIndex,
int16_t w16_bestCorr, bool w16_VAD,
AudioMultiVector<int16_t>* output) const;
private:
int old_data_length_per_channel_;
int overlap_samples_;
DISALLOW_COPY_AND_ASSIGN(PreemptiveExpand);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PREEMPTIVE_EXPAND_H_

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/random_vector.h"
namespace webrtc {
const int16_t RandomVector::kRandomTable[RandomVector::kRandomTableSize] = {
2680, 5532, 441, 5520, 16170, -5146, -1024, -8733, 3115, 9598, -10380,
-4959, -1280, -21716, 7133, -1522, 13458, -3902, 2789, -675, 3441, 5016,
-13599, -4003, -2739, 3922, -7209, 13352, -11617, -7241, 12905, -2314, 5426,
10121, -9702, 11207, -13542, 1373, 816, -5934, -12504, 4798, 1811, 4112,
-613, 201, -10367, -2960, -2419, 3442, 4299, -6116, -6092, 1552, -1650,
-480, -1237, 18720, -11858, -8303, -8212, 865, -2890, -16968, 12052, -5845,
-5912, 9777, -5665, -6294, 5426, -4737, -6335, 1652, 761, 3832, 641, -8552,
-9084, -5753, 8146, 12156, -4915, 15086, -1231, -1869, 11749, -9319, -6403,
11407, 6232, -1683, 24340, -11166, 4017, -10448, 3153, -2936, 6212, 2891,
-866, -404, -4807, -2324, -1917, -2388, -6470, -3895, -10300, 5323, -5403,
2205, 4640, 7022, -21186, -6244, -882, -10031, -3395, -12885, 7155, -5339,
5079, -2645, -9515, 6622, 14651, 15852, 359, 122, 8246, -3502, -6696, -3679,
-13535, -1409, -704, -7403, -4007, 1798, 279, -420, -12796, -14219, 1141,
3359, 11434, 7049, -6684, -7473, 14283, -4115, -9123, -8969, 4152, 4117,
13792, 5742, 16168, 8661, -1609, -6095, 1881, 14380, -5588, 6758, -6425,
-22969, -7269, 7031, 1119, -1611, -5850, -11281, 3559, -8952, -10146, -4667,
-16251, -1538, 2062, -1012, -13073, 227, -3142, -5265, 20, 5770, -7559,
4740, -4819, 992, -8208, -7130, -4652, 6725, 7369, -1036, 13144, -1588,
-5304, -2344, -449, -5705, -8894, 5205, -17904, -11188, -1022, 4852, 10101,
-5255, -4200, -752, 7941, -1543, 5959, 14719, 13346, 17045, -15605, -1678,
-1600, -9230, 68, 23348, 1172, 7750, 11212, -18227, 9956, 4161, 883, 3947,
4341, 1014, -4889, -2603, 1246, -5630, -3596, -870, -1298, 2784, -3317,
-6612, -20541, 4166, 4181, -8625, 3562, 12890, 4761, 3205, -12259, -8579 };
void RandomVector::Reset() {
seed_ = 777;
seed_increment_ = 1;
}
void RandomVector::Generate(size_t length, int16_t* output) {
for (size_t i = 0; i < length; i++) {
seed_ += seed_increment_;
size_t position = seed_ & (kRandomTableSize - 1);
output[i] = kRandomTable[position];
}
}
void RandomVector::IncreaseSeedIncrement(int16_t increase_by) {
seed_increment_+= increase_by;
seed_increment_ &= kRandomTableSize - 1;
}
}

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_RANDOM_VECTOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_RANDOM_VECTOR_H_
#include <cstring> // size_t
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This class generates pseudo-random samples.
class RandomVector {
public:
static const int kRandomTableSize = 256;
static const int16_t kRandomTable[kRandomTableSize];
RandomVector()
: seed_(777),
seed_increment_(1) {
}
void Reset();
void Generate(size_t length, int16_t* output);
void IncreaseSeedIncrement(int16_t increase_by);
// Accessors and mutators.
int16_t seed_increment() { return seed_increment_; }
void set_seed_increment(int16_t value) { seed_increment_ = value; }
private:
uint32_t seed_;
int16_t seed_increment_;
DISALLOW_COPY_AND_ASSIGN(RandomVector);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_RANDOM_VECTOR_H_

View File

@ -0,0 +1,25 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Unit tests for RandomVector class.
#include "webrtc/modules/audio_coding/neteq4/random_vector.h"
#include "gtest/gtest.h"
namespace webrtc {
TEST(RandomVector, CreateAndDestroy) {
RandomVector random_vector;
}
// TODO(hlundin): Write more tests.
} // namespace webrtc

View File

@ -0,0 +1,95 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/rtcp.h"
#include <string.h>
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/interface/module_common_types.h"
namespace webrtc {
void Rtcp::Init(uint16_t start_sequence_number) {
cycles_ = 0;
max_seq_no_ = start_sequence_number;
base_seq_no_ = start_sequence_number;
received_packets_ = 0;
received_packets_prior_ = 0;
expected_prior_ = 0;
jitter_ = 0;
transit_ = 0;
}
void Rtcp::Update(const RTPHeader& rtp_header, uint32_t receive_timestamp) {
// Update number of received packets, and largest packet number received.
received_packets_++;
int16_t sn_diff = rtp_header.sequenceNumber - max_seq_no_;
if (sn_diff >= 0) {
if (rtp_header.sequenceNumber < max_seq_no_) {
// Wrap-around detected.
cycles_++;
}
max_seq_no_ = rtp_header.sequenceNumber;
}
// Calculate jitter according to RFC 3550, and update previous timestamps.
// Note that the value in |jitter_| is in Q4.
if (received_packets_ > 1) {
int32_t ts_diff = receive_timestamp - (rtp_header.timestamp - transit_);
ts_diff = WEBRTC_SPL_ABS_W32(ts_diff);
int32_t jitter_diff = (ts_diff << 4) - jitter_;
// Calculate 15 * jitter_ / 16 + jitter_diff / 16 (with proper rounding).
jitter_ = jitter_ + ((jitter_diff + 8) >> 4);
}
transit_ = rtp_header.timestamp - receive_timestamp;
}
void Rtcp::GetStatistics(bool no_reset, RtcpStatistics* stats) {
// Extended highest sequence number received.
stats->extended_max = (static_cast<int>(cycles_) << 16) + max_seq_no_;
// Calculate expected number of packets and compare it with the number of
// packets that were actually received. The cumulative number of lost packets
// can be extracted.
uint32_t expected_packets = stats->extended_max - base_seq_no_ + 1;
if (received_packets_ == 0) {
// No packets received, assume none lost.
stats->cumulative_lost = 0;
} else if (expected_packets > received_packets_) {
stats->cumulative_lost = expected_packets - received_packets_;
if (stats->cumulative_lost > 0xFFFFFF) {
stats->cumulative_lost = 0xFFFFFF;
}
} else {
stats->cumulative_lost = 0;
}
// Fraction lost since last report.
uint32_t expected_since_last = expected_packets - expected_prior_;
uint32_t received_since_last = received_packets_ - received_packets_prior_;
if (!no_reset) {
expected_prior_ = expected_packets;
received_packets_prior_ = received_packets_;
}
int32_t lost = expected_since_last - received_since_last;
if (expected_since_last == 0 || lost <= 0 || received_packets_ == 0) {
stats->fraction_lost = 0;
} else {
stats->fraction_lost = (lost << 8) / expected_since_last;
}
if (stats->fraction_lost > 0xFF) {
stats->fraction_lost = 0xFF;
}
stats->jitter = jitter_ >> 4; // Scaling from Q4.
}
} // namespace webrtc

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_RTCP_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_RTCP_H_
#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declaration.
struct RTPHeader;
class Rtcp {
public:
Rtcp() {
Init(0);
}
~Rtcp() {}
// Resets the RTCP statistics, and sets the first received sequence number.
void Init(uint16_t start_sequence_number);
// Updates the RTCP statistics with a new received packet.
void Update(const RTPHeader& rtp_header, uint32_t receive_timestamp);
// Returns the current RTCP statistics. If |no_reset| is true, the statistics
// are not reset, otherwise they are.
void GetStatistics(bool no_reset, RtcpStatistics* stats);
private:
uint16_t cycles_; // The number of wrap-arounds for the sequence number.
uint16_t max_seq_no_; // The maximum sequence number received. Starts over
// from 0 after wrap-around.
uint16_t base_seq_no_; // The sequence number of the first received packet.
uint32_t received_packets_; // The number of packets that have been received.
uint32_t received_packets_prior_; // Number of packets received when last
// report was generated.
uint32_t expected_prior_; // Expected number of packets, at the time of the
// last report.
uint32_t jitter_; // Current jitter value.
int32_t transit_; // Clock difference for previous packet.
DISALLOW_COPY_AND_ASSIGN(Rtcp);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_RTCP_H_

View File

@ -0,0 +1,171 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/neteq4/statistics_calculator.h"
#include <assert.h>
#include <cstring> // memset
#include "webrtc/modules/audio_coding/neteq4/decision_logic.h"
#include "webrtc/modules/audio_coding/neteq4/delay_manager.h"
namespace webrtc {
StatisticsCalculator::StatisticsCalculator()
: preemptive_samples_(0),
accelerate_samples_(0),
added_zero_samples_(0),
expanded_voice_samples_(0),
expanded_noise_samples_(0),
discarded_packets_(0),
lost_timestamps_(0),
last_report_timestamp_(0),
len_waiting_times_(0),
next_waiting_time_index_(0) {
memset(waiting_times_, 0, kLenWaitingTimes * sizeof(waiting_times_[0]));
}
void StatisticsCalculator::Reset() {
preemptive_samples_ = 0;
accelerate_samples_ = 0;
added_zero_samples_ = 0;
expanded_voice_samples_ = 0;
expanded_noise_samples_ = 0;
}
void StatisticsCalculator::ResetMcu() {
discarded_packets_ = 0;
lost_timestamps_ = 0;
last_report_timestamp_ = 0;
}
void StatisticsCalculator::ResetWaitingTimeStatistics() {
memset(waiting_times_, 0, kLenWaitingTimes * sizeof(waiting_times_[0]));
len_waiting_times_ = 0;
next_waiting_time_index_ = 0;
}
void StatisticsCalculator::ExpandedVoiceSamples(int num_samples) {
expanded_voice_samples_ += num_samples;
}
void StatisticsCalculator::ExpandedNoiseSamples(int num_samples) {
expanded_noise_samples_ += num_samples;
}
void StatisticsCalculator::PreemptiveExpandedSamples(int num_samples) {
preemptive_samples_ += num_samples;
}
void StatisticsCalculator::AcceleratedSamples(int num_samples) {
accelerate_samples_ += num_samples;
}
void StatisticsCalculator::AddZeros(int num_samples) {
added_zero_samples_ += num_samples;
}
void StatisticsCalculator::PacketsDiscarded(int num_packets) {
discarded_packets_ += num_packets;
}
void StatisticsCalculator::LostSamples(int num_samples) {
lost_timestamps_ += num_samples;
}
void StatisticsCalculator::IncreaseCounter(int num_samples, int fs_hz) {
last_report_timestamp_ += num_samples;
if (last_report_timestamp_ >
static_cast<uint32_t>(fs_hz * kMaxReportPeriod)) {
lost_timestamps_ = 0;
last_report_timestamp_ = 0;
discarded_packets_ = 0;
}
}
void StatisticsCalculator::StoreWaitingTime(int waiting_time_ms) {
assert(next_waiting_time_index_ < kLenWaitingTimes);
waiting_times_[next_waiting_time_index_] = waiting_time_ms;
next_waiting_time_index_++;
if (next_waiting_time_index_ >= kLenWaitingTimes) {
next_waiting_time_index_ = 0;
}
if (len_waiting_times_ < kLenWaitingTimes) {
len_waiting_times_++;
}
}
void StatisticsCalculator::GetNetworkStatistics(
int fs_hz,
int num_samples_in_buffers,
int samples_per_packet,
const DelayManager& delay_manager,
const DecisionLogic& decision_logic,
NetEqNetworkStatistics *stats) {
if (fs_hz <= 0 || !stats) {
assert(false);
return;
}
stats->added_zero_samples = added_zero_samples_;
stats->current_buffer_size_ms = num_samples_in_buffers * 1000 / fs_hz;
const int ms_per_packet = decision_logic.packet_length_samples() /
(fs_hz / 1000);
stats->preferred_buffer_size_ms = (delay_manager.TargetLevel() >> 8) *
ms_per_packet;
stats->jitter_peaks_found = delay_manager.PeakFound();
stats->clockdrift_ppm = delay_manager.AverageIAT();
stats->packet_loss_rate = CalculateQ14Ratio(lost_timestamps_,
last_report_timestamp_);
const unsigned discarded_samples = discarded_packets_ * samples_per_packet;
stats->packet_discard_rate = CalculateQ14Ratio(discarded_samples,
last_report_timestamp_);
stats->accelerate_rate = CalculateQ14Ratio(accelerate_samples_,
last_report_timestamp_);
stats->preemptive_rate = CalculateQ14Ratio(preemptive_samples_,
last_report_timestamp_);
stats->expand_rate = CalculateQ14Ratio(expanded_voice_samples_ +
expanded_noise_samples_,
last_report_timestamp_);
// Reset counters.
ResetMcu();
Reset();
}
void StatisticsCalculator::WaitingTimes(std::vector<int>* waiting_times) {
if (!waiting_times) {
return;
}
waiting_times->assign(waiting_times_, waiting_times_ + len_waiting_times_);
ResetWaitingTimeStatistics();
}
int StatisticsCalculator::CalculateQ14Ratio(uint32_t numerator,
uint32_t denominator) {
if (numerator == 0) {
return 0;
} else if (numerator < denominator) {
// Ratio must be smaller than 1 in Q14.
assert((numerator << 14) / denominator < (1 << 14));
return (numerator << 14) / denominator;
} else {
// Will not produce a ratio larger than 1, since this is probably an error.
return 1 << 14;
}
}
} // namespace webrtc

View File

@ -0,0 +1,109 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_STATISTICS_CALCULATOR_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_STATISTICS_CALCULATOR_H_
#include <vector>
#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// Forward declarations.
class DecisionLogic;
class DelayManager;
// This class handles various network statistics in NetEq.
class StatisticsCalculator {
public:
StatisticsCalculator();
virtual ~StatisticsCalculator() {}
// Resets most of the counters.
void Reset();
// Resets the counters that are not handled by Reset().
void ResetMcu();
// Resets the waiting time statistics.
void ResetWaitingTimeStatistics();
// Reports that |num_samples| samples were produced through expansion, and
// that the expansion produced other than just noise samples.
void ExpandedVoiceSamples(int num_samples);
// Reports that |num_samples| samples were produced through expansion, and
// that the expansion produced only noise samples.
void ExpandedNoiseSamples(int num_samples);
// Reports that |num_samples| samples were produced through preemptive
// expansion.
void PreemptiveExpandedSamples(int num_samples);
// Reports that |num_samples| samples were removed through accelerate.
void AcceleratedSamples(int num_samples);
// Reports that |num_samples| zeros were inserted into the output.
void AddZeros(int num_samples);
// Reports that |num_packets| packets were discarded.
void PacketsDiscarded(int num_packets);
// Reports that |num_samples| were lost.
void LostSamples(int num_samples);
// Increases the report interval counter with |num_samples| at a sample rate
// of |fs_hz|.
void IncreaseCounter(int num_samples, int fs_hz);
// Stores new packet waiting time in waiting time statistics.
void StoreWaitingTime(int waiting_time_ms);
// Returns the current network statistics in |stats|. The current sample rate
// is |fs_hz|, the total number of samples in packet buffer and sync buffer
// yet to play out is |num_samples_in_buffers|, and the number of samples per
// packet is |samples_per_packet|.
void GetNetworkStatistics(int fs_hz,
int num_samples_in_buffers,
int samples_per_packet,
const DelayManager& delay_manager,
const DecisionLogic& decision_logic,
NetEqNetworkStatistics *stats);
void WaitingTimes(std::vector<int>* waiting_times);
private:
static const int kMaxReportPeriod = 60; // Seconds before auto-reset.
static const int kLenWaitingTimes = 100;
// Calculates numerator / denominator, and returns the value in Q14.
static int CalculateQ14Ratio(uint32_t numerator, uint32_t denominator);
uint32_t preemptive_samples_;
uint32_t accelerate_samples_;
int added_zero_samples_;
uint32_t expanded_voice_samples_;
uint32_t expanded_noise_samples_;
int discarded_packets_;
uint32_t lost_timestamps_;
uint32_t last_report_timestamp_;
int waiting_times_[kLenWaitingTimes]; // Used as a circular buffer.
int len_waiting_times_;
int next_waiting_time_index_;
DISALLOW_COPY_AND_ASSIGN(StatisticsCalculator);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_STATISTICS_CALCULATOR_H_

Some files were not shown because too many files have changed in this diff Show More