Make an AudioEncoder subclass for comfort noise
BUG=3926 R=bjornv@webrtc.org, kjellander@webrtc.org, kwiberg@webrtc.org Review URL: https://webrtc-codereview.appspot.com/31129004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7857 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
@ -33,9 +33,9 @@ class Vad {
|
||||
|
||||
virtual ~Vad();
|
||||
|
||||
enum Activity VoiceActivity(const int16_t* audio,
|
||||
size_t num_samples,
|
||||
int sample_rate_hz);
|
||||
virtual Activity VoiceActivity(const int16_t* audio,
|
||||
size_t num_samples,
|
||||
int sample_rate_hz);
|
||||
|
||||
private:
|
||||
VadInst* handle_;
|
||||
|
@ -19,7 +19,7 @@ namespace webrtc {
|
||||
|
||||
class MockVad : public Vad {
|
||||
public:
|
||||
explicit MockVad(enum Aggressiveness mode) {}
|
||||
explicit MockVad(enum Aggressiveness mode) : Vad(mode) {}
|
||||
virtual ~MockVad() { Die(); }
|
||||
MOCK_METHOD0(Die, void());
|
||||
|
||||
|
@ -131,8 +131,10 @@ config("cng_config") {
|
||||
|
||||
source_set("cng") {
|
||||
sources = [
|
||||
"codecs/cng/audio_encoder_cng.cc",
|
||||
"codecs/cng/cng_helpfuns.c",
|
||||
"codecs/cng/cng_helpfuns.h",
|
||||
"codecs/cng/include/audio_encoder_cng.h",
|
||||
"codecs/cng/include/webrtc_cng.h",
|
||||
"codecs/cng/webrtc_cng.c",
|
||||
]
|
||||
|
209
webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc
Normal file
209
webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc
Normal file
@ -0,0 +1,209 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h"
|
||||
|
||||
#include <limits>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
AudioEncoderCng::Config::Config()
|
||||
: sample_rate_hz(8000),
|
||||
num_channels(1),
|
||||
payload_type(13),
|
||||
speech_encoder(NULL),
|
||||
vad_mode(Vad::kVadNormal),
|
||||
sid_frame_interval_ms(100),
|
||||
num_cng_coefficients(8),
|
||||
vad(NULL) {
|
||||
}
|
||||
|
||||
bool AudioEncoderCng::Config::IsOk() const {
|
||||
if (sample_rate_hz != 8000 && sample_rate_hz != 16000)
|
||||
return false;
|
||||
if (num_channels != 1)
|
||||
return false;
|
||||
if (!speech_encoder)
|
||||
return false;
|
||||
if (num_channels != speech_encoder->num_channels())
|
||||
return false;
|
||||
if (sid_frame_interval_ms < speech_encoder->Max10MsFramesInAPacket() * 10)
|
||||
return false;
|
||||
if (num_cng_coefficients > WEBRTC_CNG_MAX_LPC_ORDER ||
|
||||
num_cng_coefficients <= 0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
AudioEncoderCng::AudioEncoderCng(const Config& config)
|
||||
: speech_encoder_(config.speech_encoder),
|
||||
sample_rate_hz_(config.sample_rate_hz),
|
||||
num_channels_(config.num_channels),
|
||||
cng_payload_type_(config.payload_type),
|
||||
num_cng_coefficients_(config.num_cng_coefficients),
|
||||
first_timestamp_in_buffer_(0),
|
||||
frames_in_buffer_(0),
|
||||
last_frame_active_(true),
|
||||
vad_(new Vad(config.vad_mode)) {
|
||||
if (config.vad) {
|
||||
// Replace default Vad object with user-provided one.
|
||||
vad_.reset(config.vad);
|
||||
}
|
||||
CHECK(config.IsOk()) << "Invalid configuration.";
|
||||
CNG_enc_inst* cng_inst;
|
||||
CHECK_EQ(WebRtcCng_CreateEnc(&cng_inst), 0) << "WebRtcCng_CreateEnc failed.";
|
||||
cng_inst_.reset(cng_inst); // Transfer ownership to scoped_ptr.
|
||||
CHECK_EQ(WebRtcCng_InitEnc(cng_inst_.get(), sample_rate_hz_,
|
||||
config.sid_frame_interval_ms,
|
||||
config.num_cng_coefficients),
|
||||
0)
|
||||
<< "WebRtcCng_InitEnc failed";
|
||||
}
|
||||
|
||||
AudioEncoderCng::~AudioEncoderCng() {
|
||||
}
|
||||
|
||||
int AudioEncoderCng::sample_rate_hz() const {
|
||||
return sample_rate_hz_;
|
||||
}
|
||||
|
||||
int AudioEncoderCng::num_channels() const {
|
||||
return num_channels_;
|
||||
}
|
||||
|
||||
int AudioEncoderCng::Num10MsFramesInNextPacket() const {
|
||||
return speech_encoder_->Num10MsFramesInNextPacket();
|
||||
}
|
||||
|
||||
int AudioEncoderCng::Max10MsFramesInAPacket() const {
|
||||
return speech_encoder_->Max10MsFramesInAPacket();
|
||||
}
|
||||
|
||||
bool AudioEncoderCng::EncodeInternal(uint32_t timestamp,
|
||||
const int16_t* audio,
|
||||
size_t max_encoded_bytes,
|
||||
uint8_t* encoded,
|
||||
size_t* encoded_bytes,
|
||||
EncodedInfo* info) {
|
||||
DCHECK_GE(max_encoded_bytes, static_cast<size_t>(num_cng_coefficients_ + 1));
|
||||
if (max_encoded_bytes < static_cast<size_t>(num_cng_coefficients_ + 1)) {
|
||||
return false;
|
||||
}
|
||||
*encoded_bytes = 0;
|
||||
const int num_samples = sample_rate_hz() / 100 * num_channels();
|
||||
if (speech_buffer_.empty()) {
|
||||
CHECK_EQ(frames_in_buffer_, 0);
|
||||
first_timestamp_in_buffer_ = timestamp;
|
||||
}
|
||||
for (int i = 0; i < num_samples; ++i) {
|
||||
speech_buffer_.push_back(audio[i]);
|
||||
}
|
||||
++frames_in_buffer_;
|
||||
if (frames_in_buffer_ < speech_encoder_->Num10MsFramesInNextPacket()) {
|
||||
return true;
|
||||
}
|
||||
CHECK_LE(frames_in_buffer_, 6)
|
||||
<< "Frame size cannot be larger than 60 ms when using VAD/CNG.";
|
||||
const size_t samples_per_10ms_frame = 10 * sample_rate_hz_ / 1000;
|
||||
CHECK_EQ(speech_buffer_.size(),
|
||||
static_cast<size_t>(frames_in_buffer_) * samples_per_10ms_frame);
|
||||
|
||||
// Group several 10 ms blocks per VAD call. Call VAD once or twice using the
|
||||
// following split sizes:
|
||||
// 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms;
|
||||
// 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms.
|
||||
int blocks_in_first_vad_call =
|
||||
(frames_in_buffer_ > 3 ? 3 : frames_in_buffer_);
|
||||
if (frames_in_buffer_ == 4)
|
||||
blocks_in_first_vad_call = 2;
|
||||
const int blocks_in_second_vad_call =
|
||||
frames_in_buffer_ - blocks_in_first_vad_call;
|
||||
CHECK_GE(blocks_in_second_vad_call, 0);
|
||||
|
||||
// Check if all of the buffer is passive speech. Start with checking the first
|
||||
// block.
|
||||
Vad::Activity activity = vad_->VoiceActivity(
|
||||
&speech_buffer_[0], samples_per_10ms_frame * blocks_in_first_vad_call,
|
||||
sample_rate_hz_);
|
||||
if (activity == Vad::kPassive && blocks_in_second_vad_call > 0) {
|
||||
// Only check the second block if the first was passive.
|
||||
activity = vad_->VoiceActivity(
|
||||
&speech_buffer_[samples_per_10ms_frame * blocks_in_first_vad_call],
|
||||
samples_per_10ms_frame * blocks_in_second_vad_call, sample_rate_hz_);
|
||||
}
|
||||
DCHECK_NE(activity, Vad::kError);
|
||||
|
||||
bool return_val = true;
|
||||
switch (activity) {
|
||||
case Vad::kPassive: {
|
||||
return_val = EncodePassive(encoded, encoded_bytes);
|
||||
info->encoded_timestamp = first_timestamp_in_buffer_;
|
||||
info->payload_type = cng_payload_type_;
|
||||
last_frame_active_ = false;
|
||||
break;
|
||||
}
|
||||
case Vad::kActive: {
|
||||
return_val =
|
||||
EncodeActive(max_encoded_bytes, encoded, encoded_bytes, info);
|
||||
last_frame_active_ = true;
|
||||
break;
|
||||
}
|
||||
case Vad::kError: {
|
||||
return_val = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
speech_buffer_.clear();
|
||||
frames_in_buffer_ = 0;
|
||||
return return_val;
|
||||
}
|
||||
|
||||
bool AudioEncoderCng::EncodePassive(uint8_t* encoded, size_t* encoded_bytes) {
|
||||
bool force_sid = last_frame_active_;
|
||||
bool output_produced = false;
|
||||
const size_t samples_per_10ms_frame = 10 * sample_rate_hz_ / 1000;
|
||||
for (int i = 0; i < frames_in_buffer_; ++i) {
|
||||
int16_t encoded_bytes_tmp = 0;
|
||||
if (WebRtcCng_Encode(cng_inst_.get(),
|
||||
&speech_buffer_[i * samples_per_10ms_frame],
|
||||
static_cast<int16_t>(samples_per_10ms_frame), encoded,
|
||||
&encoded_bytes_tmp, force_sid) < 0)
|
||||
return false;
|
||||
if (encoded_bytes_tmp > 0) {
|
||||
CHECK(!output_produced);
|
||||
*encoded_bytes = static_cast<size_t>(encoded_bytes_tmp);
|
||||
output_produced = true;
|
||||
force_sid = false;
|
||||
}
|
||||
CHECK(!force_sid) << "SID frame not produced despite being forced.";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AudioEncoderCng::EncodeActive(size_t max_encoded_bytes,
|
||||
uint8_t* encoded,
|
||||
size_t* encoded_bytes,
|
||||
EncodedInfo* info) {
|
||||
const size_t samples_per_10ms_frame = 10 * sample_rate_hz_ / 1000;
|
||||
for (int i = 0; i < frames_in_buffer_; ++i) {
|
||||
if (!speech_encoder_->Encode(first_timestamp_in_buffer_,
|
||||
&speech_buffer_[i * samples_per_10ms_frame],
|
||||
samples_per_10ms_frame, max_encoded_bytes,
|
||||
encoded, encoded_bytes, info))
|
||||
return false;
|
||||
if (i < frames_in_buffer_ - 1) {
|
||||
CHECK_EQ(*encoded_bytes, 0u) << "Encoder delivered data too early.";
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,465 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/common_audio/vad/mock/mock_vad.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/mock/mock_audio_encoder.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
|
||||
using ::testing::Return;
|
||||
using ::testing::_;
|
||||
using ::testing::SetArgPointee;
|
||||
using ::testing::InSequence;
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
static const size_t kMaxEncodedBytes = 1000;
|
||||
static const size_t kMaxNumSamples = 48 * 10 * 2; // 10 ms @ 48 kHz stereo.
|
||||
static const size_t kMockReturnEncodedBytes = 17;
|
||||
static const int kCngPayloadType = 18;
|
||||
}
|
||||
|
||||
class AudioEncoderCngTest : public ::testing::Test {
|
||||
protected:
|
||||
AudioEncoderCngTest()
|
||||
: mock_vad_(new MockVad(Vad::kVadNormal)),
|
||||
timestamp_(4711),
|
||||
num_audio_samples_10ms_(0),
|
||||
encoded_bytes_(0) {
|
||||
memset(encoded_, 0, kMaxEncodedBytes);
|
||||
memset(audio_, 0, kMaxNumSamples * 2);
|
||||
config_.speech_encoder = &mock_encoder_;
|
||||
EXPECT_CALL(mock_encoder_, num_channels()).WillRepeatedly(Return(1));
|
||||
// Let the AudioEncoderCng object use a MockVad instead of its internally
|
||||
// created Vad object.
|
||||
config_.vad = mock_vad_;
|
||||
config_.payload_type = kCngPayloadType;
|
||||
}
|
||||
|
||||
virtual void TearDown() OVERRIDE {
|
||||
EXPECT_CALL(*mock_vad_, Die()).Times(1);
|
||||
cng_.reset();
|
||||
// Don't expect the cng_ object to delete the AudioEncoder object. But it
|
||||
// will be deleted with the test fixture. This is why we explicitly delete
|
||||
// the cng_ object above, and set expectations on mock_encoder_ afterwards.
|
||||
EXPECT_CALL(mock_encoder_, Die()).Times(1);
|
||||
}
|
||||
|
||||
void CreateCng() {
|
||||
// The config_ parameters may be changed by the TEST_Fs up until CreateCng()
|
||||
// is called, thus we cannot use the values until now.
|
||||
num_audio_samples_10ms_ = 10 * config_.sample_rate_hz / 1000;
|
||||
ASSERT_LE(num_audio_samples_10ms_, kMaxNumSamples);
|
||||
EXPECT_CALL(mock_encoder_, sample_rate_hz())
|
||||
.WillRepeatedly(Return(config_.sample_rate_hz));
|
||||
// Max10MsFramesInAPacket() is just used to verify that the SID frame period
|
||||
// is not too small. The return value does not matter that much, as long as
|
||||
// it is smaller than 10.
|
||||
EXPECT_CALL(mock_encoder_, Max10MsFramesInAPacket()).WillOnce(Return(1));
|
||||
cng_.reset(new AudioEncoderCng(config_));
|
||||
}
|
||||
|
||||
void Encode() {
|
||||
ASSERT_TRUE(cng_) << "Must call CreateCng() first.";
|
||||
memset(&encoded_info_, 0, sizeof(encoded_info_));
|
||||
encoded_bytes_ = 0;
|
||||
ASSERT_TRUE(cng_->Encode(timestamp_, audio_, num_audio_samples_10ms_,
|
||||
kMaxEncodedBytes, encoded_, &encoded_bytes_,
|
||||
&encoded_info_));
|
||||
timestamp_ += num_audio_samples_10ms_;
|
||||
}
|
||||
|
||||
// Verifies that the cng_ object waits until it has collected
|
||||
// |blocks_per_frame| blocks of audio, and then dispatches all of them to
|
||||
// the underlying codec (speech or cng).
|
||||
void CheckBlockGrouping(int blocks_per_frame, bool active_speech) {
|
||||
EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(blocks_per_frame));
|
||||
CreateCng();
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillRepeatedly(Return(active_speech ? Vad::kActive : Vad::kPassive));
|
||||
|
||||
// Don't expect any calls to the encoder yet.
|
||||
EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)).Times(0);
|
||||
for (int i = 0; i < blocks_per_frame - 1; ++i) {
|
||||
Encode();
|
||||
EXPECT_EQ(0u, encoded_bytes_);
|
||||
}
|
||||
if (active_speech) {
|
||||
// Now expect |blocks_per_frame| calls to the encoder in sequence.
|
||||
// Let the speech codec mock return true and set the number of encoded
|
||||
// bytes to |kMockReturnEncodedBytes|.
|
||||
InSequence s;
|
||||
for (int j = 0; j < blocks_per_frame - 1; ++j) {
|
||||
EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _))
|
||||
.WillOnce(DoAll(SetArgPointee<4>(0), Return(true)));
|
||||
}
|
||||
EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _))
|
||||
.WillOnce(
|
||||
DoAll(SetArgPointee<4>(kMockReturnEncodedBytes), Return(true)));
|
||||
}
|
||||
Encode();
|
||||
if (active_speech) {
|
||||
EXPECT_EQ(kMockReturnEncodedBytes, encoded_bytes_);
|
||||
} else {
|
||||
EXPECT_EQ(static_cast<size_t>(config_.num_cng_coefficients + 1),
|
||||
encoded_bytes_);
|
||||
}
|
||||
}
|
||||
|
||||
// Verifies that the audio is partitioned into larger blocks before calling
|
||||
// the VAD.
|
||||
void CheckVadInputSize(int input_frame_size_ms,
|
||||
int expected_first_block_size_ms,
|
||||
int expected_second_block_size_ms) {
|
||||
const int blocks_per_frame = input_frame_size_ms / 10;
|
||||
|
||||
EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(blocks_per_frame));
|
||||
|
||||
// Expect nothing to happen before the last block is sent to cng_.
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)).Times(0);
|
||||
for (int i = 0; i < blocks_per_frame - 1; ++i) {
|
||||
Encode();
|
||||
}
|
||||
|
||||
// Let the VAD decision be passive, since an active decision may lead to
|
||||
// early termination of the decision loop.
|
||||
const int sample_rate_hz = config_.sample_rate_hz;
|
||||
InSequence s;
|
||||
EXPECT_CALL(
|
||||
*mock_vad_,
|
||||
VoiceActivity(_, expected_first_block_size_ms * sample_rate_hz / 1000,
|
||||
sample_rate_hz)).WillOnce(Return(Vad::kPassive));
|
||||
if (expected_second_block_size_ms > 0) {
|
||||
EXPECT_CALL(*mock_vad_,
|
||||
VoiceActivity(
|
||||
_, expected_second_block_size_ms * sample_rate_hz / 1000,
|
||||
sample_rate_hz)).WillOnce(Return(Vad::kPassive));
|
||||
}
|
||||
|
||||
// With this call to Encode(), |mock_vad_| should be called according to the
|
||||
// above expectations.
|
||||
Encode();
|
||||
}
|
||||
|
||||
// Tests a frame with both active and passive speech. Returns true if the
|
||||
// decision was active speech, false if it was passive.
|
||||
bool CheckMixedActivePassive(Vad::Activity first_type,
|
||||
Vad::Activity second_type) {
|
||||
// Set the speech encoder frame size to 60 ms, to ensure that the VAD will
|
||||
// be called twice.
|
||||
const int blocks_per_frame = 6;
|
||||
EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(blocks_per_frame));
|
||||
InSequence s;
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillOnce(Return(first_type));
|
||||
if (first_type == Vad::kPassive) {
|
||||
// Expect a second call to the VAD only if the first frame was passive.
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillOnce(Return(second_type));
|
||||
}
|
||||
encoded_info_.payload_type = 0;
|
||||
for (int i = 0; i < blocks_per_frame; ++i) {
|
||||
Encode();
|
||||
}
|
||||
return encoded_info_.payload_type != kCngPayloadType;
|
||||
}
|
||||
|
||||
AudioEncoderCng::Config config_;
|
||||
scoped_ptr<AudioEncoderCng> cng_;
|
||||
MockAudioEncoder mock_encoder_;
|
||||
MockVad* mock_vad_; // Ownership is transferred to |cng_|.
|
||||
uint32_t timestamp_;
|
||||
int16_t audio_[kMaxNumSamples];
|
||||
size_t num_audio_samples_10ms_;
|
||||
uint8_t encoded_[kMaxEncodedBytes];
|
||||
size_t encoded_bytes_;
|
||||
AudioEncoder::EncodedInfo encoded_info_;
|
||||
};
|
||||
|
||||
TEST_F(AudioEncoderCngTest, CreateAndDestroy) {
|
||||
CreateCng();
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, CheckFrameSizePropagation) {
|
||||
CreateCng();
|
||||
EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(17));
|
||||
EXPECT_EQ(17, cng_->Num10MsFramesInNextPacket());
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCallsVad) {
|
||||
EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(1));
|
||||
CreateCng();
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillOnce(Return(Vad::kPassive));
|
||||
Encode();
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCollects1BlockPassiveSpeech) {
|
||||
CheckBlockGrouping(1, false);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksPassiveSpeech) {
|
||||
CheckBlockGrouping(2, false);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksPassiveSpeech) {
|
||||
CheckBlockGrouping(3, false);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCollects1BlockActiveSpeech) {
|
||||
CheckBlockGrouping(1, true);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksActiveSpeech) {
|
||||
CheckBlockGrouping(2, true);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksActiveSpeech) {
|
||||
CheckBlockGrouping(3, true);
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngTest, EncodePassive) {
|
||||
const int kBlocksPerFrame = 3;
|
||||
EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(kBlocksPerFrame));
|
||||
CreateCng();
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillRepeatedly(Return(Vad::kPassive));
|
||||
// Expect no calls at all to the speech encoder mock.
|
||||
EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)).Times(0);
|
||||
uint32_t expected_timestamp = timestamp_;
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
Encode();
|
||||
// Check if it was time to call the cng encoder. This is done once every
|
||||
// |kBlocksPerFrame| calls.
|
||||
if ((i + 1) % kBlocksPerFrame == 0) {
|
||||
// Now check if a SID interval has elapsed.
|
||||
if ((i % (config_.sid_frame_interval_ms / 10)) < kBlocksPerFrame) {
|
||||
// If so, verify that we got a CNG encoding.
|
||||
EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type);
|
||||
EXPECT_EQ(static_cast<size_t>(config_.num_cng_coefficients) + 1,
|
||||
encoded_bytes_);
|
||||
EXPECT_EQ(expected_timestamp, encoded_info_.encoded_timestamp);
|
||||
}
|
||||
expected_timestamp += kBlocksPerFrame * num_audio_samples_10ms_;
|
||||
} else {
|
||||
// Otherwise, expect no output.
|
||||
EXPECT_EQ(0u, encoded_bytes_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Verifies that the correct action is taken for frames with both active and
|
||||
// passive speech.
|
||||
TEST_F(AudioEncoderCngTest, MixedActivePassive) {
|
||||
CreateCng();
|
||||
|
||||
// All of the frame is active speech.
|
||||
EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _))
|
||||
.Times(6)
|
||||
.WillRepeatedly(Return(true));
|
||||
EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kActive));
|
||||
|
||||
// First half of the frame is active speech.
|
||||
EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _))
|
||||
.Times(6)
|
||||
.WillRepeatedly(Return(true));
|
||||
EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kPassive));
|
||||
|
||||
// Second half of the frame is active speech.
|
||||
EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _))
|
||||
.Times(6)
|
||||
.WillRepeatedly(Return(true));
|
||||
EXPECT_TRUE(CheckMixedActivePassive(Vad::kPassive, Vad::kActive));
|
||||
|
||||
// All of the frame is passive speech. Expect no calls to |mock_encoder_|.
|
||||
EXPECT_FALSE(CheckMixedActivePassive(Vad::kPassive, Vad::kPassive));
|
||||
}
|
||||
|
||||
// These tests verify that the audio is partitioned into larger blocks before
|
||||
// calling the VAD.
|
||||
// The parameters for CheckVadInputSize are:
|
||||
// CheckVadInputSize(frame_size, expected_first_block_size,
|
||||
// expected_second_block_size);
|
||||
TEST_F(AudioEncoderCngTest, VadInputSize10Ms) {
|
||||
CreateCng();
|
||||
CheckVadInputSize(10, 10, 0);
|
||||
}
|
||||
TEST_F(AudioEncoderCngTest, VadInputSize20Ms) {
|
||||
CreateCng();
|
||||
CheckVadInputSize(20, 20, 0);
|
||||
}
|
||||
TEST_F(AudioEncoderCngTest, VadInputSize30Ms) {
|
||||
CreateCng();
|
||||
CheckVadInputSize(30, 30, 0);
|
||||
}
|
||||
TEST_F(AudioEncoderCngTest, VadInputSize40Ms) {
|
||||
CreateCng();
|
||||
CheckVadInputSize(40, 20, 20);
|
||||
}
|
||||
TEST_F(AudioEncoderCngTest, VadInputSize50Ms) {
|
||||
CreateCng();
|
||||
CheckVadInputSize(50, 30, 20);
|
||||
}
|
||||
TEST_F(AudioEncoderCngTest, VadInputSize60Ms) {
|
||||
CreateCng();
|
||||
CheckVadInputSize(60, 30, 30);
|
||||
}
|
||||
|
||||
// Verifies that the EncodedInfo struct pointer passed to
|
||||
// AudioEncoderCng::Encode is propagated to the Encode call to the underlying
|
||||
// speech encoder.
|
||||
TEST_F(AudioEncoderCngTest, VerifyEncoderInfoPropagation) {
|
||||
CreateCng();
|
||||
EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, &encoded_info_))
|
||||
.WillOnce(Return(true));
|
||||
EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(1));
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillOnce(Return(Vad::kActive));
|
||||
Encode();
|
||||
}
|
||||
|
||||
// Verifies that the correct payload type is set when CNG is encoded.
|
||||
TEST_F(AudioEncoderCngTest, VerifyCngPayloadType) {
|
||||
CreateCng();
|
||||
EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _)).Times(0);
|
||||
EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(1));
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillOnce(Return(Vad::kPassive));
|
||||
encoded_info_.payload_type = 0;
|
||||
Encode();
|
||||
EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type);
|
||||
}
|
||||
|
||||
// Verifies that a SID frame is encoded immediately as the signal changes from
|
||||
// active speech to passive.
|
||||
TEST_F(AudioEncoderCngTest, VerifySidFrameAfterSpeech) {
|
||||
CreateCng();
|
||||
EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(1));
|
||||
// Start with encoding noise.
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.Times(2)
|
||||
.WillRepeatedly(Return(Vad::kPassive));
|
||||
Encode();
|
||||
EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type);
|
||||
EXPECT_EQ(static_cast<size_t>(config_.num_cng_coefficients) + 1,
|
||||
encoded_bytes_);
|
||||
// Encode again, and make sure we got no frame at all (since the SID frame
|
||||
// period is 100 ms by default).
|
||||
Encode();
|
||||
EXPECT_EQ(0u, encoded_bytes_);
|
||||
|
||||
// Now encode active speech.
|
||||
encoded_info_.payload_type = 0;
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillOnce(Return(Vad::kActive));
|
||||
EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _, _, _))
|
||||
.WillOnce(DoAll(SetArgPointee<4>(kMockReturnEncodedBytes), Return(true)));
|
||||
Encode();
|
||||
EXPECT_EQ(kMockReturnEncodedBytes, encoded_bytes_);
|
||||
|
||||
// Go back to noise again, and verify that a SID frame is emitted.
|
||||
EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _))
|
||||
.WillOnce(Return(Vad::kPassive));
|
||||
Encode();
|
||||
EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type);
|
||||
EXPECT_EQ(static_cast<size_t>(config_.num_cng_coefficients) + 1,
|
||||
encoded_bytes_);
|
||||
}
|
||||
|
||||
#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
|
||||
|
||||
// This test fixture tests various error conditions that makes the
|
||||
// AudioEncoderCng die via CHECKs.
|
||||
class AudioEncoderCngDeathTest : public AudioEncoderCngTest {
|
||||
protected:
|
||||
AudioEncoderCngDeathTest() : AudioEncoderCngTest() {
|
||||
// Don't provide a Vad mock object, since it will leak when the test dies.
|
||||
config_.vad = NULL;
|
||||
EXPECT_CALL(*mock_vad_, Die()).Times(1);
|
||||
delete mock_vad_;
|
||||
mock_vad_ = NULL;
|
||||
}
|
||||
|
||||
// Override AudioEncoderCngTest::TearDown, since that one expects a call to
|
||||
// the destructor of |mock_vad_|. In this case, that object is already
|
||||
// deleted.
|
||||
virtual void TearDown() OVERRIDE {
|
||||
cng_.reset();
|
||||
// Don't expect the cng_ object to delete the AudioEncoder object. But it
|
||||
// will be deleted with the test fixture. This is why we explicitly delete
|
||||
// the cng_ object above, and set expectations on mock_encoder_ afterwards.
|
||||
EXPECT_CALL(mock_encoder_, Die()).Times(1);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, WrongFrameSize) {
|
||||
CreateCng();
|
||||
num_audio_samples_10ms_ *= 2; // 20 ms frame.
|
||||
EXPECT_DEATH(Encode(), "");
|
||||
num_audio_samples_10ms_ = 0; // Zero samples.
|
||||
EXPECT_DEATH(Encode(), "");
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, WrongSampleRates) {
|
||||
config_.sample_rate_hz = 32000;
|
||||
EXPECT_DEATH(CreateCng(), "Invalid configuration");
|
||||
config_.sample_rate_hz = 48000;
|
||||
EXPECT_DEATH(CreateCng(), "Invalid configuration");
|
||||
config_.sample_rate_hz = 0;
|
||||
EXPECT_DEATH(CreateCng(), "Invalid configuration");
|
||||
config_.sample_rate_hz = -8000;
|
||||
// Don't use CreateCng() here, since the built-in sanity checks will prevent
|
||||
// the test from reaching the expected point-of-death.
|
||||
EXPECT_DEATH(cng_.reset(new AudioEncoderCng(config_)),
|
||||
"Invalid configuration");
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, WrongNumCoefficients) {
|
||||
config_.num_cng_coefficients = -1;
|
||||
EXPECT_DEATH(CreateCng(), "Invalid configuration");
|
||||
config_.num_cng_coefficients = 0;
|
||||
EXPECT_DEATH(CreateCng(), "Invalid configuration");
|
||||
config_.num_cng_coefficients = 13;
|
||||
EXPECT_DEATH(CreateCng(), "Invalid configuration");
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, NullSpeechEncoder) {
|
||||
config_.speech_encoder = NULL;
|
||||
EXPECT_DEATH(CreateCng(), "Invalid configuration");
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, Stereo) {
|
||||
EXPECT_CALL(mock_encoder_, num_channels()).WillRepeatedly(Return(2));
|
||||
EXPECT_DEATH(CreateCng(), "Invalid configuration");
|
||||
config_.num_channels = 2;
|
||||
EXPECT_DEATH(CreateCng(), "Invalid configuration");
|
||||
}
|
||||
|
||||
TEST_F(AudioEncoderCngDeathTest, EncoderFrameSizeTooLarge) {
|
||||
CreateCng();
|
||||
EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket())
|
||||
.WillRepeatedly(Return(7));
|
||||
for (int i = 0; i < 6; ++i)
|
||||
Encode();
|
||||
EXPECT_DEATH(Encode(),
|
||||
"Frame size cannot be larger than 60 ms when using VAD/CNG.");
|
||||
}
|
||||
|
||||
#endif // GTEST_HAS_DEATH_TEST
|
||||
|
||||
} // namespace webrtc
|
@ -25,7 +25,9 @@
|
||||
],
|
||||
},
|
||||
'sources': [
|
||||
'include/audio_encoder_cng.h',
|
||||
'include/webrtc_cng.h',
|
||||
'audio_encoder_cng.cc',
|
||||
'webrtc_cng.c',
|
||||
'cng_helpfuns.c',
|
||||
'cng_helpfuns.h',
|
||||
|
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_INCLUDE_AUDIO_ENCODER_CNG_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_INCLUDE_AUDIO_ENCODER_CNG_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/common_audio/vad/include/vad.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/audio_encoder.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class Vad;
|
||||
|
||||
class AudioEncoderCng : public AudioEncoder {
|
||||
public:
|
||||
struct Config {
|
||||
Config();
|
||||
bool IsOk() const;
|
||||
|
||||
int sample_rate_hz;
|
||||
int num_channels;
|
||||
int payload_type;
|
||||
// Caller keeps ownership of the AudioEncoder object.
|
||||
AudioEncoder* speech_encoder;
|
||||
Vad::Aggressiveness vad_mode;
|
||||
int sid_frame_interval_ms;
|
||||
int num_cng_coefficients;
|
||||
// The Vad pointer is mainly for testing. If a NULL pointer is passed, the
|
||||
// AudioEncoderCng creates (and destroys) a Vad object internally. If an
|
||||
// object is passed, the AudioEncoderCng assumes ownership of the Vad
|
||||
// object.
|
||||
Vad* vad;
|
||||
};
|
||||
|
||||
explicit AudioEncoderCng(const Config& config);
|
||||
|
||||
virtual ~AudioEncoderCng();
|
||||
|
||||
virtual int sample_rate_hz() const OVERRIDE;
|
||||
virtual int num_channels() const OVERRIDE;
|
||||
virtual int Num10MsFramesInNextPacket() const OVERRIDE;
|
||||
virtual int Max10MsFramesInAPacket() const OVERRIDE;
|
||||
|
||||
protected:
|
||||
virtual bool EncodeInternal(uint32_t timestamp,
|
||||
const int16_t* audio,
|
||||
size_t max_encoded_bytes,
|
||||
uint8_t* encoded,
|
||||
size_t* encoded_bytes,
|
||||
EncodedInfo* info) OVERRIDE;
|
||||
|
||||
private:
|
||||
// Deleter for use with scoped_ptr. E.g., use as
|
||||
// scoped_ptr<CNG_enc_inst, CngInstDeleter> cng_inst_;
|
||||
struct CngInstDeleter {
|
||||
inline void operator()(CNG_enc_inst* ptr) const { WebRtcCng_FreeEnc(ptr); }
|
||||
};
|
||||
|
||||
bool EncodePassive(uint8_t* encoded, size_t* encoded_bytes);
|
||||
|
||||
bool EncodeActive(size_t max_encoded_bytes,
|
||||
uint8_t* encoded,
|
||||
size_t* encoded_bytes,
|
||||
EncodedInfo* info);
|
||||
|
||||
AudioEncoder* speech_encoder_;
|
||||
const int sample_rate_hz_;
|
||||
const int num_channels_;
|
||||
const int cng_payload_type_;
|
||||
const int num_cng_coefficients_;
|
||||
std::vector<int16_t> speech_buffer_;
|
||||
uint32_t first_timestamp_in_buffer_;
|
||||
int frames_in_buffer_;
|
||||
bool last_frame_active_;
|
||||
scoped_ptr<Vad> vad_;
|
||||
scoped_ptr<CNG_enc_inst, CngInstDeleter> cng_inst_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_INCLUDE_AUDIO_ENCODER_CNG_H_
|
40
webrtc/modules/audio_coding/codecs/mock/mock_audio_encoder.h
Normal file
40
webrtc/modules/audio_coding/codecs/mock/mock_audio_encoder.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_MOCK_MOCK_AUDIO_ENCODER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_CODECS_MOCK_MOCK_AUDIO_ENCODER_H_
|
||||
|
||||
#include "webrtc/modules/audio_coding/codecs/audio_encoder.h"
|
||||
|
||||
#include "testing/gmock/include/gmock/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockAudioEncoder : public AudioEncoder {
|
||||
public:
|
||||
virtual ~MockAudioEncoder() { Die(); }
|
||||
MOCK_METHOD0(Die, void());
|
||||
MOCK_CONST_METHOD0(sample_rate_hz, int());
|
||||
MOCK_CONST_METHOD0(num_channels, int());
|
||||
MOCK_CONST_METHOD0(Num10MsFramesInNextPacket, int());
|
||||
MOCK_CONST_METHOD0(Max10MsFramesInAPacket, int());
|
||||
// Note, we explicitly chose not to create a mock for the Encode method.
|
||||
MOCK_METHOD6(EncodeInternal,
|
||||
bool(uint32_t timestamp,
|
||||
const int16_t* audio,
|
||||
size_t max_encoded_bytes,
|
||||
uint8_t* encoded,
|
||||
size_t* encoded_bytes,
|
||||
EncodedInfo* info));
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_MOCK_MOCK_AUDIO_ENCODER_H_
|
@ -102,6 +102,7 @@
|
||||
'<(webrtc_root)/test/test.gyp:test_support_main',
|
||||
],
|
||||
'sources': [
|
||||
'audio_coding/codecs/cng/audio_encoder_cng_unittest.cc',
|
||||
'audio_coding/main/acm2/acm_opus_unittest.cc',
|
||||
'audio_coding/main/acm2/acm_receiver_unittest.cc',
|
||||
'audio_coding/main/acm2/acm_receiver_unittest_oldapi.cc',
|
||||
|
Reference in New Issue
Block a user