Reland "Prevent Opus DTX from generating intermittent noise during silence"

The original CL is reviewed at
https://codereview.webrtc.org/1415173005/

A silly mistake was made at the last patch set, and the CL was reverted. This CL is to fix and reland it.

BUG=

Review URL: https://codereview.webrtc.org/1422213003

Cr-Commit-Position: refs/heads/master@{#10574}
This commit is contained in:
minyue
2015-11-10 03:49:26 -08:00
committed by Commit bot
parent 626252fa66
commit 3cea256806
6 changed files with 377 additions and 62 deletions

View File

@ -15,7 +15,14 @@
struct WebRtcOpusEncInst {
OpusEncoder* encoder;
int channels;
int in_dtx_mode;
// When Opus is in DTX mode, we use |zero_counts| to count consecutive zeros
// to break long zero segment so as to prevent DTX from going wrong. We use
// one counter for each channel. After each encoding, |zero_counts| contain
// the remaining zeros from the last frame.
// TODO(minyue): remove this when Opus gets an internal fix to DTX.
size_t* zero_counts;
};
struct WebRtcOpusDecInst {

View File

@ -11,6 +11,7 @@
#include "webrtc/modules/audio_coding/codecs/opus/include/opus_interface.h"
#include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
@ -29,48 +30,61 @@ enum {
/* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */
kWebRtcOpusDefaultFrameSize = 960,
// Maximum number of consecutive zeros, beyond or equal to which DTX can fail.
kZeroBreakCount = 157,
#if defined(OPUS_FIXED_POINT)
kZeroBreakValue = 10,
#else
kZeroBreakValue = 1,
#endif
};
int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
int32_t channels,
int32_t application) {
OpusEncInst* state;
if (inst != NULL) {
state = (OpusEncInst*) calloc(1, sizeof(OpusEncInst));
if (state) {
int opus_app;
switch (application) {
case 0: {
opus_app = OPUS_APPLICATION_VOIP;
break;
}
case 1: {
opus_app = OPUS_APPLICATION_AUDIO;
break;
}
default: {
free(state);
return -1;
}
}
int opus_app;
if (!inst)
return -1;
int error;
state->encoder = opus_encoder_create(48000, channels, opus_app,
&error);
state->in_dtx_mode = 0;
if (error == OPUS_OK && state->encoder != NULL) {
*inst = state;
return 0;
}
free(state);
}
switch (application) {
case 0:
opus_app = OPUS_APPLICATION_VOIP;
break;
case 1:
opus_app = OPUS_APPLICATION_AUDIO;
break;
default:
return -1;
}
return -1;
OpusEncInst* state = calloc(1, sizeof(OpusEncInst));
assert(state);
// Allocate zero counters.
state->zero_counts = calloc(channels, sizeof(size_t));
assert(state->zero_counts);
int error;
state->encoder = opus_encoder_create(48000, channels, opus_app,
&error);
if (error != OPUS_OK || !state->encoder) {
WebRtcOpus_EncoderFree(state);
return -1;
}
state->in_dtx_mode = 0;
state->channels = channels;
*inst = state;
return 0;
}
int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
if (inst) {
opus_encoder_destroy(inst->encoder);
free(inst->zero_counts);
free(inst);
return 0;
} else {
@ -84,13 +98,42 @@ int WebRtcOpus_Encode(OpusEncInst* inst,
size_t length_encoded_buffer,
uint8_t* encoded) {
int res;
size_t i;
int c;
int16_t buffer[2 * 48 * kWebRtcOpusMaxEncodeFrameSizeMs];
if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
return -1;
}
const int channels = inst->channels;
int use_buffer = 0;
// Break long consecutive zeros by forcing a "1" every |kZeroBreakCount|
// samples.
if (inst->in_dtx_mode) {
for (i = 0; i < samples; ++i) {
for (c = 0; c < channels; ++c) {
if (audio_in[i * channels + c] == 0) {
++inst->zero_counts[c];
if (inst->zero_counts[c] == kZeroBreakCount) {
if (!use_buffer) {
memcpy(buffer, audio_in, samples * channels * sizeof(int16_t));
use_buffer = 1;
}
buffer[i * channels + c] = kZeroBreakValue;
inst->zero_counts[c] = 0;
}
} else {
inst->zero_counts[c] = 0;
}
}
}
}
res = opus_encode(inst->encoder,
(const opus_int16*)audio_in,
use_buffer ? buffer : audio_in,
(int)samples,
encoded,
(opus_int32)length_encoded_buffer);

View File

@ -36,7 +36,7 @@ class OpusTest : public TestWithParam<::testing::tuple<int, int>> {
protected:
OpusTest();
void TestDtxEffect(bool dtx);
void TestDtxEffect(bool dtx, int block_length_ms);
// Prepare |speech_data_| for encoding, read from a hard-coded file.
// After preparation, |speech_data_.GetNextBlock()| returns a pointer to a
@ -53,6 +53,9 @@ class OpusTest : public TestWithParam<::testing::tuple<int, int>> {
void SetMaxPlaybackRate(WebRtcOpusEncInst* encoder,
opus_int32 expect, int32_t set);
void CheckAudioBounded(const int16_t* audio, size_t samples, int channels,
uint16_t bound) const;
WebRtcOpusEncInst* opus_encoder_;
WebRtcOpusDecInst* opus_decoder_;
@ -95,6 +98,16 @@ void OpusTest::SetMaxPlaybackRate(WebRtcOpusEncInst* encoder,
EXPECT_EQ(expect, bandwidth);
}
void OpusTest::CheckAudioBounded(const int16_t* audio, size_t samples,
int channels, uint16_t bound) const {
for (size_t i = 0; i < samples; ++i) {
for (int c = 0; c < channels; ++c) {
ASSERT_GE(audio[i * channels + c], -bound);
ASSERT_LE(audio[i * channels + c], bound);
}
}
}
int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder,
rtc::ArrayView<const int16_t> input_audio,
WebRtcOpusDecInst* decoder,
@ -116,8 +129,9 @@ int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder,
// Test if encoder/decoder can enter DTX mode properly and do not enter DTX when
// they should not. This test is signal dependent.
void OpusTest::TestDtxEffect(bool dtx) {
PrepareSpeechData(channels_, 20, 2000);
void OpusTest::TestDtxEffect(bool dtx, int block_length_ms) {
PrepareSpeechData(channels_, block_length_ms, 2000);
const size_t samples = kOpusRateKhz * block_length_ms;
// Create encoder memory.
EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_,
@ -130,17 +144,17 @@ void OpusTest::TestDtxEffect(bool dtx) {
channels_ == 1 ? 32000 : 64000));
// Set input audio as silence.
std::vector<int16_t> silence(kOpus20msFrameSamples * channels_, 0);
std::vector<int16_t> silence(samples * channels_, 0);
// Setting DTX.
EXPECT_EQ(0, dtx ? WebRtcOpus_EnableDtx(opus_encoder_) :
WebRtcOpus_DisableDtx(opus_encoder_));
int16_t audio_type;
int16_t* output_data_decode = new int16_t[kOpus20msFrameSamples * channels_];
int16_t* output_data_decode = new int16_t[samples * channels_];
for (int i = 0; i < 100; ++i) {
EXPECT_EQ(kOpus20msFrameSamples,
EXPECT_EQ(samples,
static_cast<size_t>(EncodeDecode(
opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
output_data_decode, &audio_type)));
@ -157,9 +171,10 @@ void OpusTest::TestDtxEffect(bool dtx) {
// We input some silent segments. In DTX mode, the encoder will stop sending.
// However, DTX may happen after a while.
for (int i = 0; i < 30; ++i) {
EXPECT_EQ(kOpus20msFrameSamples, static_cast<size_t>(EncodeDecode(
opus_encoder_, silence, opus_decoder_,
output_data_decode, &audio_type)));
EXPECT_EQ(samples,
static_cast<size_t>(EncodeDecode(
opus_encoder_, silence, opus_decoder_, output_data_decode,
&audio_type)));
if (!dtx) {
EXPECT_GT(encoded_bytes_, 1U);
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
@ -175,21 +190,47 @@ void OpusTest::TestDtxEffect(bool dtx) {
// When Opus is in DTX, it wakes up in a regular basis. It sends two packets,
// one with an arbitrary size and the other of 1-byte, then stops sending for
// 19 frames.
const int cycles = 5;
for (int j = 0; j < cycles; ++j) {
// DTX mode is maintained 19 frames.
for (int i = 0; i < 19; ++i) {
EXPECT_EQ(kOpus20msFrameSamples,
static_cast<size_t>(
EncodeDecode(opus_encoder_, silence, opus_decoder_,
output_data_decode, &audio_type)));
// a certain number of frames.
// |max_dtx_frames| is the maximum number of frames Opus can stay in DTX.
const int max_dtx_frames = 400 / block_length_ms + 1;
// We run |kRunTimeMs| milliseconds of pure silence.
const int kRunTimeMs = 2000;
// We check that, after a |kCheckTimeMs| milliseconds (given that the CNG in
// Opus needs time to adapt), the absolute values of DTX decoded signal are
// bounded by |kOutputValueBound|.
const int kCheckTimeMs = 1500;
#if defined(OPUS_FIXED_POINT)
const uint16_t kOutputValueBound = 20;
#else
const uint16_t kOutputValueBound = 2;
#endif
int time = 0;
while (time < kRunTimeMs) {
// DTX mode is maintained for maximum |max_dtx_frames| frames.
int i = 0;
for (; i < max_dtx_frames; ++i) {
time += block_length_ms;
EXPECT_EQ(samples,
static_cast<size_t>(EncodeDecode(
opus_encoder_, silence, opus_decoder_, output_data_decode,
&audio_type)));
if (dtx) {
if (encoded_bytes_ > 1)
break;
EXPECT_EQ(0U, encoded_bytes_) // Send 0 byte.
<< "Opus should have entered DTX mode.";
EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
EXPECT_EQ(2, audio_type); // Comfort noise.
if (time >= kCheckTimeMs) {
CheckAudioBounded(output_data_decode, samples, channels_,
kOutputValueBound);
}
} else {
EXPECT_GT(encoded_bytes_, 1U);
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
@ -198,25 +239,31 @@ void OpusTest::TestDtxEffect(bool dtx) {
}
}
// Quit DTX after 19 frames.
EXPECT_EQ(kOpus20msFrameSamples, static_cast<size_t>(EncodeDecode(
opus_encoder_, silence, opus_decoder_,
output_data_decode, &audio_type)));
if (dtx) {
// With DTX, Opus must stop transmission for some time.
EXPECT_GT(i, 1);
}
EXPECT_GT(encoded_bytes_, 1U);
// We expect a normal payload.
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
EXPECT_EQ(0, audio_type); // Speech.
// Enters DTX again immediately.
EXPECT_EQ(kOpus20msFrameSamples, static_cast<size_t>(EncodeDecode(
opus_encoder_, silence, opus_decoder_,
output_data_decode, &audio_type)));
time += block_length_ms;
EXPECT_EQ(samples,
static_cast<size_t>(EncodeDecode(
opus_encoder_, silence, opus_decoder_, output_data_decode,
&audio_type)));
if (dtx) {
EXPECT_EQ(1U, encoded_bytes_); // Send 1 byte.
EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
EXPECT_EQ(2, audio_type); // Comfort noise.
if (time >= kCheckTimeMs) {
CheckAudioBounded(output_data_decode, samples, channels_,
kOutputValueBound);
}
} else {
EXPECT_GT(encoded_bytes_, 1U);
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
@ -228,9 +275,10 @@ void OpusTest::TestDtxEffect(bool dtx) {
silence[0] = 10000;
if (dtx) {
// Verify that encoder/decoder can jump out from DTX mode.
EXPECT_EQ(kOpus20msFrameSamples, static_cast<size_t>(EncodeDecode(
opus_encoder_, silence, opus_decoder_,
output_data_decode, &audio_type)));
EXPECT_EQ(samples,
static_cast<size_t>(EncodeDecode(
opus_encoder_, silence, opus_decoder_, output_data_decode,
&audio_type)));
EXPECT_GT(encoded_bytes_, 1U);
EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
@ -436,11 +484,15 @@ TEST_P(OpusTest, OpusEnableDisableDtx) {
}
TEST_P(OpusTest, OpusDtxOff) {
TestDtxEffect(false);
TestDtxEffect(false, 10);
TestDtxEffect(false, 20);
TestDtxEffect(false, 40);
}
TEST_P(OpusTest, OpusDtxOn) {
TestDtxEffect(true);
TestDtxEffect(true, 10);
TestDtxEffect(true, 20);
TestDtxEffect(true, 40);
}
TEST_P(OpusTest, OpusSetPacketLossRate) {