diff --git a/webrtc/modules/audio_coding/codecs/opus/opus_inst.h b/webrtc/modules/audio_coding/codecs/opus/opus_inst.h index 373db392a6..8573b6d436 100644 --- a/webrtc/modules/audio_coding/codecs/opus/opus_inst.h +++ b/webrtc/modules/audio_coding/codecs/opus/opus_inst.h @@ -15,7 +15,14 @@ struct WebRtcOpusEncInst { OpusEncoder* encoder; + int channels; int in_dtx_mode; + // When Opus is in DTX mode, we use |zero_counts| to count consecutive zeros + // to break long zero segment so as to prevent DTX from going wrong. We use + // one counter for each channel. After each encoding, |zero_counts| contain + // the remaining zeros from the last frame. + // TODO(minyue): remove this when Opus gets an internal fix to DTX. + size_t* zero_counts; }; struct WebRtcOpusDecInst { diff --git a/webrtc/modules/audio_coding/codecs/opus/opus_interface.c b/webrtc/modules/audio_coding/codecs/opus/opus_interface.c index 1a632422c5..9eee89f132 100644 --- a/webrtc/modules/audio_coding/codecs/opus/opus_interface.c +++ b/webrtc/modules/audio_coding/codecs/opus/opus_interface.c @@ -11,6 +11,7 @@ #include "webrtc/modules/audio_coding/codecs/opus/include/opus_interface.h" #include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h" +#include #include #include @@ -29,48 +30,61 @@ enum { /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */ kWebRtcOpusDefaultFrameSize = 960, + + // Maximum number of consecutive zeros, beyond or equal to which DTX can fail. + kZeroBreakCount = 157, + +#if defined(OPUS_FIXED_POINT) + kZeroBreakValue = 10, +#else + kZeroBreakValue = 1, +#endif }; int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, int32_t channels, int32_t application) { - OpusEncInst* state; - if (inst != NULL) { - state = (OpusEncInst*) calloc(1, sizeof(OpusEncInst)); - if (state) { - int opus_app; - switch (application) { - case 0: { - opus_app = OPUS_APPLICATION_VOIP; - break; - } - case 1: { - opus_app = OPUS_APPLICATION_AUDIO; - break; - } - default: { - free(state); - return -1; - } - } + int opus_app; + if (!inst) + return -1; - int error; - state->encoder = opus_encoder_create(48000, channels, opus_app, - &error); - state->in_dtx_mode = 0; - if (error == OPUS_OK && state->encoder != NULL) { - *inst = state; - return 0; - } - free(state); - } + switch (application) { + case 0: + opus_app = OPUS_APPLICATION_VOIP; + break; + case 1: + opus_app = OPUS_APPLICATION_AUDIO; + break; + default: + return -1; } - return -1; + + OpusEncInst* state = calloc(1, sizeof(OpusEncInst)); + assert(state); + + // Allocate zero counters. + state->zero_counts = calloc(channels, sizeof(size_t)); + assert(state->zero_counts); + + int error; + state->encoder = opus_encoder_create(48000, channels, opus_app, + &error); + if (error != OPUS_OK || !state->encoder) { + WebRtcOpus_EncoderFree(state); + return -1; + } + + state->in_dtx_mode = 0; + state->channels = channels; + + *inst = state; + return 0; } int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) { if (inst) { opus_encoder_destroy(inst->encoder); + free(inst->zero_counts); free(inst); return 0; } else { @@ -84,13 +98,42 @@ int WebRtcOpus_Encode(OpusEncInst* inst, size_t length_encoded_buffer, uint8_t* encoded) { int res; + size_t i; + int c; + + int16_t buffer[2 * 48 * kWebRtcOpusMaxEncodeFrameSizeMs]; if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) { return -1; } + const int channels = inst->channels; + int use_buffer = 0; + + // Break long consecutive zeros by forcing a "1" every |kZeroBreakCount| + // samples. + if (inst->in_dtx_mode) { + for (i = 0; i < samples; ++i) { + for (c = 0; c < channels; ++c) { + if (audio_in[i * channels + c] == 0) { + ++inst->zero_counts[c]; + if (inst->zero_counts[c] == kZeroBreakCount) { + if (!use_buffer) { + memcpy(buffer, audio_in, samples * channels * sizeof(int16_t)); + use_buffer = 1; + } + buffer[i * channels + c] = kZeroBreakValue; + inst->zero_counts[c] = 0; + } + } else { + inst->zero_counts[c] = 0; + } + } + } + } + res = opus_encode(inst->encoder, - (const opus_int16*)audio_in, + use_buffer ? buffer : audio_in, (int)samples, encoded, (opus_int32)length_encoded_buffer); diff --git a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc index c059fc5d01..fc5d84190b 100644 --- a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc +++ b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc @@ -36,7 +36,7 @@ class OpusTest : public TestWithParam<::testing::tuple> { protected: OpusTest(); - void TestDtxEffect(bool dtx); + void TestDtxEffect(bool dtx, int block_length_ms); // Prepare |speech_data_| for encoding, read from a hard-coded file. // After preparation, |speech_data_.GetNextBlock()| returns a pointer to a @@ -53,6 +53,9 @@ class OpusTest : public TestWithParam<::testing::tuple> { void SetMaxPlaybackRate(WebRtcOpusEncInst* encoder, opus_int32 expect, int32_t set); + void CheckAudioBounded(const int16_t* audio, size_t samples, int channels, + uint16_t bound) const; + WebRtcOpusEncInst* opus_encoder_; WebRtcOpusDecInst* opus_decoder_; @@ -95,6 +98,16 @@ void OpusTest::SetMaxPlaybackRate(WebRtcOpusEncInst* encoder, EXPECT_EQ(expect, bandwidth); } +void OpusTest::CheckAudioBounded(const int16_t* audio, size_t samples, + int channels, uint16_t bound) const { + for (size_t i = 0; i < samples; ++i) { + for (int c = 0; c < channels; ++c) { + ASSERT_GE(audio[i * channels + c], -bound); + ASSERT_LE(audio[i * channels + c], bound); + } + } +} + int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder, rtc::ArrayView input_audio, WebRtcOpusDecInst* decoder, @@ -116,8 +129,9 @@ int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder, // Test if encoder/decoder can enter DTX mode properly and do not enter DTX when // they should not. This test is signal dependent. -void OpusTest::TestDtxEffect(bool dtx) { - PrepareSpeechData(channels_, 20, 2000); +void OpusTest::TestDtxEffect(bool dtx, int block_length_ms) { + PrepareSpeechData(channels_, block_length_ms, 2000); + const size_t samples = kOpusRateKhz * block_length_ms; // Create encoder memory. EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_, @@ -130,17 +144,17 @@ void OpusTest::TestDtxEffect(bool dtx) { channels_ == 1 ? 32000 : 64000)); // Set input audio as silence. - std::vector silence(kOpus20msFrameSamples * channels_, 0); + std::vector silence(samples * channels_, 0); // Setting DTX. EXPECT_EQ(0, dtx ? WebRtcOpus_EnableDtx(opus_encoder_) : WebRtcOpus_DisableDtx(opus_encoder_)); int16_t audio_type; - int16_t* output_data_decode = new int16_t[kOpus20msFrameSamples * channels_]; + int16_t* output_data_decode = new int16_t[samples * channels_]; for (int i = 0; i < 100; ++i) { - EXPECT_EQ(kOpus20msFrameSamples, + EXPECT_EQ(samples, static_cast(EncodeDecode( opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_, output_data_decode, &audio_type))); @@ -157,9 +171,10 @@ void OpusTest::TestDtxEffect(bool dtx) { // We input some silent segments. In DTX mode, the encoder will stop sending. // However, DTX may happen after a while. for (int i = 0; i < 30; ++i) { - EXPECT_EQ(kOpus20msFrameSamples, static_cast(EncodeDecode( - opus_encoder_, silence, opus_decoder_, - output_data_decode, &audio_type))); + EXPECT_EQ(samples, + static_cast(EncodeDecode( + opus_encoder_, silence, opus_decoder_, output_data_decode, + &audio_type))); if (!dtx) { EXPECT_GT(encoded_bytes_, 1U); EXPECT_EQ(0, opus_encoder_->in_dtx_mode); @@ -175,21 +190,47 @@ void OpusTest::TestDtxEffect(bool dtx) { // When Opus is in DTX, it wakes up in a regular basis. It sends two packets, // one with an arbitrary size and the other of 1-byte, then stops sending for - // 19 frames. - const int cycles = 5; - for (int j = 0; j < cycles; ++j) { - // DTX mode is maintained 19 frames. - for (int i = 0; i < 19; ++i) { - EXPECT_EQ(kOpus20msFrameSamples, - static_cast( - EncodeDecode(opus_encoder_, silence, opus_decoder_, - output_data_decode, &audio_type))); + // a certain number of frames. + + // |max_dtx_frames| is the maximum number of frames Opus can stay in DTX. + const int max_dtx_frames = 400 / block_length_ms + 1; + + // We run |kRunTimeMs| milliseconds of pure silence. + const int kRunTimeMs = 2000; + + // We check that, after a |kCheckTimeMs| milliseconds (given that the CNG in + // Opus needs time to adapt), the absolute values of DTX decoded signal are + // bounded by |kOutputValueBound|. + const int kCheckTimeMs = 1500; + +#if defined(OPUS_FIXED_POINT) + const uint16_t kOutputValueBound = 20; +#else + const uint16_t kOutputValueBound = 2; +#endif + + int time = 0; + while (time < kRunTimeMs) { + // DTX mode is maintained for maximum |max_dtx_frames| frames. + int i = 0; + for (; i < max_dtx_frames; ++i) { + time += block_length_ms; + EXPECT_EQ(samples, + static_cast(EncodeDecode( + opus_encoder_, silence, opus_decoder_, output_data_decode, + &audio_type))); if (dtx) { + if (encoded_bytes_ > 1) + break; EXPECT_EQ(0U, encoded_bytes_) // Send 0 byte. << "Opus should have entered DTX mode."; EXPECT_EQ(1, opus_encoder_->in_dtx_mode); EXPECT_EQ(1, opus_decoder_->in_dtx_mode); EXPECT_EQ(2, audio_type); // Comfort noise. + if (time >= kCheckTimeMs) { + CheckAudioBounded(output_data_decode, samples, channels_, + kOutputValueBound); + } } else { EXPECT_GT(encoded_bytes_, 1U); EXPECT_EQ(0, opus_encoder_->in_dtx_mode); @@ -198,25 +239,31 @@ void OpusTest::TestDtxEffect(bool dtx) { } } - // Quit DTX after 19 frames. - EXPECT_EQ(kOpus20msFrameSamples, static_cast(EncodeDecode( - opus_encoder_, silence, opus_decoder_, - output_data_decode, &audio_type))); + if (dtx) { + // With DTX, Opus must stop transmission for some time. + EXPECT_GT(i, 1); + } - EXPECT_GT(encoded_bytes_, 1U); + // We expect a normal payload. EXPECT_EQ(0, opus_encoder_->in_dtx_mode); EXPECT_EQ(0, opus_decoder_->in_dtx_mode); EXPECT_EQ(0, audio_type); // Speech. // Enters DTX again immediately. - EXPECT_EQ(kOpus20msFrameSamples, static_cast(EncodeDecode( - opus_encoder_, silence, opus_decoder_, - output_data_decode, &audio_type))); + time += block_length_ms; + EXPECT_EQ(samples, + static_cast(EncodeDecode( + opus_encoder_, silence, opus_decoder_, output_data_decode, + &audio_type))); if (dtx) { EXPECT_EQ(1U, encoded_bytes_); // Send 1 byte. EXPECT_EQ(1, opus_encoder_->in_dtx_mode); EXPECT_EQ(1, opus_decoder_->in_dtx_mode); EXPECT_EQ(2, audio_type); // Comfort noise. + if (time >= kCheckTimeMs) { + CheckAudioBounded(output_data_decode, samples, channels_, + kOutputValueBound); + } } else { EXPECT_GT(encoded_bytes_, 1U); EXPECT_EQ(0, opus_encoder_->in_dtx_mode); @@ -228,9 +275,10 @@ void OpusTest::TestDtxEffect(bool dtx) { silence[0] = 10000; if (dtx) { // Verify that encoder/decoder can jump out from DTX mode. - EXPECT_EQ(kOpus20msFrameSamples, static_cast(EncodeDecode( - opus_encoder_, silence, opus_decoder_, - output_data_decode, &audio_type))); + EXPECT_EQ(samples, + static_cast(EncodeDecode( + opus_encoder_, silence, opus_decoder_, output_data_decode, + &audio_type))); EXPECT_GT(encoded_bytes_, 1U); EXPECT_EQ(0, opus_encoder_->in_dtx_mode); EXPECT_EQ(0, opus_decoder_->in_dtx_mode); @@ -436,11 +484,15 @@ TEST_P(OpusTest, OpusEnableDisableDtx) { } TEST_P(OpusTest, OpusDtxOff) { - TestDtxEffect(false); + TestDtxEffect(false, 10); + TestDtxEffect(false, 20); + TestDtxEffect(false, 40); } TEST_P(OpusTest, OpusDtxOn) { - TestDtxEffect(true); + TestDtxEffect(true, 10); + TestDtxEffect(true, 20); + TestDtxEffect(true, 40); } TEST_P(OpusTest, OpusSetPacketLossRate) { diff --git a/webrtc/modules/audio_coding/main/audio_coding_module.gypi b/webrtc/modules/audio_coding/main/audio_coding_module.gypi index 088a3197fd..09ac0a815e 100644 --- a/webrtc/modules/audio_coding/main/audio_coding_module.gypi +++ b/webrtc/modules/audio_coding/main/audio_coding_module.gypi @@ -91,6 +91,11 @@ '<(webrtc_root)', ], }, + 'conditions': [ + ['include_opus==1', { + 'export_dependent_settings': ['webrtc_opus'], + }], + ], 'sources': [ 'acm2/acm_common_defs.h', 'acm2/acm_receiver.cc', diff --git a/webrtc/voice_engine/test/auto_test/voe_output_test.cc b/webrtc/voice_engine/test/auto_test/voe_output_test.cc new file mode 100644 index 0000000000..54bfe80d0e --- /dev/null +++ b/webrtc/voice_engine/test/auto_test/voe_output_test.cc @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/base/timeutils.h" +#include "webrtc/system_wrappers/include/sleep.h" +#include "webrtc/test/channel_transport/include/channel_transport.h" +#include "webrtc/test/random.h" +#include "webrtc/test/testsupport/fileutils.h" +#include "webrtc/voice_engine/test/auto_test/voe_standard_test.h" + +namespace { + +const char kIp[] = "127.0.0.1"; +const int kPort = 1234; +const webrtc::CodecInst kCodecInst = {120, "opus", 48000, 960, 2, 64000}; + +} // namespace + +namespace voetest { + +using webrtc::test::Random; +using webrtc::test::VoiceChannelTransport; + +// This test allows a check on the output signal in an end-to-end call. +class OutputTest { + public: + OutputTest(int16_t lower_bound, int16_t upper_bound); + ~OutputTest(); + + void Start(); + + void EnableOutputCheck(); + void DisableOutputCheck(); + void SetOutputBound(int16_t lower_bound, int16_t upper_bound); + void Mute(); + void Unmute(); + void SetBitRate(int rate); + + private: + // This class checks all output values and count the number of samples that + // go out of a defined range. + class VoEOutputCheckMediaProcess : public VoEMediaProcess { + public: + VoEOutputCheckMediaProcess(int16_t lower_bound, int16_t upper_bound); + + void set_enabled(bool enabled) { enabled_ = enabled; } + void Process(int channel, + ProcessingTypes type, + int16_t audio10ms[], + size_t length, + int samplingFreq, + bool isStereo) override; + + private: + bool enabled_; + int16_t lower_bound_; + int16_t upper_bound_; + }; + + VoETestManager manager_; + VoEOutputCheckMediaProcess output_checker_; + + int channel_; +}; + +OutputTest::OutputTest(int16_t lower_bound, int16_t upper_bound) + : output_checker_(lower_bound, upper_bound) { + EXPECT_TRUE(manager_.Init()); + manager_.GetInterfaces(); + + VoEBase* base = manager_.BasePtr(); + VoECodec* codec = manager_.CodecPtr(); + VoENetwork* network = manager_.NetworkPtr(); + + EXPECT_EQ(0, base->Init()); + + channel_ = base->CreateChannel(); + + // |network| will take care of the life time of |transport|. + VoiceChannelTransport* transport = + new VoiceChannelTransport(network, channel_); + + EXPECT_EQ(0, transport->SetSendDestination(kIp, kPort)); + EXPECT_EQ(0, transport->SetLocalReceiver(kPort)); + + EXPECT_EQ(0, codec->SetSendCodec(channel_, kCodecInst)); + EXPECT_EQ(0, codec->SetOpusDtx(channel_, true)); + + EXPECT_EQ(0, manager_.VolumeControlPtr()->SetSpeakerVolume(255)); + + manager_.ExternalMediaPtr()->RegisterExternalMediaProcessing( + channel_, ProcessingTypes::kPlaybackPerChannel, output_checker_); +} + +OutputTest::~OutputTest() { + EXPECT_EQ(0, manager_.NetworkPtr()->DeRegisterExternalTransport(channel_)); + EXPECT_EQ(0, manager_.ReleaseInterfaces()); +} + +void OutputTest::Start() { + const std::string file_name = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + const webrtc::FileFormats kInputFormat = webrtc::kFileFormatPcm32kHzFile; + + ASSERT_EQ(0, manager_.FilePtr()->StartPlayingFileAsMicrophone( + channel_, file_name.c_str(), true, false, kInputFormat, 1.0)); + + VoEBase* base = manager_.BasePtr(); + ASSERT_EQ(0, base->StartPlayout(channel_)); + ASSERT_EQ(0, base->StartSend(channel_)); +} + +void OutputTest::EnableOutputCheck() { + output_checker_.set_enabled(true); +} + +void OutputTest::DisableOutputCheck() { + output_checker_.set_enabled(false); +} + +void OutputTest::Mute() { + manager_.VolumeControlPtr()->SetInputMute(channel_, true); +} + +void OutputTest::Unmute() { + manager_.VolumeControlPtr()->SetInputMute(channel_, false); +} + +void OutputTest::SetBitRate(int rate) { + manager_.CodecPtr()->SetBitRate(channel_, rate); +} + +OutputTest::VoEOutputCheckMediaProcess::VoEOutputCheckMediaProcess( + int16_t lower_bound, int16_t upper_bound) + : enabled_(false), + lower_bound_(lower_bound), + upper_bound_(upper_bound) {} + +void OutputTest::VoEOutputCheckMediaProcess::Process(int channel, + ProcessingTypes type, + int16_t* audio10ms, + size_t length, + int samplingFreq, + bool isStereo) { + if (!enabled_) + return; + const int num_channels = isStereo ? 2 : 1; + for (size_t i = 0; i < length; ++i) { + for (int c = 0; c < num_channels; ++c) { + ASSERT_GE(audio10ms[i * num_channels + c], lower_bound_); + ASSERT_LE(audio10ms[i * num_channels + c], upper_bound_); + } + } +} + +// This test checks if the Opus does not produce high noise (noise pump) when +// DTX is enabled. The microphone is toggled on and off, and values of the +// output signal during muting should be bounded. +// We do not run this test on bots. Developers that want to see the result +// and/or listen to sound quality can run this test manually. +TEST(OutputTest, DISABLED_OpusDtxHasNoNoisePump) { + const int kRuntimeMs = 20000; + const uint32_t kUnmuteTimeMs = 1000; + const int kCheckAfterMute = 2000; + const uint32_t kCheckTimeMs = 2000; + const int kMinOpusRate = 6000; + const int kMaxOpusRate = 64000; + +#if defined(OPUS_FIXED_POINT) + const int16_t kDtxBoundForSilence = 20; +#else + const int16_t kDtxBoundForSilence = 2; +#endif + + OutputTest test(-kDtxBoundForSilence, kDtxBoundForSilence); + Random random(1234ull); + + uint32_t start_time = rtc::Time(); + test.Start(); + while (rtc::TimeSince(start_time) < kRuntimeMs) { + webrtc::SleepMs(random.Rand(kUnmuteTimeMs - kUnmuteTimeMs / 10, + kUnmuteTimeMs + kUnmuteTimeMs / 10)); + test.Mute(); + webrtc::SleepMs(kCheckAfterMute); + test.EnableOutputCheck(); + webrtc::SleepMs(random.Rand(kCheckTimeMs - kCheckTimeMs / 10, + kCheckTimeMs + kCheckTimeMs / 10)); + test.DisableOutputCheck(); + test.SetBitRate(random.Rand(kMinOpusRate, kMaxOpusRate)); + test.Unmute(); + } +} + +} // namespace voetest diff --git a/webrtc/voice_engine/voice_engine.gyp b/webrtc/voice_engine/voice_engine.gyp index 221b2aa681..265ad01992 100644 --- a/webrtc/voice_engine/voice_engine.gyp +++ b/webrtc/voice_engine/voice_engine.gyp @@ -28,6 +28,9 @@ '<(webrtc_root)/system_wrappers/system_wrappers.gyp:system_wrappers', '<(webrtc_root)/webrtc.gyp:rtc_event_log', ], + 'export_dependent_settings': [ + '<(webrtc_root)/modules/modules.gyp:audio_coding_module', + ], 'sources': [ 'include/voe_audio_processing.h', 'include/voe_base.h', @@ -154,6 +157,7 @@ '<(webrtc_root)/system_wrappers/system_wrappers.gyp:system_wrappers_default', '<(webrtc_root)/test/test.gyp:channel_transport', '<(webrtc_root)/test/test.gyp:test_support', + '<(webrtc_root)/test/webrtc_test_common.gyp:webrtc_test_common', '<(webrtc_root)/webrtc.gyp:rtc_event_log', ], 'sources': [ @@ -194,6 +198,7 @@ 'test/auto_test/voe_conference_test.cc', 'test/auto_test/voe_cpu_test.cc', 'test/auto_test/voe_cpu_test.h', + 'test/auto_test/voe_output_test.cc', 'test/auto_test/voe_standard_test.cc', 'test/auto_test/voe_standard_test.h', 'test/auto_test/voe_stress_test.cc',