Reland "Prevent Opus DTX from generating intermittent noise during silence"

The original CL is reviewed at https://codereview.webrtc.org/1415173005/ A silly mistake was made at the last patch set, and the CL was reverted. This CL is to fix and reland it. BUG= Review URL: https://codereview.webrtc.org/1422213003 Cr-Commit-Position: refs/heads/master@{#10574}
2015-11-10 03:49:26 -08:00
parent 626252fa66
commit 3cea256806
6 changed files with 377 additions and 62 deletions
--- a/webrtc/modules/audio_coding/codecs/opus/opus_inst.h
+++ b/webrtc/modules/audio_coding/codecs/opus/opus_inst.h
@ -15,7 +15,14 @@
 struct WebRtcOpusEncInst {
  OpusEncoder* encoder;
  int channels;
  int in_dtx_mode;
  // When Opus is in DTX mode, we use |zero_counts| to count consecutive zeros
  // to break long zero segment so as to prevent DTX from going wrong. We use
  // one counter for each channel. After each encoding, |zero_counts| contain
  // the remaining zeros from the last frame.
  // TODO(minyue): remove this when Opus gets an internal fix to DTX.
  size_t* zero_counts;
 };
 struct WebRtcOpusDecInst {
--- a/webrtc/modules/audio_coding/codecs/opus/opus_interface.c
+++ b/webrtc/modules/audio_coding/codecs/opus/opus_interface.c
@ -11,6 +11,7 @@
 #include "webrtc/modules/audio_coding/codecs/opus/include/opus_interface.h"
 #include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h"
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
@ -29,48 +30,61 @@ enum {
  /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */
  kWebRtcOpusDefaultFrameSize = 960,
  // Maximum number of consecutive zeros, beyond or equal to which DTX can fail.
  kZeroBreakCount = 157,
 #if defined(OPUS_FIXED_POINT)
  kZeroBreakValue = 10,
 #else
  kZeroBreakValue = 1,
 #endif
 };
 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
                                 int32_t channels,
                                 int32_t application) {
  OpusEncInst* state;
  if (inst != NULL) {
    state = (OpusEncInst*) calloc(1, sizeof(OpusEncInst));
    if (state) {
  int opus_app;
  if (!inst)
    return -1;
  switch (application) {
-        case 0: {
+    case 0:
      opus_app = OPUS_APPLICATION_VOIP;
      break;
-        }
+    case 1:
        case 1: {
      opus_app = OPUS_APPLICATION_AUDIO;
      break;
-        }
+    default:
        default: {
          free(state);
      return -1;
  }
-      }
+
  OpusEncInst* state = calloc(1, sizeof(OpusEncInst));
  assert(state);
  // Allocate zero counters.
  state->zero_counts = calloc(channels, sizeof(size_t));
  assert(state->zero_counts);
  int error;
  state->encoder = opus_encoder_create(48000, channels, opus_app,
                                       &error);
  if (error != OPUS_OK || !state->encoder) {
    WebRtcOpus_EncoderFree(state);
    return -1;
  }
  state->in_dtx_mode = 0;
-      if (error == OPUS_OK && state->encoder != NULL) {
+  state->channels = channels;
  *inst = state;
  return 0;
 }
      free(state);
    }
  }
  return -1;
 }
 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
  if (inst) {
    opus_encoder_destroy(inst->encoder);
    free(inst->zero_counts);
    free(inst);
    return 0;
  } else {
@ -84,13 +98,42 @@ int WebRtcOpus_Encode(OpusEncInst* inst,
                      size_t length_encoded_buffer,
                      uint8_t* encoded) {
  int res;
  size_t i;
  int c;
  int16_t buffer[2 * 48 * kWebRtcOpusMaxEncodeFrameSizeMs];
  if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
    return -1;
  }
  const int channels = inst->channels;
  int use_buffer = 0;
  // Break long consecutive zeros by forcing a "1" every |kZeroBreakCount|
  // samples.
  if (inst->in_dtx_mode) {
    for (i = 0; i < samples; ++i) {
      for (c = 0; c < channels; ++c) {
        if (audio_in[i * channels + c] == 0) {
          ++inst->zero_counts[c];
          if (inst->zero_counts[c] == kZeroBreakCount) {
            if (!use_buffer) {
              memcpy(buffer, audio_in, samples * channels * sizeof(int16_t));
              use_buffer = 1;
            }
            buffer[i * channels + c] = kZeroBreakValue;
            inst->zero_counts[c] = 0;
          }
        } else {
          inst->zero_counts[c] = 0;
        }
      }
    }
  }
  res = opus_encode(inst->encoder,
-                    (const opus_int16*)audio_in,
+                    use_buffer ? buffer : audio_in,
                    (int)samples,
                    encoded,
                    (opus_int32)length_encoded_buffer);
--- a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc
+++ b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc
@ -36,7 +36,7 @@ class OpusTest : public TestWithParam<::testing::tuple<int, int>> {
 protected:
  OpusTest();
-  void TestDtxEffect(bool dtx);
+  void TestDtxEffect(bool dtx, int block_length_ms);
  // Prepare |speech_data_| for encoding, read from a hard-coded file.
  // After preparation, |speech_data_.GetNextBlock()| returns a pointer to a
@ -53,6 +53,9 @@ class OpusTest : public TestWithParam<::testing::tuple<int, int>> {
  void SetMaxPlaybackRate(WebRtcOpusEncInst* encoder,
                          opus_int32 expect, int32_t set);
  void CheckAudioBounded(const int16_t* audio, size_t samples, int channels,
                         uint16_t bound) const;
  WebRtcOpusEncInst* opus_encoder_;
  WebRtcOpusDecInst* opus_decoder_;
@ -95,6 +98,16 @@ void OpusTest::SetMaxPlaybackRate(WebRtcOpusEncInst* encoder,
  EXPECT_EQ(expect, bandwidth);
 }
 void OpusTest::CheckAudioBounded(const int16_t* audio, size_t samples,
                                 int channels, uint16_t bound) const {
  for (size_t i = 0; i < samples; ++i) {
    for (int c = 0; c < channels; ++c) {
      ASSERT_GE(audio[i * channels + c], -bound);
      ASSERT_LE(audio[i * channels + c], bound);
    }
  }
 }
 int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder,
                           rtc::ArrayView<const int16_t> input_audio,
                           WebRtcOpusDecInst* decoder,
@ -116,8 +129,9 @@ int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder,
 // Test if encoder/decoder can enter DTX mode properly and do not enter DTX when
 // they should not. This test is signal dependent.
-void OpusTest::TestDtxEffect(bool dtx) {
+void OpusTest::TestDtxEffect(bool dtx, int block_length_ms) {
-  PrepareSpeechData(channels_, 20, 2000);
+  PrepareSpeechData(channels_, block_length_ms, 2000);
  const size_t samples = kOpusRateKhz * block_length_ms;
  // Create encoder memory.
  EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_,
@ -130,17 +144,17 @@ void OpusTest::TestDtxEffect(bool dtx) {
                                     channels_ == 1 ? 32000 : 64000));
  // Set input audio as silence.
-  std::vector<int16_t> silence(kOpus20msFrameSamples * channels_, 0);
+  std::vector<int16_t> silence(samples * channels_, 0);
  // Setting DTX.
  EXPECT_EQ(0, dtx ? WebRtcOpus_EnableDtx(opus_encoder_) :
      WebRtcOpus_DisableDtx(opus_encoder_));
  int16_t audio_type;
-  int16_t* output_data_decode = new int16_t[kOpus20msFrameSamples * channels_];
+  int16_t* output_data_decode = new int16_t[samples * channels_];
  for (int i = 0; i < 100; ++i) {
-    EXPECT_EQ(kOpus20msFrameSamples,
+    EXPECT_EQ(samples,
              static_cast<size_t>(EncodeDecode(
                  opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
                  output_data_decode, &audio_type)));
@ -157,9 +171,10 @@ void OpusTest::TestDtxEffect(bool dtx) {
  // We input some silent segments. In DTX mode, the encoder will stop sending.
  // However, DTX may happen after a while.
  for (int i = 0; i < 30; ++i) {
-    EXPECT_EQ(kOpus20msFrameSamples, static_cast<size_t>(EncodeDecode(
+    EXPECT_EQ(samples,
-                                         opus_encoder_, silence, opus_decoder_,
+              static_cast<size_t>(EncodeDecode(
-                                         output_data_decode, &audio_type)));
+                  opus_encoder_, silence, opus_decoder_, output_data_decode,
                  &audio_type)));
    if (!dtx) {
      EXPECT_GT(encoded_bytes_, 1U);
      EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
@ -175,21 +190,47 @@ void OpusTest::TestDtxEffect(bool dtx) {
  // When Opus is in DTX, it wakes up in a regular basis. It sends two packets,
  // one with an arbitrary size and the other of 1-byte, then stops sending for
-  // 19 frames.
+  // a certain number of frames.
-  const int cycles = 5;
+
-  for (int j = 0; j < cycles; ++j) {
+  // |max_dtx_frames| is the maximum number of frames Opus can stay in DTX.
-    // DTX mode is maintained 19 frames.
+  const int max_dtx_frames = 400 / block_length_ms + 1;
-    for (int i = 0; i < 19; ++i) {
+
-      EXPECT_EQ(kOpus20msFrameSamples,
+  // We run |kRunTimeMs| milliseconds of pure silence.
-                static_cast<size_t>(
+  const int kRunTimeMs = 2000;
-                    EncodeDecode(opus_encoder_, silence, opus_decoder_,
+
-                                 output_data_decode, &audio_type)));
+  // We check that, after a |kCheckTimeMs| milliseconds (given that the CNG in
  // Opus needs time to adapt), the absolute values of DTX decoded signal are
  // bounded by |kOutputValueBound|.
  const int kCheckTimeMs = 1500;
 #if defined(OPUS_FIXED_POINT)
  const uint16_t kOutputValueBound = 20;
 #else
  const uint16_t kOutputValueBound = 2;
 #endif
  int time = 0;
  while (time < kRunTimeMs) {
    // DTX mode is maintained for maximum |max_dtx_frames| frames.
    int i = 0;
    for (; i < max_dtx_frames; ++i) {
      time += block_length_ms;
      EXPECT_EQ(samples,
                static_cast<size_t>(EncodeDecode(
                    opus_encoder_, silence, opus_decoder_, output_data_decode,
                    &audio_type)));
      if (dtx) {
        if (encoded_bytes_ > 1)
          break;
        EXPECT_EQ(0U, encoded_bytes_)  // Send 0 byte.
            << "Opus should have entered DTX mode.";
        EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
        EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
        EXPECT_EQ(2, audio_type);  // Comfort noise.
        if (time >= kCheckTimeMs) {
          CheckAudioBounded(output_data_decode, samples, channels_,
                            kOutputValueBound);
        }
      } else {
        EXPECT_GT(encoded_bytes_, 1U);
        EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
@ -198,25 +239,31 @@ void OpusTest::TestDtxEffect(bool dtx) {
      }
    }
-    // Quit DTX after 19 frames.
+    if (dtx) {
-    EXPECT_EQ(kOpus20msFrameSamples, static_cast<size_t>(EncodeDecode(
+      // With DTX, Opus must stop transmission for some time.
-                                         opus_encoder_, silence, opus_decoder_,
+      EXPECT_GT(i, 1);
-                                         output_data_decode, &audio_type)));
+    }
-    EXPECT_GT(encoded_bytes_, 1U);
+    // We expect a normal payload.
    EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
    EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
    EXPECT_EQ(0, audio_type);  // Speech.
    // Enters DTX again immediately.
-    EXPECT_EQ(kOpus20msFrameSamples, static_cast<size_t>(EncodeDecode(
+    time += block_length_ms;
-                                         opus_encoder_, silence, opus_decoder_,
+    EXPECT_EQ(samples,
-                                         output_data_decode, &audio_type)));
+              static_cast<size_t>(EncodeDecode(
                  opus_encoder_, silence, opus_decoder_, output_data_decode,
                  &audio_type)));
    if (dtx) {
      EXPECT_EQ(1U, encoded_bytes_);  // Send 1 byte.
      EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
      EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
      EXPECT_EQ(2, audio_type);  // Comfort noise.
      if (time >= kCheckTimeMs) {
        CheckAudioBounded(output_data_decode, samples, channels_,
                          kOutputValueBound);
      }
    } else {
      EXPECT_GT(encoded_bytes_, 1U);
      EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
@ -228,9 +275,10 @@ void OpusTest::TestDtxEffect(bool dtx) {
  silence[0] = 10000;
  if (dtx) {
    // Verify that encoder/decoder can jump out from DTX mode.
-    EXPECT_EQ(kOpus20msFrameSamples, static_cast<size_t>(EncodeDecode(
+    EXPECT_EQ(samples,
-                                         opus_encoder_, silence, opus_decoder_,
+              static_cast<size_t>(EncodeDecode(
-                                         output_data_decode, &audio_type)));
+                  opus_encoder_, silence, opus_decoder_, output_data_decode,
                  &audio_type)));
    EXPECT_GT(encoded_bytes_, 1U);
    EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
    EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
@ -436,11 +484,15 @@ TEST_P(OpusTest, OpusEnableDisableDtx) {
 }
 TEST_P(OpusTest, OpusDtxOff) {
-  TestDtxEffect(false);
+  TestDtxEffect(false, 10);
  TestDtxEffect(false, 20);
  TestDtxEffect(false, 40);
 }
 TEST_P(OpusTest, OpusDtxOn) {
-  TestDtxEffect(true);
+  TestDtxEffect(true, 10);
  TestDtxEffect(true, 20);
  TestDtxEffect(true, 40);
 }
 TEST_P(OpusTest, OpusSetPacketLossRate) {
--- a/webrtc/modules/audio_coding/main/audio_coding_module.gypi
+++ b/webrtc/modules/audio_coding/main/audio_coding_module.gypi
@ -91,6 +91,11 @@
          '<(webrtc_root)',
        ],
      },
      'conditions': [
        ['include_opus==1', {
          'export_dependent_settings': ['webrtc_opus'],
        }],
      ],
      'sources': [
        'acm2/acm_common_defs.h',
        'acm2/acm_receiver.cc',
--- a/webrtc/voice_engine/test/auto_test/voe_output_test.cc
+++ b/webrtc/voice_engine/test/auto_test/voe_output_test.cc
@ -0,0 +1,203 @@
 /*
 *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include "testing/gtest/include/gtest/gtest.h"
 #include "webrtc/base/scoped_ptr.h"
 #include "webrtc/base/timeutils.h"
 #include "webrtc/system_wrappers/include/sleep.h"
 #include "webrtc/test/channel_transport/include/channel_transport.h"
 #include "webrtc/test/random.h"
 #include "webrtc/test/testsupport/fileutils.h"
 #include "webrtc/voice_engine/test/auto_test/voe_standard_test.h"
 namespace {
 const char kIp[] = "127.0.0.1";
 const int kPort = 1234;
 const webrtc::CodecInst kCodecInst = {120, "opus", 48000, 960, 2, 64000};
 }  // namespace
 namespace voetest {
 using webrtc::test::Random;
 using webrtc::test::VoiceChannelTransport;
 // This test allows a check on the output signal in an end-to-end call.
 class OutputTest {
 public:
  OutputTest(int16_t lower_bound, int16_t upper_bound);
  ~OutputTest();
  void Start();
  void EnableOutputCheck();
  void DisableOutputCheck();
  void SetOutputBound(int16_t lower_bound, int16_t upper_bound);
  void Mute();
  void Unmute();
  void SetBitRate(int rate);
 private:
  // This class checks all output values and count the number of samples that
  // go out of a defined range.
  class VoEOutputCheckMediaProcess : public VoEMediaProcess {
   public:
    VoEOutputCheckMediaProcess(int16_t lower_bound, int16_t upper_bound);
    void set_enabled(bool enabled) { enabled_ = enabled; }
    void Process(int channel,
                 ProcessingTypes type,
                 int16_t audio10ms[],
                 size_t length,
                 int samplingFreq,
                 bool isStereo) override;
   private:
    bool enabled_;
    int16_t lower_bound_;
    int16_t upper_bound_;
  };
  VoETestManager manager_;
  VoEOutputCheckMediaProcess output_checker_;
  int channel_;
 };
 OutputTest::OutputTest(int16_t lower_bound, int16_t upper_bound)
    : output_checker_(lower_bound, upper_bound) {
  EXPECT_TRUE(manager_.Init());
  manager_.GetInterfaces();
  VoEBase* base = manager_.BasePtr();
  VoECodec* codec = manager_.CodecPtr();
  VoENetwork* network = manager_.NetworkPtr();
  EXPECT_EQ(0, base->Init());
  channel_ = base->CreateChannel();
  // |network| will take care of the life time of |transport|.
  VoiceChannelTransport* transport =
      new VoiceChannelTransport(network, channel_);
  EXPECT_EQ(0, transport->SetSendDestination(kIp, kPort));
  EXPECT_EQ(0, transport->SetLocalReceiver(kPort));
  EXPECT_EQ(0, codec->SetSendCodec(channel_, kCodecInst));
  EXPECT_EQ(0, codec->SetOpusDtx(channel_, true));
  EXPECT_EQ(0, manager_.VolumeControlPtr()->SetSpeakerVolume(255));
  manager_.ExternalMediaPtr()->RegisterExternalMediaProcessing(
      channel_, ProcessingTypes::kPlaybackPerChannel, output_checker_);
 }
 OutputTest::~OutputTest() {
  EXPECT_EQ(0, manager_.NetworkPtr()->DeRegisterExternalTransport(channel_));
  EXPECT_EQ(0, manager_.ReleaseInterfaces());
 }
 void OutputTest::Start() {
  const std::string file_name =
      webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
  const webrtc::FileFormats kInputFormat = webrtc::kFileFormatPcm32kHzFile;
  ASSERT_EQ(0, manager_.FilePtr()->StartPlayingFileAsMicrophone(
      channel_, file_name.c_str(), true, false, kInputFormat, 1.0));
  VoEBase* base = manager_.BasePtr();
  ASSERT_EQ(0, base->StartPlayout(channel_));
  ASSERT_EQ(0, base->StartSend(channel_));
 }
 void OutputTest::EnableOutputCheck() {
  output_checker_.set_enabled(true);
 }
 void OutputTest::DisableOutputCheck() {
  output_checker_.set_enabled(false);
 }
 void OutputTest::Mute() {
  manager_.VolumeControlPtr()->SetInputMute(channel_, true);
 }
 void OutputTest::Unmute() {
  manager_.VolumeControlPtr()->SetInputMute(channel_, false);
 }
 void OutputTest::SetBitRate(int rate) {
  manager_.CodecPtr()->SetBitRate(channel_, rate);
 }
 OutputTest::VoEOutputCheckMediaProcess::VoEOutputCheckMediaProcess(
    int16_t lower_bound, int16_t upper_bound)
    : enabled_(false),
      lower_bound_(lower_bound),
      upper_bound_(upper_bound) {}
 void OutputTest::VoEOutputCheckMediaProcess::Process(int channel,
                                                     ProcessingTypes type,
                                                     int16_t* audio10ms,
                                                     size_t length,
                                                     int samplingFreq,
                                                     bool isStereo) {
  if (!enabled_)
    return;
  const int num_channels = isStereo ? 2 : 1;
  for (size_t i = 0; i < length; ++i) {
    for (int c = 0; c < num_channels; ++c) {
      ASSERT_GE(audio10ms[i * num_channels + c], lower_bound_);
      ASSERT_LE(audio10ms[i * num_channels + c], upper_bound_);
    }
  }
 }
 // This test checks if the Opus does not produce high noise (noise pump) when
 // DTX is enabled. The microphone is toggled on and off, and values of the
 // output signal during muting should be bounded.
 // We do not run this test on bots. Developers that want to see the result
 // and/or listen to sound quality can run this test manually.
 TEST(OutputTest, DISABLED_OpusDtxHasNoNoisePump) {
  const int kRuntimeMs = 20000;
  const uint32_t kUnmuteTimeMs = 1000;
  const int kCheckAfterMute = 2000;
  const uint32_t kCheckTimeMs = 2000;
  const int kMinOpusRate = 6000;
  const int kMaxOpusRate = 64000;
 #if defined(OPUS_FIXED_POINT)
  const int16_t kDtxBoundForSilence = 20;
 #else
  const int16_t kDtxBoundForSilence = 2;
 #endif
  OutputTest test(-kDtxBoundForSilence, kDtxBoundForSilence);
  Random random(1234ull);
  uint32_t start_time = rtc::Time();
  test.Start();
  while (rtc::TimeSince(start_time) < kRuntimeMs) {
    webrtc::SleepMs(random.Rand(kUnmuteTimeMs - kUnmuteTimeMs / 10,
                                kUnmuteTimeMs + kUnmuteTimeMs / 10));
    test.Mute();
    webrtc::SleepMs(kCheckAfterMute);
    test.EnableOutputCheck();
    webrtc::SleepMs(random.Rand(kCheckTimeMs - kCheckTimeMs / 10,
                                kCheckTimeMs + kCheckTimeMs / 10));
    test.DisableOutputCheck();
    test.SetBitRate(random.Rand(kMinOpusRate, kMaxOpusRate));
    test.Unmute();
  }
 }
 }  // namespace voetest
--- a/webrtc/voice_engine/voice_engine.gyp
+++ b/webrtc/voice_engine/voice_engine.gyp
@ -28,6 +28,9 @@
        '<(webrtc_root)/system_wrappers/system_wrappers.gyp:system_wrappers',
        '<(webrtc_root)/webrtc.gyp:rtc_event_log',
      ],
      'export_dependent_settings': [
        '<(webrtc_root)/modules/modules.gyp:audio_coding_module',
      ],
      'sources': [
        'include/voe_audio_processing.h',
        'include/voe_base.h',
@ -154,6 +157,7 @@
            '<(webrtc_root)/system_wrappers/system_wrappers.gyp:system_wrappers_default',
            '<(webrtc_root)/test/test.gyp:channel_transport',
            '<(webrtc_root)/test/test.gyp:test_support',
            '<(webrtc_root)/test/webrtc_test_common.gyp:webrtc_test_common',
            '<(webrtc_root)/webrtc.gyp:rtc_event_log',
           ],
          'sources': [
@ -194,6 +198,7 @@
            'test/auto_test/voe_conference_test.cc',
            'test/auto_test/voe_cpu_test.cc',
            'test/auto_test/voe_cpu_test.h',
            'test/auto_test/voe_output_test.cc',
            'test/auto_test/voe_standard_test.cc',
            'test/auto_test/voe_standard_test.h',
            'test/auto_test/voe_stress_test.cc',