Revert of Prevent Opus DTX from generating intermittent noise during silence (patchset #10 id:250001 of https://codereview.webrtc.org/1415173005/ )

Reason for revert: Breaks voe_auto_test on all three "large tests bots". https://build.chromium.org/p/client.webrtc/builders/Win32%20Release%20%5Blarge%20tests%5D/builds/5630/steps/voe_auto_test/logs/stdio https://build.chromium.org/p/client.webrtc/builders/Mac32%20Release%20%5Blarge%20tests%5D/builds/5599/steps/voe_auto_test/logs/stdio https://build.chromium.org/p/client.webrtc/builders/Linux64%20Release%20%5Blarge%20tests%5D/builds/5645/steps/voe_auto_test/logs/stdio Notice these bots are no longer a part of the default trybot set, so they have to be run manually when working with code that affects their tests (they were removed as they queued up all the time in the CQ, and usually don't catch breakages). Original issue's description: > Prevent Opus DTX from generating intermittent noise during silence. > > Opus may have an internal error that causes this. Here we make a workaround by adding some small disturbance to the input signals to break a long sequence of zeros. > > BUG=webrtc:5127 > > Committed: https://crrev.com/f475add57eada116bc960fe2935876ec8c077977 > Cr-Commit-Position: refs/heads/master@{#10565} TBR=tina.legrand@webrtc.org,kwiberg@webrtc.org,solenberg@webrtc.org,minyue@webrtc.org NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=webrtc:5127 Review URL: https://codereview.webrtc.org/1428613004 Cr-Commit-Position: refs/heads/master@{#10567}
2015-11-09 13:27:04 -08:00
parent c1cd2bbd79
commit b4a753fdb5
6 changed files with 62 additions and 372 deletions
--- a/webrtc/modules/audio_coding/codecs/opus/opus_inst.h
+++ b/webrtc/modules/audio_coding/codecs/opus/opus_inst.h
@ -15,14 +15,7 @@

 struct WebRtcOpusEncInst {
  OpusEncoder* encoder;
-  int channels;
  int in_dtx_mode;
-  // When Opus is in DTX mode, we use |zero_counts| to count consecutive zeros
-  // to break long zero segment so as to prevent DTX from going wrong. We use
-  // one counter for each channel. After each encoding, |zero_counts| contain
-  // the remaining zeros from the last frame.
-  // TODO(minyue): remove this when Opus gets an internal fix to DTX.
-  size_t* zero_counts;
 };

 struct WebRtcOpusDecInst {
--- a/webrtc/modules/audio_coding/codecs/opus/opus_interface.c
+++ b/webrtc/modules/audio_coding/codecs/opus/opus_interface.c
@ -11,7 +11,6 @@
 #include "webrtc/modules/audio_coding/codecs/opus/include/opus_interface.h"
 #include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h"

-#include <assert.h>
 #include <stdlib.h>
 #include <string.h>

@ -30,61 +29,48 @@ enum {

  /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */
  kWebRtcOpusDefaultFrameSize = 960,
-
-  // Maximum number of consecutive zeros, beyond or equal to which DTX can fail.
-  kZeroBreakCount = 157,
-
-#if defined(OPUS_FIXED_POINT)
-  kZeroBreakValue = 10,
-#else
-  kZeroBreakValue = 1,
-#endif
 };

 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
                                 int32_t channels,
                                 int32_t application) {
-  int opus_app;
-  if (!inst)
-    return -1;
+  OpusEncInst* state;
+  if (inst != NULL) {
+    state = (OpusEncInst*) calloc(1, sizeof(OpusEncInst));
+    if (state) {
+      int opus_app;
+      switch (application) {
+        case 0: {
+          opus_app = OPUS_APPLICATION_VOIP;
+          break;
+        }
+        case 1: {
+          opus_app = OPUS_APPLICATION_AUDIO;
+          break;
+        }
+        default: {
+          free(state);
+          return -1;
+        }
+      }

-  switch (application) {
-    case 0:
-      opus_app = OPUS_APPLICATION_VOIP;
-      break;
-    case 1:
-      opus_app = OPUS_APPLICATION_AUDIO;
-      break;
-    default:
-      return -1;
+      int error;
+      state->encoder = opus_encoder_create(48000, channels, opus_app,
+                                           &error);
+      state->in_dtx_mode = 0;
+      if (error == OPUS_OK && state->encoder != NULL) {
+        *inst = state;
+        return 0;
+      }
+      free(state);
+    }
  }
-
-  OpusEncInst* state = calloc(1, sizeof(OpusEncInst));
-  assert(state);
-
-  // Allocate zero counters.
-  state->zero_counts = calloc(channels, sizeof(size_t));
-  assert(state->zero_counts);
-
-  int error;
-  state->encoder = opus_encoder_create(48000, channels, opus_app,
-                                       &error);
-  if (error != OPUS_OK || !state->encoder) {
-    WebRtcOpus_EncoderFree(state);
-    return -1;
-  }
-
-  state->in_dtx_mode = 0;
-  state->channels = channels;
-
-  *inst = state;
-  return 0;
+  return -1;
 }

 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
  if (inst) {
    opus_encoder_destroy(inst->encoder);
-    free(inst->zero_counts);
    free(inst);
    return 0;
  } else {
@ -98,42 +84,13 @@ int WebRtcOpus_Encode(OpusEncInst* inst,
                      size_t length_encoded_buffer,
                      uint8_t* encoded) {
  int res;
-  size_t i;
-  int c;
-
-  int16_t buffer[2 * 48 * kWebRtcOpusMaxEncodeFrameSizeMs];

  if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
    return -1;
  }

-  const int channels = inst->channels;
-  int use_buffer = 0;
-
-  // Break long consecutive zeros by forcing a "1" every |kZeroBreakCount|
-  // samples.
-  if (inst->in_dtx_mode) {
-    for (i = 0; i < samples; ++i) {
-      for (c = 0; c < channels; ++c) {
-        if (audio_in[i * channels + c] == 0) {
-          ++inst->zero_counts[c];
-          if (inst->zero_counts[c] == kZeroBreakCount) {
-            if (!use_buffer) {
-              memcpy(buffer, audio_in, samples * channels * sizeof(int16_t));
-              use_buffer = 1;
-            }
-            buffer[i * channels + c] = kZeroBreakValue;
-            inst->zero_counts[c] = 0;
-          }
-        } else {
-          inst->zero_counts[c] = 0;
-        }
-      }
-    }
-  }
-
  res = opus_encode(inst->encoder,
-                    use_buffer ? buffer : audio_in,
+                    (const opus_int16*)audio_in,
                    (int)samples,
                    encoded,
                    (opus_int32)length_encoded_buffer);
--- a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc
+++ b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc
@ -36,7 +36,7 @@ class OpusTest : public TestWithParam<::testing::tuple<int, int>> {
 protected:
  OpusTest();

-  void TestDtxEffect(bool dtx, int block_length_ms);
+  void TestDtxEffect(bool dtx);

  // Prepare |speech_data_| for encoding, read from a hard-coded file.
  // After preparation, |speech_data_.GetNextBlock()| returns a pointer to a
@ -53,9 +53,6 @@ class OpusTest : public TestWithParam<::testing::tuple<int, int>> {
  void SetMaxPlaybackRate(WebRtcOpusEncInst* encoder,
                          opus_int32 expect, int32_t set);

-  void CheckAudioBounded(const int16_t* audio, size_t samples, int channels,
-                         uint16_t bound) const;
-
  WebRtcOpusEncInst* opus_encoder_;
  WebRtcOpusDecInst* opus_decoder_;

@ -98,16 +95,6 @@ void OpusTest::SetMaxPlaybackRate(WebRtcOpusEncInst* encoder,
  EXPECT_EQ(expect, bandwidth);
 }

-void OpusTest::CheckAudioBounded(const int16_t* audio, size_t samples,
-                                 int channels, uint16_t bound) const {
-  for (size_t i = 0; i < samples; ++i) {
-    for (int c = 0; c < channels; ++c) {
-      ASSERT_GE(audio[i * channels + c], -bound);
-      ASSERT_LE(audio[i * channels + c], bound);
-    }
-  }
-}
-
 int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder,
                           rtc::ArrayView<const int16_t> input_audio,
                           WebRtcOpusDecInst* decoder,
@ -129,9 +116,8 @@ int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder,

 // Test if encoder/decoder can enter DTX mode properly and do not enter DTX when
 // they should not. This test is signal dependent.
-void OpusTest::TestDtxEffect(bool dtx, int block_length_ms) {
-  PrepareSpeechData(channels_, block_length_ms, 2000);
-  const size_t samples = kOpusRateKhz * block_length_ms;
+void OpusTest::TestDtxEffect(bool dtx) {
+  PrepareSpeechData(channels_, 20, 2000);

  // Create encoder memory.
  EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_,
@ -144,17 +130,17 @@ void OpusTest::TestDtxEffect(bool dtx, int block_length_ms) {
                                     channels_ == 1 ? 32000 : 64000));

  // Set input audio as silence.
-  std::vector<int16_t> silence(samples * channels_, 0);
+  std::vector<int16_t> silence(kOpus20msFrameSamples * channels_, 0);

  // Setting DTX.
  EXPECT_EQ(0, dtx ? WebRtcOpus_EnableDtx(opus_encoder_) :
      WebRtcOpus_DisableDtx(opus_encoder_));

  int16_t audio_type;
-  int16_t* output_data_decode = new int16_t[samples * channels_];
+  int16_t* output_data_decode = new int16_t[kOpus20msFrameSamples * channels_];

  for (int i = 0; i < 100; ++i) {
-    EXPECT_EQ(samples,
+    EXPECT_EQ(kOpus20msFrameSamples,
              static_cast<size_t>(EncodeDecode(
                  opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
                  output_data_decode, &audio_type)));
@ -171,10 +157,9 @@ void OpusTest::TestDtxEffect(bool dtx, int block_length_ms) {
  // We input some silent segments. In DTX mode, the encoder will stop sending.
  // However, DTX may happen after a while.
  for (int i = 0; i < 30; ++i) {
-    EXPECT_EQ(samples,
-              static_cast<size_t>(EncodeDecode(
-                  opus_encoder_, silence, opus_decoder_, output_data_decode,
-                  &audio_type)));
+    EXPECT_EQ(kOpus20msFrameSamples, static_cast<size_t>(EncodeDecode(
+                                         opus_encoder_, silence, opus_decoder_,
+                                         output_data_decode, &audio_type)));
    if (!dtx) {
      EXPECT_GT(encoded_bytes_, 1U);
      EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
@ -190,47 +175,21 @@ void OpusTest::TestDtxEffect(bool dtx, int block_length_ms) {

  // When Opus is in DTX, it wakes up in a regular basis. It sends two packets,
  // one with an arbitrary size and the other of 1-byte, then stops sending for
-  // a certain number of frames.
-
-  // |max_dtx_frames| is the maximum number of frames Opus can stay in DTX.
-  const int max_dtx_frames = 400 / block_length_ms + 1;
-
-  // We run |kRunTimeMs| milliseconds of pure silence.
-  const int kRunTimeMs = 2000;
-
-  // We check that, after a |kCheckTimeMs| milliseconds (given that the CNG in
-  // Opus needs time to adapt), the absolute values of DTX decoded signal are
-  // bounded by |kOutputValueBound|.
-  const int kCheckTimeMs = 1500;
-
-#if defined(OPUS_FIXED_POINT)
-  const uint16_t kOutputValueBound = 20;
-#else
-  const uint16_t kOutputValueBound = 2;
-#endif
-
-  int time = 0;
-  while (time < kRunTimeMs) {
-    // DTX mode is maintained for maximum |max_dtx_frames| frames.
-    int i = 0;
-    for (; i < max_dtx_frames; ++i) {
-      time += block_length_ms;
-      EXPECT_EQ(samples,
-                static_cast<size_t>(EncodeDecode(
-                    opus_encoder_, silence, opus_decoder_, output_data_decode,
-                    &audio_type)));
+  // 19 frames.
+  const int cycles = 5;
+  for (int j = 0; j < cycles; ++j) {
+    // DTX mode is maintained 19 frames.
+    for (int i = 0; i < 19; ++i) {
+      EXPECT_EQ(kOpus20msFrameSamples,
+                static_cast<size_t>(
+                    EncodeDecode(opus_encoder_, silence, opus_decoder_,
+                                 output_data_decode, &audio_type)));
      if (dtx) {
-        if (encoded_bytes_ > 1)
-          break;
        EXPECT_EQ(0U, encoded_bytes_)  // Send 0 byte.
            << "Opus should have entered DTX mode.";
        EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
        EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
        EXPECT_EQ(2, audio_type);  // Comfort noise.
-        if (time >= kCheckTimeMs) {
-          CheckAudioBounded(output_data_decode, samples, channels_,
-                            kOutputValueBound);
-        }
      } else {
        EXPECT_GT(encoded_bytes_, 1U);
        EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
@ -239,31 +198,25 @@ void OpusTest::TestDtxEffect(bool dtx, int block_length_ms) {
      }
    }

-    if (dtx) {
-      // With DTX, Opus must stop transmission for some time.
-      EXPECT_GT(i, 1);
-    }
+    // Quit DTX after 19 frames.
+    EXPECT_EQ(kOpus20msFrameSamples, static_cast<size_t>(EncodeDecode(
+                                         opus_encoder_, silence, opus_decoder_,
+                                         output_data_decode, &audio_type)));

-    // We expect a normal payload.
+    EXPECT_GT(encoded_bytes_, 1U);
    EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
    EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
    EXPECT_EQ(0, audio_type);  // Speech.

    // Enters DTX again immediately.
-    time += block_length_ms;
-    EXPECT_EQ(samples,
-              static_cast<size_t>(EncodeDecode(
-                  opus_encoder_, silence, opus_decoder_, output_data_decode,
-                  &audio_type)));
+    EXPECT_EQ(kOpus20msFrameSamples, static_cast<size_t>(EncodeDecode(
+                                         opus_encoder_, silence, opus_decoder_,
+                                         output_data_decode, &audio_type)));
    if (dtx) {
      EXPECT_EQ(1U, encoded_bytes_);  // Send 1 byte.
      EXPECT_EQ(1, opus_encoder_->in_dtx_mode);
      EXPECT_EQ(1, opus_decoder_->in_dtx_mode);
      EXPECT_EQ(2, audio_type);  // Comfort noise.
-      if (time >= kCheckTimeMs) {
-        CheckAudioBounded(output_data_decode, samples, channels_,
-                          kOutputValueBound);
-      }
    } else {
      EXPECT_GT(encoded_bytes_, 1U);
      EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
@ -275,10 +228,9 @@ void OpusTest::TestDtxEffect(bool dtx, int block_length_ms) {
  silence[0] = 10000;
  if (dtx) {
    // Verify that encoder/decoder can jump out from DTX mode.
-    EXPECT_EQ(samples,
-              static_cast<size_t>(EncodeDecode(
-                  opus_encoder_, silence, opus_decoder_, output_data_decode,
-                  &audio_type)));
+    EXPECT_EQ(kOpus20msFrameSamples, static_cast<size_t>(EncodeDecode(
+                                         opus_encoder_, silence, opus_decoder_,
+                                         output_data_decode, &audio_type)));
    EXPECT_GT(encoded_bytes_, 1U);
    EXPECT_EQ(0, opus_encoder_->in_dtx_mode);
    EXPECT_EQ(0, opus_decoder_->in_dtx_mode);
@ -484,15 +436,11 @@ TEST_P(OpusTest, OpusEnableDisableDtx) {
 }

 TEST_P(OpusTest, OpusDtxOff) {
-  TestDtxEffect(false, 10);
-  TestDtxEffect(false, 20);
-  TestDtxEffect(false, 40);
+  TestDtxEffect(false);
 }

 TEST_P(OpusTest, OpusDtxOn) {
-  TestDtxEffect(true, 10);
-  TestDtxEffect(true, 20);
-  TestDtxEffect(true, 40);
+  TestDtxEffect(true);
 }

 TEST_P(OpusTest, OpusSetPacketLossRate) {