Correct the FFT windowing when computing the AEC NLP gain

This CL adds an nonwindowed spectrum of the linear filter error to use in the NLP computation. Bug: webrtc:8661 Change-Id: I45bc9bb3eb8eeac0c5d6adb414638eb12b635a27 Reviewed-on: https://webrtc-review.googlesource.com/38701 Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#21583}
2018-01-11 10:29:49 +01:00
parent e357a4dd4e
commit d20639f1f6
10 changed files with 66 additions and 41 deletions
--- a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
+++ b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
@ -376,7 +376,7 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) {
                     [&](float a, float b) { return a - b * kScale; });
      std::for_each(e.begin(), e.end(),
                    [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
-      fft.ZeroPaddedFft(e, &E);
+      fft.ZeroPaddedFft(e, Aec3Fft::Window::kRectangular, &E);
      for (size_t k = 0; k < kBlockSize; ++k) {
        s[k] = kScale * s_scratch[k + kFftLengthBy2];
      }
--- a/modules/audio_processing/aec3/aec3_fft.cc
+++ b/modules/audio_processing/aec3/aec3_fft.cc
@ -16,13 +16,46 @@

 namespace webrtc {

+namespace {
+
+const float kHanning64[kFftLengthBy2] = {
+    0.f,         0.00248461f, 0.00991376f, 0.0222136f,  0.03926189f,
+    0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f,
+    0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f,
+    0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f,
+    0.70564355f, 0.75f,       0.79187184f, 0.83084292f, 0.86652594f,
+    0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f,
+    0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f,
+    0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f,
+    0.83084292f, 0.79187184f, 0.75f,       0.70564355f, 0.65924333f,
+    0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f,
+    0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f,
+    0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f,
+    0.0222136f,  0.00991376f, 0.00248461f, 0.f};
+
+}  // namespace
+
 // TODO(peah): Change x to be std::array once the rest of the code allows this.
-void Aec3Fft::ZeroPaddedFft(rtc::ArrayView<const float> x, FftData* X) const {
+void Aec3Fft::ZeroPaddedFft(rtc::ArrayView<const float> x,
+                            Window window,
+                            FftData* X) const {
  RTC_DCHECK(X);
  RTC_DCHECK_EQ(kFftLengthBy2, x.size());
  std::array<float, kFftLength> fft;
  std::fill(fft.begin(), fft.begin() + kFftLengthBy2, 0.f);
-  std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2);
+  switch (window) {
+    case Window::kRectangular:
+      std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2);
+      break;
+    case Window::kHanning:
+      std::transform(x.begin(), x.end(), std::begin(kHanning64),
+                     fft.begin() + kFftLengthBy2,
+                     [](float a, float b) { return a * b; });
+      break;
+    default:
+      RTC_NOTREACHED();
+  }
+
  Fft(&fft, X);
 }

--- a/modules/audio_processing/aec3/aec3_fft.h
+++ b/modules/audio_processing/aec3/aec3_fft.h
@ -25,6 +25,8 @@ namespace webrtc {
 // FftData type.
 class Aec3Fft {
 public:
+  enum class Window { kRectangular, kHanning };
+
  Aec3Fft() = default;
  // Computes the FFT. Note that both the input and output are modified.
  void Fft(std::array<float, kFftLength>* x, FftData* X) const {
@ -40,8 +42,11 @@ class Aec3Fft {
    ooura_fft_.InverseFft(x->data());
  }

-  // Pads the input with kFftLengthBy2 initial zeros before computing the Fft.
-  void ZeroPaddedFft(rtc::ArrayView<const float> x, FftData* X) const;
+  // Windows the input using a Hanning window, and then adds padding of
+  // kFftLengthBy2 initial zeros before computing the Fft.
+  void ZeroPaddedFft(rtc::ArrayView<const float> x,
+                     Window window,
+                     FftData* X) const;

  // Concatenates the kFftLengthBy2 values long x and x_old before computing the
  // Fft. After that, x is copied to x_old.
--- a/modules/audio_processing/aec3/aec3_fft_unittest.cc
+++ b/modules/audio_processing/aec3/aec3_fft_unittest.cc
@ -44,7 +44,8 @@ TEST(Aec3Fft, NullIfftOutput) {
 TEST(Aec3Fft, NullZeroPaddedFftOutput) {
  Aec3Fft fft;
  std::array<float, kFftLengthBy2> x;
-  EXPECT_DEATH(fft.ZeroPaddedFft(x, nullptr), "");
+  EXPECT_DEATH(fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, nullptr),
+               "");
 }

 // Verifies that the check for input length in ZeroPaddedFft works.
@ -52,7 +53,7 @@ TEST(Aec3Fft, ZeroPaddedFftWrongInputLength) {
  Aec3Fft fft;
  FftData X;
  std::array<float, kFftLengthBy2 - 1> x;
-  EXPECT_DEATH(fft.ZeroPaddedFft(x, &X), "");
+  EXPECT_DEATH(fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, &X), "");
 }

 // Verifies that the check for non-null output in PaddedFft works.
@ -167,7 +168,7 @@ TEST(Aec3Fft, ZeroPaddedFft) {
      x_in[j] = v++;
      x_ref[j + kFftLengthBy2] = x_in[j] * 64.f;
    }
-    fft.ZeroPaddedFft(x_in, &X);
+    fft.ZeroPaddedFft(x_in, Aec3Fft::Window::kRectangular, &X);
    fft.Ifft(X, &x_out);
    for (size_t j = 0; j < x_out.size(); ++j) {
      EXPECT_NEAR(x_ref[j], x_out[j], 0.1f);
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@ -157,8 +157,8 @@ void EchoRemoverImpl::ProcessCapture(
  FftData comfort_noise;
  FftData high_band_comfort_noise;
  SubtractorOutput subtractor_output;
-  FftData& E_main = subtractor_output.E_main;
-  auto& E2_main = subtractor_output.E2_main;
+  FftData& E_main_nonwindowed = subtractor_output.E_main_nonwindowed;
+  auto& E2_main = subtractor_output.E2_main_nonwindowed;
  auto& E2_shadow = subtractor_output.E2_shadow;
  auto& e_main = subtractor_output.e_main;

@ -170,8 +170,9 @@ void EchoRemoverImpl::ProcessCapture(
                      &subtractor_output);

  // Compute spectra.
-  fft_.ZeroPaddedFft(y0, &Y);
-  LinearEchoPower(E_main, Y, &S2_linear);
+  // fft_.ZeroPaddedFft(y0, Aec3Fft::Window::kHanning, &Y);
+  fft_.ZeroPaddedFft(y0, Aec3Fft::Window::kRectangular, &Y);
+  LinearEchoPower(E_main_nonwindowed, Y, &S2_linear);
  Y.Spectrum(optimization_, Y2);

  // Update the AEC state information.
--- a/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc
+++ b/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc
@ -64,7 +64,7 @@ TEST(TransformDbMetricForReporting, DbFsScaling) {
  std::array<float, kFftLengthBy2Plus1> X2;
  Aec3Fft fft;
  x.fill(1000.f);
-  fft.ZeroPaddedFft(x, &X);
+  fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, &X);
  X.Spectrum(Aec3Optimization::kNone, X2);

  float offset = -10.f * log10(32768.f * 32768.f);
--- a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
@ -119,7 +119,7 @@ void RunFilterUpdateTest(int num_blocks_to_process,
                   [&](float a, float b) { return a - b * kScale; });
    std::for_each(e_main.begin(), e_main.end(),
                  [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
-    fft.ZeroPaddedFft(e_main, &E_main);
+    fft.ZeroPaddedFft(e_main, Aec3Fft::Window::kRectangular, &E_main);
    for (size_t k = 0; k < kBlockSize; ++k) {
      s[k] = kScale * s_scratch[k + kFftLengthBy2];
    }
@ -132,7 +132,7 @@ void RunFilterUpdateTest(int num_blocks_to_process,
                   [&](float a, float b) { return a - b * kScale; });
    std::for_each(e_shadow.begin(), e_shadow.end(),
                  [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
-    fft.ZeroPaddedFft(e_shadow, &E_shadow);
+    fft.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kRectangular, &E_shadow);

    // Compute spectra for future use.
    E_main.Spectrum(Aec3Optimization::kNone, output.E2_main);
--- a/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc
@ -94,7 +94,7 @@ void RunFilterUpdateTest(int num_blocks_to_process,
                   [&](float a, float b) { return a - b * kScale; });
    std::for_each(e_shadow.begin(), e_shadow.end(),
                  [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
-    fft.ZeroPaddedFft(e_shadow, &E_shadow);
+    fft.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kRectangular, &E_shadow);

    std::array<float, kFftLengthBy2Plus1> render_power;
    render_delay_buffer->GetRenderBuffer()->SpectralSum(
--- a/modules/audio_processing/aec3/subtractor.cc
+++ b/modules/audio_processing/aec3/subtractor.cc
@ -22,26 +22,10 @@ namespace webrtc {

 namespace {

-const float kHanning64[64] = {
-    0.f,         0.00248461f, 0.00991376f, 0.0222136f,  0.03926189f,
-    0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f,
-    0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f,
-    0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f,
-    0.70564355f, 0.75f,       0.79187184f, 0.83084292f, 0.86652594f,
-    0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f,
-    0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f,
-    0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f,
-    0.83084292f, 0.79187184f, 0.75f,       0.70564355f, 0.65924333f,
-    0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f,
-    0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f,
-    0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f,
-    0.0222136f,  0.00991376f, 0.00248461f, 0.f};
-
 void PredictionError(const Aec3Fft& fft,
                     const FftData& S,
                     rtc::ArrayView<const float> y,
                     std::array<float, kBlockSize>* e,
-                     FftData* E,
                     std::array<float, kBlockSize>* s) {
  std::array<float, kFftLength> tmp;
  fft.Ifft(S, &tmp);
@ -57,13 +41,6 @@ void PredictionError(const Aec3Fft& fft,

  std::for_each(e->begin(), e->end(),
                [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
-
-  RTC_DCHECK_EQ(64, e->size());
-  RTC_DCHECK_LE(64, tmp.size());
-  std::transform(e->begin(), e->end(), std::begin(kHanning64), tmp.begin(),
-                 [](float a, float b) { return a * b; });
-
-  fft.ZeroPaddedFft(rtc::ArrayView<const float>(tmp.data(), 64), E);
 }

 }  // namespace
@ -119,6 +96,7 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
  RTC_DCHECK_EQ(kBlockSize, capture.size());
  rtc::ArrayView<const float> y = capture;
  FftData& E_main = output->E_main;
+  FftData& E_main_nonwindowed = output->E_main_nonwindowed;
  FftData E_shadow;
  std::array<float, kBlockSize>& e_main = output->e_main;
  std::array<float, kBlockSize>& e_shadow = output->e_shadow;
@ -128,11 +106,15 @@ void Subtractor::Process(const RenderBuffer& render_buffer,

  // Form the output of the main filter.
  main_filter_.Filter(render_buffer, &S);
-  PredictionError(fft_, S, y, &e_main, &E_main, &output->s_main);
+  PredictionError(fft_, S, y, &e_main, &output->s_main);
+  fft_.ZeroPaddedFft(e_main, Aec3Fft::Window::kHanning, &E_main);
+  fft_.ZeroPaddedFft(e_main, Aec3Fft::Window::kRectangular,
+                     &E_main_nonwindowed);

  // Form the output of the shadow filter.
  shadow_filter_.Filter(render_buffer, &S);
-  PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr);
+  PredictionError(fft_, S, y, &e_shadow, nullptr);
+  fft_.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kHanning, &E_shadow);

  if (!converged_filter_) {
    const auto sum_of_squares = [](float a, float b) { return a + b * b; };
@ -149,6 +131,7 @@ void Subtractor::Process(const RenderBuffer& render_buffer,

  // Compute spectra for future use.
  E_main.Spectrum(optimization_, output->E2_main);
+  E_main_nonwindowed.Spectrum(optimization_, output->E2_main_nonwindowed);
  E_shadow.Spectrum(optimization_, output->E2_shadow);

  // Update the main filter.
--- a/modules/audio_processing/aec3/subtractor_output.h
+++ b/modules/audio_processing/aec3/subtractor_output.h
@ -24,7 +24,9 @@ struct SubtractorOutput {
  std::array<float, kBlockSize> e_main;
  std::array<float, kBlockSize> e_shadow;
  FftData E_main;
+  FftData E_main_nonwindowed;
  std::array<float, kFftLengthBy2Plus1> E2_main;
+  std::array<float, kFftLengthBy2Plus1> E2_main_nonwindowed;
  std::array<float, kFftLengthBy2Plus1> E2_shadow;

  void Reset() {