AEC3: Further utilize the shadow filter to boost adaptation

This CL makes the jump-starting of the shadow filter more extreme. It furthermore utilizes this to allow the AEC to rely further, and more quickly on its linear filter estimates. The result is mainly increased transparency but also some cases of fewer echo blips. Bug: webrtc:9612,chromium:873074 Change-Id: I90f7cfbff9acb9d0c36409593afbf476e7a830d3 Reviewed-on: https://webrtc-review.googlesource.com/93461 Reviewed-by: Sam Zackrisson <saza@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#24264}
2018-08-10 18:37:38 +02:00
parent f5f5373372
commit 2275439c4e
9 changed files with 192 additions and 55 deletions
--- a/modules/audio_processing/aec3/adaptive_fir_filter.cc
+++ b/modules/audio_processing/aec3/adaptive_fir_filter.cc
@ -634,7 +634,6 @@ void AdaptiveFirFilter::ScaleFilter(float factor) {

 // Set the filter coefficients.
 void AdaptiveFirFilter::SetFilter(const std::vector<FftData>& H) {
-  RTC_DCHECK_EQ(H_.size(), H.size());
  const size_t num_partitions = std::min(H_.size(), H.size());
  for (size_t k = 0; k < num_partitions; ++k) {
    std::copy(H[k].re.begin(), H[k].re.end(), H_[k].re.begin());
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@ -53,6 +53,20 @@ bool EnableShortInitialState() {
  return !field_trial::IsEnabled("WebRTC-Aec3ShortInitialStateKillSwitch");
 }

+bool EnableNoWaitForAlignment() {
+  return !field_trial::IsEnabled("WebRTC-Aec3NoAlignmentWaitKillSwitch");
+}
+
+bool EnableConvergenceTriggeredLinearMode() {
+  return !field_trial::IsEnabled(
+      "WebRTC-Aec3ConvergenceTriggingLinearKillSwitch");
+}
+
+bool EnableUncertaintyUntilSufficientAdapted() {
+  return !field_trial::IsEnabled(
+      "WebRTC-Aec3ErleUncertaintyUntilSufficientlyAdaptedKillSwitch");
+}
+
 float ComputeGainRampupIncrease(const EchoCanceller3Config& config) {
  const auto& c = config.echo_removal_control.gain_rampup;
  return powf(1.f / c.first_non_zero_gain, 1.f / c.non_zero_gain_blocks);
@ -78,6 +92,10 @@ AecState::AecState(const EchoCanceller3Config& config)
          EnableLinearModeWithDivergedFilter()),
      early_filter_usage_activated_(EnableEarlyFilterUsage()),
      use_short_initial_state_(EnableShortInitialState()),
+      convergence_trigger_linear_mode_(EnableConvergenceTriggeredLinearMode()),
+      no_alignment_required_for_linear_mode_(EnableNoWaitForAlignment()),
+      use_uncertainty_until_sufficiently_adapted_(
+          EnableUncertaintyUntilSufficientAdapted()),
      erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h),
      max_render_(config_.filter.main.length_blocks, 0.f),
      gain_rampup_increase_(ComputeGainRampupIncrease(config_)),
@ -194,18 +212,15 @@ void AecState::Update(
  }

  // Detect and flag echo saturation.
-  // TODO(peah): Add the delay in this computation to ensure that the render and
-  // capture signals are properly aligned.
  if (config_.ep_strength.echo_can_saturate) {
    echo_saturation_ = DetectEchoSaturation(x, EchoPathGain());
  }

-  bool filter_has_had_time_to_converge;
  if (early_filter_usage_activated_) {
-    filter_has_had_time_to_converge =
+    filter_has_had_time_to_converge_ =
        blocks_with_proper_filter_adaptation_ >= 0.8f * kNumBlocksPerSecond;
  } else {
-    filter_has_had_time_to_converge =
+    filter_has_had_time_to_converge_ =
        blocks_with_proper_filter_adaptation_ >= 1.5f * kNumBlocksPerSecond;
  }

@ -286,10 +301,21 @@ void AecState::Update(
  transparent_mode_ = transparent_mode_ && allow_transparent_mode_;

  usable_linear_estimate_ = !echo_saturation_;
-  usable_linear_estimate_ =
-      usable_linear_estimate_ && filter_has_had_time_to_converge;

-  usable_linear_estimate_ = usable_linear_estimate_ && external_delay;
+  if (convergence_trigger_linear_mode_) {
+    usable_linear_estimate_ =
+        usable_linear_estimate_ &&
+        ((filter_has_had_time_to_converge_ && external_delay) ||
+         converged_filter_seen_);
+  } else {
+    usable_linear_estimate_ =
+        usable_linear_estimate_ && filter_has_had_time_to_converge_;
+  }
+
+  if (!no_alignment_required_for_linear_mode_) {
+    usable_linear_estimate_ = usable_linear_estimate_ && external_delay;
+  }
+
  if (!config_.echo_removal_control.linear_and_stable_echo_path) {
    usable_linear_estimate_ =
        usable_linear_estimate_ && recently_converged_filter;
@ -335,7 +361,7 @@ void AecState::Update(
  data_dumper_->DumpRaw("aec3_filter_should_have_converged",
                        filter_should_have_converged_);
  data_dumper_->DumpRaw("aec3_filter_has_had_time_to_converge",
-                        filter_has_had_time_to_converge);
+                        filter_has_had_time_to_converge_);
  data_dumper_->DumpRaw("aec3_recently_converged_filter",
                        recently_converged_filter);
  data_dumper_->DumpRaw("aec3_suppresion_gain_limiter_running",
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@ -77,6 +77,11 @@ class AecState {
    if (allow_linear_mode_with_diverged_filter_ && diverged_linear_filter_) {
      return 10.f;
    }
+
+    if (!filter_has_had_time_to_converge_ &&
+        use_uncertainty_until_sufficiently_adapted_) {
+      return 10.f;
+    }
    return absl::nullopt;
  }

@ -173,6 +178,9 @@ class AecState {
  const bool allow_linear_mode_with_diverged_filter_;
  const bool early_filter_usage_activated_;
  const bool use_short_initial_state_;
+  const bool convergence_trigger_linear_mode_;
+  const bool no_alignment_required_for_linear_mode_;
+  const bool use_uncertainty_until_sufficiently_adapted_;
  ErlEstimator erl_estimator_;
  ErleEstimator erle_estimator_;
  size_t capture_block_counter_ = 0;
--- a/modules/audio_processing/aec3/aec_state_unittest.cc
+++ b/modules/audio_processing/aec3/aec_state_unittest.cc
@ -32,6 +32,7 @@ TEST(AecState, NormalUsage) {
  EchoPathVariability echo_path_variability(
      false, EchoPathVariability::DelayAdjustment::kNone, false);
  SubtractorOutput output;
+  output.Reset();
  std::array<float, kBlockSize> y;
  Aec3Fft fft;
  output.s_main.fill(100.f);
@ -51,13 +52,6 @@ TEST(AecState, NormalUsage) {
  std::vector<float> impulse_response(
      GetTimeDomainLength(config.filter.main.length_blocks), 0.f);

-  // Verify that linear AEC usability is false when the filter is diverged.
-  output.UpdatePowers(y);
-  state.Update(delay_estimate, diverged_filter_frequency_response,
-               impulse_response, *render_delay_buffer->GetRenderBuffer(),
-               E2_main, Y2, output, y);
-  EXPECT_FALSE(state.UsableLinearEstimate());
-
  // Verify that linear AEC usability is true when the filter is converged
  std::fill(x[0].begin(), x[0].end(), 101.f);
  for (int k = 0; k < 3000; ++k) {
@ -191,6 +185,7 @@ TEST(AecState, ConvergedFilterDelay) {
  EchoPathVariability echo_path_variability(
      false, EchoPathVariability::DelayAdjustment::kNone, false);
  SubtractorOutput output;
+  output.Reset();
  std::array<float, kBlockSize> y;
  output.s_main.fill(100.f);
  x.fill(0.f);
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@ -43,6 +43,11 @@ bool UseShadowFilterOutput() {
      "WebRTC-Aec3UtilizeShadowFilterOutputKillSwitch");
 }

+bool UseSmoothSignalTransitions() {
+  return !field_trial::IsEnabled(
+      "WebRTC-Aec3SmoothSignalTransitionsKillSwitch");
+}
+
 void LinearEchoPower(const FftData& E,
                     const FftData& Y,
                     std::array<float, kFftLengthBy2Plus1>* S2) {
@ -52,6 +57,26 @@ void LinearEchoPower(const FftData& E,
  }
 }

+// Fades between two input signals using a fix-sized transition.
+void SignalTransition(rtc::ArrayView<const float> from,
+                      rtc::ArrayView<const float> to,
+                      rtc::ArrayView<float> out) {
+  constexpr size_t kTransitionSize = 30;
+  constexpr float kOneByTransitionSize = 1.f / kTransitionSize;
+
+  RTC_DCHECK_EQ(from.size(), to.size());
+  RTC_DCHECK_EQ(from.size(), out.size());
+  RTC_DCHECK_LE(kTransitionSize, out.size());
+
+  for (size_t k = 0; k < kTransitionSize; ++k) {
+    out[k] = k * kOneByTransitionSize * to[k];
+    out[k] += (kTransitionSize - k) * kOneByTransitionSize * to[k];
+  }
+
+  std::copy(to.begin() + kTransitionSize, to.end(),
+            out.begin() + kTransitionSize);
+}
+
 // Computes a windowed (square root Hanning) padded FFT and updates the related
 // memory.
 void WindowedPaddedFft(const Aec3Fft& fft,
@ -93,32 +118,11 @@ class EchoRemoverImpl final : public EchoRemover {

 private:
  // Selects which of the shadow and main linear filter outputs that is most
-  // appropriate to pass to the suppressor.
-  const std::array<float, kBlockSize>& ChooseLinearFilterOutput(
-      const SubtractorOutput& subtractor_output) {
-    if (!use_shadow_filter_output_) {
-      return subtractor_output.e_main;
-    }
-
-    // As the output of the main adaptive filter generally should be better than
-    // the shadow filter output, add a margin and threshold for when choosing
-    // the shadow filter output.
-    if (subtractor_output.e2_shadow < 0.9f * subtractor_output.e2_main &&
-        subtractor_output.y2 > 30.f * 30.f * kBlockSize &&
-        (subtractor_output.s2_main > 60.f * 60.f * kBlockSize ||
-         subtractor_output.s2_shadow > 60.f * 60.f * kBlockSize)) {
-      return subtractor_output.e_shadow;
-    }
-
-    // If the main filter is diverged, choose the filter output that has the
-    // lowest power.
-    if (subtractor_output.e2_shadow < subtractor_output.e2_main &&
-        subtractor_output.y2 < subtractor_output.e2_main) {
-      return subtractor_output.e_shadow;
-    }
-
-    return subtractor_output.e_main;
-  }
+  // appropriate to pass to the suppressor and forms the linear filter output by
+  // smoothly transition between those.
+  void FormLinearFilterOutput(bool smooth_transition,
+                              const SubtractorOutput& subtractor_output,
+                              rtc::ArrayView<float> output);

  static int instance_count_;
  const EchoCanceller3Config config_;
@ -127,6 +131,7 @@ class EchoRemoverImpl final : public EchoRemover {
  const Aec3Optimization optimization_;
  const int sample_rate_hz_;
  const bool use_shadow_filter_output_;
+  const bool use_smooth_signal_transitions_;
  Subtractor subtractor_;
  SuppressionGain suppression_gain_;
  ComfortNoiseGenerator cng_;
@ -142,6 +147,8 @@ class EchoRemoverImpl final : public EchoRemover {
  std::array<float, kFftLengthBy2> y_old_;
  size_t block_counter_ = 0;
  int gain_change_hangover_ = 0;
+  bool main_filter_output_last_selected_ = true;
+  bool linear_filter_output_last_selected_ = true;

  RTC_DISALLOW_COPY_AND_ASSIGN(EchoRemoverImpl);
 };
@ -157,6 +164,7 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
      optimization_(DetectOptimization()),
      sample_rate_hz_(sample_rate_hz),
      use_shadow_filter_output_(UseShadowFilterOutput()),
+      use_smooth_signal_transitions_(UseSmoothSignalTransitions()),
      subtractor_(config, data_dumper_.get(), optimization_),
      suppression_gain_(config_, optimization_, sample_rate_hz),
      cng_(optimization_),
@ -258,7 +266,8 @@ void EchoRemoverImpl::ProcessCapture(
  // If the delay is known, use the echo subtractor.
  subtractor_.Process(*render_buffer, y0, render_signal_analyzer_, aec_state_,
                      &subtractor_output);
-  const auto& e = ChooseLinearFilterOutput(subtractor_output);
+  std::array<float, kBlockSize> e;
+  FormLinearFilterOutput(use_smooth_signal_transitions_, subtractor_output, e);

  // Compute spectra.
  WindowedPaddedFft(fft_, y0, y_old_, &Y);
@ -287,8 +296,18 @@ void EchoRemoverImpl::ProcessCapture(
  data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0],
                        LowestBandRate(sample_rate_hz_), 1);
  if (aec_state_.UseLinearFilterOutput()) {
-    std::copy(e.begin(), e.end(), y0.begin());
+    if (!linear_filter_output_last_selected_ &&
+        use_smooth_signal_transitions_) {
+      SignalTransition(y0, e, y0);
+    } else {
+      std::copy(e.begin(), e.end(), y0.begin());
+    }
+  } else {
+    if (linear_filter_output_last_selected_ && use_smooth_signal_transitions_) {
+      SignalTransition(e, y0, y0);
+    }
  }
+  linear_filter_output_last_selected_ = aec_state_.UseLinearFilterOutput();
  const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y;

  data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0],
@ -339,6 +358,52 @@ void EchoRemoverImpl::ProcessCapture(
                        aec_state_.SaturatedCapture() ? 1 : 0);
 }

+void EchoRemoverImpl::FormLinearFilterOutput(
+    bool smooth_transition,
+    const SubtractorOutput& subtractor_output,
+    rtc::ArrayView<float> output) {
+  RTC_DCHECK_EQ(subtractor_output.e_main.size(), output.size());
+  RTC_DCHECK_EQ(subtractor_output.e_shadow.size(), output.size());
+  bool use_main_output = true;
+  if (use_shadow_filter_output_) {
+    // As the output of the main adaptive filter generally should be better than
+    // the shadow filter output, add a margin and threshold for when choosing
+    // the shadow filter output.
+    if (subtractor_output.e2_shadow < 0.9f * subtractor_output.e2_main &&
+        subtractor_output.y2 > 30.f * 30.f * kBlockSize &&
+        (subtractor_output.s2_main > 60.f * 60.f * kBlockSize ||
+         subtractor_output.s2_shadow > 60.f * 60.f * kBlockSize)) {
+      use_main_output = false;
+    } else {
+      // If the main filter is diverged, choose the filter output that has the
+      // lowest power.
+      if (subtractor_output.e2_shadow < subtractor_output.e2_main &&
+          subtractor_output.y2 < subtractor_output.e2_main) {
+        use_main_output = false;
+      }
+    }
+  }
+
+  if (use_main_output) {
+    if (!main_filter_output_last_selected_ && smooth_transition) {
+      SignalTransition(subtractor_output.e_shadow, subtractor_output.e_main,
+                       output);
+    } else {
+      std::copy(subtractor_output.e_main.begin(),
+                subtractor_output.e_main.end(), output.begin());
+    }
+  } else {
+    if (main_filter_output_last_selected_ && smooth_transition) {
+      SignalTransition(subtractor_output.e_main, subtractor_output.e_shadow,
+                       output);
+    } else {
+      std::copy(subtractor_output.e_shadow.begin(),
+                subtractor_output.e_shadow.end(), output.begin());
+    }
+  }
+  main_filter_output_last_selected_ = use_main_output;
+}
+
 }  // namespace

 EchoRemover* EchoRemover::Create(const EchoCanceller3Config& config,
--- a/modules/audio_processing/aec3/subtractor.cc
+++ b/modules/audio_processing/aec3/subtractor.cc
@ -40,6 +40,16 @@ bool EnableShadowFilterJumpstart() {
  return !field_trial::IsEnabled("WebRTC-Aec3ShadowFilterJumpstartKillSwitch");
 }

+bool EnableShadowFilterBoostedJumpstart() {
+  return !field_trial::IsEnabled(
+      "WebRTC-Aec3ShadowFilterBoostedJumpstartKillSwitch");
+}
+
+bool EnableEarlyShadowFilterJumpstart() {
+  return !field_trial::IsEnabled(
+      "WebRTC-Aec3EarlyShadowFilterJumpstartKillSwitch");
+}
+
 void PredictionError(const Aec3Fft& fft,
                     const FftData& S,
                     rtc::ArrayView<const float> y,
@ -100,6 +110,9 @@ Subtractor::Subtractor(const EchoCanceller3Config& config,
      enable_misadjustment_estimator_(EnableMisadjustmentEstimator()),
      enable_agc_gain_change_response_(EnableAgcGainChangeResponse()),
      enable_shadow_filter_jumpstart_(EnableShadowFilterJumpstart()),
+      enable_shadow_filter_boosted_jumpstart_(
+          EnableShadowFilterBoostedJumpstart()),
+      enable_early_shadow_filter_jumpstart_(EnableEarlyShadowFilterJumpstart()),
      main_filter_(config_.filter.main.length_blocks,
                   config_.filter.main_initial.length_blocks,
                   config.filter.config_change_duration_blocks,
@ -226,7 +239,11 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
  // Update the shadow filter.
  poor_shadow_filter_counter_ =
      output->e2_main < output->e2_shadow ? poor_shadow_filter_counter_ + 1 : 0;
-  if (poor_shadow_filter_counter_ < 10 || !enable_shadow_filter_jumpstart_) {
+  if (((poor_shadow_filter_counter_ < 5 &&
+        enable_early_shadow_filter_jumpstart_) ||
+       (poor_shadow_filter_counter_ < 10 &&
+        !enable_early_shadow_filter_jumpstart_)) ||
+      !enable_shadow_filter_jumpstart_) {
    if (shadow_filter_.SizePartitions() != main_filter_.SizePartitions()) {
      render_buffer.SpectralSum(shadow_filter_.SizePartitions(), &X2);
    }
@ -235,11 +252,20 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
                      aec_state.SaturatedCapture() || shadow_saturation, &G);
    shadow_filter_.Adapt(render_buffer, G);
  } else {
-    G.re.fill(0.f);
-    G.im.fill(0.f);
    poor_shadow_filter_counter_ = 0;
-    shadow_filter_.Adapt(render_buffer, G);
-    shadow_filter_.SetFilter(main_filter_.GetFilter());
+
+    if (enable_shadow_filter_boosted_jumpstart_) {
+      shadow_filter_.SetFilter(main_filter_.GetFilter());
+      G_shadow_.Compute(X2, render_signal_analyzer, E_main,
+                        shadow_filter_.SizePartitions(),
+                        aec_state.SaturatedCapture() || main_saturation, &G);
+      shadow_filter_.Adapt(render_buffer, G);
+    } else {
+      G.re.fill(0.f);
+      G.im.fill(0.f);
+      shadow_filter_.Adapt(render_buffer, G);
+      shadow_filter_.SetFilter(main_filter_.GetFilter());
+    }
  }

  data_dumper_->DumpRaw("aec3_subtractor_G_shadow", G.re);
--- a/modules/audio_processing/aec3/subtractor.h
+++ b/modules/audio_processing/aec3/subtractor.h
@ -108,6 +108,9 @@ class Subtractor {
  const bool enable_misadjustment_estimator_;
  const bool enable_agc_gain_change_response_;
  const bool enable_shadow_filter_jumpstart_;
+  const bool enable_shadow_filter_boosted_jumpstart_;
+  const bool enable_early_shadow_filter_jumpstart_;
+
  AdaptiveFirFilter main_filter_;
  AdaptiveFirFilter shadow_filter_;
  MainFilterUpdateGain G_main_;
--- a/modules/audio_processing/aec3/subtractor_output_analyzer.cc
+++ b/modules/audio_processing/aec3/subtractor_output_analyzer.cc
@ -13,7 +13,19 @@
 #include <array>
 #include <numeric>

+#include "system_wrappers/include/field_trial.h"
+
 namespace webrtc {
+namespace {
+
+bool EnableStrictDivergenceCheck() {
+  return !field_trial::IsEnabled("WebRTC-Aec3StrictDivergenceCheckKillSwitch");
+}
+
+}  // namespace
+
+SubtractorOutputAnalyzer::SubtractorOutputAnalyzer()
+    : strict_divergence_check_(EnableStrictDivergenceCheck()) {}

 void SubtractorOutputAnalyzer::Update(
    const SubtractorOutput& subtractor_output) {
@ -25,13 +37,15 @@ void SubtractorOutputAnalyzer::Update(
  main_filter_converged_ = e2_main < 0.5f * y2 && y2 > kConvergenceThreshold;
  shadow_filter_converged_ =
      e2_shadow < 0.05 * y2 && y2 > kConvergenceThreshold;
-  main_filter_diverged_ = e2_main > 1.5f * y2 && y2 > 30.f * 30.f * kBlockSize;
+  float min_e2 =
+      strict_divergence_check_ ? std::min(e2_main, e2_shadow) : e2_main;
+  filter_diverged_ = min_e2 > 1.5f * y2 && y2 > 30.f * 30.f * kBlockSize;
 }

 void SubtractorOutputAnalyzer::HandleEchoPathChange() {
  shadow_filter_converged_ = false;
  main_filter_converged_ = false;
-  main_filter_diverged_ = false;
+  filter_diverged_ = false;
 }

 }  // namespace webrtc
--- a/modules/audio_processing/aec3/subtractor_output_analyzer.h
+++ b/modules/audio_processing/aec3/subtractor_output_analyzer.h
@ -19,7 +19,7 @@ namespace webrtc {
 // Class for analyzing the properties subtractor output
 class SubtractorOutputAnalyzer {
 public:
-  SubtractorOutputAnalyzer() = default;
+  SubtractorOutputAnalyzer();
  ~SubtractorOutputAnalyzer() = default;

  // Analyses the subtractor output.
@ -29,15 +29,16 @@ class SubtractorOutputAnalyzer {
    return main_filter_converged_ || shadow_filter_converged_;
  }

-  bool DivergedFilter() const { return main_filter_diverged_; }
+  bool DivergedFilter() const { return filter_diverged_; }

  // Handle echo path change.
  void HandleEchoPathChange();

 private:
+  const bool strict_divergence_check_;
  bool shadow_filter_converged_ = false;
  bool main_filter_converged_ = false;
-  bool main_filter_diverged_ = false;
+  bool filter_diverged_ = false;
 };

 }  // namespace webrtc