Transparency improvements for AEC3 during call start and after resets

This CL changes the AEC3 behavior to be more transparent when there is uncertainty about the amount of echo in the microphone signal. Bug: webrtc:8398, chromium:774868 Change-Id: I88e681f8decd892f44397b753df371a1c4b90af0 Reviewed-on: https://webrtc-review.googlesource.com/10801 Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#20319}
2017-10-15 20:19:21 +02:00
parent ede9ca5a24
commit 1b4059e84f
15 changed files with 199 additions and 118 deletions
--- a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
+++ b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
@ -365,8 +365,9 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) {
      filter.Adapt(render_buffer, G);
      aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
      aec_state.Update(filter.FilterFrequencyResponse(),
-                       filter.FilterImpulseResponse(), rtc::Optional<size_t>(),
-                       render_buffer, E2_main, Y2, x[0], s, false);
+                       filter.FilterImpulseResponse(), true,
+                       rtc::Optional<size_t>(), render_buffer, E2_main, Y2,
+                       x[0], s, false);
    }
    // Verify that the filter is able to perform well.
    EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f),
--- a/modules/audio_processing/aec3/aec3_common.h
+++ b/modules/audio_processing/aec3/aec3_common.h
@ -39,7 +39,7 @@ constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1;
 constexpr size_t kFftLength = 2 * kFftLengthBy2;

 constexpr int kAdaptiveFilterLength = 12;
-constexpr int kResidualEchoPowerRenderWindowSize = 30;
+constexpr int kUnknownDelayRenderWindowSize = 30;
 constexpr int kAdaptiveFilterTimeDomainLength =
    kAdaptiveFilterLength * kFftLengthBy2;

@ -69,6 +69,8 @@ constexpr size_t kRenderTransferQueueSize = kMaxApiCallsJitterBlocks / 2;
 static_assert(2 * kRenderTransferQueueSize >= kMaxApiCallsJitterBlocks,
              "Requirement to ensure buffer overflow detection");

+constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond;
+
 // TODO(peah): Integrate this with how it is done inside audio_processing_impl.
 constexpr size_t NumBandsForRate(int sample_rate_hz) {
  return static_cast<size_t>(sample_rate_hz == 8000 ? 1
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@ -68,9 +68,6 @@ rtc::Optional<size_t> EstimateFilterDelay(
  return rtc::Optional<size_t>();
 }

-constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5;
-constexpr int kEchoPathChangeCounterMax = 2 * kNumBlocksPerSecond;
-
 }  // namespace

 int AecState::instance_count_ = 0;
@ -81,7 +78,6 @@ AecState::AecState(const AudioProcessing::Config::EchoCanceller3& config)
      erle_estimator_(config.param.erle.min,
                      config.param.erle.max_l,
                      config.param.erle.max_h),
-      echo_path_change_counter_(kEchoPathChangeCounterInitial),
      config_(config),
      reverb_decay_(config_.param.ep_strength.default_len) {}

@ -102,10 +98,10 @@ void AecState::HandleEchoPathChange(
      blocks_with_filter_adaptation_ = 0;
      render_received_ = false;
      force_zero_gain_ = true;
-      echo_path_change_counter_ = kEchoPathChangeCounterMax;
+      capture_block_counter_ = 0;
    }
    if (echo_path_variability.gain_change) {
-      echo_path_change_counter_ = kEchoPathChangeCounterInitial;
+      capture_block_counter_ = kNumBlocksPerSecond;
    }
  }
 }
@ -114,6 +110,7 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
                          adaptive_filter_frequency_response,
                      const std::array<float, kAdaptiveFilterTimeDomainLength>&
                          adaptive_filter_impulse_response,
+                      bool converged_filter,
                      const rtc::Optional<size_t>& external_delay_samples,
                      const RenderBuffer& render_buffer,
                      const std::array<float, kFftLengthBy2Plus1>& E2_main,
@ -121,31 +118,16 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
                      rtc::ArrayView<const float> x,
                      const std::array<float, kBlockSize>& s,
                      bool echo_leakage_detected) {
-  // Update the echo audibility evaluator.
-  echo_audibility_.Update(x, s);
-
  // Store input parameters.
  echo_leakage_detected_ = echo_leakage_detected;

  // Update counters.
-  const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
-
-  const bool active_render_block =
-      x_energy > (config_.param.render_levels.active_render_limit *
-                  config_.param.render_levels.active_render_limit) *
-                     kFftLengthBy2;
-  if (active_render_block) {
-    render_received_ = true;
-  }
-  blocks_with_filter_adaptation_ +=
-      (active_render_block && (!SaturatedCapture()) ? 1 : 0);
-  --echo_path_change_counter_;
+  ++capture_block_counter_;

  // Force zero echo suppression gain after an echo path change to allow at
  // least some render data to be collected in order to avoid an initial echo
  // burst.
-  constexpr size_t kZeroGainBlocksAfterChange = kNumBlocksPerSecond / 5;
-  force_zero_gain_ = (++force_zero_gain_counter_) < kZeroGainBlocksAfterChange;
+  force_zero_gain_ = (++force_zero_gain_counter_) < kNumBlocksPerSecond / 5;

  // Estimate delays.
  filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response);
@ -155,43 +137,60 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
          : rtc::Optional<size_t>();

  // Update the ERL and ERLE measures.
-  if (filter_delay_ && echo_path_change_counter_ <= 0) {
+  if (filter_delay_ && capture_block_counter_ >= 2 * kNumBlocksPerSecond) {
    const auto& X2 = render_buffer.Spectrum(*filter_delay_);
    erle_estimator_.Update(X2, Y2, E2_main);
    erl_estimator_.Update(X2, Y2);
  }

+  // Update the echo audibility evaluator.
+  echo_audibility_.Update(x, s, converged_filter);
+
  // Detect and flag echo saturation.
  // TODO(peah): Add the delay in this computation to ensure that the render and
  // capture signals are properly aligned.
  RTC_DCHECK_LT(0, x.size());
  const float max_sample = fabs(*std::max_element(
      x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));
+
+  if (config_.param.ep_strength.echo_can_saturate) {
    const bool saturated_echo =
-      previous_max_sample_ * 100 > 1600 && SaturatedCapture();
-  previous_max_sample_ = max_sample;
+        (previous_max_sample_ > 200.f) && SaturatedCapture();

    // Counts the blocks since saturation.
    constexpr size_t kSaturationLeakageBlocks = 20;
    blocks_since_last_saturation_ =
        saturated_echo ? 0 : blocks_since_last_saturation_ + 1;
+
    echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks;
+  } else {
+    echo_saturation_ = false;
+  }
+  previous_max_sample_ = max_sample;

  // Flag whether the linear filter estimate is usable.
-  constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond;
  usable_linear_estimate_ =
-      (!echo_saturation_) &&
-      (!render_received_ ||
-       blocks_with_filter_adaptation_ > kEchoPathChangeConvergenceBlocks) &&
-      filter_delay_ && echo_path_change_counter_ <= 0 && external_delay_;
+      (!echo_saturation_) && (converged_filter || SufficientFilterUpdates()) &&
+      filter_delay_ && capture_block_counter_ >= 2 * kNumBlocksPerSecond &&
+      external_delay_;

  // After an amount of active render samples for which an echo should have been
  // detected in the capture signal if the ERL was not infinite, flag that a
-  // headset is used.
-  constexpr size_t kHeadSetDetectionBlocks = 5 * kNumBlocksPerSecond;
-  headset_detected_ = !external_delay_ && !filter_delay_ &&
+  // transparent mode should be entered.
+  const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
+  const bool active_render_block =
+      x_energy > (config_.param.render_levels.active_render_limit *
+                  config_.param.render_levels.active_render_limit) *
+                     kFftLengthBy2;
+  if (active_render_block) {
+    render_received_ = true;
+  }
+  blocks_with_filter_adaptation_ +=
+      (active_render_block && (!SaturatedCapture()) ? 1 : 0);
+
+  transparent_mode_ = !converged_filter &&
                      (!render_received_ || blocks_with_filter_adaptation_ >=
-                                                kHeadSetDetectionBlocks);
+                                                5 * kNumBlocksPerSecond);

  // Update the room reverb estimate.
  UpdateReverb(adaptive_filter_impulse_response);
@ -276,7 +275,8 @@ void AecState::UpdateReverb(
 }

 void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x,
-                                      const std::array<float, kBlockSize>& s) {
+                                      const std::array<float, kBlockSize>& s,
+                                      bool converged_filter) {
  auto result_x = std::minmax_element(x.begin(), x.end());
  auto result_s = std::minmax_element(s.begin(), s.end());
  const float x_abs =
@ -284,11 +284,19 @@ void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x,
  const float s_abs =
      std::max(std::abs(*result_s.first), std::abs(*result_s.second));

-  if (x_abs < 5.f) {
+  if (converged_filter) {
+    if (x_abs < 20.f) {
      ++low_farend_counter_;
    } else {
      low_farend_counter_ = 0;
    }
+  } else {
+    if (x_abs < 100.f) {
+      ++low_farend_counter_;
+    } else {
+      low_farend_counter_ = 0;
+    }
+  }

  // The echo is deemed as not audible if the echo estimate is on the level of
  // the quantization noise in the FFTs and the nearend level is sufficiently
@ -296,7 +304,8 @@ void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x,
  // any residual echo that is below the quantization noise level. Furthermore,
  // cases where the render signal is very close to zero are also identified as
  // not producing audible echo.
-  inaudible_echo_ = max_nearend_ > 500 && s_abs < 30.f;
+  inaudible_echo_ = (max_nearend_ > 500 && s_abs < 30.f) ||
+                    (!converged_filter && x_abs < 500);
  inaudible_echo_ = inaudible_echo_ || low_farend_counter_ > 20;
 }

--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@ -72,8 +72,8 @@ class AecState {
    capture_signal_saturation_ = capture_signal_saturation;
  }

-  // Returns whether a probable headset setup has been detected.
-  bool HeadsetDetected() const { return headset_detected_; }
+  // Returns whether the transparent mode is active
+  bool TransparentMode() const { return transparent_mode_; }

  // Takes appropriate action at an echo path change.
  void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
@ -92,10 +92,20 @@ class AecState {
    echo_audibility_.UpdateWithOutput(e);
  }

+  // Returns whether the linear filter should have been able to adapt properly.
+  bool SufficientFilterUpdates() const {
+    return blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks;
+  }
+
  // Returns whether the echo subtractor can be used to determine the residual
  // echo.
  bool LinearEchoEstimate() const {
-    return UsableLinearEstimate() && !HeadsetDetected();
+    return UsableLinearEstimate() && !TransparentMode();
+  }
+
+  // Returns whether the AEC is in an initial state.
+  bool InitialState() const {
+    return capture_block_counter_ < 3 * kNumBlocksPerSecond;
  }

  // Updates the aec state.
@ -103,6 +113,7 @@ class AecState {
                  adaptive_filter_frequency_response,
              const std::array<float, kAdaptiveFilterTimeDomainLength>&
                  adaptive_filter_impulse_response,
+              bool converged_filter,
              const rtc::Optional<size_t>& external_delay_samples,
              const RenderBuffer& render_buffer,
              const std::array<float, kFftLengthBy2Plus1>& E2_main,
@ -115,7 +126,8 @@ class AecState {
  class EchoAudibility {
   public:
    void Update(rtc::ArrayView<const float> x,
-                const std::array<float, kBlockSize>& s);
+                const std::array<float, kBlockSize>& s,
+                bool converged_filter);
    void UpdateWithOutput(rtc::ArrayView<const float> e);
    bool InaudibleEcho() const { return inaudible_echo_; }

@ -133,13 +145,13 @@ class AecState {
  std::unique_ptr<ApmDataDumper> data_dumper_;
  ErlEstimator erl_estimator_;
  ErleEstimator erle_estimator_;
-  int echo_path_change_counter_;
+  size_t capture_block_counter_ = 0;
  size_t blocks_with_filter_adaptation_ = 0;
  bool usable_linear_estimate_ = false;
  bool echo_leakage_detected_ = false;
  bool capture_signal_saturation_ = false;
  bool echo_saturation_ = false;
-  bool headset_detected_ = false;
+  bool transparent_mode_ = false;
  float previous_max_sample_ = 0.f;
  bool force_zero_gain_ = false;
  bool render_received_ = false;
--- a/modules/audio_processing/aec3/aec_state_unittest.cc
+++ b/modules/audio_processing/aec3/aec_state_unittest.cc
@ -43,7 +43,7 @@ TEST(AecState, NormalUsage) {

  // Verify that linear AEC usability is false when the filter is diverged and
  // there is no external delay reported.
-  state.Update(diverged_filter_frequency_response, impulse_response,
+  state.Update(diverged_filter_frequency_response, impulse_response, true,
               rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0], s,
               false);
  EXPECT_FALSE(state.UsableLinearEstimate());
@ -51,7 +51,7 @@ TEST(AecState, NormalUsage) {
  // Verify that linear AEC usability is true when the filter is converged
  std::fill(x[0].begin(), x[0].end(), 101.f);
  for (int k = 0; k < 3000; ++k) {
-    state.Update(converged_filter_frequency_response, impulse_response,
+    state.Update(converged_filter_frequency_response, impulse_response, true,
                 rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
                 false);
  }
@ -60,7 +60,7 @@ TEST(AecState, NormalUsage) {
  // Verify that linear AEC usability becomes false after an echo path change is
  // reported
  state.HandleEchoPathChange(EchoPathVariability(true, false));
-  state.Update(converged_filter_frequency_response, impulse_response,
+  state.Update(converged_filter_frequency_response, impulse_response, true,
               rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
               false);
  EXPECT_FALSE(state.UsableLinearEstimate());
@ -68,25 +68,25 @@ TEST(AecState, NormalUsage) {
  // Verify that the active render detection works as intended.
  std::fill(x[0].begin(), x[0].end(), 101.f);
  state.HandleEchoPathChange(EchoPathVariability(true, true));
-  state.Update(converged_filter_frequency_response, impulse_response,
+  state.Update(converged_filter_frequency_response, impulse_response, true,
               rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
               false);
  EXPECT_FALSE(state.ActiveRender());

  for (int k = 0; k < 1000; ++k) {
-    state.Update(converged_filter_frequency_response, impulse_response,
+    state.Update(converged_filter_frequency_response, impulse_response, true,
                 rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
                 false);
  }
  EXPECT_TRUE(state.ActiveRender());

  // Verify that echo leakage is properly reported.
-  state.Update(converged_filter_frequency_response, impulse_response,
+  state.Update(converged_filter_frequency_response, impulse_response, true,
               rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
               false);
  EXPECT_FALSE(state.EchoLeakageDetected());

-  state.Update(converged_filter_frequency_response, impulse_response,
+  state.Update(converged_filter_frequency_response, impulse_response, true,
               rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
               true);
  EXPECT_TRUE(state.EchoLeakageDetected());
@ -103,7 +103,7 @@ TEST(AecState, NormalUsage) {

  Y2.fill(10.f * 10000.f * 10000.f);
  for (size_t k = 0; k < 1000; ++k) {
-    state.Update(converged_filter_frequency_response, impulse_response,
+    state.Update(converged_filter_frequency_response, impulse_response, true,
                 rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
                 false);
  }
@ -120,7 +120,7 @@ TEST(AecState, NormalUsage) {
  E2_main.fill(1.f * 10000.f * 10000.f);
  Y2.fill(10.f * E2_main[0]);
  for (size_t k = 0; k < 1000; ++k) {
-    state.Update(converged_filter_frequency_response, impulse_response,
+    state.Update(converged_filter_frequency_response, impulse_response, true,
                 rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
                 false);
  }
@ -141,7 +141,7 @@ TEST(AecState, NormalUsage) {
  E2_main.fill(1.f * 10000.f * 10000.f);
  Y2.fill(5.f * E2_main[0]);
  for (size_t k = 0; k < 1000; ++k) {
-    state.Update(converged_filter_frequency_response, impulse_response,
+    state.Update(converged_filter_frequency_response, impulse_response, true,
                 rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
                 false);
  }
@ -184,8 +184,9 @@ TEST(AecState, NonSignificantDelay) {

  // Verify that a non-significant filter delay is identified correctly.
  state.HandleEchoPathChange(echo_path_variability);
-  state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
-               render_buffer, E2_main, Y2, x, s, false);
+  state.Update(frequency_response, impulse_response, true,
+               rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x, s,
+               false);
  EXPECT_FALSE(state.FilterDelay());
 }

@ -217,8 +218,9 @@ TEST(AecState, ConvergedFilterDelay) {
    frequency_response[k].fill(100.f);
    frequency_response[k][0] = 0.f;
    state.HandleEchoPathChange(echo_path_variability);
-    state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
-                 render_buffer, E2_main, Y2, x, s, false);
+    state.Update(frequency_response, impulse_response, true,
+                 rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x, s,
+                 false);
    EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay());
    if (k != (kFilterLength - 1)) {
      EXPECT_EQ(k, state.FilterDelay());
@ -251,7 +253,7 @@ TEST(AecState, ExternalDelay) {

  for (size_t k = 0; k < frequency_response.size() - 1; ++k) {
    state.HandleEchoPathChange(EchoPathVariability(false, false));
-    state.Update(frequency_response, impulse_response,
+    state.Update(frequency_response, impulse_response, true,
                 rtc::Optional<size_t>(k * kBlockSize + 5), render_buffer,
                 E2_main, Y2, x, s, false);
    EXPECT_TRUE(state.ExternalDelay());
@ -261,8 +263,9 @@ TEST(AecState, ExternalDelay) {
  // Verify that the externally reported delay is properly unset when it is no
  // longer present.
  state.HandleEchoPathChange(EchoPathVariability(false, false));
-  state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
-               render_buffer, E2_main, Y2, x, s, false);
+  state.Update(frequency_response, impulse_response, true,
+               rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x, s,
+               false);
  EXPECT_FALSE(state.ExternalDelay());
 }

--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@ -172,11 +172,12 @@ void EchoRemoverImpl::ProcessCapture(
  // Update the AEC state information.
  aec_state_.Update(subtractor_.FilterFrequencyResponse(),
                    subtractor_.FilterImpulseResponse(),
-                    echo_path_delay_samples, render_buffer, E2_main, Y2, x0,
-                    subtractor_output.s_main, echo_leakage_detected_);
+                    subtractor_.ConvergedFilter(), echo_path_delay_samples,
+                    render_buffer, E2_main, Y2, x0, subtractor_output.s_main,
+                    echo_leakage_detected_);

  // Choose the linear output.
-  output_selector_.FormLinearOutput(!aec_state_.HeadsetDetected(), e_main, y0);
+  output_selector_.FormLinearOutput(!aec_state_.TransparentMode(), e_main, y0);
  data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0],
                        LowestBandRate(sample_rate_hz_), 1);
  data_dumper_->DumpRaw("aec3_output_linear", y0);
--- a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
+++ b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
@ -135,7 +135,7 @@ void RunFilterUpdateTest(int num_blocks_to_process,
    // Update the delay.
    aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
    aec_state.Update(main_filter.FilterFrequencyResponse(),
-                     main_filter.FilterImpulseResponse(),
+                     main_filter.FilterImpulseResponse(), true,
                     rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0],
                     s, false);
  }
--- a/modules/audio_processing/aec3/render_delay_buffer.cc
+++ b/modules/audio_processing/aec3/render_delay_buffer.cc
@ -106,7 +106,7 @@ RenderDelayBufferImpl::RenderDelayBufferImpl(size_t num_bands)
      fft_buffer_(
          optimization_,
          num_bands,
-          std::max(kResidualEchoPowerRenderWindowSize, kAdaptiveFilterLength),
+          std::max(kUnknownDelayRenderWindowSize, kAdaptiveFilterLength),
          std::vector<size_t>(1, kAdaptiveFilterLength)),
      api_call_jitter_buffer_(num_bands),
      zero_block_(num_bands, std::vector<float>(kBlockSize, 0.f)) {
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@ -74,9 +74,6 @@ void RenderNoisePower(
  }
 }

-// Assume a minimum echo path gain of -33 dB for headsets.
-constexpr float kHeadsetEchoPathGain = 0.0005f;
-
 }  // namespace

 ResidualEchoEstimator::ResidualEchoEstimator(
@ -95,24 +92,29 @@ void ResidualEchoEstimator::Estimate(
    std::array<float, kFftLengthBy2Plus1>* R2) {
  RTC_DCHECK(R2);

-  const rtc::Optional<size_t> delay =
-      aec_state.ExternalDelay()
-          ? (aec_state.FilterDelay() ? aec_state.FilterDelay()
-                                     : aec_state.ExternalDelay())
-          : rtc::Optional<size_t>();
-
  // Estimate the power of the stationary noise in the render signal.
  RenderNoisePower(render_buffer, &X2_noise_floor_, &X2_noise_floor_counter_);

  // Estimate the residual echo power.
-
  if (aec_state.LinearEchoEstimate()) {
    RTC_DCHECK(aec_state.FilterDelay());
    const int filter_delay = *aec_state.FilterDelay();
    LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2);
    AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay,
                  aec_state.ReverbDecay(), R2);
+
+    // If the echo is saturated, estimate the echo power as the maximum echo
+    // power with a leakage factor.
+    if (aec_state.SaturatedEcho()) {
+      R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f);
+    }
  } else {
+    const rtc::Optional<size_t> delay =
+        aec_state.ExternalDelay()
+            ? (aec_state.FilterDelay() ? aec_state.FilterDelay()
+                                       : aec_state.ExternalDelay())
+            : rtc::Optional<size_t>();
+
    // Estimate the echo generating signal power.
    std::array<float, kFftLengthBy2Plus1> X2;
    if (aec_state.ExternalDelay() && aec_state.FilterDelay()) {
@ -120,14 +122,17 @@ void ResidualEchoEstimator::Estimate(
      const int delay_use = static_cast<int>(*delay);

      // Computes the spectral power over the blocks surrounding the delay.
-      RTC_DCHECK_LT(delay_use, kResidualEchoPowerRenderWindowSize);
+      constexpr int kKnownDelayRenderWindowSize = 5;
+      static_assert(
+          kUnknownDelayRenderWindowSize >= kKnownDelayRenderWindowSize,
+          "Requirement to ensure that the render buffer is overrun");
      EchoGeneratingPower(
          render_buffer, std::max(0, delay_use - 1),
-          std::min(kResidualEchoPowerRenderWindowSize - 1, delay_use + 1), &X2);
+          std::min(kKnownDelayRenderWindowSize - 1, delay_use + 1), &X2);
    } else {
      // Computes the spectral power over the latest blocks.
-      EchoGeneratingPower(render_buffer, 0,
-                          kResidualEchoPowerRenderWindowSize - 1, &X2);
+      EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1,
+                          &X2);
    }

    // Subtract the stationary noise power to avoid stationary noise causing
@ -136,23 +141,25 @@ void ResidualEchoEstimator::Estimate(
        X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
        [](float a, float b) { return std::max(0.f, a - 10.f * b); });

-    NonLinearEstimate(aec_state.HeadsetDetected(), X2, Y2, R2);
+    NonLinearEstimate(
+        aec_state.SufficientFilterUpdates(), aec_state.SaturatedEcho(),
+        config_.param.ep_strength.bounded_erl, aec_state.TransparentMode(),
+        aec_state.InitialState(), X2, Y2, R2);
+
+    if (aec_state.ExternalDelay() && aec_state.FilterDelay() &&
+        aec_state.SaturatedEcho()) {
      AddEchoReverb(*R2, aec_state.SaturatedEcho(),
                    std::min(static_cast<size_t>(kAdaptiveFilterLength),
                             delay.value_or(kAdaptiveFilterLength)),
                    aec_state.ReverbDecay(), R2);
    }
-
-  // If the echo is deemed inaudible, set the residual echo to zero.
-  if (aec_state.InaudibleEcho() &&
-      (aec_state.ExternalDelay() || aec_state.HeadsetDetected())) {
-    R2->fill(0.f);
  }

-  // If the echo is saturated, estimate the echo power as the maximum echo power
-  // with a leakage factor.
-  if (aec_state.SaturatedEcho()) {
-    R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f);
+  // If the echo is deemed inaudible, set the residual echo to zero.
+  if (aec_state.InaudibleEcho()) {
+    R2->fill(0.f);
+    R2_old_.fill(0.f);
+    R2_hold_counter_.fill(0.f);
  }

  std::copy(R2->begin(), R2->end(), R2_old_.begin());
@ -183,17 +190,39 @@ void ResidualEchoEstimator::LinearEstimate(
 }

 void ResidualEchoEstimator::NonLinearEstimate(
-    bool headset_detected,
+    bool sufficient_filter_updates,
+    bool saturated_echo,
+    bool bounded_erl,
+    bool transparent_mode,
+    bool initial_state,
    const std::array<float, kFftLengthBy2Plus1>& X2,
    const std::array<float, kFftLengthBy2Plus1>& Y2,
    std::array<float, kFftLengthBy2Plus1>* R2) {
-  // Choose gains.
-  const float echo_path_gain_lf =
-      headset_detected ? kHeadsetEchoPathGain : config_.param.ep_strength.lf;
-  const float echo_path_gain_mf =
-      headset_detected ? kHeadsetEchoPathGain : config_.param.ep_strength.mf;
-  const float echo_path_gain_hf =
-      headset_detected ? kHeadsetEchoPathGain : config_.param.ep_strength.hf;
+  float echo_path_gain_lf;
+  float echo_path_gain_mf;
+  float echo_path_gain_hf;
+
+  // Set echo path gains.
+  if (saturated_echo) {
+    // If the echo could be saturated, use a very conservative gain.
+    echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 10000.f;
+  } else if (sufficient_filter_updates && !bounded_erl) {
+    // If the filter should have been able to converge, and no assumption is
+    // possible on the ERL, use a low gain.
+    echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.01f;
+  } else if ((sufficient_filter_updates && bounded_erl) || transparent_mode) {
+    // If the filter should have been able to converge, and and it is known that
+    // the ERL is bounded, use a very low gain.
+    echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.001f;
+  } else if (!initial_state) {
+    // If the AEC is no longer in an initial state, assume a weak echo path.
+    echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.01f;
+  } else {
+    // In the initial state, use conservative gains.
+    echo_path_gain_lf = config_.param.ep_strength.lf;
+    echo_path_gain_mf = config_.param.ep_strength.mf;
+    echo_path_gain_hf = config_.param.ep_strength.hf;
+  }

  // Compute preliminary residual echo.
  std::transform(
--- a/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/modules/audio_processing/aec3/residual_echo_estimator.h
@ -49,7 +49,11 @@ class ResidualEchoEstimator {

  // Estimates the residual echo power based on the estimate of the echo path
  // gain.
-  void NonLinearEstimate(bool headset_detected,
+  void NonLinearEstimate(bool sufficient_filter_updates,
+                         bool saturated_echo,
+                         bool bounded_erl,
+                         bool transparent_mode,
+                         bool initial_state,
                         const std::array<float, kFftLengthBy2Plus1>& X2,
                         const std::array<float, kFftLengthBy2Plus1>& Y2,
                         std::array<float, kFftLengthBy2Plus1>* R2);
--- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
@ -83,8 +83,8 @@ TEST(ResidualEchoEstimator, BasicTest) {
    render_buffer.Insert(x);

    aec_state.HandleEchoPathChange(echo_path_variability);
-    aec_state.Update(H2, h, rtc::Optional<size_t>(2), render_buffer, E2_main,
-                     Y2, x[0], s, false);
+    aec_state.Update(H2, h, true, rtc::Optional<size_t>(2), render_buffer,
+                     E2_main, Y2, x[0], s, false);

    estimator.Estimate(aec_state, render_buffer, S2_linear, Y2, &R2);
  }
--- a/modules/audio_processing/aec3/subtractor.cc
+++ b/modules/audio_processing/aec3/subtractor.cc
@ -11,6 +11,7 @@
 #include "modules/audio_processing/aec3/subtractor.h"

 #include <algorithm>
+#include <numeric>

 #include "api/array_view.h"
 #include "modules/audio_processing/logging/apm_data_dumper.h"
@ -63,6 +64,7 @@ void Subtractor::HandleEchoPathChange(
    shadow_filter_.HandleEchoPathChange();
    G_main_.HandleEchoPathChange();
    G_shadow_.HandleEchoPathChange();
+    converged_filter_ = false;
  }
 }

@ -89,6 +91,19 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
  shadow_filter_.Filter(render_buffer, &S);
  PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr);

+  if (!converged_filter_) {
+    const auto sum_of_squares = [](float a, float b) { return a + b * b; };
+    const float e2_main =
+        std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares);
+    const float e2_shadow =
+        std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares);
+    const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares);
+
+    if (y2 > kBlockSize * 50.f * 50.f) {
+      converged_filter_ = (e2_main > 0.3 * y2 || e2_shadow > 0.1 * y2);
+    }
+  }
+
  // Compute spectra for future use.
  E_main.Spectrum(optimization_, &output->E2_main);
  E_shadow.Spectrum(optimization_, &output->E2_shadow);
--- a/modules/audio_processing/aec3/subtractor.h
+++ b/modules/audio_processing/aec3/subtractor.h
@ -57,6 +57,8 @@ class Subtractor {
    return main_filter_.FilterImpulseResponse();
  }

+  bool ConvergedFilter() const { return converged_filter_; }
+
 private:
  const Aec3Fft fft_;
  ApmDataDumper* data_dumper_;
@ -65,6 +67,7 @@ class Subtractor {
  AdaptiveFirFilter shadow_filter_;
  MainFilterUpdateGain G_main_;
  ShadowFilterUpdateGain G_shadow_;
+  bool converged_filter_ = false;

  RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(Subtractor);
 };
--- a/modules/audio_processing/aec3/subtractor_unittest.cc
+++ b/modules/audio_processing/aec3/subtractor_unittest.cc
@ -69,6 +69,7 @@ float RunSubtractorTest(int num_blocks_to_process,
    aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
    aec_state.Update(subtractor.FilterFrequencyResponse(),
                     subtractor.FilterImpulseResponse(),
+                     subtractor.ConvergedFilter(),
                     rtc::Optional<size_t>(delay_samples / kBlockSize),
                     render_buffer, E2_main, Y2, x[0], output.s_main, false);
  }
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@ -285,9 +285,11 @@ class AudioProcessing : public rtc::RefCountInterface {

        struct EpStrength {
          float lf = 10.f;
-          float mf = 100.f;
-          float hf = 200.f;
+          float mf = 10.f;
+          float hf = 10.f;
          float default_len = 0.f;
+          bool echo_can_saturate = true;
+          bool bounded_erl = false;
        } ep_strength;

        struct Mask {
@ -305,7 +307,6 @@ class AudioProcessing : public rtc::RefCountInterface {
        struct EchoAudibility {
          float low_render_limit = 4 * 64.f;
          float normal_render_limit = 64.f;
-          float active_render_limit = 100.f;
        } echo_audibility;

        struct RenderLevels {