AEC3: Avoid entering non-linear mode when the filter is slightly diverged

This CL changes the behavior when the main filter diverges. Instead of entering non-linear mode, the AEC continues to operate in linear mode but estimates the residual echo differently. R2 is S2 scaled by a factor of 10. Bug: chromium:857018,webrtc:9462 Change-Id: I41212efe164ad319cf38a163cdf9d3ea151e0997 Reviewed-on: https://webrtc-review.googlesource.com/85981 Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org> Reviewed-by: Jesus de Vicente Pena <devicentepena@webrtc.org> Cr-Commit-Position: refs/heads/master@{#23772}
2018-06-28 14:21:16 +02:00
parent c75b35ab40
commit 6c618c7002
4 changed files with 37 additions and 7 deletions
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@ -38,6 +38,11 @@ bool EnableEnforcingDelayAfterRealignment() {
      "WebRTC-Aec3EnforceDelayAfterRealignmentKillSwitch");
 }

+bool EnableLinearModeWithDivergedFilter() {
+  return !field_trial::IsEnabled(
+      "WebRTC-Aec3LinearModeWithDivergedFilterKillSwitch");
+}
+
 float ComputeGainRampupIncrease(const EchoCanceller3Config& config) {
  const auto& c = config.echo_removal_control.gain_rampup;
  return powf(1.f / c.first_non_zero_gain, 1.f / c.non_zero_gain_blocks);
@ -59,6 +64,8 @@ AecState::AecState(const EchoCanceller3Config& config)
          EnableStationaryRenderImprovements() &&
          config_.echo_audibility.use_stationary_properties),
      enforce_delay_after_realignment_(EnableEnforcingDelayAfterRealignment()),
+      allow_linear_mode_with_diverged_filter_(
+          EnableLinearModeWithDivergedFilter()),
      erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h),
      max_render_(config_.filter.main.length_blocks, 0.f),
      reverb_decay_(fabsf(config_.ep_strength.default_len)),
@ -77,6 +84,7 @@ void AecState::HandleEchoPathChange(
    filter_analyzer_.Reset();
    blocks_since_last_saturation_ = 0;
    usable_linear_estimate_ = false;
+    diverged_linear_filter_ = false;
    capture_signal_saturation_ = false;
    echo_saturation_ = false;
    std::fill(max_render_.begin(), max_render_.end(), 0.f);
@ -269,10 +277,13 @@ void AecState::Update(
  if (!config_.echo_removal_control.linear_and_stable_echo_path) {
    usable_linear_estimate_ =
        usable_linear_estimate_ && recently_converged_filter;
-    usable_linear_estimate_ = usable_linear_estimate_ && !diverged_filter;
+    if (!allow_linear_mode_with_diverged_filter_) {
+      usable_linear_estimate_ = usable_linear_estimate_ && !diverged_filter;
+    }
  }

  use_linear_filter_output_ = usable_linear_estimate_ && !TransparentMode();
+  diverged_linear_filter_ = diverged_filter;

  UpdateReverb(adaptive_filter_impulse_response);

--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@ -69,6 +69,14 @@ class AecState {
    return erle_estimator_.Erle();
  }

+  // Returns any uncertainty in the ERLE estimate.
+  absl::optional<float> ErleUncertainty() const {
+    if (allow_linear_mode_with_diverged_filter_ && diverged_linear_filter_) {
+      return 10.f;
+    }
+    return absl::nullopt;
+  }
+
  // Returns the time-domain ERLE.
  float ErleTimeDomain() const { return erle_estimator_.ErleTimeDomain(); }

@ -159,6 +167,7 @@ class AecState {
  const bool allow_transparent_mode_;
  const bool use_stationary_properties_;
  const bool enforce_delay_after_realignment_;
+  const bool allow_linear_mode_with_diverged_filter_;
  ErlEstimator erl_estimator_;
  ErleEstimator erle_estimator_;
  size_t capture_block_counter_ = 0;
@ -166,6 +175,7 @@ class AecState {
  size_t blocks_with_proper_filter_adaptation_ = 0;
  size_t blocks_with_active_render_ = 0;
  bool usable_linear_estimate_ = false;
+  bool diverged_linear_filter_ = false;
  bool capture_signal_saturation_ = false;
  bool echo_saturation_ = false;
  bool transparent_mode_ = false;
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@ -100,7 +100,8 @@ void ResidualEchoEstimator::Estimate(
  // Estimate the residual echo power.
  if (aec_state.UsableLinearEstimate()) {
    RTC_DCHECK(!aec_state.SaturatedEcho());
-    LinearEstimate(S2_linear, aec_state.Erle(), R2);
+    LinearEstimate(S2_linear, aec_state.Erle(), aec_state.ErleUncertainty(),
+                   R2);
    // Adds the estimated unmodelled echo power to the residual echo power
    // estimate.
    if (echo_reverb_) {
@ -203,13 +204,20 @@ void ResidualEchoEstimator::Reset() {
 void ResidualEchoEstimator::LinearEstimate(
    const std::array<float, kFftLengthBy2Plus1>& S2_linear,
    const std::array<float, kFftLengthBy2Plus1>& erle,
+    absl::optional<float> erle_uncertainty,
    std::array<float, kFftLengthBy2Plus1>* R2) {
  std::fill(R2_hold_counter_.begin(), R2_hold_counter_.end(), 10.f);
-  std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(),
-                 [](float a, float b) {
-                   RTC_DCHECK_LT(0.f, a);
-                   return b / a;
-                 });
+  if (erle_uncertainty) {
+    for (size_t k = 0; k < R2->size(); ++k) {
+      (*R2)[k] = S2_linear[k] * *erle_uncertainty;
+    }
+  } else {
+    std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(),
+                   [](float a, float b) {
+                     RTC_DCHECK_LT(0.f, a);
+                     return b / a;
+                   });
+  }
 }

 void ResidualEchoEstimator::NonLinearEstimate(
--- a/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/modules/audio_processing/aec3/residual_echo_estimator.h
@ -56,6 +56,7 @@ class ResidualEchoEstimator {
  // (ERLE) and the linear power estimate.
  void LinearEstimate(const std::array<float, kFftLengthBy2Plus1>& S2_linear,
                      const std::array<float, kFftLengthBy2Plus1>& erle,
+                      absl::optional<float> erle_uncertainty,
                      std::array<float, kFftLengthBy2Plus1>* R2);

  // Estimates the residual echo power based on the estimate of the echo path