From 8097935df38e47e0b56a71ebb23c6e647177e843 Mon Sep 17 00:00:00 2001
From: Ilya Nikolaevskiy <ilnik@webrtc.org>
Date: Thu, 11 Mar 2021 11:40:46 +0000
Subject: [PATCH] Revert "Reduce complexity in the APM pipeline when the output
 is not used"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit aa6adffba325f4b698a1e94aeab020bfdc47adec.

Reason for revert: breaks webrtc-importer

Original change's description:
> Reduce complexity in the APM pipeline when the output is not used
>
> This CL selectively turns off parts of the audio processing when
> the output of APM is not used. The parts turned off are such that
> don't need to continuously need to be trained, but rather can be
> temporarily deactivated.
>
> The purpose of this CL is to allow CPU to be reduced when the
> client is muted.
>
> The CL will be follow by additional CLs, adding similar functionality
> in the echo canceller and the noiser suppressor
>
> Bug: b/177830919
> Change-Id: I72d24505197a53872562c0955f3e7b670c43df6b
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/209703
> Commit-Queue: Per Åhgren <peah@webrtc.org>
> Reviewed-by: Sam Zackrisson <saza@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#33431}

Bug: b/177830919
Change-Id: I937cd61dedcd43150933eb1b9d65aebe68401e91
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/211348
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#33433}
---
 .../audio_processing/audio_processing_impl.cc | 197 +++++++++---------
 .../audio_processing/audio_processing_impl.h  |   8 +-
 2 files changed, 95 insertions(+), 110 deletions(-)

diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 93dc08075c..79a315113a 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -115,10 +115,6 @@ GainControl::Mode Agc1ConfigModeToInterfaceMode(
   RTC_CHECK_NOTREACHED();
 }
 
-bool MinimizeProcessingForUnusedOutput() {
-  return !field_trial::IsEnabled("WebRTC-MutedStateKillSwitch");
-}
-
 // Maximum lengths that frame of samples being passed from the render side to
 // the capture side can have (does not apply to AEC3).
 static const size_t kMaxAllowedValuesOfSamplesPerBand = 160;
@@ -271,9 +267,7 @@ AudioProcessingImpl::AudioProcessingImpl(
                      "WebRTC-ApmExperimentalMultiChannelRenderKillSwitch"),
                  !field_trial::IsEnabled(
                      "WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch"),
-                 EnforceSplitBandHpf(),
-                 MinimizeProcessingForUnusedOutput()),
-      capture_(),
+                 EnforceSplitBandHpf()),
       capture_nonlocked_() {
   RTC_LOG(LS_INFO) << "Injected APM submodules:"
                       "\nEcho control factory: "
@@ -673,9 +667,7 @@ void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
 
 void AudioProcessingImpl::HandleCaptureOutputUsedSetting(
     bool capture_output_used) {
-  capture_.capture_output_used =
-      capture_output_used || !constants_.minimize_processing_for_unused_output;
-
+  capture_.capture_output_used = capture_output_used;
   if (submodules_.agc_manager.get()) {
     submodules_.agc_manager->HandleCaptureOutputUsedChange(
         capture_.capture_output_used);
@@ -882,7 +874,11 @@ void AudioProcessingImpl::HandleCaptureRuntimeSettings() {
 void AudioProcessingImpl::HandleOverrunInCaptureRuntimeSettingsQueue() {
   // Fall back to a safe state for the case when a setting for capture output
   // usage setting has been missed.
-  HandleCaptureOutputUsedSetting(/*capture_output_used=*/true);
+  capture_.capture_output_used = true;
+  if (submodules_.echo_controller) {
+    submodules_.echo_controller->SetCaptureOutputUsage(
+        capture_.capture_output_used);
+  }
 }
 
 void AudioProcessingImpl::HandleRenderRuntimeSettings() {
@@ -1230,101 +1226,87 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
         capture_buffer, /*stream_has_echo*/ false));
   }
 
-  capture_.stats.output_rms_dbfs = absl::nullopt;
-  if (capture_.capture_output_used) {
-    if (submodule_states_.CaptureMultiBandProcessingPresent() &&
-        SampleRateSupportsMultiBand(
-            capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
-      capture_buffer->MergeFrequencyBands();
-    }
-
-    if (capture_.capture_fullband_audio) {
-      const auto& ec = submodules_.echo_controller;
-      bool ec_active = ec ? ec->ActiveProcessing() : false;
-      // Only update the fullband buffer if the multiband processing has changed
-      // the signal. Keep the original signal otherwise.
-      if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) {
-        capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
-      }
-      capture_buffer = capture_.capture_fullband_audio.get();
-    }
-
-    if (config_.residual_echo_detector.enabled) {
-      RTC_DCHECK(submodules_.echo_detector);
-      submodules_.echo_detector->AnalyzeCaptureAudio(
-          rtc::ArrayView<const float>(capture_buffer->channels()[0],
-                                      capture_buffer->num_frames()));
-    }
-
-    // TODO(aluebs): Investigate if the transient suppression placement should
-    // be before or after the AGC.
-    if (submodules_.transient_suppressor) {
-      float voice_probability =
-          submodules_.agc_manager.get()
-              ? submodules_.agc_manager->voice_probability()
-              : 1.f;
-
-      submodules_.transient_suppressor->Suppress(
-          capture_buffer->channels()[0], capture_buffer->num_frames(),
-          capture_buffer->num_channels(),
-          capture_buffer->split_bands_const(0)[kBand0To8kHz],
-          capture_buffer->num_frames_per_band(),
-          capture_.keyboard_info.keyboard_data,
-          capture_.keyboard_info.num_keyboard_frames, voice_probability,
-          capture_.key_pressed);
-    }
-
-    // Experimental APM sub-module that analyzes |capture_buffer|.
-    if (submodules_.capture_analyzer) {
-      submodules_.capture_analyzer->Analyze(capture_buffer);
-    }
-
-    if (submodules_.gain_controller2) {
-      submodules_.gain_controller2->NotifyAnalogLevel(
-          recommended_stream_analog_level_locked());
-      submodules_.gain_controller2->Process(capture_buffer);
-    }
-
-    if (submodules_.capture_post_processor) {
-      submodules_.capture_post_processor->Process(capture_buffer);
-    }
-
-    // The level estimator operates on the recombined data.
-    if (config_.level_estimation.enabled) {
-      submodules_.output_level_estimator->ProcessStream(*capture_buffer);
-      capture_.stats.output_rms_dbfs =
-          submodules_.output_level_estimator->RMS();
-    }
-
-    capture_output_rms_.Analyze(rtc::ArrayView<const float>(
-        capture_buffer->channels_const()[0],
-        capture_nonlocked_.capture_processing_format.num_frames()));
-    if (log_rms) {
-      RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
-      RTC_HISTOGRAM_COUNTS_LINEAR(
-          "WebRTC.Audio.ApmCaptureOutputLevelAverageRms", levels.average, 1,
-          RmsLevel::kMinLevelDb, 64);
-      RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms",
-                                  levels.peak, 1, RmsLevel::kMinLevelDb, 64);
-    }
-
-    if (submodules_.agc_manager) {
-      int level = recommended_stream_analog_level_locked();
-      data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1,
-                            &level);
-    }
-
-    // Compute echo-detector stats.
-    if (config_.residual_echo_detector.enabled) {
-      RTC_DCHECK(submodules_.echo_detector);
-      auto ed_metrics = submodules_.echo_detector->GetMetrics();
-      capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
-      capture_.stats.residual_echo_likelihood_recent_max =
-          ed_metrics.echo_likelihood_recent_max;
-    }
+  if (submodule_states_.CaptureMultiBandProcessingPresent() &&
+      SampleRateSupportsMultiBand(
+          capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
+    capture_buffer->MergeFrequencyBands();
   }
 
-  // Compute echo-controller stats.
+  if (capture_.capture_fullband_audio) {
+    const auto& ec = submodules_.echo_controller;
+    bool ec_active = ec ? ec->ActiveProcessing() : false;
+    // Only update the fullband buffer if the multiband processing has changed
+    // the signal. Keep the original signal otherwise.
+    if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) {
+      capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
+    }
+    capture_buffer = capture_.capture_fullband_audio.get();
+  }
+
+  if (config_.residual_echo_detector.enabled) {
+    RTC_DCHECK(submodules_.echo_detector);
+    submodules_.echo_detector->AnalyzeCaptureAudio(rtc::ArrayView<const float>(
+        capture_buffer->channels()[0], capture_buffer->num_frames()));
+  }
+
+  // TODO(aluebs): Investigate if the transient suppression placement should be
+  // before or after the AGC.
+  if (submodules_.transient_suppressor) {
+    float voice_probability = submodules_.agc_manager.get()
+                                  ? submodules_.agc_manager->voice_probability()
+                                  : 1.f;
+
+    submodules_.transient_suppressor->Suppress(
+        capture_buffer->channels()[0], capture_buffer->num_frames(),
+        capture_buffer->num_channels(),
+        capture_buffer->split_bands_const(0)[kBand0To8kHz],
+        capture_buffer->num_frames_per_band(),
+        capture_.keyboard_info.keyboard_data,
+        capture_.keyboard_info.num_keyboard_frames, voice_probability,
+        capture_.key_pressed);
+  }
+
+  // Experimental APM sub-module that analyzes |capture_buffer|.
+  if (submodules_.capture_analyzer) {
+    submodules_.capture_analyzer->Analyze(capture_buffer);
+  }
+
+  if (submodules_.gain_controller2) {
+    submodules_.gain_controller2->NotifyAnalogLevel(
+        recommended_stream_analog_level_locked());
+    submodules_.gain_controller2->Process(capture_buffer);
+  }
+
+  if (submodules_.capture_post_processor) {
+    submodules_.capture_post_processor->Process(capture_buffer);
+  }
+
+  // The level estimator operates on the recombined data.
+  if (config_.level_estimation.enabled) {
+    submodules_.output_level_estimator->ProcessStream(*capture_buffer);
+    capture_.stats.output_rms_dbfs = submodules_.output_level_estimator->RMS();
+  } else {
+    capture_.stats.output_rms_dbfs = absl::nullopt;
+  }
+
+  capture_output_rms_.Analyze(rtc::ArrayView<const float>(
+      capture_buffer->channels_const()[0],
+      capture_nonlocked_.capture_processing_format.num_frames()));
+  if (log_rms) {
+    RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
+    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelAverageRms",
+                                levels.average, 1, RmsLevel::kMinLevelDb, 64);
+    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms",
+                                levels.peak, 1, RmsLevel::kMinLevelDb, 64);
+  }
+
+  if (submodules_.agc_manager) {
+    int level = recommended_stream_analog_level_locked();
+    data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1,
+                          &level);
+  }
+
+  // Compute echo-related stats.
   if (submodules_.echo_controller) {
     auto ec_metrics = submodules_.echo_controller->GetMetrics();
     capture_.stats.echo_return_loss = ec_metrics.echo_return_loss;
@@ -1332,6 +1314,13 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
         ec_metrics.echo_return_loss_enhancement;
     capture_.stats.delay_ms = ec_metrics.delay_ms;
   }
+  if (config_.residual_echo_detector.enabled) {
+    RTC_DCHECK(submodules_.echo_detector);
+    auto ed_metrics = submodules_.echo_detector->GetMetrics();
+    capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
+    capture_.stats.residual_echo_likelihood_recent_max =
+        ed_metrics.echo_likelihood_recent_max;
+  }
 
   // Pass stats for reporting.
   stats_reporter_.UpdateStatistics(capture_.stats);
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index c4bbf11646..8306ac7502 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -419,17 +419,13 @@ class AudioProcessingImpl : public AudioProcessing {
   const struct ApmConstants {
     ApmConstants(bool multi_channel_render_support,
                  bool multi_channel_capture_support,
-                 bool enforce_split_band_hpf,
-                 bool minimize_processing_for_unused_output)
+                 bool enforce_split_band_hpf)
         : multi_channel_render_support(multi_channel_render_support),
           multi_channel_capture_support(multi_channel_capture_support),
-          enforce_split_band_hpf(enforce_split_band_hpf),
-          minimize_processing_for_unused_output(
-              minimize_processing_for_unused_output) {}
+          enforce_split_band_hpf(enforce_split_band_hpf) {}
     bool multi_channel_render_support;
     bool multi_channel_capture_support;
     bool enforce_split_band_hpf;
-    bool minimize_processing_for_unused_output;
   } constants_;
 
   struct ApmCaptureState {