Revert "Reduce complexity in the APM pipeline when the output is not used"

This reverts commit aa6adffba325f4b698a1e94aeab020bfdc47adec. Reason for revert: breaks webrtc-importer Original change's description: > Reduce complexity in the APM pipeline when the output is not used > > This CL selectively turns off parts of the audio processing when > the output of APM is not used. The parts turned off are such that > don't need to continuously need to be trained, but rather can be > temporarily deactivated. > > The purpose of this CL is to allow CPU to be reduced when the > client is muted. > > The CL will be follow by additional CLs, adding similar functionality > in the echo canceller and the noiser suppressor > > Bug: b/177830919 > Change-Id: I72d24505197a53872562c0955f3e7b670c43df6b > Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/209703 > Commit-Queue: Per Åhgren <peah@webrtc.org> > Reviewed-by: Sam Zackrisson <saza@webrtc.org> > Cr-Commit-Position: refs/heads/master@{#33431} Bug: b/177830919 Change-Id: I937cd61dedcd43150933eb1b9d65aebe68401e91 No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/211348 Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org> Cr-Commit-Position: refs/heads/master@{#33433}
2021-03-11 11:40:46 +00:00
parent be140b4187
commit 8097935df3
2 changed files with 95 additions and 110 deletions
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@ -115,10 +115,6 @@ GainControl::Mode Agc1ConfigModeToInterfaceMode(
  RTC_CHECK_NOTREACHED();
 }
 bool MinimizeProcessingForUnusedOutput() {
  return !field_trial::IsEnabled("WebRTC-MutedStateKillSwitch");
 }
 // Maximum lengths that frame of samples being passed from the render side to
 // the capture side can have (does not apply to AEC3).
 static const size_t kMaxAllowedValuesOfSamplesPerBand = 160;
@ -271,9 +267,7 @@ AudioProcessingImpl::AudioProcessingImpl(
                     "WebRTC-ApmExperimentalMultiChannelRenderKillSwitch"),
                 !field_trial::IsEnabled(
                     "WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch"),
-                 EnforceSplitBandHpf(),
+                 EnforceSplitBandHpf()),
                 MinimizeProcessingForUnusedOutput()),
      capture_(),
      capture_nonlocked_() {
  RTC_LOG(LS_INFO) << "Injected APM submodules:"
                      "\nEcho control factory: "
@ -673,9 +667,7 @@ void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
 void AudioProcessingImpl::HandleCaptureOutputUsedSetting(
    bool capture_output_used) {
-  capture_.capture_output_used =
+  capture_.capture_output_used = capture_output_used;
      capture_output_used || !constants_.minimize_processing_for_unused_output;
  if (submodules_.agc_manager.get()) {
    submodules_.agc_manager->HandleCaptureOutputUsedChange(
        capture_.capture_output_used);
@ -882,7 +874,11 @@ void AudioProcessingImpl::HandleCaptureRuntimeSettings() {
 void AudioProcessingImpl::HandleOverrunInCaptureRuntimeSettingsQueue() {
  // Fall back to a safe state for the case when a setting for capture output
  // usage setting has been missed.
-  HandleCaptureOutputUsedSetting(/*capture_output_used=*/true);
+  capture_.capture_output_used = true;
  if (submodules_.echo_controller) {
    submodules_.echo_controller->SetCaptureOutputUsage(
        capture_.capture_output_used);
  }
 }
 void AudioProcessingImpl::HandleRenderRuntimeSettings() {
@ -1230,101 +1226,87 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
        capture_buffer, /*stream_has_echo*/ false));
  }
-  capture_.stats.output_rms_dbfs = absl::nullopt;
+  if (submodule_states_.CaptureMultiBandProcessingPresent() &&
-  if (capture_.capture_output_used) {
+      SampleRateSupportsMultiBand(
-    if (submodule_states_.CaptureMultiBandProcessingPresent() &&
+          capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
-        SampleRateSupportsMultiBand(
+    capture_buffer->MergeFrequencyBands();
            capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
      capture_buffer->MergeFrequencyBands();
    }
    if (capture_.capture_fullband_audio) {
      const auto& ec = submodules_.echo_controller;
      bool ec_active = ec ? ec->ActiveProcessing() : false;
      // Only update the fullband buffer if the multiband processing has changed
      // the signal. Keep the original signal otherwise.
      if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) {
        capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
      }
      capture_buffer = capture_.capture_fullband_audio.get();
    }
    if (config_.residual_echo_detector.enabled) {
      RTC_DCHECK(submodules_.echo_detector);
      submodules_.echo_detector->AnalyzeCaptureAudio(
          rtc::ArrayView<const float>(capture_buffer->channels()[0],
                                      capture_buffer->num_frames()));
    }
    // TODO(aluebs): Investigate if the transient suppression placement should
    // be before or after the AGC.
    if (submodules_.transient_suppressor) {
      float voice_probability =
          submodules_.agc_manager.get()
              ? submodules_.agc_manager->voice_probability()
              : 1.f;
      submodules_.transient_suppressor->Suppress(
          capture_buffer->channels()[0], capture_buffer->num_frames(),
          capture_buffer->num_channels(),
          capture_buffer->split_bands_const(0)[kBand0To8kHz],
          capture_buffer->num_frames_per_band(),
          capture_.keyboard_info.keyboard_data,
          capture_.keyboard_info.num_keyboard_frames, voice_probability,
          capture_.key_pressed);
    }
    // Experimental APM sub-module that analyzes |capture_buffer|.
    if (submodules_.capture_analyzer) {
      submodules_.capture_analyzer->Analyze(capture_buffer);
    }
    if (submodules_.gain_controller2) {
      submodules_.gain_controller2->NotifyAnalogLevel(
          recommended_stream_analog_level_locked());
      submodules_.gain_controller2->Process(capture_buffer);
    }
    if (submodules_.capture_post_processor) {
      submodules_.capture_post_processor->Process(capture_buffer);
    }
    // The level estimator operates on the recombined data.
    if (config_.level_estimation.enabled) {
      submodules_.output_level_estimator->ProcessStream(*capture_buffer);
      capture_.stats.output_rms_dbfs =
          submodules_.output_level_estimator->RMS();
    }
    capture_output_rms_.Analyze(rtc::ArrayView<const float>(
        capture_buffer->channels_const()[0],
        capture_nonlocked_.capture_processing_format.num_frames()));
    if (log_rms) {
      RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
      RTC_HISTOGRAM_COUNTS_LINEAR(
          "WebRTC.Audio.ApmCaptureOutputLevelAverageRms", levels.average, 1,
          RmsLevel::kMinLevelDb, 64);
      RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms",
                                  levels.peak, 1, RmsLevel::kMinLevelDb, 64);
    }
    if (submodules_.agc_manager) {
      int level = recommended_stream_analog_level_locked();
      data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1,
                            &level);
    }
    // Compute echo-detector stats.
    if (config_.residual_echo_detector.enabled) {
      RTC_DCHECK(submodules_.echo_detector);
      auto ed_metrics = submodules_.echo_detector->GetMetrics();
      capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
      capture_.stats.residual_echo_likelihood_recent_max =
          ed_metrics.echo_likelihood_recent_max;
    }
  }
-  // Compute echo-controller stats.
+  if (capture_.capture_fullband_audio) {
    const auto& ec = submodules_.echo_controller;
    bool ec_active = ec ? ec->ActiveProcessing() : false;
    // Only update the fullband buffer if the multiband processing has changed
    // the signal. Keep the original signal otherwise.
    if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) {
      capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
    }
    capture_buffer = capture_.capture_fullband_audio.get();
  }
  if (config_.residual_echo_detector.enabled) {
    RTC_DCHECK(submodules_.echo_detector);
    submodules_.echo_detector->AnalyzeCaptureAudio(rtc::ArrayView<const float>(
        capture_buffer->channels()[0], capture_buffer->num_frames()));
  }
  // TODO(aluebs): Investigate if the transient suppression placement should be
  // before or after the AGC.
  if (submodules_.transient_suppressor) {
    float voice_probability = submodules_.agc_manager.get()
                                  ? submodules_.agc_manager->voice_probability()
                                  : 1.f;
    submodules_.transient_suppressor->Suppress(
        capture_buffer->channels()[0], capture_buffer->num_frames(),
        capture_buffer->num_channels(),
        capture_buffer->split_bands_const(0)[kBand0To8kHz],
        capture_buffer->num_frames_per_band(),
        capture_.keyboard_info.keyboard_data,
        capture_.keyboard_info.num_keyboard_frames, voice_probability,
        capture_.key_pressed);
  }
  // Experimental APM sub-module that analyzes |capture_buffer|.
  if (submodules_.capture_analyzer) {
    submodules_.capture_analyzer->Analyze(capture_buffer);
  }
  if (submodules_.gain_controller2) {
    submodules_.gain_controller2->NotifyAnalogLevel(
        recommended_stream_analog_level_locked());
    submodules_.gain_controller2->Process(capture_buffer);
  }
  if (submodules_.capture_post_processor) {
    submodules_.capture_post_processor->Process(capture_buffer);
  }
  // The level estimator operates on the recombined data.
  if (config_.level_estimation.enabled) {
    submodules_.output_level_estimator->ProcessStream(*capture_buffer);
    capture_.stats.output_rms_dbfs = submodules_.output_level_estimator->RMS();
  } else {
    capture_.stats.output_rms_dbfs = absl::nullopt;
  }
  capture_output_rms_.Analyze(rtc::ArrayView<const float>(
      capture_buffer->channels_const()[0],
      capture_nonlocked_.capture_processing_format.num_frames()));
  if (log_rms) {
    RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelAverageRms",
                                levels.average, 1, RmsLevel::kMinLevelDb, 64);
    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms",
                                levels.peak, 1, RmsLevel::kMinLevelDb, 64);
  }
  if (submodules_.agc_manager) {
    int level = recommended_stream_analog_level_locked();
    data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1,
                          &level);
  }
  // Compute echo-related stats.
  if (submodules_.echo_controller) {
    auto ec_metrics = submodules_.echo_controller->GetMetrics();
    capture_.stats.echo_return_loss = ec_metrics.echo_return_loss;
@ -1332,6 +1314,13 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
        ec_metrics.echo_return_loss_enhancement;
    capture_.stats.delay_ms = ec_metrics.delay_ms;
  }
  if (config_.residual_echo_detector.enabled) {
    RTC_DCHECK(submodules_.echo_detector);
    auto ed_metrics = submodules_.echo_detector->GetMetrics();
    capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
    capture_.stats.residual_echo_likelihood_recent_max =
        ed_metrics.echo_likelihood_recent_max;
  }
  // Pass stats for reporting.
  stats_reporter_.UpdateStatistics(capture_.stats);
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@ -419,17 +419,13 @@ class AudioProcessingImpl : public AudioProcessing {
  const struct ApmConstants {
    ApmConstants(bool multi_channel_render_support,
                 bool multi_channel_capture_support,
-                 bool enforce_split_band_hpf,
+                 bool enforce_split_band_hpf)
                 bool minimize_processing_for_unused_output)
        : multi_channel_render_support(multi_channel_render_support),
          multi_channel_capture_support(multi_channel_capture_support),
-          enforce_split_band_hpf(enforce_split_band_hpf),
+          enforce_split_band_hpf(enforce_split_band_hpf) {}
          minimize_processing_for_unused_output(
              minimize_processing_for_unused_output) {}
    bool multi_channel_render_support;
    bool multi_channel_capture_support;
    bool enforce_split_band_hpf;
    bool minimize_processing_for_unused_output;
  } constants_;
  struct ApmCaptureState {