From 8097935df38e47e0b56a71ebb23c6e647177e843 Mon Sep 17 00:00:00 2001 From: Ilya Nikolaevskiy Date: Thu, 11 Mar 2021 11:40:46 +0000 Subject: [PATCH] Revert "Reduce complexity in the APM pipeline when the output is not used" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit aa6adffba325f4b698a1e94aeab020bfdc47adec. Reason for revert: breaks webrtc-importer Original change's description: > Reduce complexity in the APM pipeline when the output is not used > > This CL selectively turns off parts of the audio processing when > the output of APM is not used. The parts turned off are such that > don't need to continuously need to be trained, but rather can be > temporarily deactivated. > > The purpose of this CL is to allow CPU to be reduced when the > client is muted. > > The CL will be follow by additional CLs, adding similar functionality > in the echo canceller and the noiser suppressor > > Bug: b/177830919 > Change-Id: I72d24505197a53872562c0955f3e7b670c43df6b > Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/209703 > Commit-Queue: Per Ã…hgren > Reviewed-by: Sam Zackrisson > Cr-Commit-Position: refs/heads/master@{#33431} Bug: b/177830919 Change-Id: I937cd61dedcd43150933eb1b9d65aebe68401e91 No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/211348 Reviewed-by: Ilya Nikolaevskiy Commit-Queue: Ilya Nikolaevskiy Cr-Commit-Position: refs/heads/master@{#33433} --- .../audio_processing/audio_processing_impl.cc | 197 +++++++++--------- .../audio_processing/audio_processing_impl.h | 8 +- 2 files changed, 95 insertions(+), 110 deletions(-) diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 93dc08075c..79a315113a 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -115,10 +115,6 @@ GainControl::Mode Agc1ConfigModeToInterfaceMode( RTC_CHECK_NOTREACHED(); } -bool MinimizeProcessingForUnusedOutput() { - return !field_trial::IsEnabled("WebRTC-MutedStateKillSwitch"); -} - // Maximum lengths that frame of samples being passed from the render side to // the capture side can have (does not apply to AEC3). static const size_t kMaxAllowedValuesOfSamplesPerBand = 160; @@ -271,9 +267,7 @@ AudioProcessingImpl::AudioProcessingImpl( "WebRTC-ApmExperimentalMultiChannelRenderKillSwitch"), !field_trial::IsEnabled( "WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch"), - EnforceSplitBandHpf(), - MinimizeProcessingForUnusedOutput()), - capture_(), + EnforceSplitBandHpf()), capture_nonlocked_() { RTC_LOG(LS_INFO) << "Injected APM submodules:" "\nEcho control factory: " @@ -673,9 +667,7 @@ void AudioProcessingImpl::set_output_will_be_muted(bool muted) { void AudioProcessingImpl::HandleCaptureOutputUsedSetting( bool capture_output_used) { - capture_.capture_output_used = - capture_output_used || !constants_.minimize_processing_for_unused_output; - + capture_.capture_output_used = capture_output_used; if (submodules_.agc_manager.get()) { submodules_.agc_manager->HandleCaptureOutputUsedChange( capture_.capture_output_used); @@ -882,7 +874,11 @@ void AudioProcessingImpl::HandleCaptureRuntimeSettings() { void AudioProcessingImpl::HandleOverrunInCaptureRuntimeSettingsQueue() { // Fall back to a safe state for the case when a setting for capture output // usage setting has been missed. - HandleCaptureOutputUsedSetting(/*capture_output_used=*/true); + capture_.capture_output_used = true; + if (submodules_.echo_controller) { + submodules_.echo_controller->SetCaptureOutputUsage( + capture_.capture_output_used); + } } void AudioProcessingImpl::HandleRenderRuntimeSettings() { @@ -1230,101 +1226,87 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { capture_buffer, /*stream_has_echo*/ false)); } - capture_.stats.output_rms_dbfs = absl::nullopt; - if (capture_.capture_output_used) { - if (submodule_states_.CaptureMultiBandProcessingPresent() && - SampleRateSupportsMultiBand( - capture_nonlocked_.capture_processing_format.sample_rate_hz())) { - capture_buffer->MergeFrequencyBands(); - } - - if (capture_.capture_fullband_audio) { - const auto& ec = submodules_.echo_controller; - bool ec_active = ec ? ec->ActiveProcessing() : false; - // Only update the fullband buffer if the multiband processing has changed - // the signal. Keep the original signal otherwise. - if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) { - capture_buffer->CopyTo(capture_.capture_fullband_audio.get()); - } - capture_buffer = capture_.capture_fullband_audio.get(); - } - - if (config_.residual_echo_detector.enabled) { - RTC_DCHECK(submodules_.echo_detector); - submodules_.echo_detector->AnalyzeCaptureAudio( - rtc::ArrayView(capture_buffer->channels()[0], - capture_buffer->num_frames())); - } - - // TODO(aluebs): Investigate if the transient suppression placement should - // be before or after the AGC. - if (submodules_.transient_suppressor) { - float voice_probability = - submodules_.agc_manager.get() - ? submodules_.agc_manager->voice_probability() - : 1.f; - - submodules_.transient_suppressor->Suppress( - capture_buffer->channels()[0], capture_buffer->num_frames(), - capture_buffer->num_channels(), - capture_buffer->split_bands_const(0)[kBand0To8kHz], - capture_buffer->num_frames_per_band(), - capture_.keyboard_info.keyboard_data, - capture_.keyboard_info.num_keyboard_frames, voice_probability, - capture_.key_pressed); - } - - // Experimental APM sub-module that analyzes |capture_buffer|. - if (submodules_.capture_analyzer) { - submodules_.capture_analyzer->Analyze(capture_buffer); - } - - if (submodules_.gain_controller2) { - submodules_.gain_controller2->NotifyAnalogLevel( - recommended_stream_analog_level_locked()); - submodules_.gain_controller2->Process(capture_buffer); - } - - if (submodules_.capture_post_processor) { - submodules_.capture_post_processor->Process(capture_buffer); - } - - // The level estimator operates on the recombined data. - if (config_.level_estimation.enabled) { - submodules_.output_level_estimator->ProcessStream(*capture_buffer); - capture_.stats.output_rms_dbfs = - submodules_.output_level_estimator->RMS(); - } - - capture_output_rms_.Analyze(rtc::ArrayView( - capture_buffer->channels_const()[0], - capture_nonlocked_.capture_processing_format.num_frames())); - if (log_rms) { - RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak(); - RTC_HISTOGRAM_COUNTS_LINEAR( - "WebRTC.Audio.ApmCaptureOutputLevelAverageRms", levels.average, 1, - RmsLevel::kMinLevelDb, 64); - RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms", - levels.peak, 1, RmsLevel::kMinLevelDb, 64); - } - - if (submodules_.agc_manager) { - int level = recommended_stream_analog_level_locked(); - data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1, - &level); - } - - // Compute echo-detector stats. - if (config_.residual_echo_detector.enabled) { - RTC_DCHECK(submodules_.echo_detector); - auto ed_metrics = submodules_.echo_detector->GetMetrics(); - capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood; - capture_.stats.residual_echo_likelihood_recent_max = - ed_metrics.echo_likelihood_recent_max; - } + if (submodule_states_.CaptureMultiBandProcessingPresent() && + SampleRateSupportsMultiBand( + capture_nonlocked_.capture_processing_format.sample_rate_hz())) { + capture_buffer->MergeFrequencyBands(); } - // Compute echo-controller stats. + if (capture_.capture_fullband_audio) { + const auto& ec = submodules_.echo_controller; + bool ec_active = ec ? ec->ActiveProcessing() : false; + // Only update the fullband buffer if the multiband processing has changed + // the signal. Keep the original signal otherwise. + if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) { + capture_buffer->CopyTo(capture_.capture_fullband_audio.get()); + } + capture_buffer = capture_.capture_fullband_audio.get(); + } + + if (config_.residual_echo_detector.enabled) { + RTC_DCHECK(submodules_.echo_detector); + submodules_.echo_detector->AnalyzeCaptureAudio(rtc::ArrayView( + capture_buffer->channels()[0], capture_buffer->num_frames())); + } + + // TODO(aluebs): Investigate if the transient suppression placement should be + // before or after the AGC. + if (submodules_.transient_suppressor) { + float voice_probability = submodules_.agc_manager.get() + ? submodules_.agc_manager->voice_probability() + : 1.f; + + submodules_.transient_suppressor->Suppress( + capture_buffer->channels()[0], capture_buffer->num_frames(), + capture_buffer->num_channels(), + capture_buffer->split_bands_const(0)[kBand0To8kHz], + capture_buffer->num_frames_per_band(), + capture_.keyboard_info.keyboard_data, + capture_.keyboard_info.num_keyboard_frames, voice_probability, + capture_.key_pressed); + } + + // Experimental APM sub-module that analyzes |capture_buffer|. + if (submodules_.capture_analyzer) { + submodules_.capture_analyzer->Analyze(capture_buffer); + } + + if (submodules_.gain_controller2) { + submodules_.gain_controller2->NotifyAnalogLevel( + recommended_stream_analog_level_locked()); + submodules_.gain_controller2->Process(capture_buffer); + } + + if (submodules_.capture_post_processor) { + submodules_.capture_post_processor->Process(capture_buffer); + } + + // The level estimator operates on the recombined data. + if (config_.level_estimation.enabled) { + submodules_.output_level_estimator->ProcessStream(*capture_buffer); + capture_.stats.output_rms_dbfs = submodules_.output_level_estimator->RMS(); + } else { + capture_.stats.output_rms_dbfs = absl::nullopt; + } + + capture_output_rms_.Analyze(rtc::ArrayView( + capture_buffer->channels_const()[0], + capture_nonlocked_.capture_processing_format.num_frames())); + if (log_rms) { + RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak(); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelAverageRms", + levels.average, 1, RmsLevel::kMinLevelDb, 64); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms", + levels.peak, 1, RmsLevel::kMinLevelDb, 64); + } + + if (submodules_.agc_manager) { + int level = recommended_stream_analog_level_locked(); + data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1, + &level); + } + + // Compute echo-related stats. if (submodules_.echo_controller) { auto ec_metrics = submodules_.echo_controller->GetMetrics(); capture_.stats.echo_return_loss = ec_metrics.echo_return_loss; @@ -1332,6 +1314,13 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { ec_metrics.echo_return_loss_enhancement; capture_.stats.delay_ms = ec_metrics.delay_ms; } + if (config_.residual_echo_detector.enabled) { + RTC_DCHECK(submodules_.echo_detector); + auto ed_metrics = submodules_.echo_detector->GetMetrics(); + capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood; + capture_.stats.residual_echo_likelihood_recent_max = + ed_metrics.echo_likelihood_recent_max; + } // Pass stats for reporting. stats_reporter_.UpdateStatistics(capture_.stats); diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h index c4bbf11646..8306ac7502 100644 --- a/modules/audio_processing/audio_processing_impl.h +++ b/modules/audio_processing/audio_processing_impl.h @@ -419,17 +419,13 @@ class AudioProcessingImpl : public AudioProcessing { const struct ApmConstants { ApmConstants(bool multi_channel_render_support, bool multi_channel_capture_support, - bool enforce_split_band_hpf, - bool minimize_processing_for_unused_output) + bool enforce_split_band_hpf) : multi_channel_render_support(multi_channel_render_support), multi_channel_capture_support(multi_channel_capture_support), - enforce_split_band_hpf(enforce_split_band_hpf), - minimize_processing_for_unused_output( - minimize_processing_for_unused_output) {} + enforce_split_band_hpf(enforce_split_band_hpf) {} bool multi_channel_render_support; bool multi_channel_capture_support; bool enforce_split_band_hpf; - bool minimize_processing_for_unused_output; } constants_; struct ApmCaptureState {