From fde4aa9909f12f8d1a331e03a442ac96ceb6f88b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20=C3=85hgren?= Date: Mon, 27 Aug 2018 14:19:35 +0200 Subject: [PATCH] AEC3: Adaptive handling of echo path with strong high-frequency gain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL adds adaptive handling of platforms where the echo path has a strong gain above 10 kHz. A configurable offset is adaptively applied depending on the amount of echo and mode of the echo suppressor. Bug: webrtc:9663 Change-Id: I27dde6dc23b04a76a3be8c49d7fc9e226b9137e6 Reviewed-on: https://webrtc-review.googlesource.com/95947 Commit-Queue: Per Ã…hgren Reviewed-by: Gustaf Ullberg Cr-Commit-Position: refs/heads/master@{#24448} --- api/audio/echo_canceller3_config.h | 5 + modules/audio_processing/aec3/echo_remover.cc | 4 +- .../audio_processing/aec3/suppression_gain.cc | 139 ++++++++++-------- .../audio_processing/aec3/suppression_gain.h | 12 +- .../aec3/suppression_gain_unittest.cc | 13 +- .../test/audio_processing_simulator.cc | 8 + 6 files changed, 114 insertions(+), 67 deletions(-) diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h index 9a1510b138..67fe755c33 100644 --- a/api/audio/echo_canceller3_config.h +++ b/api/audio/echo_canceller3_config.h @@ -183,6 +183,11 @@ struct EchoCanceller3Config { int trigger_threshold = 15; } dominant_nearend_detection; + struct HighBandsSuppression { + float enr_threshold = 1.f; + float max_gain_during_echo = 1.f; + } high_bands_suppression; + float floor_first_increase = 0.00001f; bool enforce_transparent = false; bool enforce_empty_higher_bands = false; diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index 5a91e8d608..b916880736 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -310,7 +310,9 @@ void EchoRemoverImpl::ProcessCapture( cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise); // Compute and apply the suppression gain. - suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(), E, Y, + const auto& echo_spectrum = + aec_state_.UsableLinearEstimate() ? S2_linear : R2; + suppression_gain_.GetGain(E2, echo_spectrum, R2, cng_.NoiseSpectrum(), E, Y, render_signal_analyzer_, aec_state_, x, &high_bands_gain, &G); diff --git a/modules/audio_processing/aec3/suppression_gain.cc b/modules/audio_processing/aec3/suppression_gain.cc index 2d88ab2863..c389a6a13e 100644 --- a/modules/audio_processing/aec3/suppression_gain.cc +++ b/modules/audio_processing/aec3/suppression_gain.cc @@ -47,60 +47,6 @@ void AdjustForExternalFilters(std::array* gain) { (*gain)[kFftLengthBy2] = (*gain)[kFftLengthBy2Minus1]; } -// Computes the gain to apply for the bands beyond the first band. -float UpperBandsGain( - const absl::optional& narrow_peak_band, - bool saturated_echo, - const std::vector>& render, - const std::array& low_band_gain) { - RTC_DCHECK_LT(0, render.size()); - if (render.size() == 1) { - return 1.f; - } - - if (narrow_peak_band && - (*narrow_peak_band > static_cast(kFftLengthBy2Plus1 - 10))) { - return 0.001f; - } - - constexpr size_t kLowBandGainLimit = kFftLengthBy2 / 2; - const float gain_below_8_khz = *std::min_element( - low_band_gain.begin() + kLowBandGainLimit, low_band_gain.end()); - - // Always attenuate the upper bands when there is saturated echo. - if (saturated_echo) { - return std::min(0.001f, gain_below_8_khz); - } - - // Compute the upper and lower band energies. - const auto sum_of_squares = [](float a, float b) { return a + b * b; }; - const float low_band_energy = - std::accumulate(render[0].begin(), render[0].end(), 0.f, sum_of_squares); - float high_band_energy = 0.f; - for (size_t k = 1; k < render.size(); ++k) { - const float energy = std::accumulate(render[k].begin(), render[k].end(), - 0.f, sum_of_squares); - high_band_energy = std::max(high_band_energy, energy); - } - - // If there is more power in the lower frequencies than the upper frequencies, - // or if the power in upper frequencies is low, do not bound the gain in the - // upper bands. - float anti_howling_gain; - constexpr float kThreshold = kBlockSize * 10.f * 10.f / 4.f; - if (high_band_energy < std::max(low_band_energy, kThreshold)) { - anti_howling_gain = 1.f; - } else { - // In all other cases, bound the gain for upper frequencies. - RTC_DCHECK_LE(low_band_energy, high_band_energy); - RTC_DCHECK_NE(0.f, high_band_energy); - anti_howling_gain = 0.01f * sqrtf(low_band_energy / high_band_energy); - } - - // Choose the gain as the minimum of the lower and upper gains. - return std::min(gain_below_8_khz, anti_howling_gain); -} - // Scales the echo according to assessed audibility at the other end. void WeightEchoForAudibility(const EchoCanceller3Config& config, rtc::ArrayView echo, @@ -218,6 +164,75 @@ void AdjustNonConvergedFrequencies( int SuppressionGain::instance_count_ = 0; +float SuppressionGain::UpperBandsGain( + const std::array& echo_spectrum, + const std::array& comfort_noise_spectrum, + const absl::optional& narrow_peak_band, + bool saturated_echo, + const std::vector>& render, + const std::array& low_band_gain) const { + RTC_DCHECK_LT(0, render.size()); + if (render.size() == 1) { + return 1.f; + } + + if (narrow_peak_band && + (*narrow_peak_band > static_cast(kFftLengthBy2Plus1 - 10))) { + return 0.001f; + } + + constexpr size_t kLowBandGainLimit = kFftLengthBy2 / 2; + const float gain_below_8_khz = *std::min_element( + low_band_gain.begin() + kLowBandGainLimit, low_band_gain.end()); + + // Always attenuate the upper bands when there is saturated echo. + if (saturated_echo) { + return std::min(0.001f, gain_below_8_khz); + } + + // Compute the upper and lower band energies. + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + const float low_band_energy = + std::accumulate(render[0].begin(), render[0].end(), 0.f, sum_of_squares); + float high_band_energy = 0.f; + for (size_t k = 1; k < render.size(); ++k) { + const float energy = std::accumulate(render[k].begin(), render[k].end(), + 0.f, sum_of_squares); + high_band_energy = std::max(high_band_energy, energy); + } + + // If there is more power in the lower frequencies than the upper frequencies, + // or if the power in upper frequencies is low, do not bound the gain in the + // upper bands. + float anti_howling_gain; + constexpr float kThreshold = kBlockSize * 10.f * 10.f / 4.f; + if (high_band_energy < std::max(low_band_energy, kThreshold)) { + anti_howling_gain = 1.f; + } else { + // In all other cases, bound the gain for upper frequencies. + RTC_DCHECK_LE(low_band_energy, high_band_energy); + RTC_DCHECK_NE(0.f, high_band_energy); + anti_howling_gain = 0.01f * sqrtf(low_band_energy / high_band_energy); + } + + // Bound the upper gain during significant echo activity. + auto low_frequency_energy = [](rtc::ArrayView spectrum) { + RTC_DCHECK_LE(16, spectrum.size()); + return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f); + }; + const float echo_sum = low_frequency_energy(echo_spectrum); + const float noise_sum = low_frequency_energy(comfort_noise_spectrum); + const auto& cfg = config_.suppressor.high_bands_suppression; + float gain_bound = 1.f; + if (echo_sum > cfg.enr_threshold * noise_sum && + !dominant_nearend_detector_.IsNearendState()) { + gain_bound = cfg.max_gain_during_echo; + } + + // Choose the gain as the minimum of the lower and upper gains. + return std::min(std::min(gain_below_8_khz, anti_howling_gain), gain_bound); +} + // Computes the gain to reduce the echo to a non audible level. void SuppressionGain::GainToNoAudibleEcho( const std::array& nearend, @@ -357,6 +372,7 @@ SuppressionGain::~SuppressionGain() = default; void SuppressionGain::GetGain( const std::array& nearend_spectrum, const std::array& echo_spectrum, + const std::array& residual_echo_spectrum, const std::array& comfort_noise_spectrum, const FftData& linear_aec_fft, const FftData& capture_fft, @@ -379,15 +395,15 @@ void SuppressionGain::GetGain( moving_average_.Average(nearend_spectrum, nearend_average); // Update the state selection. - dominant_nearend_detector_.Update(nearend_spectrum, echo_spectrum, + dominant_nearend_detector_.Update(nearend_spectrum, residual_echo_spectrum, comfort_noise_spectrum); // Compute gain for the lower band. bool low_noise_render = low_render_detector_.Detect(render); const absl::optional narrow_peak_band = render_signal_analyzer.NarrowPeakBand(); - LowerBandGain(low_noise_render, aec_state, nearend_average, echo_spectrum, - comfort_noise_spectrum, low_band_gain); + LowerBandGain(low_noise_render, aec_state, nearend_average, + residual_echo_spectrum, comfort_noise_spectrum, low_band_gain); // Limit the gain of the lower bands during start up and after resets. const float gain_upper_bound = aec_state.SuppressionGainLimit(); @@ -398,8 +414,9 @@ void SuppressionGain::GetGain( } // Compute the gain for the upper bands. - *high_bands_gain = UpperBandsGain(narrow_peak_band, aec_state.SaturatedEcho(), - render, *low_band_gain); + *high_bands_gain = + UpperBandsGain(echo_spectrum, comfort_noise_spectrum, narrow_peak_band, + aec_state.SaturatedEcho(), render, *low_band_gain); if (cfg.enforce_empty_higher_bands) { *high_bands_gain = 0.f; } @@ -442,14 +459,14 @@ SuppressionGain::DominantNearendDetector::DominantNearendDetector( void SuppressionGain::DominantNearendDetector::Update( rtc::ArrayView nearend_spectrum, - rtc::ArrayView echo_spectrum, + rtc::ArrayView residual_echo_spectrum, rtc::ArrayView comfort_noise_spectrum) { auto low_frequency_energy = [](rtc::ArrayView spectrum) { RTC_DCHECK_LE(16, spectrum.size()); return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f); }; const float ne_sum = low_frequency_energy(nearend_spectrum); - const float echo_sum = low_frequency_energy(echo_spectrum); + const float echo_sum = low_frequency_energy(residual_echo_spectrum); const float noise_sum = low_frequency_energy(comfort_noise_spectrum); // Detect strong active nearend if the nearend is sufficiently stronger than diff --git a/modules/audio_processing/aec3/suppression_gain.h b/modules/audio_processing/aec3/suppression_gain.h index 3753711533..b8519302bd 100644 --- a/modules/audio_processing/aec3/suppression_gain.h +++ b/modules/audio_processing/aec3/suppression_gain.h @@ -32,6 +32,7 @@ class SuppressionGain { void GetGain( const std::array& nearend_spectrum, const std::array& echo_spectrum, + const std::array& residual_echo_spectrum, const std::array& comfort_noise_spectrum, const FftData& linear_aec_fft, const FftData& capture_fft, @@ -45,6 +46,15 @@ class SuppressionGain { void SetInitialState(bool state); private: + // Computes the gain to apply for the bands beyond the first band. + float UpperBandsGain( + const std::array& echo_spectrum, + const std::array& comfort_noise_spectrum, + const absl::optional& narrow_peak_band, + bool saturated_echo, + const std::vector>& render, + const std::array& low_band_gain) const; + void GainToNoAudibleEcho( const std::array& nearend, const std::array& echo, @@ -80,7 +90,7 @@ class SuppressionGain { // Updates the state selection based on latest spectral estimates. void Update(rtc::ArrayView nearend_spectrum, - rtc::ArrayView echo_spectrum, + rtc::ArrayView residual_echo_spectrum, rtc::ArrayView comfort_noise_spectrum); private: diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc index 4961e598ac..ef31371fc8 100644 --- a/modules/audio_processing/aec3/suppression_gain_unittest.cc +++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc @@ -28,11 +28,13 @@ namespace aec3 { TEST(SuppressionGain, NullOutputGains) { std::array E2; std::array R2; + std::array S2; std::array N2; FftData E; FftData Y; E2.fill(0.f); R2.fill(0.f); + S2.fill(0.1f); N2.fill(0.f); E.re.fill(0.f); E.im.fill(0.f); @@ -43,7 +45,7 @@ TEST(SuppressionGain, NullOutputGains) { AecState aec_state(EchoCanceller3Config{}); EXPECT_DEATH( SuppressionGain(EchoCanceller3Config{}, DetectOptimization(), 16000) - .GetGain(E2, R2, N2, E, Y, + .GetGain(E2, S2, R2, N2, E, Y, RenderSignalAnalyzer((EchoCanceller3Config{})), aec_state, std::vector>( 3, std::vector(kBlockSize, 0.f)), @@ -60,6 +62,7 @@ TEST(SuppressionGain, BasicGainComputation) { RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); float high_bands_gain; std::array E2; + std::array S2; std::array Y2; std::array R2; std::array N2; @@ -81,6 +84,7 @@ TEST(SuppressionGain, BasicGainComputation) { E2.fill(10.f); Y2.fill(10.f); R2.fill(0.1f); + S2.fill(0.1f); N2.fill(100.f); output.Reset(); y.fill(0.f); @@ -102,7 +106,7 @@ TEST(SuppressionGain, BasicGainComputation) { subtractor.FilterImpulseResponse(), *render_delay_buffer->GetRenderBuffer(), E2, Y2, output, y); - suppression_gain.GetGain(E2, R2, N2, E, Y, analyzer, aec_state, x, + suppression_gain.GetGain(E2, S2, R2, N2, E, Y, analyzer, aec_state, x, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), @@ -112,6 +116,7 @@ TEST(SuppressionGain, BasicGainComputation) { E2.fill(100.f); Y2.fill(100.f); R2.fill(0.1f); + S2.fill(0.1f); N2.fill(0.f); E.re.fill(sqrtf(E2[0])); Y.re.fill(sqrtf(Y2[0])); @@ -121,7 +126,7 @@ TEST(SuppressionGain, BasicGainComputation) { subtractor.FilterImpulseResponse(), *render_delay_buffer->GetRenderBuffer(), E2, Y2, output, y); - suppression_gain.GetGain(E2, R2, N2, E, Y, analyzer, aec_state, x, + suppression_gain.GetGain(E2, S2, R2, N2, E, Y, analyzer, aec_state, x, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), @@ -133,7 +138,7 @@ TEST(SuppressionGain, BasicGainComputation) { E.re.fill(sqrtf(E2[0])); for (int k = 0; k < 10; ++k) { - suppression_gain.GetGain(E2, R2, N2, E, Y, analyzer, aec_state, x, + suppression_gain.GetGain(E2, S2, R2, N2, E, Y, analyzer, aec_state, x, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc index 9fea4080cd..856469d1d9 100644 --- a/modules/audio_processing/test/audio_processing_simulator.cc +++ b/modules/audio_processing/test/audio_processing_simulator.cc @@ -367,6 +367,14 @@ class Aec3ParametersParser { &cfg.suppressor.dominant_nearend_detection.trigger_threshold); } + if (rtc::GetValueFromJsonObject(section, "high_bands_suppression", + &subsection)) { + ReadParam(subsection, "enr_threshold", + &cfg.suppressor.high_bands_suppression.enr_threshold); + ReadParam(subsection, "max_gain_during_echo", + &cfg.suppressor.high_bands_suppression.max_gain_during_echo); + } + ReadParam(section, "floor_first_increase", &cfg.suppressor.floor_first_increase); ReadParam(section, "enforce_transparent",