AEC3: Adaptive handling of echo path with strong high-frequency gain

This CL adds adaptive handling of platforms where the echo path has
a strong gain above 10 kHz. A configurable offset is adaptively applied
depending on the amount of echo and mode of the echo suppressor.

Bug: webrtc:9663
Change-Id: I27dde6dc23b04a76a3be8c49d7fc9e226b9137e6
Reviewed-on: https://webrtc-review.googlesource.com/95947
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24448}
This commit is contained in:
Per Åhgren
2018-08-27 14:19:35 +02:00
committed by Commit Bot
parent 01a89904c0
commit fde4aa9909
6 changed files with 114 additions and 67 deletions

View File

@ -183,6 +183,11 @@ struct EchoCanceller3Config {
int trigger_threshold = 15;
} dominant_nearend_detection;
struct HighBandsSuppression {
float enr_threshold = 1.f;
float max_gain_during_echo = 1.f;
} high_bands_suppression;
float floor_first_increase = 0.00001f;
bool enforce_transparent = false;
bool enforce_empty_higher_bands = false;

View File

@ -310,7 +310,9 @@ void EchoRemoverImpl::ProcessCapture(
cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise);
// Compute and apply the suppression gain.
suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(), E, Y,
const auto& echo_spectrum =
aec_state_.UsableLinearEstimate() ? S2_linear : R2;
suppression_gain_.GetGain(E2, echo_spectrum, R2, cng_.NoiseSpectrum(), E, Y,
render_signal_analyzer_, aec_state_, x,
&high_bands_gain, &G);

View File

@ -47,60 +47,6 @@ void AdjustForExternalFilters(std::array<float, kFftLengthBy2Plus1>* gain) {
(*gain)[kFftLengthBy2] = (*gain)[kFftLengthBy2Minus1];
}
// Computes the gain to apply for the bands beyond the first band.
float UpperBandsGain(
const absl::optional<int>& narrow_peak_band,
bool saturated_echo,
const std::vector<std::vector<float>>& render,
const std::array<float, kFftLengthBy2Plus1>& low_band_gain) {
RTC_DCHECK_LT(0, render.size());
if (render.size() == 1) {
return 1.f;
}
if (narrow_peak_band &&
(*narrow_peak_band > static_cast<int>(kFftLengthBy2Plus1 - 10))) {
return 0.001f;
}
constexpr size_t kLowBandGainLimit = kFftLengthBy2 / 2;
const float gain_below_8_khz = *std::min_element(
low_band_gain.begin() + kLowBandGainLimit, low_band_gain.end());
// Always attenuate the upper bands when there is saturated echo.
if (saturated_echo) {
return std::min(0.001f, gain_below_8_khz);
}
// Compute the upper and lower band energies.
const auto sum_of_squares = [](float a, float b) { return a + b * b; };
const float low_band_energy =
std::accumulate(render[0].begin(), render[0].end(), 0.f, sum_of_squares);
float high_band_energy = 0.f;
for (size_t k = 1; k < render.size(); ++k) {
const float energy = std::accumulate(render[k].begin(), render[k].end(),
0.f, sum_of_squares);
high_band_energy = std::max(high_band_energy, energy);
}
// If there is more power in the lower frequencies than the upper frequencies,
// or if the power in upper frequencies is low, do not bound the gain in the
// upper bands.
float anti_howling_gain;
constexpr float kThreshold = kBlockSize * 10.f * 10.f / 4.f;
if (high_band_energy < std::max(low_band_energy, kThreshold)) {
anti_howling_gain = 1.f;
} else {
// In all other cases, bound the gain for upper frequencies.
RTC_DCHECK_LE(low_band_energy, high_band_energy);
RTC_DCHECK_NE(0.f, high_band_energy);
anti_howling_gain = 0.01f * sqrtf(low_band_energy / high_band_energy);
}
// Choose the gain as the minimum of the lower and upper gains.
return std::min(gain_below_8_khz, anti_howling_gain);
}
// Scales the echo according to assessed audibility at the other end.
void WeightEchoForAudibility(const EchoCanceller3Config& config,
rtc::ArrayView<const float> echo,
@ -218,6 +164,75 @@ void AdjustNonConvergedFrequencies(
int SuppressionGain::instance_count_ = 0;
float SuppressionGain::UpperBandsGain(
const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
const absl::optional<int>& narrow_peak_band,
bool saturated_echo,
const std::vector<std::vector<float>>& render,
const std::array<float, kFftLengthBy2Plus1>& low_band_gain) const {
RTC_DCHECK_LT(0, render.size());
if (render.size() == 1) {
return 1.f;
}
if (narrow_peak_band &&
(*narrow_peak_band > static_cast<int>(kFftLengthBy2Plus1 - 10))) {
return 0.001f;
}
constexpr size_t kLowBandGainLimit = kFftLengthBy2 / 2;
const float gain_below_8_khz = *std::min_element(
low_band_gain.begin() + kLowBandGainLimit, low_band_gain.end());
// Always attenuate the upper bands when there is saturated echo.
if (saturated_echo) {
return std::min(0.001f, gain_below_8_khz);
}
// Compute the upper and lower band energies.
const auto sum_of_squares = [](float a, float b) { return a + b * b; };
const float low_band_energy =
std::accumulate(render[0].begin(), render[0].end(), 0.f, sum_of_squares);
float high_band_energy = 0.f;
for (size_t k = 1; k < render.size(); ++k) {
const float energy = std::accumulate(render[k].begin(), render[k].end(),
0.f, sum_of_squares);
high_band_energy = std::max(high_band_energy, energy);
}
// If there is more power in the lower frequencies than the upper frequencies,
// or if the power in upper frequencies is low, do not bound the gain in the
// upper bands.
float anti_howling_gain;
constexpr float kThreshold = kBlockSize * 10.f * 10.f / 4.f;
if (high_band_energy < std::max(low_band_energy, kThreshold)) {
anti_howling_gain = 1.f;
} else {
// In all other cases, bound the gain for upper frequencies.
RTC_DCHECK_LE(low_band_energy, high_band_energy);
RTC_DCHECK_NE(0.f, high_band_energy);
anti_howling_gain = 0.01f * sqrtf(low_band_energy / high_band_energy);
}
// Bound the upper gain during significant echo activity.
auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
RTC_DCHECK_LE(16, spectrum.size());
return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
};
const float echo_sum = low_frequency_energy(echo_spectrum);
const float noise_sum = low_frequency_energy(comfort_noise_spectrum);
const auto& cfg = config_.suppressor.high_bands_suppression;
float gain_bound = 1.f;
if (echo_sum > cfg.enr_threshold * noise_sum &&
!dominant_nearend_detector_.IsNearendState()) {
gain_bound = cfg.max_gain_during_echo;
}
// Choose the gain as the minimum of the lower and upper gains.
return std::min(std::min(gain_below_8_khz, anti_howling_gain), gain_bound);
}
// Computes the gain to reduce the echo to a non audible level.
void SuppressionGain::GainToNoAudibleEcho(
const std::array<float, kFftLengthBy2Plus1>& nearend,
@ -357,6 +372,7 @@ SuppressionGain::~SuppressionGain() = default;
void SuppressionGain::GetGain(
const std::array<float, kFftLengthBy2Plus1>& nearend_spectrum,
const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
const std::array<float, kFftLengthBy2Plus1>& residual_echo_spectrum,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
const FftData& linear_aec_fft,
const FftData& capture_fft,
@ -379,15 +395,15 @@ void SuppressionGain::GetGain(
moving_average_.Average(nearend_spectrum, nearend_average);
// Update the state selection.
dominant_nearend_detector_.Update(nearend_spectrum, echo_spectrum,
dominant_nearend_detector_.Update(nearend_spectrum, residual_echo_spectrum,
comfort_noise_spectrum);
// Compute gain for the lower band.
bool low_noise_render = low_render_detector_.Detect(render);
const absl::optional<int> narrow_peak_band =
render_signal_analyzer.NarrowPeakBand();
LowerBandGain(low_noise_render, aec_state, nearend_average, echo_spectrum,
comfort_noise_spectrum, low_band_gain);
LowerBandGain(low_noise_render, aec_state, nearend_average,
residual_echo_spectrum, comfort_noise_spectrum, low_band_gain);
// Limit the gain of the lower bands during start up and after resets.
const float gain_upper_bound = aec_state.SuppressionGainLimit();
@ -398,8 +414,9 @@ void SuppressionGain::GetGain(
}
// Compute the gain for the upper bands.
*high_bands_gain = UpperBandsGain(narrow_peak_band, aec_state.SaturatedEcho(),
render, *low_band_gain);
*high_bands_gain =
UpperBandsGain(echo_spectrum, comfort_noise_spectrum, narrow_peak_band,
aec_state.SaturatedEcho(), render, *low_band_gain);
if (cfg.enforce_empty_higher_bands) {
*high_bands_gain = 0.f;
}
@ -442,14 +459,14 @@ SuppressionGain::DominantNearendDetector::DominantNearendDetector(
void SuppressionGain::DominantNearendDetector::Update(
rtc::ArrayView<const float> nearend_spectrum,
rtc::ArrayView<const float> echo_spectrum,
rtc::ArrayView<const float> residual_echo_spectrum,
rtc::ArrayView<const float> comfort_noise_spectrum) {
auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
RTC_DCHECK_LE(16, spectrum.size());
return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
};
const float ne_sum = low_frequency_energy(nearend_spectrum);
const float echo_sum = low_frequency_energy(echo_spectrum);
const float echo_sum = low_frequency_energy(residual_echo_spectrum);
const float noise_sum = low_frequency_energy(comfort_noise_spectrum);
// Detect strong active nearend if the nearend is sufficiently stronger than

View File

@ -32,6 +32,7 @@ class SuppressionGain {
void GetGain(
const std::array<float, kFftLengthBy2Plus1>& nearend_spectrum,
const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
const std::array<float, kFftLengthBy2Plus1>& residual_echo_spectrum,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
const FftData& linear_aec_fft,
const FftData& capture_fft,
@ -45,6 +46,15 @@ class SuppressionGain {
void SetInitialState(bool state);
private:
// Computes the gain to apply for the bands beyond the first band.
float UpperBandsGain(
const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
const absl::optional<int>& narrow_peak_band,
bool saturated_echo,
const std::vector<std::vector<float>>& render,
const std::array<float, kFftLengthBy2Plus1>& low_band_gain) const;
void GainToNoAudibleEcho(
const std::array<float, kFftLengthBy2Plus1>& nearend,
const std::array<float, kFftLengthBy2Plus1>& echo,
@ -80,7 +90,7 @@ class SuppressionGain {
// Updates the state selection based on latest spectral estimates.
void Update(rtc::ArrayView<const float> nearend_spectrum,
rtc::ArrayView<const float> echo_spectrum,
rtc::ArrayView<const float> residual_echo_spectrum,
rtc::ArrayView<const float> comfort_noise_spectrum);
private:

View File

@ -28,11 +28,13 @@ namespace aec3 {
TEST(SuppressionGain, NullOutputGains) {
std::array<float, kFftLengthBy2Plus1> E2;
std::array<float, kFftLengthBy2Plus1> R2;
std::array<float, kFftLengthBy2Plus1> S2;
std::array<float, kFftLengthBy2Plus1> N2;
FftData E;
FftData Y;
E2.fill(0.f);
R2.fill(0.f);
S2.fill(0.1f);
N2.fill(0.f);
E.re.fill(0.f);
E.im.fill(0.f);
@ -43,7 +45,7 @@ TEST(SuppressionGain, NullOutputGains) {
AecState aec_state(EchoCanceller3Config{});
EXPECT_DEATH(
SuppressionGain(EchoCanceller3Config{}, DetectOptimization(), 16000)
.GetGain(E2, R2, N2, E, Y,
.GetGain(E2, S2, R2, N2, E, Y,
RenderSignalAnalyzer((EchoCanceller3Config{})), aec_state,
std::vector<std::vector<float>>(
3, std::vector<float>(kBlockSize, 0.f)),
@ -60,6 +62,7 @@ TEST(SuppressionGain, BasicGainComputation) {
RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
float high_bands_gain;
std::array<float, kFftLengthBy2Plus1> E2;
std::array<float, kFftLengthBy2Plus1> S2;
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kFftLengthBy2Plus1> R2;
std::array<float, kFftLengthBy2Plus1> N2;
@ -81,6 +84,7 @@ TEST(SuppressionGain, BasicGainComputation) {
E2.fill(10.f);
Y2.fill(10.f);
R2.fill(0.1f);
S2.fill(0.1f);
N2.fill(100.f);
output.Reset();
y.fill(0.f);
@ -102,7 +106,7 @@ TEST(SuppressionGain, BasicGainComputation) {
subtractor.FilterImpulseResponse(),
*render_delay_buffer->GetRenderBuffer(), E2, Y2, output,
y);
suppression_gain.GetGain(E2, R2, N2, E, Y, analyzer, aec_state, x,
suppression_gain.GetGain(E2, S2, R2, N2, E, Y, analyzer, aec_state, x,
&high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
@ -112,6 +116,7 @@ TEST(SuppressionGain, BasicGainComputation) {
E2.fill(100.f);
Y2.fill(100.f);
R2.fill(0.1f);
S2.fill(0.1f);
N2.fill(0.f);
E.re.fill(sqrtf(E2[0]));
Y.re.fill(sqrtf(Y2[0]));
@ -121,7 +126,7 @@ TEST(SuppressionGain, BasicGainComputation) {
subtractor.FilterImpulseResponse(),
*render_delay_buffer->GetRenderBuffer(), E2, Y2, output,
y);
suppression_gain.GetGain(E2, R2, N2, E, Y, analyzer, aec_state, x,
suppression_gain.GetGain(E2, S2, R2, N2, E, Y, analyzer, aec_state, x,
&high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),
@ -133,7 +138,7 @@ TEST(SuppressionGain, BasicGainComputation) {
E.re.fill(sqrtf(E2[0]));
for (int k = 0; k < 10; ++k) {
suppression_gain.GetGain(E2, R2, N2, E, Y, analyzer, aec_state, x,
suppression_gain.GetGain(E2, S2, R2, N2, E, Y, analyzer, aec_state, x,
&high_bands_gain, &g);
}
std::for_each(g.begin(), g.end(),

View File

@ -367,6 +367,14 @@ class Aec3ParametersParser {
&cfg.suppressor.dominant_nearend_detection.trigger_threshold);
}
if (rtc::GetValueFromJsonObject(section, "high_bands_suppression",
&subsection)) {
ReadParam(subsection, "enr_threshold",
&cfg.suppressor.high_bands_suppression.enr_threshold);
ReadParam(subsection, "max_gain_during_echo",
&cfg.suppressor.high_bands_suppression.max_gain_during_echo);
}
ReadParam(section, "floor_first_increase",
&cfg.suppressor.floor_first_increase);
ReadParam(section, "enforce_transparent",