Correct and soften the AEC3 handling of saturated mic signals

This CL changes the handling of saturated microphone signals in AEC3.

Some of the changes included are
-Make the detection of saturated echoes depend on the echo path gain
 estimate.
-Remove redundant code related to echo saturation.
-Correct the computation of residual echoes when the echo is saturated.
-Soften the echo removal during echo saturation.

Bug: webrtc:9119
Change-Id: I5cb11cd449de552ab670beeb24ed8112f8beb734
Reviewed-on: https://webrtc-review.googlesource.com/67220
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22809}
This commit is contained in:
Per Åhgren
2018-04-10 16:33:55 +02:00
committed by Commit Bot
parent 342695d068
commit 31122d6c5f
7 changed files with 42 additions and 63 deletions

View File

@ -62,9 +62,9 @@ struct EchoCanceller3Config {
} erle; } erle;
struct EpStrength { struct EpStrength {
float lf = 2.f; float lf = 10.f;
float mf = 2.f; float mf = 10.f;
float hf = 2.f; float hf = 10.f;
float default_len = 0.f; float default_len = 0.f;
bool echo_can_saturate = true; bool echo_can_saturate = true;
bool bounded_erl = false; bool bounded_erl = false;

View File

@ -59,7 +59,6 @@ void AecState::HandleEchoPathChange(
usable_linear_estimate_ = false; usable_linear_estimate_ = false;
capture_signal_saturation_ = false; capture_signal_saturation_ = false;
echo_saturation_ = false; echo_saturation_ = false;
previous_max_sample_ = 0.f;
std::fill(max_render_.begin(), max_render_.end(), 0.f); std::fill(max_render_.begin(), max_render_.end(), 0.f);
blocks_with_proper_filter_adaptation_ = 0; blocks_with_proper_filter_adaptation_ = 0;
blocks_since_reset_ = 0; blocks_since_reset_ = 0;
@ -144,7 +143,7 @@ void AecState::Update(
// TODO(peah): Add the delay in this computation to ensure that the render and // TODO(peah): Add the delay in this computation to ensure that the render and
// capture signals are properly aligned. // capture signals are properly aligned.
if (config_.ep_strength.echo_can_saturate) { if (config_.ep_strength.echo_can_saturate) {
echo_saturation_ = DetectEchoSaturation(x); echo_saturation_ = DetectEchoSaturation(x, EchoPathGain());
} }
bool filter_has_had_time_to_converge = bool filter_has_had_time_to_converge =
@ -458,19 +457,22 @@ bool AecState::DetectActiveRender(rtc::ArrayView<const float> x) const {
kFftLengthBy2; kFftLengthBy2;
} }
bool AecState::DetectEchoSaturation(rtc::ArrayView<const float> x) { bool AecState::DetectEchoSaturation(rtc::ArrayView<const float> x,
float echo_path_gain) {
RTC_DCHECK_LT(0, x.size()); RTC_DCHECK_LT(0, x.size());
const float max_sample = fabs(*std::max_element( const float max_sample = fabs(*std::max_element(
x.begin(), x.end(), [](float a, float b) { return a * a < b * b; })); x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));
previous_max_sample_ = max_sample;
// Set flag for potential presence of saturated echo // Set flag for potential presence of saturated echo
blocks_since_last_saturation_ = const float kMargin = 10.f;
previous_max_sample_ > 200.f && SaturatedCapture() float peak_echo_amplitude = max_sample * echo_path_gain * kMargin;
? 0 if (SaturatedCapture() && peak_echo_amplitude > 32000) {
: blocks_since_last_saturation_ + 1; blocks_since_last_saturation_ = 0;
} else {
++blocks_since_last_saturation_;
}
return blocks_since_last_saturation_ < 20; return blocks_since_last_saturation_ < 5;
} }
} // namespace webrtc } // namespace webrtc

View File

@ -81,9 +81,6 @@ class AecState {
// Returns whether the echo signal is saturated. // Returns whether the echo signal is saturated.
bool SaturatedEcho() const { return echo_saturation_; } bool SaturatedEcho() const { return echo_saturation_; }
// Returns whether the echo path can saturate.
bool SaturatingEchoPath() const { return saturating_echo_path_; }
// Updates the capture signal saturation. // Updates the capture signal saturation.
void UpdateCaptureSaturation(bool capture_signal_saturation) { void UpdateCaptureSaturation(bool capture_signal_saturation) {
capture_signal_saturation_ = capture_signal_saturation; capture_signal_saturation_ = capture_signal_saturation;
@ -127,7 +124,8 @@ class AecState {
void UpdateReverb(const std::vector<float>& impulse_response); void UpdateReverb(const std::vector<float>& impulse_response);
bool DetectActiveRender(rtc::ArrayView<const float> x) const; bool DetectActiveRender(rtc::ArrayView<const float> x) const;
void UpdateSuppressorGainLimit(bool render_activity); void UpdateSuppressorGainLimit(bool render_activity);
bool DetectEchoSaturation(rtc::ArrayView<const float> x); bool DetectEchoSaturation(rtc::ArrayView<const float> x,
float echo_path_gain);
static int instance_count_; static int instance_count_;
std::unique_ptr<ApmDataDumper> data_dumper_; std::unique_ptr<ApmDataDumper> data_dumper_;
@ -141,7 +139,6 @@ class AecState {
bool capture_signal_saturation_ = false; bool capture_signal_saturation_ = false;
bool echo_saturation_ = false; bool echo_saturation_ = false;
bool transparent_mode_ = false; bool transparent_mode_ = false;
float previous_max_sample_ = 0.f;
bool render_received_ = false; bool render_received_ = false;
int filter_delay_blocks_ = 0; int filter_delay_blocks_ = 0;
size_t blocks_since_last_saturation_ = 1000; size_t blocks_since_last_saturation_ = 1000;
@ -158,7 +155,6 @@ class AecState {
const EchoCanceller3Config config_; const EchoCanceller3Config config_;
std::vector<float> max_render_; std::vector<float> max_render_;
float reverb_decay_ = fabsf(config_.ep_strength.default_len); float reverb_decay_ = fabsf(config_.ep_strength.default_len);
bool saturating_echo_path_ = false;
bool filter_has_had_time_to_converge_ = false; bool filter_has_had_time_to_converge_ = false;
bool initial_state_ = true; bool initial_state_ = true;
const float gain_rampup_increase_; const float gain_rampup_increase_;

View File

@ -38,16 +38,10 @@ void ResidualEchoEstimator::Estimate(
// Estimate the residual echo power. // Estimate the residual echo power.
if (aec_state.UsableLinearEstimate()) { if (aec_state.UsableLinearEstimate()) {
LinearEstimate(S2_linear, aec_state.Erle(), aec_state.FilterDelayBlocks(), RTC_DCHECK(!aec_state.SaturatedEcho());
R2); LinearEstimate(S2_linear, aec_state.Erle(), R2);
AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), AddEchoReverb(S2_linear, aec_state.FilterDelayBlocks(),
aec_state.FilterDelayBlocks(), aec_state.ReverbDecay(), R2); aec_state.ReverbDecay(), R2);
// If the echo is saturated, estimate the echo power as the maximum echo
// power with a leakage factor.
if (aec_state.SaturatedEcho()) {
R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f);
}
} else { } else {
// Estimate the echo generating signal power. // Estimate the echo generating signal power.
std::array<float, kFftLengthBy2Plus1> X2; std::array<float, kFftLengthBy2Plus1> X2;
@ -69,15 +63,16 @@ void ResidualEchoEstimator::Estimate(
0.f, a - config_.echo_model.stationary_gate_slope * b); 0.f, a - config_.echo_model.stationary_gate_slope * b);
}); });
NonLinearEstimate(aec_state.SaturatedEcho(), aec_state.EchoPathGain(), X2, NonLinearEstimate(aec_state.EchoPathGain(), X2, Y2, R2);
Y2, R2);
// If the echo is saturated, estimate the echo power as the maximum echo
// power with a leakage factor.
if (aec_state.SaturatedEcho()) { if (aec_state.SaturatedEcho()) {
// TODO(peah): Modify to make sense theoretically. R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f);
AddEchoReverb(*R2, aec_state.SaturatedEcho(),
config_.filter.main.length_blocks, aec_state.ReverbDecay(),
R2);
} }
AddEchoReverb(*R2, config_.filter.main.length_blocks,
aec_state.ReverbDecay(), R2);
} }
// If the echo is deemed inaudible, set the residual echo to zero. // If the echo is deemed inaudible, set the residual echo to zero.
@ -104,7 +99,6 @@ void ResidualEchoEstimator::Reset() {
void ResidualEchoEstimator::LinearEstimate( void ResidualEchoEstimator::LinearEstimate(
const std::array<float, kFftLengthBy2Plus1>& S2_linear, const std::array<float, kFftLengthBy2Plus1>& S2_linear,
const std::array<float, kFftLengthBy2Plus1>& erle, const std::array<float, kFftLengthBy2Plus1>& erle,
size_t delay,
std::array<float, kFftLengthBy2Plus1>* R2) { std::array<float, kFftLengthBy2Plus1>* R2) {
std::fill(R2_hold_counter_.begin(), R2_hold_counter_.end(), 10.f); std::fill(R2_hold_counter_.begin(), R2_hold_counter_.end(), 10.f);
std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(), std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(),
@ -115,17 +109,15 @@ void ResidualEchoEstimator::LinearEstimate(
} }
void ResidualEchoEstimator::NonLinearEstimate( void ResidualEchoEstimator::NonLinearEstimate(
bool saturated_echo,
float echo_path_gain, float echo_path_gain,
const std::array<float, kFftLengthBy2Plus1>& X2, const std::array<float, kFftLengthBy2Plus1>& X2,
const std::array<float, kFftLengthBy2Plus1>& Y2, const std::array<float, kFftLengthBy2Plus1>& Y2,
std::array<float, kFftLengthBy2Plus1>* R2) { std::array<float, kFftLengthBy2Plus1>* R2) {
float echo_path_gain_use = saturated_echo ? 10000.f : echo_path_gain;
// Compute preliminary residual echo. // Compute preliminary residual echo.
std::transform( std::transform(X2.begin(), X2.end(), R2->begin(), [echo_path_gain](float a) {
X2.begin(), X2.end(), R2->begin(), return a * echo_path_gain * echo_path_gain;
[echo_path_gain_use](float a) { return a * echo_path_gain_use; }); });
for (size_t k = 0; k < R2->size(); ++k) { for (size_t k = 0; k < R2->size(); ++k) {
// Update hold counter. // Update hold counter.
@ -144,7 +136,6 @@ void ResidualEchoEstimator::NonLinearEstimate(
void ResidualEchoEstimator::AddEchoReverb( void ResidualEchoEstimator::AddEchoReverb(
const std::array<float, kFftLengthBy2Plus1>& S2, const std::array<float, kFftLengthBy2Plus1>& S2,
bool saturated_echo,
size_t delay, size_t delay,
float reverb_decay_factor, float reverb_decay_factor,
std::array<float, kFftLengthBy2Plus1>* R2) { std::array<float, kFftLengthBy2Plus1>* R2) {
@ -171,12 +162,7 @@ void ResidualEchoEstimator::AddEchoReverb(
}); });
// Update the buffer of old echo powers. // Update the buffer of old echo powers.
if (saturated_echo) {
S2_old_[S2_old_index_].fill((*std::max_element(S2.begin(), S2.end())) *
100.f);
} else {
std::copy(S2.begin(), S2.end(), S2_old_[S2_old_index_].begin()); std::copy(S2.begin(), S2.end(), S2_old_[S2_old_index_].begin());
}
// Add the power of the echo reverb to the residual echo power. // Add the power of the echo reverb to the residual echo power.
std::transform(R2->begin(), R2->end(), R2_reverb_.begin(), R2->begin(), std::transform(R2->begin(), R2->end(), R2_reverb_.begin(), R2->begin(),

View File

@ -43,13 +43,11 @@ class ResidualEchoEstimator {
// (ERLE) and the linear power estimate. // (ERLE) and the linear power estimate.
void LinearEstimate(const std::array<float, kFftLengthBy2Plus1>& S2_linear, void LinearEstimate(const std::array<float, kFftLengthBy2Plus1>& S2_linear,
const std::array<float, kFftLengthBy2Plus1>& erle, const std::array<float, kFftLengthBy2Plus1>& erle,
size_t delay,
std::array<float, kFftLengthBy2Plus1>* R2); std::array<float, kFftLengthBy2Plus1>* R2);
// Estimates the residual echo power based on the estimate of the echo path // Estimates the residual echo power based on the estimate of the echo path
// gain. // gain.
void NonLinearEstimate(bool saturated_echo, void NonLinearEstimate(float echo_path_gain,
float echo_path_gain,
const std::array<float, kFftLengthBy2Plus1>& X2, const std::array<float, kFftLengthBy2Plus1>& X2,
const std::array<float, kFftLengthBy2Plus1>& Y2, const std::array<float, kFftLengthBy2Plus1>& Y2,
std::array<float, kFftLengthBy2Plus1>* R2); std::array<float, kFftLengthBy2Plus1>* R2);
@ -57,7 +55,6 @@ class ResidualEchoEstimator {
// Adds the estimated unmodelled echo power to the residual echo power // Adds the estimated unmodelled echo power to the residual echo power
// estimate. // estimate.
void AddEchoReverb(const std::array<float, kFftLengthBy2Plus1>& S2, void AddEchoReverb(const std::array<float, kFftLengthBy2Plus1>& S2,
bool saturated_echo,
size_t delay, size_t delay,
float reverb_decay_factor, float reverb_decay_factor,
std::array<float, kFftLengthBy2Plus1>* R2); std::array<float, kFftLengthBy2Plus1>* R2);

View File

@ -1,3 +1,4 @@
/* /*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
* *
@ -117,7 +118,6 @@ void GainToNoAudibleEcho(
const EchoCanceller3Config& config, const EchoCanceller3Config& config,
bool low_noise_render, bool low_noise_render,
bool saturated_echo, bool saturated_echo,
bool saturating_echo_path,
bool linear_echo_estimate, bool linear_echo_estimate,
const std::array<float, kFftLengthBy2Plus1>& nearend, const std::array<float, kFftLengthBy2Plus1>& nearend,
const std::array<float, kFftLengthBy2Plus1>& echo, const std::array<float, kFftLengthBy2Plus1>& echo,
@ -224,12 +224,8 @@ void SuppressionGain::LowerBandGain(
const std::array<float, kFftLengthBy2Plus1>& comfort_noise, const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
std::array<float, kFftLengthBy2Plus1>* gain) { std::array<float, kFftLengthBy2Plus1>* gain) {
const bool saturated_echo = aec_state.SaturatedEcho(); const bool saturated_echo = aec_state.SaturatedEcho();
const bool saturating_echo_path = aec_state.SaturatingEchoPath();
const bool linear_echo_estimate = aec_state.UsableLinearEstimate(); const bool linear_echo_estimate = aec_state.UsableLinearEstimate();
// Count the number of blocks since saturation.
no_saturation_counter_ = saturated_echo ? 0 : no_saturation_counter_ + 1;
// Precompute 1/echo (note that when the echo is zero, the precomputed value // Precompute 1/echo (note that when the echo is zero, the precomputed value
// is never used). // is never used).
std::array<float, kFftLengthBy2Plus1> one_by_echo; std::array<float, kFftLengthBy2Plus1> one_by_echo;
@ -242,7 +238,7 @@ void SuppressionGain::LowerBandGain(
const float min_echo_power = const float min_echo_power =
low_noise_render ? config_.echo_audibility.low_render_limit low_noise_render ? config_.echo_audibility.low_render_limit
: config_.echo_audibility.normal_render_limit; : config_.echo_audibility.normal_render_limit;
if (no_saturation_counter_ > 10) { if (!saturated_echo) {
for (size_t k = 0; k < nearend.size(); ++k) { for (size_t k = 0; k < nearend.size(); ++k) {
const float denom = std::min(nearend[k], echo[k]); const float denom = std::min(nearend[k], echo[k]);
min_gain[k] = denom > 0.f ? min_echo_power / denom : 1.f; min_gain[k] = denom > 0.f ? min_echo_power / denom : 1.f;
@ -268,8 +264,8 @@ void SuppressionGain::LowerBandGain(
std::array<float, kFftLengthBy2Plus1> masker; std::array<float, kFftLengthBy2Plus1> masker;
MaskingPower(config_, nearend, comfort_noise, last_masker_, *gain, &masker); MaskingPower(config_, nearend, comfort_noise, last_masker_, *gain, &masker);
GainToNoAudibleEcho(config_, low_noise_render, saturated_echo, GainToNoAudibleEcho(config_, low_noise_render, saturated_echo,
saturating_echo_path, linear_echo_estimate, nearend, linear_echo_estimate, nearend, echo, masker, min_gain,
echo, masker, min_gain, max_gain, one_by_echo, gain); max_gain, one_by_echo, gain);
AdjustForExternalFilters(gain); AdjustForExternalFilters(gain);
if (narrow_peak_band) { if (narrow_peak_band) {
NarrowBandAttenuation(*narrow_peak_band, nearend, echo, gain); NarrowBandAttenuation(*narrow_peak_band, nearend, echo, gain);
@ -280,7 +276,8 @@ void SuppressionGain::LowerBandGain(
AdjustNonConvergedFrequencies(gain); AdjustNonConvergedFrequencies(gain);
// Update the allowed maximum gain increase. // Update the allowed maximum gain increase.
UpdateGainIncrease(low_noise_render, linear_echo_estimate, echo, *gain); UpdateGainIncrease(low_noise_render, linear_echo_estimate, saturated_echo,
echo, *gain);
// Adjust gain dynamics. // Adjust gain dynamics.
const float gain_bound = const float gain_bound =
@ -353,6 +350,7 @@ void SuppressionGain::SetInitialState(bool state) {
void SuppressionGain::UpdateGainIncrease( void SuppressionGain::UpdateGainIncrease(
bool low_noise_render, bool low_noise_render,
bool linear_echo_estimate, bool linear_echo_estimate,
bool saturated_echo,
const std::array<float, kFftLengthBy2Plus1>& echo, const std::array<float, kFftLengthBy2Plus1>& echo,
const std::array<float, kFftLengthBy2Plus1>& new_gain) { const std::array<float, kFftLengthBy2Plus1>& new_gain) {
float max_inc; float max_inc;
@ -379,7 +377,7 @@ void SuppressionGain::UpdateGainIncrease(
rate_dec = p.nonlinear.rate_dec; rate_dec = p.nonlinear.rate_dec;
min_inc = p.nonlinear.min_inc; min_inc = p.nonlinear.min_inc;
min_dec = p.nonlinear.min_dec; min_dec = p.nonlinear.min_dec;
} else if (initial_state_ && no_saturation_counter_ > 10) { } else if (initial_state_ && !saturated_echo) {
if (initial_state_change_counter_ > 0) { if (initial_state_change_counter_ > 0) {
float change_factor = float change_factor =
initial_state_change_counter_ * one_by_state_change_duration_blocks_; initial_state_change_counter_ * one_by_state_change_duration_blocks_;
@ -409,7 +407,7 @@ void SuppressionGain::UpdateGainIncrease(
rate_dec = p.low_noise.rate_dec; rate_dec = p.low_noise.rate_dec;
min_inc = p.low_noise.min_inc; min_inc = p.low_noise.min_inc;
min_dec = p.low_noise.min_dec; min_dec = p.low_noise.min_dec;
} else if (no_saturation_counter_ > 10) { } else if (!saturated_echo) {
max_inc = p.normal.max_inc; max_inc = p.normal.max_inc;
max_dec = p.normal.max_dec; max_dec = p.normal.max_dec;
rate_inc = p.normal.rate_inc; rate_inc = p.normal.rate_inc;

View File

@ -51,6 +51,7 @@ class SuppressionGain {
void UpdateGainIncrease( void UpdateGainIncrease(
bool low_noise_render, bool low_noise_render,
bool linear_echo_estimate, bool linear_echo_estimate,
bool saturated_echo,
const std::array<float, kFftLengthBy2Plus1>& echo, const std::array<float, kFftLengthBy2Plus1>& echo,
const std::array<float, kFftLengthBy2Plus1>& new_gain); const std::array<float, kFftLengthBy2Plus1>& new_gain);
@ -72,7 +73,6 @@ class SuppressionGain {
std::array<float, kFftLengthBy2Plus1> last_echo_; std::array<float, kFftLengthBy2Plus1> last_echo_;
LowNoiseRenderDetector low_render_detector_; LowNoiseRenderDetector low_render_detector_;
size_t no_saturation_counter_ = 0;
bool initial_state_ = true; bool initial_state_ = true;
int initial_state_change_counter_ = 0; int initial_state_change_counter_ = 0;
RTC_DISALLOW_COPY_AND_ASSIGN(SuppressionGain); RTC_DISALLOW_COPY_AND_ASSIGN(SuppressionGain);