Balancing the transparency in AEC3 between saturating and low echo paths

This CL balances the NLP tradeoff in AEC3 to properly handle the cases
when the echo path is so strong that it saturates the echo and when it
is so weak that the echo is very low compared to nearend.

Bug: webrtc:8411, webrtc:8412, chromium:775653
Change-Id: I5aff74dfadd51cac1ce71b1cb935d68a5be6918d
Reviewed-on: https://webrtc-review.googlesource.com/14120
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20418}
This commit is contained in:
Per Åhgren
2017-10-25 02:59:45 +02:00
committed by Commit Bot
parent d9f99c1e7a
commit 7ddd46386a
11 changed files with 276 additions and 133 deletions

View File

@ -56,23 +56,29 @@ AecState::AecState(const EchoCanceller3Config& config)
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h), erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h),
config_(config), config_(config),
reverb_decay_(config_.ep_strength.default_len) {} reverb_decay_(config_.ep_strength.default_len) {
max_render_.fill(0.f);
}
AecState::~AecState() = default; AecState::~AecState() = default;
void AecState::HandleEchoPathChange( void AecState::HandleEchoPathChange(
const EchoPathVariability& echo_path_variability) { const EchoPathVariability& echo_path_variability) {
if (echo_path_variability.AudioPathChanged()) { if (echo_path_variability.AudioPathChanged()) {
blocks_since_last_saturation_ = 0; blocks_since_last_saturation_ = kUnknownDelayRenderWindowSize + 1;
usable_linear_estimate_ = false; usable_linear_estimate_ = false;
echo_leakage_detected_ = false; echo_leakage_detected_ = false;
capture_signal_saturation_ = false; capture_signal_saturation_ = false;
echo_saturation_ = false; echo_saturation_ = false;
previous_max_sample_ = 0.f; max_render_.fill(0.f);
if (echo_path_variability.delay_change) { if (echo_path_variability.delay_change) {
force_zero_gain_counter_ = 0; force_zero_gain_counter_ = 0;
blocks_with_filter_adaptation_ = 0; blocks_with_filter_adaptation_ = 0;
blocks_with_strong_render_ = 0;
initial_state_ = true;
linear_echo_estimate_ = false;
sufficient_filter_updates_ = false;
render_received_ = false; render_received_ = false;
force_zero_gain_ = true; force_zero_gain_ = true;
capture_block_counter_ = 0; capture_block_counter_ = 0;
@ -124,50 +130,134 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
// Update the echo audibility evaluator. // Update the echo audibility evaluator.
echo_audibility_.Update(x, s, converged_filter); echo_audibility_.Update(x, s, converged_filter);
// Detect and flag echo saturation.
// TODO(peah): Add the delay in this computation to ensure that the render and
// capture signals are properly aligned.
RTC_DCHECK_LT(0, x.size());
const float max_sample = fabs(*std::max_element(
x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));
if (config_.ep_strength.echo_can_saturate) { if (config_.ep_strength.echo_can_saturate) {
const bool saturated_echo = // Detect and flag echo saturation.
(previous_max_sample_ > 200.f) && SaturatedCapture(); RTC_DCHECK_LT(0, x.size());
// Store the render values in a circular buffer.
max_render_index_ = (max_render_index_ + 1) % max_render_.size();
auto x_max_result = std::minmax_element(x.begin(), x.end());
max_render_[max_render_index_] =
std::max(fabs(*x_max_result.first), fabs(*x_max_result.second));
// Counts the blocks since saturation. bool saturated_echo = false;
constexpr size_t kSaturationLeakageBlocks = 20; // Check for whether a saturated frame potentially could consist of
// saturated echo.
if (SaturatedCapture()) {
if (converged_filter) {
RTC_DCHECK(filter_delay_);
const size_t index =
(max_render_index_ + max_render_.size() - *filter_delay_) %
max_render_.size();
saturated_echo = max_render_[index] > 200.f;
} else {
saturated_echo =
*std::max_element(max_render_.begin(), max_render_.end()) > 200.f;
}
}
// Set flag for potential presence of saturated echo
blocks_since_last_saturation_ = blocks_since_last_saturation_ =
saturated_echo ? 0 : blocks_since_last_saturation_ + 1; saturated_echo ? 0 : blocks_since_last_saturation_ + 1;
if (converged_filter) {
echo_saturation_ =
blocks_since_last_saturation_ < kAdaptiveFilterLength + 1;
} else {
echo_saturation_ =
blocks_since_last_saturation_ < kUnknownDelayRenderWindowSize + 1;
}
echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks; // Set flag for whether the echo path is generally strong enough to saturate
// the echo.
if (converged_filter) {
// Base detection on predicted echo sample.
auto s_max_result = std::minmax_element(s.begin(), s.end());
const float s_max_abs =
std::max(fabs(*s_max_result.first), fabs(*s_max_result.second));
const bool saturated_echo_sample =
s_max_abs >= 10000.f && SaturatedCapture();
saturating_echo_path_counter_ = saturated_echo_sample
? 10 * kNumBlocksPerSecond
: saturating_echo_path_counter_ - 1;
} else {
// Base detection on detected potentially echo.
saturating_echo_path_counter_ = saturated_echo
? 10 * kNumBlocksPerSecond
: saturating_echo_path_counter_ - 1;
}
saturating_echo_path_counter_ = std::max(0, saturating_echo_path_counter_);
saturating_echo_path_ = saturating_echo_path_counter_ > 0;
} else { } else {
echo_saturation_ = false; echo_saturation_ = false;
saturating_echo_path_ = false;
saturating_echo_path_counter_ = 0;
} }
previous_max_sample_ = max_sample;
// Flag whether the linear filter estimate is usable. // Compute render energies.
usable_linear_estimate_ =
(!echo_saturation_) && (converged_filter || SufficientFilterUpdates()) &&
capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_;
// After an amount of active render samples for which an echo should have been
// detected in the capture signal if the ERL was not infinite, flag that a
// transparent mode should be entered.
const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
const bool active_render_block = const bool active_render_block =
x_energy > (config_.render_levels.active_render_limit * x_energy > (config_.render_levels.active_render_limit *
config_.render_levels.active_render_limit) * config_.render_levels.active_render_limit) *
kFftLengthBy2; kFftLengthBy2;
const bool strong_render_block = x_energy > 1000 * 1000 * kFftLengthBy2;
if (active_render_block) { if (active_render_block) {
render_received_ = true; render_received_ = true;
} }
// Update counters.
blocks_with_filter_adaptation_ += blocks_with_filter_adaptation_ +=
(active_render_block && (!SaturatedCapture()) ? 1 : 0); (active_render_block && (!SaturatedCapture()) ? 1 : 0);
transparent_mode_ = !converged_filter && blocks_with_strong_render_ +=
(!render_received_ || blocks_with_filter_adaptation_ >= (strong_render_block && (!SaturatedCapture()) ? 1 : 0);
5 * kNumBlocksPerSecond);
// After an amount of active render samples for which an echo should have been
// detected in the capture signal if the ERL was not infinite, flag that a
// transparent mode should be entered.
if (SaturatingEchoPath()) {
transparent_mode_ = !converged_filter &&
(!render_received_ || blocks_with_strong_render_ >=
15 * kNumBlocksPerSecond);
} else {
transparent_mode_ = !converged_filter &&
(!render_received_ ||
blocks_with_strong_render_ >= 5 * kNumBlocksPerSecond);
}
// Update flag for whether the adaptation is in the initial state.
if (SaturatingEchoPath()) {
initial_state_ = capture_block_counter_ < 6 * kNumBlocksPerSecond;
} else {
initial_state_ = capture_block_counter_ < 3 * kNumBlocksPerSecond;
}
// Detect whether the linear filter is usable.
if (SaturatingEchoPath()) {
usable_linear_estimate_ =
(!echo_saturation_) &&
(converged_filter && SufficientFilterUpdates()) &&
capture_block_counter_ >= 5 * kNumBlocksPerSecond && external_delay_;
} else {
usable_linear_estimate_ =
(!echo_saturation_) &&
(converged_filter || SufficientFilterUpdates()) &&
capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_;
}
// Flag whether the linear echo estimate should be used.
linear_echo_estimate_ = usable_linear_estimate_ && !TransparentMode();
// Flag whether a sufficient number of filter updates has been done for the
// filter to perform well.
if (SaturatingEchoPath()) {
sufficient_filter_updates_ =
blocks_with_filter_adaptation_ >= 2 * kEchoPathChangeConvergenceBlocks;
} else {
sufficient_filter_updates_ =
blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks;
}
// Update the room reverb estimate. // Update the room reverb estimate.
UpdateReverb(adaptive_filter_impulse_response); UpdateReverb(adaptive_filter_impulse_response);

View File

@ -67,6 +67,9 @@ class AecState {
// Returns whether the echo signal is saturated. // Returns whether the echo signal is saturated.
bool SaturatedEcho() const { return echo_saturation_; } bool SaturatedEcho() const { return echo_saturation_; }
// Returns whether the echo path can saturate.
bool SaturatingEchoPath() const { return saturating_echo_path_; }
// Updates the capture signal saturation. // Updates the capture signal saturation.
void UpdateCaptureSaturation(bool capture_signal_saturation) { void UpdateCaptureSaturation(bool capture_signal_saturation) {
capture_signal_saturation_ = capture_signal_saturation; capture_signal_saturation_ = capture_signal_saturation;
@ -93,20 +96,14 @@ class AecState {
} }
// Returns whether the linear filter should have been able to adapt properly. // Returns whether the linear filter should have been able to adapt properly.
bool SufficientFilterUpdates() const { bool SufficientFilterUpdates() const { return sufficient_filter_updates_; }
return blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks;
}
// Returns whether the echo subtractor can be used to determine the residual // Returns whether the echo subtractor can be used to determine the residual
// echo. // echo.
bool LinearEchoEstimate() const { bool LinearEchoEstimate() const { return linear_echo_estimate_; }
return UsableLinearEstimate() && !TransparentMode();
}
// Returns whether the AEC is in an initial state. // Returns whether the AEC is in an initial state.
bool InitialState() const { bool InitialState() const { return initial_state_; }
return capture_block_counter_ < 3 * kNumBlocksPerSecond;
}
// Updates the aec state. // Updates the aec state.
void Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>& void Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
@ -147,12 +144,14 @@ class AecState {
ErleEstimator erle_estimator_; ErleEstimator erle_estimator_;
size_t capture_block_counter_ = 0; size_t capture_block_counter_ = 0;
size_t blocks_with_filter_adaptation_ = 0; size_t blocks_with_filter_adaptation_ = 0;
size_t blocks_with_strong_render_ = 0;
bool usable_linear_estimate_ = false; bool usable_linear_estimate_ = false;
bool echo_leakage_detected_ = false; bool echo_leakage_detected_ = false;
bool capture_signal_saturation_ = false; bool capture_signal_saturation_ = false;
bool echo_saturation_ = false; bool echo_saturation_ = false;
bool transparent_mode_ = false; bool transparent_mode_ = false;
float previous_max_sample_ = 0.f; std::array<float, kAdaptiveFilterLength> max_render_;
size_t max_render_index_ = 0;
bool force_zero_gain_ = false; bool force_zero_gain_ = false;
bool render_received_ = false; bool render_received_ = false;
size_t force_zero_gain_counter_ = 0; size_t force_zero_gain_counter_ = 0;
@ -165,6 +164,11 @@ class AecState {
EchoAudibility echo_audibility_; EchoAudibility echo_audibility_;
const EchoCanceller3Config config_; const EchoCanceller3Config config_;
float reverb_decay_; float reverb_decay_;
bool saturating_echo_path_ = false;
int saturating_echo_path_counter_ = 0;
bool initial_state_ = true;
bool linear_echo_estimate_ = false;
bool sufficient_filter_updates_ = false;
RTC_DISALLOW_COPY_AND_ASSIGN(AecState); RTC_DISALLOW_COPY_AND_ASSIGN(AecState);
}; };

View File

@ -189,10 +189,9 @@ void EchoRemoverImpl::ProcessCapture(
cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise); cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise);
// A choose and apply echo suppression gain. // A choose and apply echo suppression gain.
suppression_gain_.GetGain( suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(),
E2, R2, cng_.NoiseSpectrum(), render_signal_analyzer_, render_signal_analyzer_, aec_state_, x,
aec_state_.SaturatedEcho(), x, aec_state_.ForcedZeroGain(), &high_bands_gain, &G);
aec_state_.LinearEchoEstimate(), &high_bands_gain, &G);
suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
high_bands_gain, y); high_bands_gain, y);

View File

@ -368,7 +368,7 @@ void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer,
[](float a, float b) -> bool { return a * a < b * b; })); [](float a, float b) -> bool { return a * a < b * b; }));
// Update the lag estimates for the matched filter. // Update the lag estimates for the matched filter.
const float kMatchingFilterThreshold = 0.2f; const float kMatchingFilterThreshold = 0.1f;
lag_estimates_[n] = LagEstimate( lag_estimates_[n] = LagEstimate(
error_sum_anchor - error_sum, error_sum_anchor - error_sum,
(lag_estimate > 2 && lag_estimate < (filters_[n].size() - 10) && (lag_estimate > 2 && lag_estimate < (filters_[n].size() - 10) &&

View File

@ -108,54 +108,29 @@ void ResidualEchoEstimator::Estimate(
R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f); R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f);
} }
} else { } else {
const rtc::Optional<size_t> delay =
aec_state.ExternalDelay()
? (aec_state.FilterDelay() ? aec_state.FilterDelay()
: aec_state.ExternalDelay())
: rtc::Optional<size_t>();
// Estimate the echo generating signal power. // Estimate the echo generating signal power.
std::array<float, kFftLengthBy2Plus1> X2; std::array<float, kFftLengthBy2Plus1> X2;
if (aec_state.ExternalDelay() && aec_state.FilterDelay()) { EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1,
RTC_DCHECK(delay); &X2);
const int delay_use = static_cast<int>(*delay);
// Computes the spectral power over the blocks surrounding the delay.
constexpr int kKnownDelayRenderWindowSize = 5;
static_assert(
kUnknownDelayRenderWindowSize >= kKnownDelayRenderWindowSize,
"Requirement to ensure that the render buffer is overrun");
EchoGeneratingPower(
render_buffer, std::max(0, delay_use - 1),
std::min(kKnownDelayRenderWindowSize - 1, delay_use + 1), &X2);
} else {
// Computes the spectral power over the latest blocks.
EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1,
&X2);
}
// Subtract the stationary noise power to avoid stationary noise causing // Subtract the stationary noise power to avoid stationary noise causing
// excessive echo suppression. // excessive echo suppression.
std::transform( if (!(aec_state.SaturatedEcho() || aec_state.SaturatingEchoPath())) {
X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(), std::transform(
[](float a, float b) { return std::max(0.f, a - 10.f * b); }); X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
[](float a, float b) { return std::max(0.f, a - 10.f * b); });
}
NonLinearEstimate( NonLinearEstimate(
aec_state.SufficientFilterUpdates(), aec_state.SaturatedEcho(), aec_state.SufficientFilterUpdates(),
aec_state.SaturatedEcho() && aec_state.SaturatingEchoPath(),
config_.ep_strength.bounded_erl, aec_state.TransparentMode(), config_.ep_strength.bounded_erl, aec_state.TransparentMode(),
aec_state.InitialState(), X2, Y2, R2); aec_state.InitialState(), X2, Y2, R2);
if (aec_state.ExternalDelay() && aec_state.FilterDelay() &&
aec_state.SaturatedEcho()) {
AddEchoReverb(*R2, aec_state.SaturatedEcho(),
std::min(static_cast<size_t>(kAdaptiveFilterLength),
delay.value_or(kAdaptiveFilterLength)),
aec_state.ReverbDecay(), R2);
}
} }
// If the echo is deemed inaudible, set the residual echo to zero. // If the echo is deemed inaudible, set the residual echo to zero.
if (aec_state.InaudibleEcho()) { if (aec_state.InaudibleEcho() &&
(!(aec_state.SaturatedEcho() || aec_state.SaturatingEchoPath()))) {
R2->fill(0.f); R2->fill(0.f);
R2_old_.fill(0.f); R2_old_.fill(0.f);
R2_hold_counter_.fill(0.f); R2_hold_counter_.fill(0.f);
@ -204,7 +179,7 @@ void ResidualEchoEstimator::NonLinearEstimate(
// Set echo path gains. // Set echo path gains.
if (saturated_echo) { if (saturated_echo) {
// If the echo could be saturated, use a very conservative gain. // If the echo could be saturated, use a very conservative gain.
echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 10000.f; echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 1000.f;
} else if (sufficient_filter_updates && !bounded_erl) { } else if (sufficient_filter_updates && !bounded_erl) {
// If the filter should have been able to converge, and no assumption is // If the filter should have been able to converge, and no assumption is
// possible on the ERL, use a low gain. // possible on the ERL, use a low gain.

View File

@ -59,12 +59,14 @@ Subtractor::~Subtractor() = default;
void Subtractor::HandleEchoPathChange( void Subtractor::HandleEchoPathChange(
const EchoPathVariability& echo_path_variability) { const EchoPathVariability& echo_path_variability) {
use_shadow_filter_frequency_response_ = false;
if (echo_path_variability.delay_change) { if (echo_path_variability.delay_change) {
main_filter_.HandleEchoPathChange(); main_filter_.HandleEchoPathChange();
shadow_filter_.HandleEchoPathChange(); shadow_filter_.HandleEchoPathChange();
G_main_.HandleEchoPathChange(); G_main_.HandleEchoPathChange();
G_shadow_.HandleEchoPathChange(); G_shadow_.HandleEchoPathChange();
converged_filter_ = false; converged_filter_ = false;
converged_filter_counter_ = 0;
} }
} }
@ -91,16 +93,29 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
shadow_filter_.Filter(render_buffer, &S); shadow_filter_.Filter(render_buffer, &S);
PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr); PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr);
if (!converged_filter_) { // Determine which frequency response should be used.
const auto sum_of_squares = [](float a, float b) { return a + b * b; }; const auto sum_of_squares = [](float a, float b) { return a + b * b; };
const float e2_main = const float e2_main =
std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares); std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares);
const float e2_shadow = const float e2_shadow =
std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares); std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares);
const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares); const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares);
if (y2 > kBlockSize * 50.f * 50.f) { if (e2_main < e2_shadow && e2_main < 0.1 * y2) {
converged_filter_ = (e2_main > 0.3 * y2 || e2_shadow > 0.1 * y2); use_shadow_filter_frequency_response_ = false;
} else if (e2_shadow < e2_main && e2_shadow < 0.01 * y2) {
use_shadow_filter_frequency_response_ = true;
}
// Flag whether the filter has at some point converged.
// TODO(peah): Consider using a timeout for this.
if (!converged_filter_) {
if (y2 > kBlockSize * 100.f * 100.f) {
if (e2_main < 0.3 * y2) {
converged_filter_ = (++converged_filter_counter_) > 10;
} else {
converged_filter_counter_ = 0;
}
} }
} }

View File

@ -48,6 +48,9 @@ class Subtractor {
// Returns the block-wise frequency response for the main adaptive filter. // Returns the block-wise frequency response for the main adaptive filter.
const std::vector<std::array<float, kFftLengthBy2Plus1>>& const std::vector<std::array<float, kFftLengthBy2Plus1>>&
FilterFrequencyResponse() const { FilterFrequencyResponse() const {
if (use_shadow_filter_frequency_response_) {
return shadow_filter_.FilterFrequencyResponse();
}
return main_filter_.FilterFrequencyResponse(); return main_filter_.FilterFrequencyResponse();
} }
@ -68,7 +71,8 @@ class Subtractor {
MainFilterUpdateGain G_main_; MainFilterUpdateGain G_main_;
ShadowFilterUpdateGain G_shadow_; ShadowFilterUpdateGain G_shadow_;
bool converged_filter_ = false; bool converged_filter_ = false;
size_t converged_filter_counter_ = 0;
bool use_shadow_filter_frequency_response_ = false;
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(Subtractor); RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(Subtractor);
}; };

View File

@ -126,7 +126,14 @@ void UpdateMaxGainIncrease(
float min_decreasing; float min_decreasing;
auto& param = config.gain_updates; auto& param = config.gain_updates;
if (!linear_echo_estimate) { if (no_saturation_counter <= 10) {
max_increasing = param.saturation.max_inc;
max_decreasing = param.saturation.max_dec;
rate_increasing = param.saturation.rate_inc;
rate_decreasing = param.saturation.rate_dec;
min_increasing = param.saturation.min_inc;
min_decreasing = param.saturation.min_dec;
} else if (!linear_echo_estimate) {
max_increasing = param.nonlinear.max_inc; max_increasing = param.nonlinear.max_inc;
max_decreasing = param.nonlinear.max_dec; max_decreasing = param.nonlinear.max_dec;
rate_increasing = param.nonlinear.rate_inc; rate_increasing = param.nonlinear.rate_inc;
@ -140,20 +147,13 @@ void UpdateMaxGainIncrease(
rate_decreasing = param.low_noise.rate_dec; rate_decreasing = param.low_noise.rate_dec;
min_increasing = param.low_noise.min_inc; min_increasing = param.low_noise.min_inc;
min_decreasing = param.low_noise.min_dec; min_decreasing = param.low_noise.min_dec;
} else if (no_saturation_counter > 10) { } else {
max_increasing = param.normal.max_inc; max_increasing = param.normal.max_inc;
max_decreasing = param.normal.max_dec; max_decreasing = param.normal.max_dec;
rate_increasing = param.normal.rate_inc; rate_increasing = param.normal.rate_inc;
rate_decreasing = param.normal.rate_dec; rate_decreasing = param.normal.rate_dec;
min_increasing = param.normal.min_inc; min_increasing = param.normal.min_inc;
min_decreasing = param.normal.min_dec; min_decreasing = param.normal.min_dec;
} else {
max_increasing = param.saturation.max_inc;
max_decreasing = param.saturation.max_dec;
rate_increasing = param.saturation.rate_inc;
rate_decreasing = param.saturation.rate_dec;
min_increasing = param.saturation.min_inc;
min_decreasing = param.saturation.min_dec;
} }
for (size_t k = 0; k < new_gain.size(); ++k) { for (size_t k = 0; k < new_gain.size(); ++k) {
@ -176,6 +176,7 @@ void GainToNoAudibleEcho(
const EchoCanceller3Config& config, const EchoCanceller3Config& config,
bool low_noise_render, bool low_noise_render,
bool saturated_echo, bool saturated_echo,
bool saturating_echo_path,
bool linear_echo_estimate, bool linear_echo_estimate,
const std::array<float, kFftLengthBy2Plus1>& nearend, const std::array<float, kFftLengthBy2Plus1>& nearend,
const std::array<float, kFftLengthBy2Plus1>& echo, const std::array<float, kFftLengthBy2Plus1>& echo,
@ -185,21 +186,29 @@ void GainToNoAudibleEcho(
const std::array<float, kFftLengthBy2Plus1>& one_by_echo, const std::array<float, kFftLengthBy2Plus1>& one_by_echo,
std::array<float, kFftLengthBy2Plus1>* gain) { std::array<float, kFftLengthBy2Plus1>* gain) {
float nearend_masking_margin = 0.f; float nearend_masking_margin = 0.f;
if (linear_echo_estimate) { if (saturated_echo) {
nearend_masking_margin = nearend_masking_margin = config.gain_mask.m2;
low_noise_render
? config.gain_mask.m9
: (saturated_echo ? config.gain_mask.m2 : config.gain_mask.m3);
} else { } else {
nearend_masking_margin = config.gain_mask.m7; if (linear_echo_estimate) {
nearend_masking_margin =
low_noise_render ? config.gain_mask.m9 : config.gain_mask.m3;
} else {
nearend_masking_margin = config.gain_mask.m7;
}
} }
RTC_DCHECK_LE(0.f, nearend_masking_margin); RTC_DCHECK_LE(0.f, nearend_masking_margin);
RTC_DCHECK_GT(1.f, nearend_masking_margin); RTC_DCHECK_GT(1.f, nearend_masking_margin);
const float one_by_one_minus_nearend_masking_margin = const float one_by_one_minus_nearend_masking_margin =
1.f / (1.0f - nearend_masking_margin); 1.f / (1.0f - nearend_masking_margin);
const float masker_margin = float masker_margin;
linear_echo_estimate ? config.gain_mask.m1 : config.gain_mask.m8; if (saturated_echo || saturating_echo_path) {
masker_margin = 0.0001f;
} else {
masker_margin =
linear_echo_estimate ? config.gain_mask.m1 : config.gain_mask.m8;
}
for (size_t k = 0; k < gain->size(); ++k) { for (size_t k = 0; k < gain->size(); ++k) {
const float unity_gain_masker = std::max(nearend[k], masker[k]); const float unity_gain_masker = std::max(nearend[k], masker[k]);
@ -276,6 +285,7 @@ void SuppressionGain::LowerBandGain(
bool low_noise_render, bool low_noise_render,
const rtc::Optional<int>& narrow_peak_band, const rtc::Optional<int>& narrow_peak_band,
bool saturated_echo, bool saturated_echo,
bool saturating_echo_path,
bool linear_echo_estimate, bool linear_echo_estimate,
const std::array<float, kFftLengthBy2Plus1>& nearend, const std::array<float, kFftLengthBy2Plus1>& nearend,
const std::array<float, kFftLengthBy2Plus1>& echo, const std::array<float, kFftLengthBy2Plus1>& echo,
@ -296,7 +306,7 @@ void SuppressionGain::LowerBandGain(
const float min_echo_power = const float min_echo_power =
low_noise_render ? config_.echo_audibility.low_render_limit low_noise_render ? config_.echo_audibility.low_render_limit
: config_.echo_audibility.normal_render_limit; : config_.echo_audibility.normal_render_limit;
if (no_saturation_counter_ > 10) { if (!saturating_echo_path) {
for (size_t k = 0; k < nearend.size(); ++k) { for (size_t k = 0; k < nearend.size(); ++k) {
const float denom = std::min(nearend[k], echo[k]); const float denom = std::min(nearend[k], echo[k]);
min_gain[k] = denom > 0.f ? min_echo_power / denom : 1.f; min_gain[k] = denom > 0.f ? min_echo_power / denom : 1.f;
@ -309,10 +319,12 @@ void SuppressionGain::LowerBandGain(
// Compute the maximum gain by limiting the gain increase from the previous // Compute the maximum gain by limiting the gain increase from the previous
// gain. // gain.
std::array<float, kFftLengthBy2Plus1> max_gain; std::array<float, kFftLengthBy2Plus1> max_gain;
const float first_increase = saturated_echo || saturating_echo_path
? 0.00001f
: config_.gain_updates.floor_first_increase;
for (size_t k = 0; k < gain->size(); ++k) { for (size_t k = 0; k < gain->size(); ++k) {
max_gain[k] = std::min(std::max(last_gain_[k] * gain_increase_[k], max_gain[k] = std::min(
config_.gain_updates.floor_first_increase), std::max(last_gain_[k] * gain_increase_[k], first_increase), 1.f);
1.f);
} }
// Iteratively compute the gain required to attenuate the echo to a non // Iteratively compute the gain required to attenuate the echo to a non
@ -321,9 +333,9 @@ void SuppressionGain::LowerBandGain(
for (int k = 0; k < 2; ++k) { for (int k = 0; k < 2; ++k) {
std::array<float, kFftLengthBy2Plus1> masker; std::array<float, kFftLengthBy2Plus1> masker;
MaskingPower(config_, nearend, comfort_noise, last_masker_, *gain, &masker); MaskingPower(config_, nearend, comfort_noise, last_masker_, *gain, &masker);
GainToNoAudibleEcho(config_, low_noise_render, saturated_echo, GainToNoAudibleEcho(config_, low_noise_render, no_saturation_counter_ > 10,
linear_echo_estimate, nearend, echo, masker, min_gain, saturating_echo_path, linear_echo_estimate, nearend,
max_gain, one_by_echo, gain); echo, masker, min_gain, max_gain, one_by_echo, gain);
AdjustForExternalFilters(gain); AdjustForExternalFilters(gain);
if (narrow_peak_band) { if (narrow_peak_band) {
NarrowBandAttenuation(*narrow_peak_band, gain); NarrowBandAttenuation(*narrow_peak_band, gain);
@ -366,15 +378,18 @@ void SuppressionGain::GetGain(
const std::array<float, kFftLengthBy2Plus1>& echo, const std::array<float, kFftLengthBy2Plus1>& echo,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise, const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
const RenderSignalAnalyzer& render_signal_analyzer, const RenderSignalAnalyzer& render_signal_analyzer,
bool saturated_echo, const AecState& aec_state,
const std::vector<std::vector<float>>& render, const std::vector<std::vector<float>>& render,
bool force_zero_gain,
bool linear_echo_estimate,
float* high_bands_gain, float* high_bands_gain,
std::array<float, kFftLengthBy2Plus1>* low_band_gain) { std::array<float, kFftLengthBy2Plus1>* low_band_gain) {
RTC_DCHECK(high_bands_gain); RTC_DCHECK(high_bands_gain);
RTC_DCHECK(low_band_gain); RTC_DCHECK(low_band_gain);
const bool saturated_echo = aec_state.SaturatedEcho();
const bool saturating_echo_path = aec_state.SaturatingEchoPath();
const bool force_zero_gain = aec_state.ForcedZeroGain();
const bool linear_echo_estimate = aec_state.LinearEchoEstimate();
if (force_zero_gain) { if (force_zero_gain) {
last_gain_.fill(0.f); last_gain_.fill(0.f);
std::copy(comfort_noise.begin(), comfort_noise.end(), last_masker_.begin()); std::copy(comfort_noise.begin(), comfort_noise.end(), last_masker_.begin());
@ -390,8 +405,8 @@ void SuppressionGain::GetGain(
const rtc::Optional<int> narrow_peak_band = const rtc::Optional<int> narrow_peak_band =
render_signal_analyzer.NarrowPeakBand(); render_signal_analyzer.NarrowPeakBand();
LowerBandGain(low_noise_render, narrow_peak_band, saturated_echo, LowerBandGain(low_noise_render, narrow_peak_band, saturated_echo,
linear_echo_estimate, nearend, echo, comfort_noise, saturating_echo_path, linear_echo_estimate, nearend, echo,
low_band_gain); comfort_noise, low_band_gain);
// Compute the gain for the upper bands. // Compute the gain for the upper bands.
*high_bands_gain = *high_bands_gain =

View File

@ -15,6 +15,7 @@
#include <vector> #include <vector>
#include "modules/audio_processing/aec3/aec3_common.h" #include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/aec_state.h"
#include "modules/audio_processing/aec3/render_signal_analyzer.h" #include "modules/audio_processing/aec3/render_signal_analyzer.h"
#include "modules/audio_processing/include/audio_processing.h" #include "modules/audio_processing/include/audio_processing.h"
#include "rtc_base/constructormagic.h" #include "rtc_base/constructormagic.h"
@ -29,10 +30,8 @@ class SuppressionGain {
const std::array<float, kFftLengthBy2Plus1>& echo, const std::array<float, kFftLengthBy2Plus1>& echo,
const std::array<float, kFftLengthBy2Plus1>& comfort_noise, const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
const RenderSignalAnalyzer& render_signal_analyzer, const RenderSignalAnalyzer& render_signal_analyzer,
bool saturated_echo, const AecState& aec_state,
const std::vector<std::vector<float>>& render, const std::vector<std::vector<float>>& render,
bool force_zero_gain,
bool linear_echo_estimate,
float* high_bands_gain, float* high_bands_gain,
std::array<float, kFftLengthBy2Plus1>* low_band_gain); std::array<float, kFftLengthBy2Plus1>* low_band_gain);
@ -40,6 +39,7 @@ class SuppressionGain {
void LowerBandGain(bool stationary_with_low_power, void LowerBandGain(bool stationary_with_low_power,
const rtc::Optional<int>& narrow_peak_band, const rtc::Optional<int>& narrow_peak_band,
bool saturated_echo, bool saturated_echo,
bool saturating_echo_path,
bool linear_echo_estimate, bool linear_echo_estimate,
const std::array<float, kFftLengthBy2Plus1>& nearend, const std::array<float, kFftLengthBy2Plus1>& nearend,
const std::array<float, kFftLengthBy2Plus1>& echo, const std::array<float, kFftLengthBy2Plus1>& echo,

View File

@ -10,6 +10,10 @@
#include "modules/audio_processing/aec3/suppression_gain.h" #include "modules/audio_processing/aec3/suppression_gain.h"
#include "modules/audio_processing/aec3/aec_state.h"
#include "modules/audio_processing/aec3/render_buffer.h"
#include "modules/audio_processing/aec3/subtractor.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h" #include "rtc_base/checks.h"
#include "system_wrappers/include/cpu_features_wrapper.h" #include "system_wrappers/include/cpu_features_wrapper.h"
#include "test/gtest.h" #include "test/gtest.h"
@ -29,11 +33,12 @@ TEST(SuppressionGain, NullOutputGains) {
R2.fill(0.f); R2.fill(0.f);
N2.fill(0.f); N2.fill(0.f);
float high_bands_gain; float high_bands_gain;
AecState aec_state(EchoCanceller3Config{});
EXPECT_DEATH(SuppressionGain(EchoCanceller3Config{}, DetectOptimization()) EXPECT_DEATH(SuppressionGain(EchoCanceller3Config{}, DetectOptimization())
.GetGain(E2, R2, N2, RenderSignalAnalyzer(), false, .GetGain(E2, R2, N2, RenderSignalAnalyzer(), aec_state,
std::vector<std::vector<float>>( std::vector<std::vector<float>>(
3, std::vector<float>(kBlockSize, 0.f)), 3, std::vector<float>(kBlockSize, 0.f)),
false, true, &high_bands_gain, nullptr), &high_bands_gain, nullptr),
""); "");
} }
@ -46,17 +51,53 @@ TEST(SuppressionGain, BasicGainComputation) {
RenderSignalAnalyzer analyzer; RenderSignalAnalyzer analyzer;
float high_bands_gain; float high_bands_gain;
std::array<float, kFftLengthBy2Plus1> E2; std::array<float, kFftLengthBy2Plus1> E2;
std::array<float, kFftLengthBy2Plus1> Y2;
std::array<float, kFftLengthBy2Plus1> R2; std::array<float, kFftLengthBy2Plus1> R2;
std::array<float, kFftLengthBy2Plus1> N2; std::array<float, kFftLengthBy2Plus1> N2;
std::array<float, kFftLengthBy2Plus1> g; std::array<float, kFftLengthBy2Plus1> g;
std::array<float, kBlockSize> s;
std::vector<std::vector<float>> x(1, std::vector<float>(kBlockSize, 0.f)); std::vector<std::vector<float>> x(1, std::vector<float>(kBlockSize, 0.f));
AecState aec_state(EchoCanceller3Config{});
ApmDataDumper data_dumper(42);
Subtractor subtractor(&data_dumper, DetectOptimization());
RenderBuffer render_buffer(
DetectOptimization(), 1,
std::max(kUnknownDelayRenderWindowSize, kAdaptiveFilterLength),
std::vector<size_t>(1, kAdaptiveFilterLength));
// Verify the functionality for forcing a zero gain.
E2.fill(1000000000.f);
R2.fill(10000000000000.f);
N2.fill(0.f);
s.fill(10.f);
aec_state.Update(subtractor.FilterFrequencyResponse(),
subtractor.FilterImpulseResponse(),
subtractor.ConvergedFilter(), rtc::Optional<size_t>(10),
render_buffer, E2, Y2, x[0], s, false);
suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain,
&g);
std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); });
EXPECT_FLOAT_EQ(0.f, high_bands_gain);
// Ensure that a strong noise is detected to mask any echoes. // Ensure that a strong noise is detected to mask any echoes.
E2.fill(10.f); E2.fill(10.f);
Y2.fill(10.f);
R2.fill(0.1f); R2.fill(0.1f);
N2.fill(100.f); N2.fill(100.f);
for (int k = 0; k < 10; ++k) { // Ensure that the gain is no longer forced to zero.
suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true, for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) {
aec_state.Update(subtractor.FilterFrequencyResponse(),
subtractor.FilterImpulseResponse(),
subtractor.ConvergedFilter(), rtc::Optional<size_t>(10),
render_buffer, E2, Y2, x[0], s, false);
}
for (int k = 0; k < 100; ++k) {
aec_state.Update(subtractor.FilterFrequencyResponse(),
subtractor.FilterImpulseResponse(),
subtractor.ConvergedFilter(), rtc::Optional<size_t>(10),
render_buffer, E2, Y2, x[0], s, false);
suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x,
&high_bands_gain, &g); &high_bands_gain, &g);
} }
std::for_each(g.begin(), g.end(), std::for_each(g.begin(), g.end(),
@ -64,10 +105,15 @@ TEST(SuppressionGain, BasicGainComputation) {
// Ensure that a strong nearend is detected to mask any echoes. // Ensure that a strong nearend is detected to mask any echoes.
E2.fill(100.f); E2.fill(100.f);
Y2.fill(100.f);
R2.fill(0.1f); R2.fill(0.1f);
N2.fill(0.f); N2.fill(0.f);
for (int k = 0; k < 10; ++k) { for (int k = 0; k < 100; ++k) {
suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true, aec_state.Update(subtractor.FilterFrequencyResponse(),
subtractor.FilterImpulseResponse(),
subtractor.ConvergedFilter(), rtc::Optional<size_t>(10),
render_buffer, E2, Y2, x[0], s, false);
suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x,
&high_bands_gain, &g); &high_bands_gain, &g);
} }
std::for_each(g.begin(), g.end(), std::for_each(g.begin(), g.end(),
@ -78,17 +124,12 @@ TEST(SuppressionGain, BasicGainComputation) {
R2.fill(10000000000000.f); R2.fill(10000000000000.f);
N2.fill(0.f); N2.fill(0.f);
for (int k = 0; k < 10; ++k) { for (int k = 0; k < 10; ++k) {
suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true, suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x,
&high_bands_gain, &g); &high_bands_gain, &g);
} }
std::for_each(g.begin(), g.end(), std::for_each(g.begin(), g.end(),
[](float a) { EXPECT_NEAR(0.f, a, 0.001); }); [](float a) { EXPECT_NEAR(0.f, a, 0.001); });
// Verify the functionality for forcing a zero gain.
suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, true, true,
&high_bands_gain, &g);
std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); });
EXPECT_FLOAT_EQ(0.f, high_bands_gain);
} }
} // namespace aec3 } // namespace aec3

View File

@ -1188,7 +1188,7 @@ struct EchoCanceller3Config {
GainChanges low_noise = {3.f, 3.f, 1.5f, 1.5f, 1.5f, 1.5f}; GainChanges low_noise = {3.f, 3.f, 1.5f, 1.5f, 1.5f, 1.5f};
GainChanges normal = {2.f, 2.f, 1.5f, 1.5f, 1.2f, 1.2f}; GainChanges normal = {2.f, 2.f, 1.5f, 1.5f, 1.2f, 1.2f};
GainChanges saturation = {1.2f, 1.2f, 1.5f, 1.5f, 1.f, 1.f}; GainChanges saturation = {1.5f, 1.5f, 1.2f, 1.2f, 1.1f, 1.1f};
GainChanges nonlinear = {1.5f, 1.5f, 1.2f, 1.2f, 1.1f, 1.1f}; GainChanges nonlinear = {1.5f, 1.5f, 1.2f, 1.2f, 1.1f, 1.1f};
float floor_first_increase = 0.0001f; float floor_first_increase = 0.0001f;