Balancing the transparency in AEC3 between saturating and low echo paths
This CL balances the NLP tradeoff in AEC3 to properly handle the cases when the echo path is so strong that it saturates the echo and when it is so weak that the echo is very low compared to nearend. Bug: webrtc:8411, webrtc:8412, chromium:775653 Change-Id: I5aff74dfadd51cac1ce71b1cb935d68a5be6918d Reviewed-on: https://webrtc-review.googlesource.com/14120 Commit-Queue: Per Åhgren <peah@webrtc.org> Reviewed-by: Per Åhgren <peah@webrtc.org> Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org> Cr-Commit-Position: refs/heads/master@{#20418}
This commit is contained in:
@ -56,23 +56,29 @@ AecState::AecState(const EchoCanceller3Config& config)
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h),
|
||||
config_(config),
|
||||
reverb_decay_(config_.ep_strength.default_len) {}
|
||||
reverb_decay_(config_.ep_strength.default_len) {
|
||||
max_render_.fill(0.f);
|
||||
}
|
||||
|
||||
AecState::~AecState() = default;
|
||||
|
||||
void AecState::HandleEchoPathChange(
|
||||
const EchoPathVariability& echo_path_variability) {
|
||||
if (echo_path_variability.AudioPathChanged()) {
|
||||
blocks_since_last_saturation_ = 0;
|
||||
blocks_since_last_saturation_ = kUnknownDelayRenderWindowSize + 1;
|
||||
usable_linear_estimate_ = false;
|
||||
echo_leakage_detected_ = false;
|
||||
capture_signal_saturation_ = false;
|
||||
echo_saturation_ = false;
|
||||
previous_max_sample_ = 0.f;
|
||||
max_render_.fill(0.f);
|
||||
|
||||
if (echo_path_variability.delay_change) {
|
||||
force_zero_gain_counter_ = 0;
|
||||
blocks_with_filter_adaptation_ = 0;
|
||||
blocks_with_strong_render_ = 0;
|
||||
initial_state_ = true;
|
||||
linear_echo_estimate_ = false;
|
||||
sufficient_filter_updates_ = false;
|
||||
render_received_ = false;
|
||||
force_zero_gain_ = true;
|
||||
capture_block_counter_ = 0;
|
||||
@ -124,50 +130,134 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
|
||||
// Update the echo audibility evaluator.
|
||||
echo_audibility_.Update(x, s, converged_filter);
|
||||
|
||||
// Detect and flag echo saturation.
|
||||
// TODO(peah): Add the delay in this computation to ensure that the render and
|
||||
// capture signals are properly aligned.
|
||||
RTC_DCHECK_LT(0, x.size());
|
||||
const float max_sample = fabs(*std::max_element(
|
||||
x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));
|
||||
|
||||
if (config_.ep_strength.echo_can_saturate) {
|
||||
const bool saturated_echo =
|
||||
(previous_max_sample_ > 200.f) && SaturatedCapture();
|
||||
// Detect and flag echo saturation.
|
||||
RTC_DCHECK_LT(0, x.size());
|
||||
// Store the render values in a circular buffer.
|
||||
max_render_index_ = (max_render_index_ + 1) % max_render_.size();
|
||||
auto x_max_result = std::minmax_element(x.begin(), x.end());
|
||||
max_render_[max_render_index_] =
|
||||
std::max(fabs(*x_max_result.first), fabs(*x_max_result.second));
|
||||
|
||||
// Counts the blocks since saturation.
|
||||
constexpr size_t kSaturationLeakageBlocks = 20;
|
||||
bool saturated_echo = false;
|
||||
// Check for whether a saturated frame potentially could consist of
|
||||
// saturated echo.
|
||||
if (SaturatedCapture()) {
|
||||
if (converged_filter) {
|
||||
RTC_DCHECK(filter_delay_);
|
||||
const size_t index =
|
||||
(max_render_index_ + max_render_.size() - *filter_delay_) %
|
||||
max_render_.size();
|
||||
saturated_echo = max_render_[index] > 200.f;
|
||||
} else {
|
||||
saturated_echo =
|
||||
*std::max_element(max_render_.begin(), max_render_.end()) > 200.f;
|
||||
}
|
||||
}
|
||||
|
||||
// Set flag for potential presence of saturated echo
|
||||
blocks_since_last_saturation_ =
|
||||
saturated_echo ? 0 : blocks_since_last_saturation_ + 1;
|
||||
if (converged_filter) {
|
||||
echo_saturation_ =
|
||||
blocks_since_last_saturation_ < kAdaptiveFilterLength + 1;
|
||||
} else {
|
||||
echo_saturation_ =
|
||||
blocks_since_last_saturation_ < kUnknownDelayRenderWindowSize + 1;
|
||||
}
|
||||
|
||||
echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks;
|
||||
// Set flag for whether the echo path is generally strong enough to saturate
|
||||
// the echo.
|
||||
if (converged_filter) {
|
||||
// Base detection on predicted echo sample.
|
||||
auto s_max_result = std::minmax_element(s.begin(), s.end());
|
||||
const float s_max_abs =
|
||||
std::max(fabs(*s_max_result.first), fabs(*s_max_result.second));
|
||||
|
||||
const bool saturated_echo_sample =
|
||||
s_max_abs >= 10000.f && SaturatedCapture();
|
||||
saturating_echo_path_counter_ = saturated_echo_sample
|
||||
? 10 * kNumBlocksPerSecond
|
||||
: saturating_echo_path_counter_ - 1;
|
||||
} else {
|
||||
// Base detection on detected potentially echo.
|
||||
saturating_echo_path_counter_ = saturated_echo
|
||||
? 10 * kNumBlocksPerSecond
|
||||
: saturating_echo_path_counter_ - 1;
|
||||
}
|
||||
saturating_echo_path_counter_ = std::max(0, saturating_echo_path_counter_);
|
||||
saturating_echo_path_ = saturating_echo_path_counter_ > 0;
|
||||
} else {
|
||||
echo_saturation_ = false;
|
||||
saturating_echo_path_ = false;
|
||||
saturating_echo_path_counter_ = 0;
|
||||
}
|
||||
previous_max_sample_ = max_sample;
|
||||
|
||||
// Flag whether the linear filter estimate is usable.
|
||||
usable_linear_estimate_ =
|
||||
(!echo_saturation_) && (converged_filter || SufficientFilterUpdates()) &&
|
||||
capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_;
|
||||
|
||||
// After an amount of active render samples for which an echo should have been
|
||||
// detected in the capture signal if the ERL was not infinite, flag that a
|
||||
// transparent mode should be entered.
|
||||
// Compute render energies.
|
||||
const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
|
||||
const bool active_render_block =
|
||||
x_energy > (config_.render_levels.active_render_limit *
|
||||
config_.render_levels.active_render_limit) *
|
||||
kFftLengthBy2;
|
||||
const bool strong_render_block = x_energy > 1000 * 1000 * kFftLengthBy2;
|
||||
|
||||
if (active_render_block) {
|
||||
render_received_ = true;
|
||||
}
|
||||
|
||||
// Update counters.
|
||||
blocks_with_filter_adaptation_ +=
|
||||
(active_render_block && (!SaturatedCapture()) ? 1 : 0);
|
||||
|
||||
transparent_mode_ = !converged_filter &&
|
||||
(!render_received_ || blocks_with_filter_adaptation_ >=
|
||||
5 * kNumBlocksPerSecond);
|
||||
blocks_with_strong_render_ +=
|
||||
(strong_render_block && (!SaturatedCapture()) ? 1 : 0);
|
||||
|
||||
// After an amount of active render samples for which an echo should have been
|
||||
// detected in the capture signal if the ERL was not infinite, flag that a
|
||||
// transparent mode should be entered.
|
||||
if (SaturatingEchoPath()) {
|
||||
transparent_mode_ = !converged_filter &&
|
||||
(!render_received_ || blocks_with_strong_render_ >=
|
||||
15 * kNumBlocksPerSecond);
|
||||
} else {
|
||||
transparent_mode_ = !converged_filter &&
|
||||
(!render_received_ ||
|
||||
blocks_with_strong_render_ >= 5 * kNumBlocksPerSecond);
|
||||
}
|
||||
|
||||
// Update flag for whether the adaptation is in the initial state.
|
||||
if (SaturatingEchoPath()) {
|
||||
initial_state_ = capture_block_counter_ < 6 * kNumBlocksPerSecond;
|
||||
} else {
|
||||
initial_state_ = capture_block_counter_ < 3 * kNumBlocksPerSecond;
|
||||
}
|
||||
|
||||
// Detect whether the linear filter is usable.
|
||||
if (SaturatingEchoPath()) {
|
||||
usable_linear_estimate_ =
|
||||
(!echo_saturation_) &&
|
||||
(converged_filter && SufficientFilterUpdates()) &&
|
||||
capture_block_counter_ >= 5 * kNumBlocksPerSecond && external_delay_;
|
||||
} else {
|
||||
usable_linear_estimate_ =
|
||||
(!echo_saturation_) &&
|
||||
(converged_filter || SufficientFilterUpdates()) &&
|
||||
capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_;
|
||||
}
|
||||
|
||||
// Flag whether the linear echo estimate should be used.
|
||||
linear_echo_estimate_ = usable_linear_estimate_ && !TransparentMode();
|
||||
|
||||
// Flag whether a sufficient number of filter updates has been done for the
|
||||
// filter to perform well.
|
||||
if (SaturatingEchoPath()) {
|
||||
sufficient_filter_updates_ =
|
||||
blocks_with_filter_adaptation_ >= 2 * kEchoPathChangeConvergenceBlocks;
|
||||
} else {
|
||||
sufficient_filter_updates_ =
|
||||
blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks;
|
||||
}
|
||||
|
||||
// Update the room reverb estimate.
|
||||
UpdateReverb(adaptive_filter_impulse_response);
|
||||
|
@ -67,6 +67,9 @@ class AecState {
|
||||
// Returns whether the echo signal is saturated.
|
||||
bool SaturatedEcho() const { return echo_saturation_; }
|
||||
|
||||
// Returns whether the echo path can saturate.
|
||||
bool SaturatingEchoPath() const { return saturating_echo_path_; }
|
||||
|
||||
// Updates the capture signal saturation.
|
||||
void UpdateCaptureSaturation(bool capture_signal_saturation) {
|
||||
capture_signal_saturation_ = capture_signal_saturation;
|
||||
@ -93,20 +96,14 @@ class AecState {
|
||||
}
|
||||
|
||||
// Returns whether the linear filter should have been able to adapt properly.
|
||||
bool SufficientFilterUpdates() const {
|
||||
return blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks;
|
||||
}
|
||||
bool SufficientFilterUpdates() const { return sufficient_filter_updates_; }
|
||||
|
||||
// Returns whether the echo subtractor can be used to determine the residual
|
||||
// echo.
|
||||
bool LinearEchoEstimate() const {
|
||||
return UsableLinearEstimate() && !TransparentMode();
|
||||
}
|
||||
bool LinearEchoEstimate() const { return linear_echo_estimate_; }
|
||||
|
||||
// Returns whether the AEC is in an initial state.
|
||||
bool InitialState() const {
|
||||
return capture_block_counter_ < 3 * kNumBlocksPerSecond;
|
||||
}
|
||||
bool InitialState() const { return initial_state_; }
|
||||
|
||||
// Updates the aec state.
|
||||
void Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
|
||||
@ -147,12 +144,14 @@ class AecState {
|
||||
ErleEstimator erle_estimator_;
|
||||
size_t capture_block_counter_ = 0;
|
||||
size_t blocks_with_filter_adaptation_ = 0;
|
||||
size_t blocks_with_strong_render_ = 0;
|
||||
bool usable_linear_estimate_ = false;
|
||||
bool echo_leakage_detected_ = false;
|
||||
bool capture_signal_saturation_ = false;
|
||||
bool echo_saturation_ = false;
|
||||
bool transparent_mode_ = false;
|
||||
float previous_max_sample_ = 0.f;
|
||||
std::array<float, kAdaptiveFilterLength> max_render_;
|
||||
size_t max_render_index_ = 0;
|
||||
bool force_zero_gain_ = false;
|
||||
bool render_received_ = false;
|
||||
size_t force_zero_gain_counter_ = 0;
|
||||
@ -165,6 +164,11 @@ class AecState {
|
||||
EchoAudibility echo_audibility_;
|
||||
const EchoCanceller3Config config_;
|
||||
float reverb_decay_;
|
||||
bool saturating_echo_path_ = false;
|
||||
int saturating_echo_path_counter_ = 0;
|
||||
bool initial_state_ = true;
|
||||
bool linear_echo_estimate_ = false;
|
||||
bool sufficient_filter_updates_ = false;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(AecState);
|
||||
};
|
||||
|
@ -189,10 +189,9 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise);
|
||||
|
||||
// A choose and apply echo suppression gain.
|
||||
suppression_gain_.GetGain(
|
||||
E2, R2, cng_.NoiseSpectrum(), render_signal_analyzer_,
|
||||
aec_state_.SaturatedEcho(), x, aec_state_.ForcedZeroGain(),
|
||||
aec_state_.LinearEchoEstimate(), &high_bands_gain, &G);
|
||||
suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(),
|
||||
render_signal_analyzer_, aec_state_, x,
|
||||
&high_bands_gain, &G);
|
||||
suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
|
||||
high_bands_gain, y);
|
||||
|
||||
|
@ -368,7 +368,7 @@ void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer,
|
||||
[](float a, float b) -> bool { return a * a < b * b; }));
|
||||
|
||||
// Update the lag estimates for the matched filter.
|
||||
const float kMatchingFilterThreshold = 0.2f;
|
||||
const float kMatchingFilterThreshold = 0.1f;
|
||||
lag_estimates_[n] = LagEstimate(
|
||||
error_sum_anchor - error_sum,
|
||||
(lag_estimate > 2 && lag_estimate < (filters_[n].size() - 10) &&
|
||||
|
@ -108,54 +108,29 @@ void ResidualEchoEstimator::Estimate(
|
||||
R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f);
|
||||
}
|
||||
} else {
|
||||
const rtc::Optional<size_t> delay =
|
||||
aec_state.ExternalDelay()
|
||||
? (aec_state.FilterDelay() ? aec_state.FilterDelay()
|
||||
: aec_state.ExternalDelay())
|
||||
: rtc::Optional<size_t>();
|
||||
|
||||
// Estimate the echo generating signal power.
|
||||
std::array<float, kFftLengthBy2Plus1> X2;
|
||||
if (aec_state.ExternalDelay() && aec_state.FilterDelay()) {
|
||||
RTC_DCHECK(delay);
|
||||
const int delay_use = static_cast<int>(*delay);
|
||||
|
||||
// Computes the spectral power over the blocks surrounding the delay.
|
||||
constexpr int kKnownDelayRenderWindowSize = 5;
|
||||
static_assert(
|
||||
kUnknownDelayRenderWindowSize >= kKnownDelayRenderWindowSize,
|
||||
"Requirement to ensure that the render buffer is overrun");
|
||||
EchoGeneratingPower(
|
||||
render_buffer, std::max(0, delay_use - 1),
|
||||
std::min(kKnownDelayRenderWindowSize - 1, delay_use + 1), &X2);
|
||||
} else {
|
||||
// Computes the spectral power over the latest blocks.
|
||||
EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1,
|
||||
&X2);
|
||||
}
|
||||
EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1,
|
||||
&X2);
|
||||
|
||||
// Subtract the stationary noise power to avoid stationary noise causing
|
||||
// excessive echo suppression.
|
||||
std::transform(
|
||||
X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
|
||||
[](float a, float b) { return std::max(0.f, a - 10.f * b); });
|
||||
if (!(aec_state.SaturatedEcho() || aec_state.SaturatingEchoPath())) {
|
||||
std::transform(
|
||||
X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
|
||||
[](float a, float b) { return std::max(0.f, a - 10.f * b); });
|
||||
}
|
||||
|
||||
NonLinearEstimate(
|
||||
aec_state.SufficientFilterUpdates(), aec_state.SaturatedEcho(),
|
||||
aec_state.SufficientFilterUpdates(),
|
||||
aec_state.SaturatedEcho() && aec_state.SaturatingEchoPath(),
|
||||
config_.ep_strength.bounded_erl, aec_state.TransparentMode(),
|
||||
aec_state.InitialState(), X2, Y2, R2);
|
||||
|
||||
if (aec_state.ExternalDelay() && aec_state.FilterDelay() &&
|
||||
aec_state.SaturatedEcho()) {
|
||||
AddEchoReverb(*R2, aec_state.SaturatedEcho(),
|
||||
std::min(static_cast<size_t>(kAdaptiveFilterLength),
|
||||
delay.value_or(kAdaptiveFilterLength)),
|
||||
aec_state.ReverbDecay(), R2);
|
||||
}
|
||||
}
|
||||
|
||||
// If the echo is deemed inaudible, set the residual echo to zero.
|
||||
if (aec_state.InaudibleEcho()) {
|
||||
if (aec_state.InaudibleEcho() &&
|
||||
(!(aec_state.SaturatedEcho() || aec_state.SaturatingEchoPath()))) {
|
||||
R2->fill(0.f);
|
||||
R2_old_.fill(0.f);
|
||||
R2_hold_counter_.fill(0.f);
|
||||
@ -204,7 +179,7 @@ void ResidualEchoEstimator::NonLinearEstimate(
|
||||
// Set echo path gains.
|
||||
if (saturated_echo) {
|
||||
// If the echo could be saturated, use a very conservative gain.
|
||||
echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 10000.f;
|
||||
echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 1000.f;
|
||||
} else if (sufficient_filter_updates && !bounded_erl) {
|
||||
// If the filter should have been able to converge, and no assumption is
|
||||
// possible on the ERL, use a low gain.
|
||||
|
@ -59,12 +59,14 @@ Subtractor::~Subtractor() = default;
|
||||
|
||||
void Subtractor::HandleEchoPathChange(
|
||||
const EchoPathVariability& echo_path_variability) {
|
||||
use_shadow_filter_frequency_response_ = false;
|
||||
if (echo_path_variability.delay_change) {
|
||||
main_filter_.HandleEchoPathChange();
|
||||
shadow_filter_.HandleEchoPathChange();
|
||||
G_main_.HandleEchoPathChange();
|
||||
G_shadow_.HandleEchoPathChange();
|
||||
converged_filter_ = false;
|
||||
converged_filter_counter_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -91,16 +93,29 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
|
||||
shadow_filter_.Filter(render_buffer, &S);
|
||||
PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr);
|
||||
|
||||
if (!converged_filter_) {
|
||||
const auto sum_of_squares = [](float a, float b) { return a + b * b; };
|
||||
const float e2_main =
|
||||
std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares);
|
||||
const float e2_shadow =
|
||||
std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares);
|
||||
const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares);
|
||||
// Determine which frequency response should be used.
|
||||
const auto sum_of_squares = [](float a, float b) { return a + b * b; };
|
||||
const float e2_main =
|
||||
std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares);
|
||||
const float e2_shadow =
|
||||
std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares);
|
||||
const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares);
|
||||
|
||||
if (y2 > kBlockSize * 50.f * 50.f) {
|
||||
converged_filter_ = (e2_main > 0.3 * y2 || e2_shadow > 0.1 * y2);
|
||||
if (e2_main < e2_shadow && e2_main < 0.1 * y2) {
|
||||
use_shadow_filter_frequency_response_ = false;
|
||||
} else if (e2_shadow < e2_main && e2_shadow < 0.01 * y2) {
|
||||
use_shadow_filter_frequency_response_ = true;
|
||||
}
|
||||
|
||||
// Flag whether the filter has at some point converged.
|
||||
// TODO(peah): Consider using a timeout for this.
|
||||
if (!converged_filter_) {
|
||||
if (y2 > kBlockSize * 100.f * 100.f) {
|
||||
if (e2_main < 0.3 * y2) {
|
||||
converged_filter_ = (++converged_filter_counter_) > 10;
|
||||
} else {
|
||||
converged_filter_counter_ = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -48,6 +48,9 @@ class Subtractor {
|
||||
// Returns the block-wise frequency response for the main adaptive filter.
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
|
||||
FilterFrequencyResponse() const {
|
||||
if (use_shadow_filter_frequency_response_) {
|
||||
return shadow_filter_.FilterFrequencyResponse();
|
||||
}
|
||||
return main_filter_.FilterFrequencyResponse();
|
||||
}
|
||||
|
||||
@ -68,7 +71,8 @@ class Subtractor {
|
||||
MainFilterUpdateGain G_main_;
|
||||
ShadowFilterUpdateGain G_shadow_;
|
||||
bool converged_filter_ = false;
|
||||
|
||||
size_t converged_filter_counter_ = 0;
|
||||
bool use_shadow_filter_frequency_response_ = false;
|
||||
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(Subtractor);
|
||||
};
|
||||
|
||||
|
@ -126,7 +126,14 @@ void UpdateMaxGainIncrease(
|
||||
float min_decreasing;
|
||||
|
||||
auto& param = config.gain_updates;
|
||||
if (!linear_echo_estimate) {
|
||||
if (no_saturation_counter <= 10) {
|
||||
max_increasing = param.saturation.max_inc;
|
||||
max_decreasing = param.saturation.max_dec;
|
||||
rate_increasing = param.saturation.rate_inc;
|
||||
rate_decreasing = param.saturation.rate_dec;
|
||||
min_increasing = param.saturation.min_inc;
|
||||
min_decreasing = param.saturation.min_dec;
|
||||
} else if (!linear_echo_estimate) {
|
||||
max_increasing = param.nonlinear.max_inc;
|
||||
max_decreasing = param.nonlinear.max_dec;
|
||||
rate_increasing = param.nonlinear.rate_inc;
|
||||
@ -140,20 +147,13 @@ void UpdateMaxGainIncrease(
|
||||
rate_decreasing = param.low_noise.rate_dec;
|
||||
min_increasing = param.low_noise.min_inc;
|
||||
min_decreasing = param.low_noise.min_dec;
|
||||
} else if (no_saturation_counter > 10) {
|
||||
} else {
|
||||
max_increasing = param.normal.max_inc;
|
||||
max_decreasing = param.normal.max_dec;
|
||||
rate_increasing = param.normal.rate_inc;
|
||||
rate_decreasing = param.normal.rate_dec;
|
||||
min_increasing = param.normal.min_inc;
|
||||
min_decreasing = param.normal.min_dec;
|
||||
} else {
|
||||
max_increasing = param.saturation.max_inc;
|
||||
max_decreasing = param.saturation.max_dec;
|
||||
rate_increasing = param.saturation.rate_inc;
|
||||
rate_decreasing = param.saturation.rate_dec;
|
||||
min_increasing = param.saturation.min_inc;
|
||||
min_decreasing = param.saturation.min_dec;
|
||||
}
|
||||
|
||||
for (size_t k = 0; k < new_gain.size(); ++k) {
|
||||
@ -176,6 +176,7 @@ void GainToNoAudibleEcho(
|
||||
const EchoCanceller3Config& config,
|
||||
bool low_noise_render,
|
||||
bool saturated_echo,
|
||||
bool saturating_echo_path,
|
||||
bool linear_echo_estimate,
|
||||
const std::array<float, kFftLengthBy2Plus1>& nearend,
|
||||
const std::array<float, kFftLengthBy2Plus1>& echo,
|
||||
@ -185,21 +186,29 @@ void GainToNoAudibleEcho(
|
||||
const std::array<float, kFftLengthBy2Plus1>& one_by_echo,
|
||||
std::array<float, kFftLengthBy2Plus1>* gain) {
|
||||
float nearend_masking_margin = 0.f;
|
||||
if (linear_echo_estimate) {
|
||||
nearend_masking_margin =
|
||||
low_noise_render
|
||||
? config.gain_mask.m9
|
||||
: (saturated_echo ? config.gain_mask.m2 : config.gain_mask.m3);
|
||||
if (saturated_echo) {
|
||||
nearend_masking_margin = config.gain_mask.m2;
|
||||
} else {
|
||||
nearend_masking_margin = config.gain_mask.m7;
|
||||
if (linear_echo_estimate) {
|
||||
nearend_masking_margin =
|
||||
low_noise_render ? config.gain_mask.m9 : config.gain_mask.m3;
|
||||
} else {
|
||||
nearend_masking_margin = config.gain_mask.m7;
|
||||
}
|
||||
}
|
||||
|
||||
RTC_DCHECK_LE(0.f, nearend_masking_margin);
|
||||
RTC_DCHECK_GT(1.f, nearend_masking_margin);
|
||||
const float one_by_one_minus_nearend_masking_margin =
|
||||
1.f / (1.0f - nearend_masking_margin);
|
||||
|
||||
const float masker_margin =
|
||||
linear_echo_estimate ? config.gain_mask.m1 : config.gain_mask.m8;
|
||||
float masker_margin;
|
||||
if (saturated_echo || saturating_echo_path) {
|
||||
masker_margin = 0.0001f;
|
||||
} else {
|
||||
masker_margin =
|
||||
linear_echo_estimate ? config.gain_mask.m1 : config.gain_mask.m8;
|
||||
}
|
||||
|
||||
for (size_t k = 0; k < gain->size(); ++k) {
|
||||
const float unity_gain_masker = std::max(nearend[k], masker[k]);
|
||||
@ -276,6 +285,7 @@ void SuppressionGain::LowerBandGain(
|
||||
bool low_noise_render,
|
||||
const rtc::Optional<int>& narrow_peak_band,
|
||||
bool saturated_echo,
|
||||
bool saturating_echo_path,
|
||||
bool linear_echo_estimate,
|
||||
const std::array<float, kFftLengthBy2Plus1>& nearend,
|
||||
const std::array<float, kFftLengthBy2Plus1>& echo,
|
||||
@ -296,7 +306,7 @@ void SuppressionGain::LowerBandGain(
|
||||
const float min_echo_power =
|
||||
low_noise_render ? config_.echo_audibility.low_render_limit
|
||||
: config_.echo_audibility.normal_render_limit;
|
||||
if (no_saturation_counter_ > 10) {
|
||||
if (!saturating_echo_path) {
|
||||
for (size_t k = 0; k < nearend.size(); ++k) {
|
||||
const float denom = std::min(nearend[k], echo[k]);
|
||||
min_gain[k] = denom > 0.f ? min_echo_power / denom : 1.f;
|
||||
@ -309,10 +319,12 @@ void SuppressionGain::LowerBandGain(
|
||||
// Compute the maximum gain by limiting the gain increase from the previous
|
||||
// gain.
|
||||
std::array<float, kFftLengthBy2Plus1> max_gain;
|
||||
const float first_increase = saturated_echo || saturating_echo_path
|
||||
? 0.00001f
|
||||
: config_.gain_updates.floor_first_increase;
|
||||
for (size_t k = 0; k < gain->size(); ++k) {
|
||||
max_gain[k] = std::min(std::max(last_gain_[k] * gain_increase_[k],
|
||||
config_.gain_updates.floor_first_increase),
|
||||
1.f);
|
||||
max_gain[k] = std::min(
|
||||
std::max(last_gain_[k] * gain_increase_[k], first_increase), 1.f);
|
||||
}
|
||||
|
||||
// Iteratively compute the gain required to attenuate the echo to a non
|
||||
@ -321,9 +333,9 @@ void SuppressionGain::LowerBandGain(
|
||||
for (int k = 0; k < 2; ++k) {
|
||||
std::array<float, kFftLengthBy2Plus1> masker;
|
||||
MaskingPower(config_, nearend, comfort_noise, last_masker_, *gain, &masker);
|
||||
GainToNoAudibleEcho(config_, low_noise_render, saturated_echo,
|
||||
linear_echo_estimate, nearend, echo, masker, min_gain,
|
||||
max_gain, one_by_echo, gain);
|
||||
GainToNoAudibleEcho(config_, low_noise_render, no_saturation_counter_ > 10,
|
||||
saturating_echo_path, linear_echo_estimate, nearend,
|
||||
echo, masker, min_gain, max_gain, one_by_echo, gain);
|
||||
AdjustForExternalFilters(gain);
|
||||
if (narrow_peak_band) {
|
||||
NarrowBandAttenuation(*narrow_peak_band, gain);
|
||||
@ -366,15 +378,18 @@ void SuppressionGain::GetGain(
|
||||
const std::array<float, kFftLengthBy2Plus1>& echo,
|
||||
const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
|
||||
const RenderSignalAnalyzer& render_signal_analyzer,
|
||||
bool saturated_echo,
|
||||
const AecState& aec_state,
|
||||
const std::vector<std::vector<float>>& render,
|
||||
bool force_zero_gain,
|
||||
bool linear_echo_estimate,
|
||||
float* high_bands_gain,
|
||||
std::array<float, kFftLengthBy2Plus1>* low_band_gain) {
|
||||
RTC_DCHECK(high_bands_gain);
|
||||
RTC_DCHECK(low_band_gain);
|
||||
|
||||
const bool saturated_echo = aec_state.SaturatedEcho();
|
||||
const bool saturating_echo_path = aec_state.SaturatingEchoPath();
|
||||
const bool force_zero_gain = aec_state.ForcedZeroGain();
|
||||
const bool linear_echo_estimate = aec_state.LinearEchoEstimate();
|
||||
|
||||
if (force_zero_gain) {
|
||||
last_gain_.fill(0.f);
|
||||
std::copy(comfort_noise.begin(), comfort_noise.end(), last_masker_.begin());
|
||||
@ -390,8 +405,8 @@ void SuppressionGain::GetGain(
|
||||
const rtc::Optional<int> narrow_peak_band =
|
||||
render_signal_analyzer.NarrowPeakBand();
|
||||
LowerBandGain(low_noise_render, narrow_peak_band, saturated_echo,
|
||||
linear_echo_estimate, nearend, echo, comfort_noise,
|
||||
low_band_gain);
|
||||
saturating_echo_path, linear_echo_estimate, nearend, echo,
|
||||
comfort_noise, low_band_gain);
|
||||
|
||||
// Compute the gain for the upper bands.
|
||||
*high_bands_gain =
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
#include "modules/audio_processing/aec3/render_signal_analyzer.h"
|
||||
#include "modules/audio_processing/include/audio_processing.h"
|
||||
#include "rtc_base/constructormagic.h"
|
||||
@ -29,10 +30,8 @@ class SuppressionGain {
|
||||
const std::array<float, kFftLengthBy2Plus1>& echo,
|
||||
const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
|
||||
const RenderSignalAnalyzer& render_signal_analyzer,
|
||||
bool saturated_echo,
|
||||
const AecState& aec_state,
|
||||
const std::vector<std::vector<float>>& render,
|
||||
bool force_zero_gain,
|
||||
bool linear_echo_estimate,
|
||||
float* high_bands_gain,
|
||||
std::array<float, kFftLengthBy2Plus1>* low_band_gain);
|
||||
|
||||
@ -40,6 +39,7 @@ class SuppressionGain {
|
||||
void LowerBandGain(bool stationary_with_low_power,
|
||||
const rtc::Optional<int>& narrow_peak_band,
|
||||
bool saturated_echo,
|
||||
bool saturating_echo_path,
|
||||
bool linear_echo_estimate,
|
||||
const std::array<float, kFftLengthBy2Plus1>& nearend,
|
||||
const std::array<float, kFftLengthBy2Plus1>& echo,
|
||||
|
@ -10,6 +10,10 @@
|
||||
|
||||
#include "modules/audio_processing/aec3/suppression_gain.h"
|
||||
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/subtractor.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/cpu_features_wrapper.h"
|
||||
#include "test/gtest.h"
|
||||
@ -29,11 +33,12 @@ TEST(SuppressionGain, NullOutputGains) {
|
||||
R2.fill(0.f);
|
||||
N2.fill(0.f);
|
||||
float high_bands_gain;
|
||||
AecState aec_state(EchoCanceller3Config{});
|
||||
EXPECT_DEATH(SuppressionGain(EchoCanceller3Config{}, DetectOptimization())
|
||||
.GetGain(E2, R2, N2, RenderSignalAnalyzer(), false,
|
||||
.GetGain(E2, R2, N2, RenderSignalAnalyzer(), aec_state,
|
||||
std::vector<std::vector<float>>(
|
||||
3, std::vector<float>(kBlockSize, 0.f)),
|
||||
false, true, &high_bands_gain, nullptr),
|
||||
&high_bands_gain, nullptr),
|
||||
"");
|
||||
}
|
||||
|
||||
@ -46,17 +51,53 @@ TEST(SuppressionGain, BasicGainComputation) {
|
||||
RenderSignalAnalyzer analyzer;
|
||||
float high_bands_gain;
|
||||
std::array<float, kFftLengthBy2Plus1> E2;
|
||||
std::array<float, kFftLengthBy2Plus1> Y2;
|
||||
std::array<float, kFftLengthBy2Plus1> R2;
|
||||
std::array<float, kFftLengthBy2Plus1> N2;
|
||||
std::array<float, kFftLengthBy2Plus1> g;
|
||||
std::array<float, kBlockSize> s;
|
||||
std::vector<std::vector<float>> x(1, std::vector<float>(kBlockSize, 0.f));
|
||||
AecState aec_state(EchoCanceller3Config{});
|
||||
ApmDataDumper data_dumper(42);
|
||||
Subtractor subtractor(&data_dumper, DetectOptimization());
|
||||
RenderBuffer render_buffer(
|
||||
DetectOptimization(), 1,
|
||||
std::max(kUnknownDelayRenderWindowSize, kAdaptiveFilterLength),
|
||||
std::vector<size_t>(1, kAdaptiveFilterLength));
|
||||
|
||||
// Verify the functionality for forcing a zero gain.
|
||||
E2.fill(1000000000.f);
|
||||
R2.fill(10000000000000.f);
|
||||
N2.fill(0.f);
|
||||
s.fill(10.f);
|
||||
aec_state.Update(subtractor.FilterFrequencyResponse(),
|
||||
subtractor.FilterImpulseResponse(),
|
||||
subtractor.ConvergedFilter(), rtc::Optional<size_t>(10),
|
||||
render_buffer, E2, Y2, x[0], s, false);
|
||||
suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain,
|
||||
&g);
|
||||
std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); });
|
||||
EXPECT_FLOAT_EQ(0.f, high_bands_gain);
|
||||
|
||||
// Ensure that a strong noise is detected to mask any echoes.
|
||||
E2.fill(10.f);
|
||||
Y2.fill(10.f);
|
||||
R2.fill(0.1f);
|
||||
N2.fill(100.f);
|
||||
for (int k = 0; k < 10; ++k) {
|
||||
suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true,
|
||||
// Ensure that the gain is no longer forced to zero.
|
||||
for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) {
|
||||
aec_state.Update(subtractor.FilterFrequencyResponse(),
|
||||
subtractor.FilterImpulseResponse(),
|
||||
subtractor.ConvergedFilter(), rtc::Optional<size_t>(10),
|
||||
render_buffer, E2, Y2, x[0], s, false);
|
||||
}
|
||||
|
||||
for (int k = 0; k < 100; ++k) {
|
||||
aec_state.Update(subtractor.FilterFrequencyResponse(),
|
||||
subtractor.FilterImpulseResponse(),
|
||||
subtractor.ConvergedFilter(), rtc::Optional<size_t>(10),
|
||||
render_buffer, E2, Y2, x[0], s, false);
|
||||
suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x,
|
||||
&high_bands_gain, &g);
|
||||
}
|
||||
std::for_each(g.begin(), g.end(),
|
||||
@ -64,10 +105,15 @@ TEST(SuppressionGain, BasicGainComputation) {
|
||||
|
||||
// Ensure that a strong nearend is detected to mask any echoes.
|
||||
E2.fill(100.f);
|
||||
Y2.fill(100.f);
|
||||
R2.fill(0.1f);
|
||||
N2.fill(0.f);
|
||||
for (int k = 0; k < 10; ++k) {
|
||||
suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true,
|
||||
for (int k = 0; k < 100; ++k) {
|
||||
aec_state.Update(subtractor.FilterFrequencyResponse(),
|
||||
subtractor.FilterImpulseResponse(),
|
||||
subtractor.ConvergedFilter(), rtc::Optional<size_t>(10),
|
||||
render_buffer, E2, Y2, x[0], s, false);
|
||||
suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x,
|
||||
&high_bands_gain, &g);
|
||||
}
|
||||
std::for_each(g.begin(), g.end(),
|
||||
@ -78,17 +124,12 @@ TEST(SuppressionGain, BasicGainComputation) {
|
||||
R2.fill(10000000000000.f);
|
||||
N2.fill(0.f);
|
||||
for (int k = 0; k < 10; ++k) {
|
||||
suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true,
|
||||
suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x,
|
||||
&high_bands_gain, &g);
|
||||
}
|
||||
std::for_each(g.begin(), g.end(),
|
||||
[](float a) { EXPECT_NEAR(0.f, a, 0.001); });
|
||||
|
||||
// Verify the functionality for forcing a zero gain.
|
||||
suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, true, true,
|
||||
&high_bands_gain, &g);
|
||||
std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); });
|
||||
EXPECT_FLOAT_EQ(0.f, high_bands_gain);
|
||||
}
|
||||
|
||||
} // namespace aec3
|
||||
|
@ -1188,7 +1188,7 @@ struct EchoCanceller3Config {
|
||||
|
||||
GainChanges low_noise = {3.f, 3.f, 1.5f, 1.5f, 1.5f, 1.5f};
|
||||
GainChanges normal = {2.f, 2.f, 1.5f, 1.5f, 1.2f, 1.2f};
|
||||
GainChanges saturation = {1.2f, 1.2f, 1.5f, 1.5f, 1.f, 1.f};
|
||||
GainChanges saturation = {1.5f, 1.5f, 1.2f, 1.2f, 1.1f, 1.1f};
|
||||
GainChanges nonlinear = {1.5f, 1.5f, 1.2f, 1.2f, 1.1f, 1.1f};
|
||||
|
||||
float floor_first_increase = 0.0001f;
|
||||
|
Reference in New Issue
Block a user