Safe behavior of the initial echo removal in AEC3

This CL adds functionality to allow removal of any echo occurring
before the render and capture signals have been properly aligned.
The functionality is added in such a manner that the transparency
to nearend is maintained as much as possible.


Bug: webrtc:8883
Change-Id: I813cbbc4c48822e7dffcd9ab6233be4c222089de
Reviewed-on: https://webrtc-review.googlesource.com/49941
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22107}
This commit is contained in:
Per Åhgren
2018-02-20 22:18:27 +01:00
committed by Commit Bot
parent e4bf600cad
commit b6b00dc180
10 changed files with 117 additions and 36 deletions

View File

@ -38,6 +38,7 @@ rtc_source_set("audio_mixer_api") {
rtc_source_set("aec3_config") {
visibility = [ "*" ]
sources = [
"echo_canceller3_config.cc",
"echo_canceller3_config.h",
]
}

View File

@ -0,0 +1,16 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio/echo_canceller3_config.h"
namespace webrtc {
EchoCanceller3Config::EchoCanceller3Config() = default;
} // namespace webrtc

View File

@ -17,6 +17,8 @@ namespace webrtc {
// Configuration struct for EchoCanceller3
struct EchoCanceller3Config {
EchoCanceller3Config();
struct Delay {
size_t default_delay = 5;
size_t down_sampling_factor = 4;
@ -105,6 +107,14 @@ struct EchoCanceller3Config {
float floor_first_increase = 0.00001f;
} gain_updates;
struct EchoRemovalControl {
struct GainRampup {
float first_non_zero_gain = 0.001f;
int non_zero_gain_blocks = 187;
int full_gain_blocks = 312;
} gain_rampup;
} echo_removal_control;
};
} // namespace webrtc

View File

@ -47,6 +47,11 @@ int EstimateFilterDelay(
std::max_element(delays.begin(), delays.end()));
}
float ComputeGainRampupIncrease(const EchoCanceller3Config& config) {
const auto& c = config.echo_removal_control.gain_rampup;
return powf(1.f / c.first_non_zero_gain, 1.f / c.non_zero_gain_blocks);
}
} // namespace
int AecState::instance_count_ = 0;
@ -57,7 +62,8 @@ AecState::AecState(const EchoCanceller3Config& config)
erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h),
config_(config),
max_render_(config_.filter.main.length_blocks, 0.f),
reverb_decay_(config_.ep_strength.default_len) {}
reverb_decay_(config_.ep_strength.default_len),
gain_rampup_increase_(ComputeGainRampupIncrease(config_)) {}
AecState::~AecState() = default;
@ -71,12 +77,10 @@ void AecState::HandleEchoPathChange(
echo_saturation_ = false;
previous_max_sample_ = 0.f;
std::fill(max_render_.begin(), max_render_.end(), 0.f);
force_zero_gain_counter_ = 0;
blocks_with_proper_filter_adaptation_ = 0;
capture_block_counter_ = 0;
filter_has_had_time_to_converge_ = false;
render_received_ = false;
force_zero_gain_ = true;
blocks_with_active_render_ = 0;
initial_state_ = true;
};
@ -92,8 +96,8 @@ void AecState::HandleEchoPathChange(
full_reset();
} else if (echo_path_variability.delay_change !=
EchoPathVariability::DelayAdjustment::kBufferFlush) {
active_render_seen_ = false;
full_reset();
} else if (echo_path_variability.delay_change !=
EchoPathVariability::DelayAdjustment::kDelayReset) {
full_reset();
@ -129,11 +133,9 @@ void AecState::Update(
blocks_with_proper_filter_adaptation_ +=
active_render_block && !SaturatedCapture() ? 1 : 0;
// Force zero echo suppression gain after an echo path change to allow at
// least some render data to be collected in order to avoid an initial echo
// burst.
force_zero_gain_ = ++force_zero_gain_counter_ < kNumBlocksPerSecond / 5;
// Update the limit on the echo suppression after an echo path change to avoid
// an initial echo burst.
UpdateSuppressorGainLimit(render_buffer.GetRenderActivity());
// Update the ERL and ERLE measures.
if (converged_filter && capture_block_counter_ >= 2 * kNumBlocksPerSecond) {
@ -264,6 +266,37 @@ bool AecState::DetectActiveRender(rtc::ArrayView<const float> x) const {
kFftLengthBy2;
}
// Updates the suppressor gain limit.
void AecState::UpdateSuppressorGainLimit(bool render_activity) {
const auto& rampup_conf = config_.echo_removal_control.gain_rampup;
if (!active_render_seen_ && render_activity) {
active_render_seen_ = true;
realignment_counter_ = rampup_conf.full_gain_blocks;
} else if (realignment_counter_ > 0) {
--realignment_counter_;
}
if (realignment_counter_ <= 0) {
suppressor_gain_limit_ = 1.f;
return;
}
if (realignment_counter_ > rampup_conf.non_zero_gain_blocks) {
suppressor_gain_limit_ = 0.f;
return;
}
if (realignment_counter_ == rampup_conf.non_zero_gain_blocks) {
suppressor_gain_limit_ = rampup_conf.first_non_zero_gain;
return;
}
RTC_DCHECK_LT(0.f, suppressor_gain_limit_);
suppressor_gain_limit_ =
std::min(1.f, suppressor_gain_limit_ * gain_rampup_increase_);
RTC_DCHECK_GE(1.f, suppressor_gain_limit_);
}
bool AecState::DetectEchoSaturation(rtc::ArrayView<const float> x) {
RTC_DCHECK_LT(0, x.size());
const float max_sample = fabs(*std::max_element(

View File

@ -87,8 +87,8 @@ class AecState {
// Returns the decay factor for the echo reverberation.
float ReverbDecay() const { return reverb_decay_; }
// Returns whether the echo suppression gain should be forced to zero.
bool ForcedZeroGain() const { return force_zero_gain_; }
// Returns the upper limit for the echo suppression gain.
float SuppressionGainLimit() const { return suppressor_gain_limit_; }
// Returns whether the echo in the capture signal is audible.
bool InaudibleEcho() const { return echo_audibility_.InaudibleEcho(); }
@ -135,6 +135,7 @@ class AecState {
void UpdateReverb(const std::vector<float>& impulse_response);
bool DetectActiveRender(rtc::ArrayView<const float> x) const;
void UpdateSuppressorGainLimit(bool render_activity);
bool DetectEchoSaturation(rtc::ArrayView<const float> x);
static int instance_count_;
@ -150,9 +151,10 @@ class AecState {
bool echo_saturation_ = false;
bool transparent_mode_ = false;
float previous_max_sample_ = 0.f;
bool force_zero_gain_ = false;
bool render_received_ = false;
size_t force_zero_gain_counter_ = 0;
int realignment_counter_ = 0;
float suppressor_gain_limit_ = 1.f;
bool active_render_seen_ = false;
int filter_delay_ = 0;
size_t blocks_since_last_saturation_ = 1000;
float reverb_decay_to_test_ = 0.9f;
@ -165,6 +167,7 @@ class AecState {
bool saturating_echo_path_ = false;
bool filter_has_had_time_to_converge_ = false;
bool initial_state_ = true;
const float gain_rampup_increase_;
RTC_DISALLOW_COPY_AND_ASSIGN(AecState);
};

View File

@ -61,10 +61,17 @@ class RenderBuffer {
void SpectralSum(size_t num_spectra,
std::array<float, kFftLengthBy2Plus1>* X2) const;
// Gets the recent activity seen in the render signal.
bool GetRenderActivity() const { return render_activity_; }
// Specifies the recent activity seen in the render signal.
void SetRenderActivity(bool activity) { render_activity_ = activity; }
private:
const MatrixBuffer* const block_buffer_;
const VectorBuffer* const spectrum_buffer_;
const FftBuffer* const fft_buffer_;
bool render_activity_ = false;
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderBuffer);
};

View File

@ -12,6 +12,7 @@
#include <string.h>
#include <algorithm>
#include <numeric>
#include "modules/audio_processing/aec3/aec3_common.h"
#include "modules/audio_processing/aec3/aec3_fft.h"
@ -72,12 +73,15 @@ class RenderDelayBufferImpl final : public RenderDelayBuffer {
int max_observed_jitter_ = 1;
size_t capture_call_counter_ = 0;
size_t render_call_counter_ = 0;
bool render_activity_ = false;
size_t render_activity_counter_ = 0;
int LowRateBufferOffset() const { return DelayEstimatorOffset(config_) >> 1; }
int MaxExternalDelayToInternalDelay(size_t delay) const;
void ApplyDelay(int delay);
void InsertBlock(const std::vector<std::vector<float>>& block,
int previous_write);
bool DetectActiveRender(rtc::ArrayView<const float> x) const;
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderDelayBufferImpl);
};
@ -230,6 +234,12 @@ RenderDelayBuffer::BufferingEvent RenderDelayBufferImpl::Insert(
? BufferingEvent::kRenderOverrun
: BufferingEvent::kNone;
// Detect and update render activity.
if (!render_activity_) {
render_activity_counter_ += DetectActiveRender(block[0]) ? 1 : 0;
render_activity_ = render_activity_counter_ >= 20;
}
// Insert the new render block into the specified position.
InsertBlock(block, previous_write);
@ -283,6 +293,12 @@ RenderDelayBufferImpl::PrepareCaptureProcessing() {
Reset();
}
echo_remover_buffer_.SetRenderActivity(render_activity_);
if (render_activity_) {
render_activity_counter_ = 0;
render_activity_ = false;
}
return event;
}
@ -353,6 +369,14 @@ void RenderDelayBufferImpl::InsertBlock(
f.buffer[f.write].Spectrum(optimization_, s.buffer[s.write]);
}
bool RenderDelayBufferImpl::DetectActiveRender(
rtc::ArrayView<const float> x) const {
const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
return x_energy > (config_.render_levels.active_render_limit *
config_.render_levels.active_render_limit) *
kFftLengthBy2;
}
} // namespace
int RenderDelayBuffer::RenderDelayBuffer::DelayEstimatorOffset(

View File

@ -387,17 +387,9 @@ void SuppressionGain::GetGain(
const bool saturated_echo = aec_state.SaturatedEcho();
const bool saturating_echo_path = aec_state.SaturatingEchoPath();
const bool force_zero_gain = aec_state.ForcedZeroGain();
const float gain_upper_bound = aec_state.SuppressionGainLimit();
const bool linear_echo_estimate = aec_state.UsableLinearEstimate();
const bool initial_state = aec_state.InitialState();
if (force_zero_gain) {
last_gain_.fill(0.f);
std::copy(comfort_noise.begin(), comfort_noise.end(), last_masker_.begin());
low_band_gain->fill(0.f);
gain_increase_.fill(1.f);
*high_bands_gain = 0.f;
return;
}
bool low_noise_render = low_render_detector_.Detect(render);
@ -408,6 +400,12 @@ void SuppressionGain::GetGain(
saturating_echo_path, initial_state, linear_echo_estimate,
nearend, echo, comfort_noise, low_band_gain);
if (gain_upper_bound < 1.f) {
for (size_t k = 0; k < low_band_gain->size(); ++k) {
(*low_band_gain)[k] = std::min((*low_band_gain)[k], gain_upper_bound);
}
}
// Compute the gain for the upper bands.
*high_bands_gain =
UpperBandsGain(narrow_peak_band, saturated_echo, render, *low_band_gain);

View File

@ -64,25 +64,13 @@ TEST(SuppressionGain, BasicGainComputation) {
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
RenderDelayBuffer::Create(config, 3));
// Verify the functionality for forcing a zero gain.
E2.fill(1000000000.f);
R2.fill(10000000000000.f);
N2.fill(0.f);
s.fill(10.f);
aec_state.Update(subtractor.FilterFrequencyResponse(),
subtractor.FilterImpulseResponse(),
subtractor.ConvergedFilter(),
*render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false);
suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain,
&g);
std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); });
EXPECT_FLOAT_EQ(0.f, high_bands_gain);
// Ensure that a strong noise is detected to mask any echoes.
E2.fill(10.f);
Y2.fill(10.f);
R2.fill(0.1f);
N2.fill(100.f);
s.fill(10.f);
// Ensure that the gain is no longer forced to zero.
for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) {
aec_state.Update(subtractor.FilterFrequencyResponse(),

View File

@ -32,4 +32,5 @@ Beamforming::Beamforming(bool enabled,
target_direction(target_direction) {}
Beamforming::~Beamforming() {}
} // namespace webrtc