AEC3: Add state-specific suppressor behaviors

This CL allows selecting an echo suppressor behavior which is specific
for whether the nearend is dominant, or the echo is dominant.

The changes in this CL are bitexact.

Bug: webrtc:9660
Change-Id: Ie32e65efe47e692de6d6a22a7ad3b469d745fd6b
Reviewed-on: https://webrtc-review.googlesource.com/95725
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24434}
This commit is contained in:
Per Åhgren
2018-08-24 22:48:49 +02:00
committed by Commit Bot
parent 90ab76dd19
commit 524e878121
5 changed files with 225 additions and 54 deletions

View File

@ -25,4 +25,30 @@ EchoCanceller3Config::EchoModel::EchoModel() = default;
EchoCanceller3Config::EchoModel::EchoModel(
const EchoCanceller3Config::EchoModel& e) = default;
EchoCanceller3Config::Suppressor::Suppressor() = default;
EchoCanceller3Config::Suppressor::Suppressor(
const EchoCanceller3Config::Suppressor& e) = default;
EchoCanceller3Config::Suppressor::MaskingThresholds::MaskingThresholds(
float enr_transparent,
float enr_suppress,
float emr_transparent)
: enr_transparent(enr_transparent),
enr_suppress(enr_suppress),
emr_transparent(emr_transparent) {}
EchoCanceller3Config::Suppressor::Suppressor::MaskingThresholds::
MaskingThresholds(
const EchoCanceller3Config::Suppressor::MaskingThresholds& e) = default;
EchoCanceller3Config::Suppressor::Tuning::Tuning(MaskingThresholds mask_lf,
MaskingThresholds mask_hf,
float max_inc_factor,
float max_dec_factor_lf)
: mask_lf(mask_lf),
mask_hf(mask_hf),
max_inc_factor(max_inc_factor),
max_dec_factor_lf(max_dec_factor_lf) {}
EchoCanceller3Config::Suppressor::Tuning::Tuning(
const EchoCanceller3Config::Suppressor::Tuning& e) = default;
} // namespace webrtc

View File

@ -112,12 +112,6 @@ struct EchoCanceller3Config {
float poor_excitation_render_limit_ds8 = 20.f;
} render_levels;
struct GainUpdates {
float max_inc_factor = 2.0f;
float max_dec_factor_lf = 0.25f;
float floor_first_increase = 0.00001f;
} gain_updates;
struct EchoRemovalControl {
struct GainRampup {
float initial_gain = 0.0f;
@ -146,15 +140,50 @@ struct EchoCanceller3Config {
} echo_model;
struct Suppressor {
Suppressor();
Suppressor(const Suppressor& e);
size_t nearend_average_blocks = 4;
struct MaskingThresholds {
MaskingThresholds(float enr_transparent,
float enr_suppress,
float emr_transparent);
MaskingThresholds(const MaskingThresholds& e);
float enr_transparent;
float enr_suppress;
float emr_transparent;
};
MaskingThresholds mask_lf = {.2f, .3f, .3f};
MaskingThresholds mask_hf = {.07f, .1f, .3f};
struct Tuning {
Tuning(MaskingThresholds mask_lf,
MaskingThresholds mask_hf,
float max_inc_factor,
float max_dec_factor_lf);
Tuning(const Tuning& e);
MaskingThresholds mask_lf;
MaskingThresholds mask_hf;
float max_inc_factor;
float max_dec_factor_lf;
};
Tuning normal_tuning = Tuning(MaskingThresholds(.2f, .3f, .3f),
MaskingThresholds(.07f, .1f, .3f),
2.0f,
0.25f);
Tuning nearend_tuning = Tuning(MaskingThresholds(.2f, .3f, .3f),
MaskingThresholds(.07f, .1f, .3f),
2.0f,
0.25f);
struct DominantNearendDetection {
float enr_threshold = 10.f;
float snr_threshold = 10.f;
int hold_duration = 25;
int trigger_threshold = 15;
} dominant_nearend_detection;
float floor_first_increase = 0.00001f;
bool enforce_transparent = false;
bool enforce_empty_higher_bands = false;
} suppressor;

View File

@ -1,4 +1,3 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
@ -227,13 +226,16 @@ void SuppressionGain::GainToNoAudibleEcho(
const std::array<float, kFftLengthBy2Plus1>& min_gain,
const std::array<float, kFftLengthBy2Plus1>& max_gain,
std::array<float, kFftLengthBy2Plus1>* gain) const {
const auto& p = dominant_nearend_detector_.IsNearendState() ? nearend_params_
: normal_params_;
for (size_t k = 0; k < gain->size(); ++k) {
float enr = echo[k] / (nearend[k] + 1.f); // Echo-to-nearend ratio.
float emr = echo[k] / (masker[k] + 1.f); // Echo-to-masker (noise) ratio.
float g = 1.0f;
if (enr > enr_transparent_[k] && emr > emr_transparent_[k]) {
g = (enr_suppress_[k] - enr) / (enr_suppress_[k] - enr_transparent_[k]);
g = std::max(g, emr_transparent_[k] / emr);
if (enr > p.enr_transparent_[k] && emr > p.emr_transparent_[k]) {
g = (p.enr_suppress_[k] - enr) /
(p.enr_suppress_[k] - p.enr_transparent_[k]);
g = std::max(g, p.emr_transparent_[k] / emr);
}
(*gain)[k] = std::max(std::min(g, max_gain[k]), min_gain[k]);
}
@ -249,6 +251,9 @@ void SuppressionGain::LowerBandGain(
std::array<float, kFftLengthBy2Plus1>* gain) {
const bool saturated_echo = aec_state.SaturatedEcho();
const bool linear_echo_estimate = aec_state.UsableLinearEstimate();
const auto& params = dominant_nearend_detector_.IsNearendState()
? nearend_params_
: normal_params_;
// Weight echo power in terms of audibility. // Precompute 1/weighted echo
// (note that when the echo is zero, the precomputed value is never used).
@ -273,8 +278,7 @@ void SuppressionGain::LowerBandGain(
// quickly after strong nearend.
if (last_nearend_[k] > last_echo_[k]) {
min_gain[k] =
std::max(min_gain[k],
last_gain_[k] * config_.gain_updates.max_dec_factor_lf);
std::max(min_gain[k], last_gain_[k] * params.max_dec_factor_lf);
min_gain[k] = std::min(min_gain[k], 1.f);
}
}
@ -286,10 +290,9 @@ void SuppressionGain::LowerBandGain(
// gain.
std::array<float, kFftLengthBy2Plus1> max_gain;
for (size_t k = 0; k < gain->size(); ++k) {
max_gain[k] =
std::min(std::max(last_gain_[k] * config_.gain_updates.max_inc_factor,
config_.gain_updates.floor_first_increase),
1.f);
max_gain[k] = std::min(std::max(last_gain_[k] * params.max_inc_factor,
config_.suppressor.floor_first_increase),
1.f);
}
// Iteratively compute the gain required to attenuate the echo to a non
@ -337,34 +340,16 @@ SuppressionGain::SuppressionGain(const EchoCanceller3Config& config,
static_cast<int>(config_.filter.config_change_duration_blocks)),
enable_new_suppression_(EnableNewSuppression()),
moving_average_(kFftLengthBy2Plus1,
config.suppressor.nearend_average_blocks) {
config.suppressor.nearend_average_blocks),
nearend_params_(config_.suppressor.nearend_tuning),
normal_params_(config_.suppressor.normal_tuning),
dominant_nearend_detector_(
config_.suppressor.dominant_nearend_detection) {
RTC_DCHECK_LT(0, state_change_duration_blocks_);
one_by_state_change_duration_blocks_ = 1.f / state_change_duration_blocks_;
last_gain_.fill(1.f);
last_nearend_.fill(0.f);
last_echo_.fill(0.f);
// Compute per-band masking thresholds.
constexpr size_t kLastLfBand = 5;
constexpr size_t kFirstHfBand = 8;
RTC_DCHECK_LT(kLastLfBand, kFirstHfBand);
auto& lf = config.suppressor.mask_lf;
auto& hf = config.suppressor.mask_hf;
RTC_DCHECK_LT(lf.enr_transparent, lf.enr_suppress);
RTC_DCHECK_LT(hf.enr_transparent, hf.enr_suppress);
for (size_t k = 0; k < kFftLengthBy2Plus1; k++) {
float a;
if (k <= kLastLfBand) {
a = 0.f;
} else if (k < kFirstHfBand) {
a = (k - kLastLfBand) / static_cast<float>(kFirstHfBand - kLastLfBand);
} else {
a = 1.f;
}
enr_transparent_[k] = (1 - a) * lf.enr_transparent + a * hf.enr_transparent;
enr_suppress_[k] = (1 - a) * lf.enr_suppress + a * hf.enr_suppress;
emr_transparent_[k] = (1 - a) * lf.emr_transparent + a * hf.emr_transparent;
}
}
SuppressionGain::~SuppressionGain() = default;
@ -393,6 +378,10 @@ void SuppressionGain::GetGain(
std::array<float, kFftLengthBy2Plus1> nearend_average;
moving_average_.Average(nearend_spectrum, nearend_average);
// Update the state selection.
dominant_nearend_detector_.Update(nearend_spectrum, echo_spectrum,
comfort_noise_spectrum);
// Compute gain for the lower band.
bool low_noise_render = low_render_detector_.Detect(render);
const absl::optional<int> narrow_peak_band =
@ -444,4 +433,69 @@ bool SuppressionGain::LowNoiseRenderDetector::Detect(
return low_noise_render;
}
SuppressionGain::DominantNearendDetector::DominantNearendDetector(
const EchoCanceller3Config::Suppressor::DominantNearendDetection config)
: enr_threshold_(config.enr_threshold),
snr_threshold_(config.snr_threshold),
hold_duration_(config.hold_duration),
trigger_threshold_(config.trigger_threshold) {}
void SuppressionGain::DominantNearendDetector::Update(
rtc::ArrayView<const float> nearend_spectrum,
rtc::ArrayView<const float> echo_spectrum,
rtc::ArrayView<const float> comfort_noise_spectrum) {
auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
RTC_DCHECK_LE(16, spectrum.size());
return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
};
const float ne_sum = low_frequency_energy(nearend_spectrum);
const float echo_sum = low_frequency_energy(echo_spectrum);
const float noise_sum = low_frequency_energy(comfort_noise_spectrum);
// Detect strong active nearend if the nearend is sufficiently stronger than
// the echo and the nearend noise.
if (ne_sum > enr_threshold_ * echo_sum &&
ne_sum > snr_threshold_ * noise_sum) {
if (++trigger_counter_ >= trigger_threshold_) {
// After a period of strong active nearend activity, flag nearend mode.
hold_counter_ = hold_duration_;
trigger_counter_ = trigger_threshold_;
}
} else {
// Forget previously detected strong active nearend activity.
trigger_counter_ = std::max(0, trigger_counter_ - 1);
}
// Remain in any nearend mode for a certain duration.
hold_counter_ = std::max(0, hold_counter_ - 1);
nearend_state_ = hold_counter_ > 0;
}
SuppressionGain::GainParameters::GainParameters(
const EchoCanceller3Config::Suppressor::Tuning& tuning)
: max_inc_factor(tuning.max_inc_factor),
max_dec_factor_lf(tuning.max_dec_factor_lf) {
// Compute per-band masking thresholds.
constexpr size_t kLastLfBand = 5;
constexpr size_t kFirstHfBand = 8;
RTC_DCHECK_LT(kLastLfBand, kFirstHfBand);
auto& lf = tuning.mask_lf;
auto& hf = tuning.mask_hf;
RTC_DCHECK_LT(lf.enr_transparent, lf.enr_suppress);
RTC_DCHECK_LT(hf.enr_transparent, hf.enr_suppress);
for (size_t k = 0; k < kFftLengthBy2Plus1; k++) {
float a;
if (k <= kLastLfBand) {
a = 0.f;
} else if (k < kFirstHfBand) {
a = (k - kLastLfBand) / static_cast<float>(kFirstHfBand - kLastLfBand);
} else {
a = 1.f;
}
enr_transparent_[k] = (1 - a) * lf.enr_transparent + a * hf.enr_transparent;
enr_suppress_[k] = (1 - a) * lf.enr_suppress + a * hf.enr_suppress;
emr_transparent_[k] = (1 - a) * lf.emr_transparent + a * hf.emr_transparent;
}
}
} // namespace webrtc

View File

@ -68,6 +68,42 @@ class SuppressionGain {
float average_power_ = 32768.f * 32768.f;
};
// Class for selecting whether the suppressor is in the nearend or echo state.
class DominantNearendDetector {
public:
explicit DominantNearendDetector(
const EchoCanceller3Config::Suppressor::DominantNearendDetection
config);
// Returns whether the current state is the nearend state.
bool IsNearendState() const { return nearend_state_; }
// Updates the state selection based on latest spectral estimates.
void Update(rtc::ArrayView<const float> nearend_spectrum,
rtc::ArrayView<const float> echo_spectrum,
rtc::ArrayView<const float> comfort_noise_spectrum);
private:
const float enr_threshold_;
const float snr_threshold_;
const int hold_duration_;
const int trigger_threshold_;
bool nearend_state_ = false;
int trigger_counter_ = 0;
int hold_counter_ = 0;
};
struct GainParameters {
explicit GainParameters(
const EchoCanceller3Config::Suppressor::Tuning& tuning);
const float max_inc_factor;
const float max_dec_factor_lf;
std::array<float, kFftLengthBy2Plus1> enr_transparent_;
std::array<float, kFftLengthBy2Plus1> enr_suppress_;
std::array<float, kFftLengthBy2Plus1> emr_transparent_;
};
static int instance_count_;
std::unique_ptr<ApmDataDumper> data_dumper_;
const Aec3Optimization optimization_;
@ -77,14 +113,14 @@ class SuppressionGain {
std::array<float, kFftLengthBy2Plus1> last_gain_;
std::array<float, kFftLengthBy2Plus1> last_nearend_;
std::array<float, kFftLengthBy2Plus1> last_echo_;
std::array<float, kFftLengthBy2Plus1> enr_transparent_;
std::array<float, kFftLengthBy2Plus1> enr_suppress_;
std::array<float, kFftLengthBy2Plus1> emr_transparent_;
LowNoiseRenderDetector low_render_detector_;
bool initial_state_ = true;
int initial_state_change_counter_ = 0;
const bool enable_new_suppression_;
aec3::MovingAverage moving_average_;
const GainParameters nearend_params_;
const GainParameters normal_params_;
DominantNearendDetector dominant_nearend_detector_;
RTC_DISALLOW_COPY_AND_ASSIGN(SuppressionGain);
};

View File

@ -290,14 +290,6 @@ class Aec3ParametersParser {
&cfg.echo_audibility.use_stationary_properties);
}
if (rtc::GetValueFromJsonObject(root, "gain_updates", &section)) {
ReadParam(section, "max_inc_factor", &cfg.gain_updates.max_inc_factor);
ReadParam(section, "max_dec_factor_lf",
&cfg.gain_updates.max_dec_factor_lf);
ReadParam(section, "floor_first_increase",
&cfg.gain_updates.floor_first_increase);
}
if (rtc::GetValueFromJsonObject(root, "echo_removal_control", &section)) {
Json::Value subsection;
if (rtc::GetValueFromJsonObject(section, "gain_rampup", &subsection)) {
@ -338,11 +330,45 @@ class Aec3ParametersParser {
&cfg.echo_model.nonlinear_release);
}
Json::Value subsection;
if (rtc::GetValueFromJsonObject(root, "suppressor", &section)) {
ReadParam(section, "nearend_average_blocks",
&cfg.suppressor.nearend_average_blocks);
ReadParam(section, "mask_lf", &cfg.suppressor.mask_lf);
ReadParam(section, "mask_hf", &cfg.suppressor.mask_hf);
if (rtc::GetValueFromJsonObject(section, "normal_tuning", &subsection)) {
ReadParam(subsection, "mask_lf", &cfg.suppressor.normal_tuning.mask_lf);
ReadParam(subsection, "mask_hf", &cfg.suppressor.normal_tuning.mask_hf);
ReadParam(subsection, "max_inc_factor",
&cfg.suppressor.normal_tuning.max_inc_factor);
ReadParam(subsection, "max_dec_factor_lf",
&cfg.suppressor.normal_tuning.max_dec_factor_lf);
}
if (rtc::GetValueFromJsonObject(section, "nearend_tuning", &subsection)) {
ReadParam(subsection, "mask_lf",
&cfg.suppressor.nearend_tuning.mask_lf);
ReadParam(subsection, "mask_hf",
&cfg.suppressor.nearend_tuning.mask_hf);
ReadParam(subsection, "max_inc_factor",
&cfg.suppressor.nearend_tuning.max_inc_factor);
ReadParam(subsection, "max_dec_factor_lf",
&cfg.suppressor.nearend_tuning.max_dec_factor_lf);
}
if (rtc::GetValueFromJsonObject(section, "dominant_nearend_detection",
&subsection)) {
ReadParam(subsection, "enr_threshold",
&cfg.suppressor.dominant_nearend_detection.enr_threshold);
ReadParam(subsection, "snr_threshold",
&cfg.suppressor.dominant_nearend_detection.snr_threshold);
ReadParam(subsection, "hold_duration",
&cfg.suppressor.dominant_nearend_detection.hold_duration);
ReadParam(subsection, "trigger_threshold",
&cfg.suppressor.dominant_nearend_detection.trigger_threshold);
}
ReadParam(section, "floor_first_increase",
&cfg.suppressor.floor_first_increase);
ReadParam(section, "enforce_transparent",
&cfg.suppressor.enforce_transparent);
ReadParam(section, "enforce_empty_higher_bands",