From e9a7e90625fea3ad7d12d3a2aafb5b529db0940b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20de=20Vicente=20Pe=C3=B1a?= Date: Thu, 27 Sep 2018 11:49:39 +0200 Subject: [PATCH] AEC3: ERLE: Allowing increases of the ERLE estimate for low render signals. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Specially for devices with high echo path gain, even low render signal can allow the linear filter of the AEC3 to converge. However, the conditions that were used for updating the ERLE avoided to update that estimation. In this commit, we allow adapting the ERLE estimator using even low render signal but the update of the ERLE is constraint in a way that decreases are not allowed. Bug: webrtc:9776 Change-Id: Ic4331efcc47a0b05f394cdea9a88f336292de5a1 Reviewed-on: https://webrtc-review.googlesource.com/101641 Commit-Queue: Jesus de Vicente Pena Reviewed-by: Per Ã…hgren Cr-Commit-Position: refs/heads/master@{#24859} --- modules/audio_processing/aec3/BUILD.gn | 4 + modules/audio_processing/aec3/aec_state.h | 6 +- .../aec3/aec_state_unittest.cc | 10 +- modules/audio_processing/aec3/echo_remover.cc | 2 +- .../aec3/echo_remover_metrics.cc | 2 +- .../audio_processing/aec3/erle_estimator.cc | 250 +----------------- .../audio_processing/aec3/erle_estimator.h | 104 ++------ .../aec3/erle_estimator_unittest.cc | 23 +- .../aec3/fullband_erle_estimator.cc | 168 ++++++++++++ .../aec3/fullband_erle_estimator.h | 93 +++++++ .../aec3/subband_erle_estimator.cc | 187 +++++++++++++ .../aec3/subband_erle_estimator.h | 97 +++++++ 12 files changed, 600 insertions(+), 346 deletions(-) create mode 100644 modules/audio_processing/aec3/fullband_erle_estimator.cc create mode 100644 modules/audio_processing/aec3/fullband_erle_estimator.h create mode 100644 modules/audio_processing/aec3/subband_erle_estimator.cc create mode 100644 modules/audio_processing/aec3/subband_erle_estimator.h diff --git a/modules/audio_processing/aec3/BUILD.gn b/modules/audio_processing/aec3/BUILD.gn index 75ee3723fe..02b3acf4a2 100644 --- a/modules/audio_processing/aec3/BUILD.gn +++ b/modules/audio_processing/aec3/BUILD.gn @@ -60,6 +60,8 @@ rtc_static_library("aec3") { "filter_analyzer.h", "frame_blocker.cc", "frame_blocker.h", + "fullband_erle_estimator.cc", + "fullband_erle_estimator.h", "main_filter_update_gain.cc", "main_filter_update_gain.h", "matched_filter.cc", @@ -98,6 +100,8 @@ rtc_static_library("aec3") { "skew_estimator.h", "stationarity_estimator.cc", "stationarity_estimator.h", + "subband_erle_estimator.cc", + "subband_erle_estimator.h", "subtractor.cc", "subtractor.h", "subtractor_output.cc", diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index 45ed09fa3a..c5ec6cc3e5 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -82,10 +82,8 @@ class AecState { return absl::nullopt; } - // Returns the time-domain ERLE in log2 units. - float ErleTimeDomainLog2() const { - return erle_estimator_.ErleTimeDomainLog2(); - } + // Returns the fullband ERLE estimate in log2 units. + float FullBandErleLog2() const { return erle_estimator_.FullbandErleLog2(); } // Returns the ERL. const std::array& Erl() const { diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc index 50b97f4437..bf32a0b607 100644 --- a/modules/audio_processing/aec3/aec_state_unittest.cc +++ b/modules/audio_processing/aec3/aec_state_unittest.cc @@ -135,14 +135,16 @@ TEST(AecState, NormalUsage) { } ASSERT_TRUE(state.UsableLinearEstimate()); { + // Note that the render spectrum is built so it does not have energy in the + // odd bands but just in the even bands. const auto& erle = state.Erle(); EXPECT_EQ(erle[0], erle[1]); constexpr size_t kLowFrequencyLimit = 32; - for (size_t k = 1; k < kLowFrequencyLimit; ++k) { - EXPECT_NEAR(k % 2 == 0 ? 4.f : 1.f, erle[k], 0.1); + for (size_t k = 2; k < kLowFrequencyLimit; k = k + 2) { + EXPECT_NEAR(4.f, erle[k], 0.1); } - for (size_t k = kLowFrequencyLimit; k < erle.size() - 1; ++k) { - EXPECT_NEAR(k % 2 == 0 ? 1.5f : 1.f, erle[k], 0.1); + for (size_t k = kLowFrequencyLimit; k < erle.size() - 1; k = k + 2) { + EXPECT_NEAR(1.5f, erle[k], 0.1); } EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]); } diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index f75eb55d60..acdaf3104b 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -191,7 +191,7 @@ void EchoRemoverImpl::GetMetrics(EchoControl::Metrics* metrics) const { // Echo return loss (ERL) is inverted to go from gain to attenuation. metrics->echo_return_loss = -10.0 * log10(aec_state_.ErlTimeDomain()); metrics->echo_return_loss_enhancement = - Log2TodB(aec_state_.ErleTimeDomainLog2()); + Log2TodB(aec_state_.FullBandErleLog2()); } void EchoRemoverImpl::ProcessCapture( diff --git a/modules/audio_processing/aec3/echo_remover_metrics.cc b/modules/audio_processing/aec3/echo_remover_metrics.cc index 8592a93b65..a04026b4f5 100644 --- a/modules/audio_processing/aec3/echo_remover_metrics.cc +++ b/modules/audio_processing/aec3/echo_remover_metrics.cc @@ -67,7 +67,7 @@ void EchoRemoverMetrics::Update( aec3::UpdateDbMetric(aec_state.Erl(), &erl_); erl_time_domain_.UpdateInstant(aec_state.ErlTimeDomain()); aec3::UpdateDbMetric(aec_state.Erle(), &erle_); - erle_time_domain_.UpdateInstant(aec_state.ErleTimeDomainLog2()); + erle_time_domain_.UpdateInstant(aec_state.FullBandErleLog2()); aec3::UpdateDbMetric(comfort_noise_spectrum, &comfort_noise_); aec3::UpdateDbMetric(suppressor_gain, &suppressor_gain_); active_render_count_ += (aec_state.ActiveRender() ? 1 : 0); diff --git a/modules/audio_processing/aec3/erle_estimator.cc b/modules/audio_processing/aec3/erle_estimator.cc index b02245c859..40f69e19c2 100644 --- a/modules/audio_processing/aec3/erle_estimator.cc +++ b/modules/audio_processing/aec3/erle_estimator.cc @@ -10,163 +10,25 @@ #include "modules/audio_processing/aec3/erle_estimator.h" -#include -#include - -#include "absl/types/optional.h" +#include "api/array_view.h" #include "modules/audio_processing/aec3/aec3_common.h" #include "modules/audio_processing/logging/apm_data_dumper.h" -#include "rtc_base/numerics/safe_minmax.h" namespace webrtc { -namespace { -constexpr int kPointsToAccumulate = 6; -constexpr float kEpsilon = 1e-3f; -} // namespace - ErleEstimator::ErleEstimator(float min_erle, float max_erle_lf, float max_erle_hf) - : min_erle_(min_erle), - min_erle_log2_(FastApproxLog2f(min_erle_ + kEpsilon)), - max_erle_lf_(max_erle_lf), - max_erle_lf_log2(FastApproxLog2f(max_erle_lf_ + kEpsilon)), - max_erle_hf_(max_erle_hf), - erle_freq_inst_(kPointsToAccumulate), - erle_time_inst_(kPointsToAccumulate) { + : fullband_erle_estimator_(min_erle, max_erle_lf), + subband_erle_estimator_(min_erle, max_erle_lf, max_erle_hf) { Reset(); } ErleEstimator::~ErleEstimator() = default; void ErleEstimator::Reset() { - erle_time_inst_.Reset(); - erle_.fill(min_erle_); - erle_onsets_.fill(min_erle_); - hold_counters_.fill(0); - coming_onset_.fill(true); - erle_time_domain_log2_ = min_erle_log2_; - hold_counter_time_domain_ = 0; -} - -ErleEstimator::ErleTimeInstantaneous::ErleTimeInstantaneous( - int points_to_accumulate) - : points_to_accumulate_(points_to_accumulate) { - Reset(); -} -ErleEstimator::ErleTimeInstantaneous::~ErleTimeInstantaneous() = default; - -bool ErleEstimator::ErleTimeInstantaneous::Update(const float Y2_sum, - const float E2_sum) { - bool ret = false; - E2_acum_ += E2_sum; - Y2_acum_ += Y2_sum; - num_points_++; - if (num_points_ == points_to_accumulate_) { - if (E2_acum_ > 0.f) { - ret = true; - erle_log2_ = FastApproxLog2f(Y2_acum_ / E2_acum_ + kEpsilon); - } - num_points_ = 0; - E2_acum_ = 0.f; - Y2_acum_ = 0.f; - } - - if (ret) { - UpdateMaxMin(); - UpdateQualityEstimate(); - } - return ret; -} - -void ErleEstimator::ErleTimeInstantaneous::Reset() { - ResetAccumulators(); - max_erle_log2_ = -10.f; // -30 dB. - min_erle_log2_ = 33.f; // 100 dB. - inst_quality_estimate_ = 0.f; -} - -void ErleEstimator::ErleTimeInstantaneous::ResetAccumulators() { - erle_log2_ = absl::nullopt; - inst_quality_estimate_ = 0.f; - num_points_ = 0; - E2_acum_ = 0.f; - Y2_acum_ = 0.f; -} - -void ErleEstimator::ErleTimeInstantaneous::Dump( - const std::unique_ptr& data_dumper) { - data_dumper->DumpRaw("aec3_erle_time_inst_log2", - erle_log2_ ? *erle_log2_ : -10.f); - data_dumper->DumpRaw( - "aec3_erle_time_quality", - GetInstQualityEstimate() ? GetInstQualityEstimate().value() : 0.f); - data_dumper->DumpRaw("aec3_erle_time_max_log2", max_erle_log2_); - data_dumper->DumpRaw("aec3_erle_time_min_log2", min_erle_log2_); -} - -void ErleEstimator::ErleTimeInstantaneous::UpdateMaxMin() { - RTC_DCHECK(erle_log2_); - if (erle_log2_.value() > max_erle_log2_) { - max_erle_log2_ = erle_log2_.value(); - } else { - max_erle_log2_ -= 0.0004; // Forget factor, approx 1dB every 3 sec. - } - - if (erle_log2_.value() < min_erle_log2_) { - min_erle_log2_ = erle_log2_.value(); - } else { - min_erle_log2_ += 0.0004; // Forget factor, approx 1dB every 3 sec. - } -} - -void ErleEstimator::ErleTimeInstantaneous::UpdateQualityEstimate() { - const float alpha = 0.07f; - float quality_estimate = 0.f; - RTC_DCHECK(erle_log2_); - if (max_erle_log2_ > min_erle_log2_) { - quality_estimate = (erle_log2_.value() - min_erle_log2_) / - (max_erle_log2_ - min_erle_log2_); - } - if (quality_estimate > inst_quality_estimate_) { - inst_quality_estimate_ = quality_estimate; - } else { - inst_quality_estimate_ += - alpha * (quality_estimate - inst_quality_estimate_); - } -} - -ErleEstimator::ErleFreqInstantaneous::ErleFreqInstantaneous( - int points_to_accumulate) - : points_to_accumulate_(points_to_accumulate) { - Reset(); -} - -ErleEstimator::ErleFreqInstantaneous::~ErleFreqInstantaneous() = default; - -absl::optional -ErleEstimator::ErleFreqInstantaneous::Update(float Y2, float E2, size_t band) { - absl::optional ret = absl::nullopt; - RTC_DCHECK_LT(band, kFftLengthBy2Plus1); - Y2_acum_[band] += Y2; - E2_acum_[band] += E2; - if (++num_points_[band] == points_to_accumulate_) { - if (E2_acum_[band]) { - ret = Y2_acum_[band] / E2_acum_[band]; - } - num_points_[band] = 0; - Y2_acum_[band] = 0.f; - E2_acum_[band] = 0.f; - } - - return ret; -} - -void ErleEstimator::ErleFreqInstantaneous::Reset() { - Y2_acum_.fill(0.f); - E2_acum_.fill(0.f); - num_points_.fill(0); + fullband_erle_estimator_.Reset(); + subband_erle_estimator_.Reset(); } void ErleEstimator::Update(rtc::ArrayView render_spectrum, @@ -181,104 +43,14 @@ void ErleEstimator::Update(rtc::ArrayView render_spectrum, const auto& Y2 = capture_spectrum; const auto& E2 = subtractor_spectrum; - // Corresponds of WGN of power -46 dBFS. - constexpr float kX2Min = 44015068.0f; - - constexpr int kErleHold = 100; - constexpr int kBlocksForOnsetDetection = kErleHold + 150; - - auto erle_band_update = [](float erle_band, float new_erle, float alpha_inc, - float alpha_dec, float min_erle, float max_erle) { - float alpha = new_erle > erle_band ? alpha_inc : alpha_dec; - float erle_band_out = erle_band; - erle_band_out = erle_band + alpha * (new_erle - erle_band); - erle_band_out = rtc::SafeClamp(erle_band_out, min_erle, max_erle); - return erle_band_out; - }; - - // Update the estimates in a clamped minimum statistics manner. - auto erle_update = [&](size_t start, size_t stop, float max_erle, - bool onset_detection) { - for (size_t k = start; k < stop; ++k) { - if (X2[k] > kX2Min) { - absl::optional new_erle = - erle_freq_inst_.Update(Y2[k], E2[k], k); - if (new_erle) { - if (onset_detection) { - if (coming_onset_[k]) { - coming_onset_[k] = false; - erle_onsets_[k] = - erle_band_update(erle_onsets_[k], new_erle.value(), 0.15f, - 0.3f, min_erle_, max_erle); - } - hold_counters_[k] = kBlocksForOnsetDetection; - } - erle_[k] = erle_band_update(erle_[k], new_erle.value(), 0.05f, 0.1f, - min_erle_, max_erle); - } - } - } - }; - - if (converged_filter) { - // Note that the use of the converged_filter flag already imposed - // a minimum of the erle that can be estimated as that flag would - // be false if the filter is performing poorly. - constexpr size_t kFftLengthBy4 = kFftLengthBy2 / 2; - erle_update(1, kFftLengthBy4, max_erle_lf_, onset_detection); - erle_update(kFftLengthBy4, kFftLengthBy2, max_erle_hf_, onset_detection); - } - - if (onset_detection) { - for (size_t k = 1; k < kFftLengthBy2; ++k) { - hold_counters_[k]--; - if (hold_counters_[k] <= (kBlocksForOnsetDetection - kErleHold)) { - if (erle_[k] > erle_onsets_[k]) { - erle_[k] = std::max(erle_onsets_[k], 0.97f * erle_[k]); - RTC_DCHECK_LE(min_erle_, erle_[k]); - } - if (hold_counters_[k] <= 0) { - coming_onset_[k] = true; - hold_counters_[k] = 0; - } - } - } - } - - erle_[0] = erle_[1]; - erle_[kFftLengthBy2] = erle_[kFftLengthBy2 - 1]; - - if (converged_filter) { - // Compute ERLE over all frequency bins. - const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f); - if (X2_sum > kX2Min * X2.size()) { - const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f); - const float E2_sum = std::accumulate(E2.begin(), E2.end(), 0.0f); - if (erle_time_inst_.Update(Y2_sum, E2_sum)) { - hold_counter_time_domain_ = kErleHold; - erle_time_domain_log2_ += - 0.1f * ((erle_time_inst_.GetInstErle_log2().value()) - - erle_time_domain_log2_); - erle_time_domain_log2_ = rtc::SafeClamp( - erle_time_domain_log2_, min_erle_log2_, max_erle_lf_log2); - } - } - } - --hold_counter_time_domain_; - if (hold_counter_time_domain_ <= 0) { - erle_time_domain_log2_ = - std::max(min_erle_log2_, erle_time_domain_log2_ - 0.044f); - } - if (hold_counter_time_domain_ == 0) { - erle_time_inst_.ResetAccumulators(); - } + subband_erle_estimator_.Update(X2, Y2, E2, converged_filter, onset_detection); + fullband_erle_estimator_.Update(X2, Y2, E2, converged_filter); } -void ErleEstimator::Dump(const std::unique_ptr& data_dumper) { - data_dumper->DumpRaw("aec3_erle", Erle()); - data_dumper->DumpRaw("aec3_erle_onset", ErleOnsets()); - data_dumper->DumpRaw("aec3_erle_time_domain_log2", ErleTimeDomainLog2()); - erle_time_inst_.Dump(data_dumper); +void ErleEstimator::Dump( + const std::unique_ptr& data_dumper) const { + fullband_erle_estimator_.Dump(data_dumper); + subband_erle_estimator_.Dump(data_dumper); } } // namespace webrtc diff --git a/modules/audio_processing/aec3/erle_estimator.h b/modules/audio_processing/aec3/erle_estimator.h index 8160dbecba..490a4ec772 100644 --- a/modules/audio_processing/aec3/erle_estimator.h +++ b/modules/audio_processing/aec3/erle_estimator.h @@ -12,112 +12,62 @@ #define MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_ #include +#include #include "absl/types/optional.h" #include "api/array_view.h" #include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/fullband_erle_estimator.h" +#include "modules/audio_processing/aec3/subband_erle_estimator.h" #include "modules/audio_processing/logging/apm_data_dumper.h" -#include "rtc_base/constructormagic.h" namespace webrtc { -// Estimates the echo return loss enhancement based on the signal spectra. +// Estimates the echo return loss enhancement. One estimate is done per subband +// and another one is done using the aggreation of energy over all the subbands. class ErleEstimator { public: ErleEstimator(float min_erle, float max_erle_lf, float max_erle_hf); ~ErleEstimator(); - // Reset the ERLE estimator. + // Resets the fullband ERLE estimator and the subbands ERLE estimators. void Reset(); - // Updates the ERLE estimate. + // Updates the ERLE estimates. void Update(rtc::ArrayView render_spectrum, rtc::ArrayView capture_spectrum, rtc::ArrayView subtractor_spectrum, bool converged_filter, bool onset_detection); - // Returns the most recent ERLE estimate. - const std::array& Erle() const { return erle_; } - // Returns the ERLE that is estimated during onsets. Use for logging/testing. + // Returns the most recent subband ERLE estimates. + const std::array& Erle() const { + return subband_erle_estimator_.Erle(); + } + // Returns the subband ERLE that are estimated during onsets. Used + // for logging/testing. const std::array& ErleOnsets() const { - return erle_onsets_; + return subband_erle_estimator_.ErleOnsets(); } - float ErleTimeDomainLog2() const { return erle_time_domain_log2_; } + // Returns the fullband ERLE estimate. + float FullbandErleLog2() const { + return fullband_erle_estimator_.FullbandErleLog2(); + } + + // Returns an estimation of the current linear filter quality based on the + // current and past fullband ERLE estimates. The returned value is a float + // between 0 and 1 where 1 indicates that, at this current time instant, the + // linear filter is reaching its maximum subtraction performance. absl::optional GetInstLinearQualityEstimate() const { - return erle_time_inst_.GetInstQualityEstimate(); + return fullband_erle_estimator_.GetInstLinearQualityEstimate(); } - void Dump(const std::unique_ptr& data_dumper); - - class ErleTimeInstantaneous { - public: - ErleTimeInstantaneous(int points_to_accumulate); - ~ErleTimeInstantaneous(); - // Update the estimator with a new point, returns true - // if the instantaneous erle was updated due to having enough - // points for performing the estimate. - bool Update(const float Y2_sum, const float E2_sum); - // Reset all the members of the class. - void Reset(); - // Reset the members realated with an instantaneous estimate. - void ResetAccumulators(); - // Returns the instantaneous ERLE in log2 units. - absl::optional GetInstErle_log2() const { return erle_log2_; } - // Get an indication between 0 and 1 of the performance of the linear filter - // for the current time instant. - absl::optional GetInstQualityEstimate() const { - return erle_log2_ ? absl::optional(inst_quality_estimate_) - : absl::nullopt; - } - void Dump(const std::unique_ptr& data_dumper); - - private: - void UpdateMaxMin(); - void UpdateQualityEstimate(); - absl::optional erle_log2_; - float inst_quality_estimate_; - float max_erle_log2_; - float min_erle_log2_; - float Y2_acum_; - float E2_acum_; - int num_points_; - const int points_to_accumulate_; - }; - - class ErleFreqInstantaneous { - public: - ErleFreqInstantaneous(int points_to_accumulate); - ~ErleFreqInstantaneous(); - // Updates the ERLE for a band with a new block. Returns absl::nullopt - // if not enough points were accuulated for doing the estimation. - absl::optional Update(float Y2, float E2, size_t band); - // Reset all the member of the class. - void Reset(); - - private: - std::array Y2_acum_; - std::array E2_acum_; - std::array num_points_; - const int points_to_accumulate_; - }; + void Dump(const std::unique_ptr& data_dumper) const; private: - std::array erle_; - std::array erle_onsets_; - std::array coming_onset_; - std::array hold_counters_; - int hold_counter_time_domain_; - float erle_time_domain_log2_; - const float min_erle_; - const float min_erle_log2_; - const float max_erle_lf_; - const float max_erle_lf_log2; - const float max_erle_hf_; - ErleFreqInstantaneous erle_freq_inst_; - ErleTimeInstantaneous erle_time_inst_; - RTC_DISALLOW_COPY_AND_ASSIGN(ErleEstimator); + FullBandErleEstimator fullband_erle_estimator_; + SubbandErleEstimator subband_erle_estimator_; }; } // namespace webrtc diff --git a/modules/audio_processing/aec3/erle_estimator_unittest.cc b/modules/audio_processing/aec3/erle_estimator_unittest.cc index 1687568529..e9500f2eec 100644 --- a/modules/audio_processing/aec3/erle_estimator_unittest.cc +++ b/modules/audio_processing/aec3/erle_estimator_unittest.cc @@ -76,7 +76,7 @@ TEST(ErleEstimator, VerifyErleIncreaseAndHold) { for (size_t k = 0; k < 200; ++k) { estimator.Update(X2, Y2, E2, true, true); } - VerifyErle(estimator.Erle(), std::pow(2.f, estimator.ErleTimeDomainLog2()), + VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), kMaxErleLf, kMaxErleHf); FormNearendFrame(&X2, &E2, &Y2); @@ -85,7 +85,7 @@ TEST(ErleEstimator, VerifyErleIncreaseAndHold) { for (size_t k = 0; k < 50; ++k) { estimator.Update(X2, Y2, E2, true, true); } - VerifyErle(estimator.Erle(), std::pow(2.f, estimator.ErleTimeDomainLog2()), + VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), kMaxErleLf, kMaxErleHf); } @@ -116,24 +116,7 @@ TEST(ErleEstimator, VerifyErleTrackingOnOnsets) { estimator.Update(X2, Y2, E2, true, true); } // Verifies that during ne activity, Erle converges to the Erle for onsets. - VerifyErle(estimator.Erle(), std::pow(2.f, estimator.ErleTimeDomainLog2()), - kMinErle, kMinErle); -} - -TEST(ErleEstimator, VerifyNoErleUpdateDuringLowActivity) { - std::array X2; - std::array E2; - std::array Y2; - ErleEstimator estimator(kMinErle, kMaxErleLf, kMaxErleHf); - - // Verifies that the ERLE estimate is is not updated for low-level render - // signals. - X2.fill(1000.f * 1000.f); - Y2.fill(10 * E2[0]); - for (size_t k = 0; k < 200; ++k) { - estimator.Update(X2, Y2, E2, true, true); - } - VerifyErle(estimator.Erle(), std::pow(2.f, estimator.ErleTimeDomainLog2()), + VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), kMinErle, kMinErle); } diff --git a/modules/audio_processing/aec3/fullband_erle_estimator.cc b/modules/audio_processing/aec3/fullband_erle_estimator.cc new file mode 100644 index 0000000000..db9be7c104 --- /dev/null +++ b/modules/audio_processing/aec3/fullband_erle_estimator.cc @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/fullband_erle_estimator.h" + +#include +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +namespace { +constexpr float kEpsilon = 1e-3f; +constexpr float kX2BandEnergyThreshold = 44015068.0f; +constexpr int kErleHold = 100; +constexpr int kPointsToAccumulate = 6; +} // namespace + +FullBandErleEstimator::FullBandErleEstimator(float min_erle, float max_erle_lf) + : min_erle_log2_(FastApproxLog2f(min_erle + kEpsilon)), + max_erle_lf_log2(FastApproxLog2f(max_erle_lf + kEpsilon)) { + Reset(); +} + +FullBandErleEstimator::~FullBandErleEstimator() = default; + +void FullBandErleEstimator::Reset() { + instantaneous_erle_.Reset(); + erle_time_domain_log2_ = min_erle_log2_; + hold_counter_time_domain_ = 0; +} + +void FullBandErleEstimator::Update(rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + bool converged_filter) { + if (converged_filter) { + // Computes the fullband ERLE. + const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f); + if (X2_sum > kX2BandEnergyThreshold * X2.size()) { + const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f); + const float E2_sum = std::accumulate(E2.begin(), E2.end(), 0.0f); + if (instantaneous_erle_.Update(Y2_sum, E2_sum)) { + hold_counter_time_domain_ = kErleHold; + erle_time_domain_log2_ += + 0.1f * ((instantaneous_erle_.GetInstErleLog2().value()) - + erle_time_domain_log2_); + erle_time_domain_log2_ = rtc::SafeClamp( + erle_time_domain_log2_, min_erle_log2_, max_erle_lf_log2); + } + } + } + --hold_counter_time_domain_; + if (hold_counter_time_domain_ <= 0) { + erle_time_domain_log2_ = + std::max(min_erle_log2_, erle_time_domain_log2_ - 0.044f); + } + if (hold_counter_time_domain_ == 0) { + instantaneous_erle_.ResetAccumulators(); + } +} + +void FullBandErleEstimator::Dump( + const std::unique_ptr& data_dumper) const { + data_dumper->DumpRaw("aec3_fullband_erle_log2", FullbandErleLog2()); + instantaneous_erle_.Dump(data_dumper); +} + +FullBandErleEstimator::ErleInstantaneous::ErleInstantaneous() { + Reset(); +} + +FullBandErleEstimator::ErleInstantaneous::~ErleInstantaneous() = default; + +bool FullBandErleEstimator::ErleInstantaneous::Update(const float Y2_sum, + const float E2_sum) { + bool update_estimates = false; + E2_acum_ += E2_sum; + Y2_acum_ += Y2_sum; + num_points_++; + if (num_points_ == kPointsToAccumulate) { + if (E2_acum_ > 0.f) { + update_estimates = true; + erle_log2_ = FastApproxLog2f(Y2_acum_ / E2_acum_ + kEpsilon); + } + num_points_ = 0; + E2_acum_ = 0.f; + Y2_acum_ = 0.f; + } + + if (update_estimates) { + UpdateMaxMin(); + UpdateQualityEstimate(); + } + return update_estimates; +} + +void FullBandErleEstimator::ErleInstantaneous::Reset() { + ResetAccumulators(); + max_erle_log2_ = -10.f; // -30 dB. + min_erle_log2_ = 33.f; // 100 dB. + inst_quality_estimate_ = 0.f; +} + +void FullBandErleEstimator::ErleInstantaneous::ResetAccumulators() { + erle_log2_ = absl::nullopt; + inst_quality_estimate_ = 0.f; + num_points_ = 0; + E2_acum_ = 0.f; + Y2_acum_ = 0.f; +} + +void FullBandErleEstimator::ErleInstantaneous::Dump( + const std::unique_ptr& data_dumper) const { + data_dumper->DumpRaw("aec3_fullband_erle_inst_log2", + erle_log2_ ? *erle_log2_ : -10.f); + data_dumper->DumpRaw( + "aec3_erle_instantaneous_quality", + GetQualityEstimate() ? GetQualityEstimate().value() : 0.f); + data_dumper->DumpRaw("aec3_fullband_erle_max_log2", max_erle_log2_); + data_dumper->DumpRaw("aec3_fullband_erle_min_log2", min_erle_log2_); +} + +void FullBandErleEstimator::ErleInstantaneous::UpdateMaxMin() { + RTC_DCHECK(erle_log2_); + if (erle_log2_.value() > max_erle_log2_) { + max_erle_log2_ = erle_log2_.value(); + } else { + max_erle_log2_ -= 0.0004; // Forget factor, approx 1dB every 3 sec. + } + + if (erle_log2_.value() < min_erle_log2_) { + min_erle_log2_ = erle_log2_.value(); + } else { + min_erle_log2_ += 0.0004; // Forget factor, approx 1dB every 3 sec. + } +} + +void FullBandErleEstimator::ErleInstantaneous::UpdateQualityEstimate() { + const float alpha = 0.07f; + float quality_estimate = 0.f; + RTC_DCHECK(erle_log2_); + if (max_erle_log2_ > min_erle_log2_) { + quality_estimate = (erle_log2_.value() - min_erle_log2_) / + (max_erle_log2_ - min_erle_log2_); + } + if (quality_estimate > inst_quality_estimate_) { + inst_quality_estimate_ = quality_estimate; + } else { + inst_quality_estimate_ += + alpha * (quality_estimate - inst_quality_estimate_); + } +} + +} // namespace webrtc diff --git a/modules/audio_processing/aec3/fullband_erle_estimator.h b/modules/audio_processing/aec3/fullband_erle_estimator.h new file mode 100644 index 0000000000..175db55e11 --- /dev/null +++ b/modules/audio_processing/aec3/fullband_erle_estimator.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_ + +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// Estimates the echo return loss enhancement using the energy of all the +// freuquency bands. +class FullBandErleEstimator { + public: + FullBandErleEstimator(float min_erle, float max_erle_lf); + ~FullBandErleEstimator(); + // Resets the ERLE estimator. + void Reset(); + + // Updates the ERLE estimator. + void Update(rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + bool converged_filter); + + // Returns the fullband ERLE estimates in log2 units. + float FullbandErleLog2() const { return erle_time_domain_log2_; } + + // Returns an estimation of the current linear filter quality. It returns a + // float number between 0 and 1 mapping 1 to the highest possible quality. + absl::optional GetInstLinearQualityEstimate() const { + return instantaneous_erle_.GetQualityEstimate(); + } + + void Dump(const std::unique_ptr& data_dumper) const; + + private: + class ErleInstantaneous { + public: + ErleInstantaneous(); + ~ErleInstantaneous(); + + // Updates the estimator with a new point, returns true + // if the instantaneous ERLE was updated due to having enough + // points for performing the estimate. + bool Update(const float Y2_sum, const float E2_sum); + // Resets the instantaneous ERLE estimator to its initial state. + void Reset(); + // Resets the members related with an instantaneous estimate. + void ResetAccumulators(); + // Returns the instantaneous ERLE in log2 units. + absl::optional GetInstErleLog2() const { return erle_log2_; } + // Gets an indication between 0 and 1 of the performance of the linear + // filter for the current time instant. + absl::optional GetQualityEstimate() const { + return erle_log2_ ? absl::optional(inst_quality_estimate_) + : absl::nullopt; + } + void Dump(const std::unique_ptr& data_dumper) const; + + private: + void UpdateMaxMin(); + void UpdateQualityEstimate(); + absl::optional erle_log2_; + float inst_quality_estimate_; + float max_erle_log2_; + float min_erle_log2_; + float Y2_acum_; + float E2_acum_; + int num_points_; + }; + + int hold_counter_time_domain_; + float erle_time_domain_log2_; + const float min_erle_log2_; + const float max_erle_lf_log2; + ErleInstantaneous instantaneous_erle_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_ diff --git a/modules/audio_processing/aec3/subband_erle_estimator.cc b/modules/audio_processing/aec3/subband_erle_estimator.cc new file mode 100644 index 0000000000..d8cb7a7631 --- /dev/null +++ b/modules/audio_processing/aec3/subband_erle_estimator.cc @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/subband_erle_estimator.h" + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/numerics/safe_minmax.h" +#include "system_wrappers/include/field_trial.h" + +namespace webrtc { + +namespace { +constexpr int kPointsToAccumulate = 6; +constexpr float kX2BandEnergyThreshold = 44015068.0f; +constexpr int kErleHold = 100; +constexpr int kBlocksForOnsetDetection = kErleHold + 150; + +bool EnableAdaptErleOnLowRender() { + return !field_trial::IsEnabled("WebRTC-Aec3AdaptErleOnLowRenderKillSwitch"); +} + +} // namespace + +SubbandErleEstimator::SubbandErleEstimator(float min_erle, + float max_erle_lf, + float max_erle_hf) + : min_erle_(min_erle), + max_erle_lf_(max_erle_lf), + max_erle_hf_(max_erle_hf), + adapt_on_low_render_(EnableAdaptErleOnLowRender()) { + Reset(); +} + +SubbandErleEstimator::~SubbandErleEstimator() = default; + +void SubbandErleEstimator::Reset() { + erle_.fill(min_erle_); + erle_onsets_.fill(min_erle_); + hold_counters_.fill(0); + coming_onset_.fill(true); +} + +void SubbandErleEstimator::Update(rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + bool converged_filter, + bool onset_detection) { + if (converged_filter) { + // Note that the use of the converged_filter flag already imposed + // a minimum of the erle that can be estimated as that flag would + // be false if the filter is performing poorly. + constexpr size_t kFftLengthBy4 = kFftLengthBy2 / 2; + UpdateBands(X2, Y2, E2, 1, kFftLengthBy4, max_erle_lf_, onset_detection); + UpdateBands(X2, Y2, E2, kFftLengthBy4, kFftLengthBy2, max_erle_hf_, + onset_detection); + } + + if (onset_detection) { + DecreaseErlePerBandForLowRenderSignals(); + } + + erle_[0] = erle_[1]; + erle_[kFftLengthBy2] = erle_[kFftLengthBy2 - 1]; +} + +void SubbandErleEstimator::Dump( + const std::unique_ptr& data_dumper) const { + data_dumper->DumpRaw("aec3_erle", Erle()); + data_dumper->DumpRaw("aec3_erle_onset", ErleOnsets()); +} + +void SubbandErleEstimator::UpdateBands(rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + size_t start, + size_t stop, + float max_erle, + bool onset_detection) { + auto erle_band_update = [](float erle_band, float new_erle, + bool low_render_energy, float alpha_inc, + float alpha_dec, float min_erle, float max_erle) { + if (new_erle < erle_band && low_render_energy) { + // Decreases are not allowed if low render energy signals were used for + // the erle computation. + return erle_band; + } + float alpha = new_erle > erle_band ? alpha_inc : alpha_dec; + float erle_band_out = erle_band; + erle_band_out = erle_band + alpha * (new_erle - erle_band); + erle_band_out = rtc::SafeClamp(erle_band_out, min_erle, max_erle); + return erle_band_out; + }; + + for (size_t k = start; k < stop; ++k) { + if (adapt_on_low_render_ || X2[k] > kX2BandEnergyThreshold) { + bool low_render_energy = false; + absl::optional new_erle = instantaneous_erle_.Update( + X2[k], Y2[k], E2[k], k, &low_render_energy); + if (new_erle) { + RTC_DCHECK(adapt_on_low_render_ || !low_render_energy); + if (onset_detection && !low_render_energy) { + if (coming_onset_[k]) { + coming_onset_[k] = false; + erle_onsets_[k] = erle_band_update( + erle_onsets_[k], new_erle.value(), low_render_energy, 0.15f, + 0.3f, min_erle_, max_erle); + } + hold_counters_[k] = kBlocksForOnsetDetection; + } + + erle_[k] = + erle_band_update(erle_[k], new_erle.value(), low_render_energy, + 0.05f, 0.1f, min_erle_, max_erle); + } + } + } +} + +void SubbandErleEstimator::DecreaseErlePerBandForLowRenderSignals() { + for (size_t k = 1; k < kFftLengthBy2; ++k) { + hold_counters_[k]--; + if (hold_counters_[k] <= (kBlocksForOnsetDetection - kErleHold)) { + if (erle_[k] > erle_onsets_[k]) { + erle_[k] = std::max(erle_onsets_[k], 0.97f * erle_[k]); + RTC_DCHECK_LE(min_erle_, erle_[k]); + } + if (hold_counters_[k] <= 0) { + coming_onset_[k] = true; + hold_counters_[k] = 0; + } + } + } +} + +SubbandErleEstimator::ErleInstantaneous::ErleInstantaneous() { + Reset(); +} + +SubbandErleEstimator::ErleInstantaneous::~ErleInstantaneous() = default; + +absl::optional SubbandErleEstimator::ErleInstantaneous::Update( + float X2, + float Y2, + float E2, + size_t band, + bool* low_render_energy) { + absl::optional erle_instantaneous = absl::nullopt; + RTC_DCHECK_LT(band, kFftLengthBy2Plus1); + Y2_acum_[band] += Y2; + E2_acum_[band] += E2; + low_render_energy_[band] = + low_render_energy_[band] || X2 < kX2BandEnergyThreshold; + if (++num_points_[band] == kPointsToAccumulate) { + if (E2_acum_[band]) { + erle_instantaneous = Y2_acum_[band] / E2_acum_[band]; + } + *low_render_energy = low_render_energy_[band]; + num_points_[band] = 0; + Y2_acum_[band] = 0.f; + E2_acum_[band] = 0.f; + low_render_energy_[band] = false; + } + + return erle_instantaneous; +} + +void SubbandErleEstimator::ErleInstantaneous::Reset() { + Y2_acum_.fill(0.f); + E2_acum_.fill(0.f); + low_render_energy_.fill(false); + num_points_.fill(0); +} + +} // namespace webrtc diff --git a/modules/audio_processing/aec3/subband_erle_estimator.h b/modules/audio_processing/aec3/subband_erle_estimator.h new file mode 100644 index 0000000000..aa5e5ccb24 --- /dev/null +++ b/modules/audio_processing/aec3/subband_erle_estimator.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_ERLE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_ERLE_ESTIMATOR_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// Estimates the echo return loss enhancement for each frequency subband. +class SubbandErleEstimator { + public: + SubbandErleEstimator(float min_erle, float max_erle_lf, float max_erle_hf); + ~SubbandErleEstimator(); + + // Resets the ERLE estimator. + void Reset(); + + // Updates the ERLE estimate. + void Update(rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + bool converged_filter, + bool onset_detection); + + // Returns the ERLE estimate. + const std::array& Erle() const { return erle_; } + + // Returns the ERLE estimate at onsets. + const std::array& ErleOnsets() const { + return erle_onsets_; + } + + void Dump(const std::unique_ptr& data_dumper) const; + + private: + void UpdateBands(rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + size_t start, + size_t stop, + float max_erle, + bool onset_detection); + void DecreaseErlePerBandForLowRenderSignals(); + + class ErleInstantaneous { + public: + ErleInstantaneous(); + ~ErleInstantaneous(); + // Updates the ERLE for a band with a new block. Returns absl::nullopt + // if not enough points were accumulated for doing the estimation, + // otherwise, it returns the ERLE. When the ERLE is returned, the + // low_render_energy flag contains information on whether the estimation was + // done using low level render signals. + absl::optional Update(float X2, + float Y2, + float E2, + size_t band, + bool* low_render_energy); + // Resets the ERLE estimator to its initial state. + void Reset(); + + private: + std::array Y2_acum_; + std::array E2_acum_; + std::array low_render_energy_; + std::array num_points_; + }; + + ErleInstantaneous instantaneous_erle_; + std::array erle_; + std::array erle_onsets_; + std::array coming_onset_; + std::array hold_counters_; + const float min_erle_; + const float max_erle_lf_; + const float max_erle_hf_; + const bool adapt_on_low_render_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SUBBAND_ERLE_ESTIMATOR_H_