AEC3: Audibility improvements
This CL is created from a work initiated at https://webrtc-review.googlesource.com/c/src/+/61160 The purpose of this work is to improve the performance of the echo canceler (AEC3) when the farend signal contains stationary noises: - An stationarity estimator of the farend signal has been added for detecting the portions of the farend signal that are pure noise. - When the echo canceler deals with a portion of the signal that contains basically noise, the echo suppressor is able to back-off and avoid the fading of the nearend speech. Change-Id: Id4b87fc59f4765bf1fca36d1cab39a49aabe104a Bug: webrtc:9193,chromium:836790 Reviewed-on: https://webrtc-review.googlesource.com/64141 Reviewed-by: Per Åhgren <peah@webrtc.org> Commit-Queue: Jesus de Vicente Pena <devicentepena@webrtc.org> Cr-Commit-Position: refs/heads/master@{#23024}
This commit is contained in:

committed by
Commit Bot

parent
74e5f804e4
commit
d5cb477576
@ -96,6 +96,7 @@ struct EchoCanceller3Config {
|
||||
float audibility_threshold_lf = 10;
|
||||
float audibility_threshold_mf = 10;
|
||||
float audibility_threshold_hf = 10;
|
||||
bool use_stationary_properties = false;
|
||||
} echo_audibility;
|
||||
|
||||
struct RenderLevels {
|
||||
|
@ -39,6 +39,8 @@ rtc_static_library("aec3") {
|
||||
"delay_estimate.h",
|
||||
"downsampled_render_buffer.cc",
|
||||
"downsampled_render_buffer.h",
|
||||
"echo_audibility.cc",
|
||||
"echo_audibility.h",
|
||||
"echo_canceller3.cc",
|
||||
"echo_canceller3.h",
|
||||
"echo_path_delay_estimator.cc",
|
||||
@ -84,6 +86,8 @@ rtc_static_library("aec3") {
|
||||
"shadow_filter_update_gain.h",
|
||||
"skew_estimator.cc",
|
||||
"skew_estimator.h",
|
||||
"stationarity_estimator.cc",
|
||||
"stationarity_estimator.h",
|
||||
"subtractor.cc",
|
||||
"subtractor.h",
|
||||
"subtractor_output.h",
|
||||
|
@ -132,6 +132,12 @@ void AecState::Update(
|
||||
suppression_gain_limiter_.Update(render_buffer.GetRenderActivity(),
|
||||
transparent_mode_);
|
||||
|
||||
if (UseStationaryProperties() && external_delay_seen_) {
|
||||
// Update the echo audibility evaluator.
|
||||
echo_audibility_.Update(render_buffer, FilterDelayBlocks(),
|
||||
capture_block_counter_);
|
||||
}
|
||||
|
||||
// Update the ERL and ERLE measures.
|
||||
if (converged_filter && blocks_since_reset_ >= 2 * kNumBlocksPerSecond) {
|
||||
const auto& X2 = render_buffer.Spectrum(filter_delay_blocks_);
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "api/optional.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/echo_audibility.h"
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
#include "modules/audio_processing/aec3/erl_estimator.h"
|
||||
#include "modules/audio_processing/aec3/erle_estimator.h"
|
||||
@ -53,6 +54,18 @@ class AecState {
|
||||
// Returns whether the render signal is currently active.
|
||||
bool ActiveRender() const { return blocks_with_active_render_ > 200; }
|
||||
|
||||
// Returns the appropriate scaling of the residual echo to match the
|
||||
// audibility.
|
||||
void GetResidualEchoScaling(rtc::ArrayView<float> residual_scaling) const {
|
||||
echo_audibility_.GetResidualEchoScaling(residual_scaling);
|
||||
}
|
||||
|
||||
// Returns whether the stationary properties of the signals are used in the
|
||||
// aec.
|
||||
bool UseStationaryProperties() const {
|
||||
return config_.echo_audibility.use_stationary_properties;
|
||||
}
|
||||
|
||||
// Returns the ERLE.
|
||||
const std::array<float, kFftLengthBy2Plus1>& Erle() const {
|
||||
return erle_estimator_.Erle();
|
||||
@ -172,6 +185,7 @@ class AecState {
|
||||
size_t converged_filter_count_ = 0;
|
||||
bool finite_erl_ = false;
|
||||
size_t active_blocks_since_converged_filter_ = 0;
|
||||
EchoAudibility echo_audibility_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(AecState);
|
||||
};
|
||||
|
50
modules/audio_processing/aec3/echo_audibility.cc
Normal file
50
modules/audio_processing/aec3/echo_audibility.cc
Normal file
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/echo_audibility.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/stationarity_estimator.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
EchoAudibility::EchoAudibility() {
|
||||
Reset();
|
||||
}
|
||||
|
||||
void EchoAudibility::Reset() {
|
||||
render_stationarity_.Reset();
|
||||
}
|
||||
|
||||
EchoAudibility::~EchoAudibility() = default;
|
||||
|
||||
void EchoAudibility::Update(const RenderBuffer& render_buffer,
|
||||
size_t delay_blocks,
|
||||
size_t capture_block_counter) {
|
||||
RTC_DCHECK_GT(capture_block_counter, delay_blocks);
|
||||
|
||||
size_t num_lookahead = std::min(StationarityEstimator::GetMaxNumLookAhead(),
|
||||
render_buffer.Headroom() - delay_blocks + 1);
|
||||
int render_block_number = capture_block_counter - delay_blocks;
|
||||
|
||||
for (size_t k = 0; k < (num_lookahead + 1); ++k) {
|
||||
// Delay changes can potentially make that not all the farend blocks
|
||||
// are seen. That effect is assumed to have a minimum effect in the
|
||||
// estimation.
|
||||
render_stationarity_.Update(render_buffer.Spectrum(delay_blocks - k),
|
||||
render_block_number + k);
|
||||
}
|
||||
render_stationarity_.UpdateStationarityFlags(render_block_number,
|
||||
num_lookahead);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
62
modules/audio_processing/aec3/echo_audibility.h
Normal file
62
modules/audio_processing/aec3/echo_audibility.h
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/stationarity_estimator.h"
|
||||
#include "rtc_base/constructormagic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
|
||||
class EchoAudibility {
|
||||
public:
|
||||
EchoAudibility();
|
||||
~EchoAudibility();
|
||||
|
||||
// Feed new render data to the echo audibility estimator.
|
||||
void Update(const RenderBuffer& render_buffer,
|
||||
size_t delay_blocks,
|
||||
size_t capture_block_counter_);
|
||||
|
||||
// Get the residual echo scaling.
|
||||
void GetResidualEchoScaling(rtc::ArrayView<float> residual_scaling) const {
|
||||
for (size_t band = 0; band < residual_scaling.size(); ++band) {
|
||||
if (render_stationarity_.IsBandStationary(band)) {
|
||||
residual_scaling[band] = 0.f;
|
||||
} else {
|
||||
residual_scaling[band] = 1.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Reset the EchoAudibility class.
|
||||
void Reset();
|
||||
|
||||
// Compute the residual scaling per frequency for the current frame.
|
||||
void ComputeResidualScaling();
|
||||
StationarityEstimator render_stationarity_;
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(EchoAudibility);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
|
@ -67,6 +67,21 @@ class RenderBuffer {
|
||||
// Specifies the recent activity seen in the render signal.
|
||||
void SetRenderActivity(bool activity) { render_activity_ = activity; }
|
||||
|
||||
// Returns the headroom between the write and the read positions in the
|
||||
// buffer;
|
||||
int Headroom() const {
|
||||
// The write and read indices are decreased over time.
|
||||
int headroom =
|
||||
fft_buffer_->write < fft_buffer_->read
|
||||
? fft_buffer_->read - fft_buffer_->write
|
||||
: fft_buffer_->size - fft_buffer_->write + fft_buffer_->read;
|
||||
|
||||
RTC_DCHECK_LE(0, headroom);
|
||||
RTC_DCHECK_GE(fft_buffer_->size, headroom);
|
||||
|
||||
return headroom;
|
||||
}
|
||||
|
||||
private:
|
||||
const MatrixBuffer* const block_buffer_;
|
||||
const VectorBuffer* const spectrum_buffer_;
|
||||
|
@ -55,6 +55,8 @@ void ResidualEchoEstimator::Estimate(
|
||||
static_cast<int>(config_.echo_model.render_post_window_size);
|
||||
EchoGeneratingPower(render_buffer, window_start, window_end, &X2);
|
||||
|
||||
// TODO(devicentepena): look if this is competing/completing
|
||||
// with the stationarity estimator
|
||||
// Subtract the stationary noise power to avoid stationary noise causing
|
||||
// excessive echo suppression.
|
||||
std::transform(X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
|
||||
@ -75,6 +77,18 @@ void ResidualEchoEstimator::Estimate(
|
||||
aec_state.ReverbDecay(), R2);
|
||||
}
|
||||
|
||||
if (aec_state.UseStationaryProperties()) {
|
||||
// Scale the echo according to echo audibility.
|
||||
std::array<float, kFftLengthBy2Plus1> residual_scaling;
|
||||
aec_state.GetResidualEchoScaling(residual_scaling);
|
||||
for (size_t k = 0; k < R2->size(); ++k) {
|
||||
(*R2)[k] *= residual_scaling[k];
|
||||
if (residual_scaling[k] == 0.f) {
|
||||
R2_hold_counter_[k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the echo is deemed inaudible, set the residual echo to zero.
|
||||
if (aec_state.TransparentMode()) {
|
||||
R2->fill(0.f);
|
||||
|
232
modules/audio_processing/aec3/stationarity_estimator.cc
Normal file
232
modules/audio_processing/aec3/stationarity_estimator.cc
Normal file
@ -0,0 +1,232 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/stationarity_estimator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomicops.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
constexpr float kMinNoisePower = 10.f;
|
||||
constexpr int kHangoverBlocks = kNumBlocksPerSecond / 20;
|
||||
constexpr int kNBlocksAverageInitPhase = 20;
|
||||
constexpr int kNBlocksInitialPhase = kNumBlocksPerSecond * 2.;
|
||||
constexpr size_t kLongWindowSize = 13;
|
||||
} // namespace
|
||||
|
||||
StationarityEstimator::StationarityEstimator()
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
idx_lookahead_(kLongWindowSize, 0),
|
||||
idx_lookback_(kLongWindowSize, 0) {
|
||||
static_assert(StationarityEstimator::CircularBuffer::GetBufferSize() >=
|
||||
(kLongWindowSize + 1),
|
||||
"Mismatch between the window size and the buffer size.");
|
||||
Reset();
|
||||
}
|
||||
|
||||
StationarityEstimator::~StationarityEstimator() = default;
|
||||
|
||||
void StationarityEstimator::Reset() {
|
||||
noise_.Reset();
|
||||
hangovers_.fill(0);
|
||||
stationarity_flags_.fill(false);
|
||||
}
|
||||
|
||||
void StationarityEstimator::Update(rtc::ArrayView<const float> spectrum,
|
||||
int block_number) {
|
||||
if (!buffer_.IsBlockNumberAlreadyUpdated(block_number)) {
|
||||
noise_.Update(spectrum);
|
||||
WriteInfoFrameInSlot(block_number, spectrum);
|
||||
}
|
||||
}
|
||||
|
||||
void StationarityEstimator::UpdateStationarityFlags(size_t current_block_number,
|
||||
size_t num_lookahead) {
|
||||
RTC_DCHECK_GE(idx_lookahead_.capacity(),
|
||||
std::min(num_lookahead + 1, kLongWindowSize));
|
||||
idx_lookahead_.resize(std::min(num_lookahead + 1, kLongWindowSize));
|
||||
idx_lookback_.resize(0);
|
||||
GetSlotsAheadBack(current_block_number);
|
||||
|
||||
for (size_t k = 0; k < stationarity_flags_.size(); ++k) {
|
||||
stationarity_flags_[k] = EstimateBandStationarity(k);
|
||||
}
|
||||
UpdateHangover();
|
||||
SmoothStationaryPerFreq();
|
||||
|
||||
data_dumper_->DumpRaw("aec3_stationarity_noise_spectrum", noise_.Spectrum());
|
||||
}
|
||||
|
||||
void StationarityEstimator::WriteInfoFrameInSlot(
|
||||
int block_number,
|
||||
rtc::ArrayView<const float> spectrum) {
|
||||
size_t slot = buffer_.SetBlockNumberInSlot(block_number);
|
||||
for (size_t k = 0; k < spectrum.size(); ++k) {
|
||||
buffer_.SetElementProperties(spectrum[k], slot, k);
|
||||
}
|
||||
}
|
||||
|
||||
bool StationarityEstimator::EstimateBandStationarity(size_t band) const {
|
||||
constexpr float kThrStationarity = 10.f;
|
||||
float acumPower = 0.f;
|
||||
for (auto slot : idx_lookahead_) {
|
||||
acumPower += buffer_.GetPowerBand(slot, band);
|
||||
}
|
||||
for (auto slot : idx_lookback_) {
|
||||
acumPower += buffer_.GetPowerBand(slot, band);
|
||||
}
|
||||
|
||||
// Generally windowSize is equal to kLongWindowSize
|
||||
float windowSize = idx_lookahead_.size() + idx_lookback_.size();
|
||||
float noise = windowSize * GetStationarityPowerBand(band);
|
||||
RTC_CHECK_LT(0.f, noise);
|
||||
bool stationary = acumPower < kThrStationarity * noise;
|
||||
data_dumper_->DumpRaw("aec3_stationarity_long_ratio", acumPower / noise);
|
||||
return stationary;
|
||||
}
|
||||
|
||||
bool StationarityEstimator::AreAllBandsStationary() {
|
||||
for (auto b : stationarity_flags_) {
|
||||
if (!b)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void StationarityEstimator::UpdateHangover() {
|
||||
bool reduce_hangover = AreAllBandsStationary();
|
||||
for (size_t k = 0; k < stationarity_flags_.size(); ++k) {
|
||||
if (!stationarity_flags_[k]) {
|
||||
hangovers_[k] = kHangoverBlocks;
|
||||
} else if (reduce_hangover) {
|
||||
hangovers_[k] = std::max(hangovers_[k] - 1, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void StationarityEstimator::GetSlotsAheadBack(size_t current_block_number) {
|
||||
for (size_t block = 0; block < idx_lookahead_.size(); ++block) {
|
||||
idx_lookahead_[block] = buffer_.GetSlotNumber(current_block_number + block);
|
||||
}
|
||||
size_t num_lookback_blocks;
|
||||
if (idx_lookahead_.size() >= kLongWindowSize) {
|
||||
RTC_CHECK_EQ(idx_lookahead_.size(), kLongWindowSize);
|
||||
num_lookback_blocks = 0;
|
||||
} else {
|
||||
num_lookback_blocks = kLongWindowSize - idx_lookahead_.size();
|
||||
}
|
||||
if (current_block_number < num_lookback_blocks) {
|
||||
idx_lookback_.resize(0);
|
||||
} else {
|
||||
for (size_t block = 0; block < num_lookback_blocks; ++block) {
|
||||
int block_number = current_block_number - block - 1;
|
||||
if (!buffer_.IsBlockNumberAlreadyUpdated(block_number)) {
|
||||
break;
|
||||
} else {
|
||||
RTC_DCHECK_GE(idx_lookback_.capacity(), idx_lookback_.size() + 1);
|
||||
idx_lookback_.push_back(buffer_.GetSlotNumber(block_number));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void StationarityEstimator::SmoothStationaryPerFreq() {
|
||||
std::array<bool, kFftLengthBy2Plus1> all_ahead_stationary_smooth;
|
||||
for (size_t k = 1; k < kFftLengthBy2Plus1 - 1; ++k) {
|
||||
all_ahead_stationary_smooth[k] = stationarity_flags_[k - 1] &&
|
||||
stationarity_flags_[k] &&
|
||||
stationarity_flags_[k + 1];
|
||||
}
|
||||
|
||||
all_ahead_stationary_smooth[0] = all_ahead_stationary_smooth[1];
|
||||
all_ahead_stationary_smooth[kFftLengthBy2Plus1 - 1] =
|
||||
all_ahead_stationary_smooth[kFftLengthBy2Plus1 - 2];
|
||||
|
||||
stationarity_flags_ = all_ahead_stationary_smooth;
|
||||
}
|
||||
|
||||
int StationarityEstimator::instance_count_ = 0;
|
||||
|
||||
StationarityEstimator::NoiseSpectrum::NoiseSpectrum() {
|
||||
Reset();
|
||||
}
|
||||
|
||||
StationarityEstimator::NoiseSpectrum::~NoiseSpectrum() = default;
|
||||
|
||||
void StationarityEstimator::NoiseSpectrum::Reset() {
|
||||
block_counter_ = 0;
|
||||
noise_spectrum_.fill(kMinNoisePower);
|
||||
}
|
||||
|
||||
void StationarityEstimator::NoiseSpectrum::Update(
|
||||
rtc::ArrayView<const float> spectrum) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, spectrum.size());
|
||||
++block_counter_;
|
||||
float alpha = GetAlpha();
|
||||
for (size_t k = 0; k < spectrum.size(); ++k) {
|
||||
if (block_counter_ <= kNBlocksAverageInitPhase) {
|
||||
noise_spectrum_[k] += (1.f / kNBlocksAverageInitPhase) * spectrum[k];
|
||||
} else {
|
||||
noise_spectrum_[k] =
|
||||
UpdateBandBySmoothing(spectrum[k], noise_spectrum_[k], alpha);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float StationarityEstimator::NoiseSpectrum::GetAlpha() const {
|
||||
constexpr float kAlpha = 0.004f;
|
||||
constexpr float kAlphaInit = 0.04f;
|
||||
constexpr float kTiltAlpha = (kAlphaInit - kAlpha) / kNBlocksInitialPhase;
|
||||
|
||||
if (block_counter_ > (kNBlocksInitialPhase + kNBlocksAverageInitPhase)) {
|
||||
return kAlpha;
|
||||
} else {
|
||||
return kAlphaInit -
|
||||
kTiltAlpha * (block_counter_ - kNBlocksAverageInitPhase);
|
||||
}
|
||||
}
|
||||
|
||||
float StationarityEstimator::NoiseSpectrum::UpdateBandBySmoothing(
|
||||
float power_band,
|
||||
float power_band_noise,
|
||||
float alpha) const {
|
||||
float power_band_noise_updated = power_band_noise;
|
||||
if (power_band_noise < power_band) {
|
||||
RTC_DCHECK_GT(power_band, 0.f);
|
||||
float alpha_inc = alpha * (power_band_noise / power_band);
|
||||
if (block_counter_ > kNBlocksInitialPhase) {
|
||||
if (10.f * power_band_noise < power_band) {
|
||||
alpha_inc *= 0.1f;
|
||||
}
|
||||
}
|
||||
power_band_noise_updated += alpha_inc * (power_band - power_band_noise);
|
||||
} else {
|
||||
power_band_noise_updated += alpha * (power_band - power_band_noise);
|
||||
power_band_noise_updated =
|
||||
std::max(power_band_noise_updated, kMinNoisePower);
|
||||
}
|
||||
return power_band_noise_updated;
|
||||
}
|
||||
|
||||
StationarityEstimator::CircularBuffer::CircularBuffer() {
|
||||
for (auto slot : slots_) {
|
||||
slot.block_number_ = -1;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
159
modules/audio_processing/aec3/stationarity_estimator.h
Normal file
159
modules/audio_processing/aec3/stationarity_estimator.h
Normal file
@ -0,0 +1,159 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_STATIONARITY_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_STATIONARITY_ESTIMATOR_H_
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
|
||||
class StationarityEstimator {
|
||||
public:
|
||||
StationarityEstimator();
|
||||
~StationarityEstimator();
|
||||
|
||||
// Reset the stationarity estimator.
|
||||
void Reset();
|
||||
|
||||
// Update the stationarity estimator.
|
||||
void Update(rtc::ArrayView<const float> spectrum, int block_number);
|
||||
|
||||
// Update the flag indicating whether this current frame is stationary. For
|
||||
// getting a more robust estimation, it looks at future and/or past frames.
|
||||
void UpdateStationarityFlags(size_t current_block_number,
|
||||
size_t num_lookahead);
|
||||
|
||||
// Returns true if the current band is stationary.
|
||||
bool IsBandStationary(size_t band) const {
|
||||
return stationarity_flags_[band] && (hangovers_[band] == 0);
|
||||
}
|
||||
|
||||
static constexpr size_t GetMaxNumLookAhead() {
|
||||
return CircularBuffer::GetBufferSize() - 2;
|
||||
}
|
||||
|
||||
private:
|
||||
// Returns the power of the stationary noise spectrum at a band.
|
||||
float GetStationarityPowerBand(size_t k) const { return noise_.Power(k); }
|
||||
|
||||
// Write into the slot the information about the current frame that
|
||||
// is needed for the stationarity detection.
|
||||
void WriteInfoFrameInSlot(int block_number,
|
||||
rtc::ArrayView<const float> spectrum);
|
||||
|
||||
// Get an estimation of the stationarity for the current band by looking
|
||||
// at the past/present/future available data.
|
||||
bool EstimateBandStationarity(size_t band) const;
|
||||
|
||||
// True if all bands at the current point are stationary.
|
||||
bool AreAllBandsStationary();
|
||||
|
||||
// Update the hangover depending on the stationary status of the current
|
||||
// frame.
|
||||
void UpdateHangover();
|
||||
|
||||
// Get the slots that contain past/present and future data.
|
||||
void GetSlotsAheadBack(size_t current_block_number);
|
||||
|
||||
// Smooth the stationarity detection by looking at neighbouring frequency
|
||||
// bands.
|
||||
void SmoothStationaryPerFreq();
|
||||
|
||||
class NoiseSpectrum {
|
||||
public:
|
||||
NoiseSpectrum();
|
||||
~NoiseSpectrum();
|
||||
|
||||
// Reset the noise power spectrum estimate state.
|
||||
void Reset();
|
||||
|
||||
// Update the noise power spectrum with a new frame.
|
||||
void Update(rtc::ArrayView<const float> spectrum);
|
||||
|
||||
// Get the noise estimation power spectrum.
|
||||
rtc::ArrayView<const float> Spectrum() const { return noise_spectrum_; }
|
||||
|
||||
// Get the noise power spectrum at a certain band.
|
||||
float Power(size_t band) const {
|
||||
RTC_DCHECK_LT(band, noise_spectrum_.size());
|
||||
return noise_spectrum_[band];
|
||||
}
|
||||
|
||||
private:
|
||||
// Get the update coefficient to be used for the current frame.
|
||||
float GetAlpha() const;
|
||||
|
||||
// Update the noise power spectrum at a certain band with a new frame.
|
||||
float UpdateBandBySmoothing(float power_band,
|
||||
float power_band_noise,
|
||||
float alpha) const;
|
||||
std::array<float, kFftLengthBy2Plus1> noise_spectrum_;
|
||||
size_t block_counter_;
|
||||
};
|
||||
|
||||
// The class circular buffer stores the data needed to take a decission
|
||||
// on whether the current frame is stationary by looking at data from the
|
||||
// future, present and/or past. This buffer stores that data that is
|
||||
// represented by the struct Element a bit bellow.
|
||||
class CircularBuffer {
|
||||
public:
|
||||
static constexpr int kCircularBufferSize = 16;
|
||||
struct Element {
|
||||
int block_number_;
|
||||
std::array<float, kFftLengthBy2Plus1> power_spectrum_;
|
||||
};
|
||||
CircularBuffer();
|
||||
|
||||
static constexpr int GetBufferSize() { return kCircularBufferSize; }
|
||||
|
||||
bool IsBlockNumberAlreadyUpdated(int block_number) const {
|
||||
size_t slot_number = GetSlotNumber(block_number);
|
||||
return slots_[slot_number].block_number_ == block_number;
|
||||
}
|
||||
|
||||
size_t GetSlotNumber(int block_number) const {
|
||||
return block_number & (kCircularBufferSize - 1);
|
||||
}
|
||||
size_t SetBlockNumberInSlot(int block_number) {
|
||||
size_t slot = GetSlotNumber(block_number);
|
||||
slots_[slot].block_number_ = block_number;
|
||||
return slot;
|
||||
}
|
||||
void SetElementProperties(float band_power, int slot, int band) {
|
||||
slots_[slot].power_spectrum_[band] = band_power;
|
||||
}
|
||||
float GetPowerBand(size_t slot, size_t band) const {
|
||||
return slots_[slot].power_spectrum_[band];
|
||||
}
|
||||
|
||||
private:
|
||||
std::array<Element, kCircularBufferSize> slots_;
|
||||
};
|
||||
static int instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
NoiseSpectrum noise_;
|
||||
std::vector<size_t> idx_lookahead_;
|
||||
std::vector<size_t> idx_lookback_;
|
||||
std::array<int, kFftLengthBy2Plus1> hangovers_;
|
||||
std::array<bool, kFftLengthBy2Plus1> stationarity_flags_;
|
||||
CircularBuffer buffer_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_STATIONARITY_ESTIMATOR_H_
|
@ -238,6 +238,8 @@ EchoCanceller3Config ParseAec3Parameters(const std::string& filename) {
|
||||
&cfg.echo_audibility.audibility_threshold_mf);
|
||||
ReadParam(section, "audibility_threshold_hf",
|
||||
&cfg.echo_audibility.audibility_threshold_hf);
|
||||
ReadParam(section, "use_stationary_properties",
|
||||
&cfg.echo_audibility.use_stationary_properties);
|
||||
}
|
||||
|
||||
if (rtc::GetValueFromJsonObject(root, "gain_updates", §ion)) {
|
||||
|
Reference in New Issue
Block a user