AEC3: Add multichannel support to the residual echo estimator
This CL adds support for multichannel in the residual echo estimator code. It also adds placeholder functionality in the surrounding code to ensure that the residual echo estimator receives the require inputs. The changes in the CL has been shown to be bitexact on a large set of mono recordings. Bug: webrtc:10913 Change-Id: I726128ca928648b1dcf36c5f479eb243f3ff3f96 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155361 Commit-Queue: Per Åhgren <peah@webrtc.org> Reviewed-by: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/master@{#29400}
This commit is contained in:
@ -65,7 +65,7 @@ AecState::AecState(const EchoCanceller3Config& config,
|
|||||||
transparent_state_(config_),
|
transparent_state_(config_),
|
||||||
filter_quality_state_(config_),
|
filter_quality_state_(config_),
|
||||||
erl_estimator_(2 * kNumBlocksPerSecond),
|
erl_estimator_(2 * kNumBlocksPerSecond),
|
||||||
erle_estimator_(2 * kNumBlocksPerSecond, config_),
|
erle_estimator_(2 * kNumBlocksPerSecond, config_, num_capture_channels),
|
||||||
filter_analyzer_(config_),
|
filter_analyzer_(config_),
|
||||||
echo_audibility_(
|
echo_audibility_(
|
||||||
config_.echo_audibility.use_stationarity_properties_at_init),
|
config_.echo_audibility.use_stationarity_properties_at_init),
|
||||||
@ -214,7 +214,7 @@ void AecState::Update(
|
|||||||
reverb_model_estimator_.Dump(data_dumper_.get());
|
reverb_model_estimator_.Dump(data_dumper_.get());
|
||||||
data_dumper_->DumpRaw("aec3_erl", Erl());
|
data_dumper_->DumpRaw("aec3_erl", Erl());
|
||||||
data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain());
|
data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain());
|
||||||
data_dumper_->DumpRaw("aec3_erle", Erle());
|
data_dumper_->DumpRaw("aec3_erle", Erle()[0]);
|
||||||
data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate());
|
data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate());
|
||||||
data_dumper_->DumpRaw("aec3_transparent_mode", TransparentMode());
|
data_dumper_->DumpRaw("aec3_transparent_mode", TransparentMode());
|
||||||
data_dumper_->DumpRaw("aec3_filter_delay", filter_analyzer_.DelayBlocks());
|
data_dumper_->DumpRaw("aec3_filter_delay", filter_analyzer_.DelayBlocks());
|
||||||
|
@ -68,12 +68,12 @@ class AecState {
|
|||||||
|
|
||||||
// Returns whether the stationary properties of the signals are used in the
|
// Returns whether the stationary properties of the signals are used in the
|
||||||
// aec.
|
// aec.
|
||||||
bool UseStationaryProperties() const {
|
bool UseStationarityProperties() const {
|
||||||
return config_.echo_audibility.use_stationarity_properties;
|
return config_.echo_audibility.use_stationarity_properties;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the ERLE.
|
// Returns the ERLE.
|
||||||
const std::array<float, kFftLengthBy2Plus1>& Erle() const {
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
|
||||||
return erle_estimator_.Erle();
|
return erle_estimator_.Erle();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,7 +170,7 @@ void RunNormalUsageTest(size_t num_render_channels,
|
|||||||
{
|
{
|
||||||
// Note that the render spectrum is built so it does not have energy in
|
// Note that the render spectrum is built so it does not have energy in
|
||||||
// the odd bands but just in the even bands.
|
// the odd bands but just in the even bands.
|
||||||
const auto& erle = state.Erle();
|
const auto& erle = state.Erle()[0];
|
||||||
EXPECT_EQ(erle[0], erle[1]);
|
EXPECT_EQ(erle[0], erle[1]);
|
||||||
constexpr size_t kLowFrequencyLimit = 32;
|
constexpr size_t kLowFrequencyLimit = 32;
|
||||||
for (size_t k = 2; k < kLowFrequencyLimit; k = k + 2) {
|
for (size_t k = 2; k < kLowFrequencyLimit; k = k + 2) {
|
||||||
@ -195,7 +195,7 @@ void RunNormalUsageTest(size_t num_render_channels,
|
|||||||
|
|
||||||
ASSERT_TRUE(state.UsableLinearEstimate());
|
ASSERT_TRUE(state.UsableLinearEstimate());
|
||||||
{
|
{
|
||||||
const auto& erle = state.Erle();
|
const auto& erle = state.Erle()[0];
|
||||||
EXPECT_EQ(erle[0], erle[1]);
|
EXPECT_EQ(erle[0], erle[1]);
|
||||||
constexpr size_t kLowFrequencyLimit = 32;
|
constexpr size_t kLowFrequencyLimit = 32;
|
||||||
for (size_t k = 1; k < kLowFrequencyLimit; ++k) {
|
for (size_t k = 1; k < kLowFrequencyLimit; ++k) {
|
||||||
|
@ -152,7 +152,7 @@ class EchoRemoverImpl final : public EchoRemover {
|
|||||||
std::vector<std::unique_ptr<ComfortNoiseGenerator>> cngs_;
|
std::vector<std::unique_ptr<ComfortNoiseGenerator>> cngs_;
|
||||||
SuppressionFilter suppression_filter_;
|
SuppressionFilter suppression_filter_;
|
||||||
RenderSignalAnalyzer render_signal_analyzer_;
|
RenderSignalAnalyzer render_signal_analyzer_;
|
||||||
std::vector<std::unique_ptr<ResidualEchoEstimator>> residual_echo_estimators_;
|
ResidualEchoEstimator residual_echo_estimator_;
|
||||||
bool echo_leakage_detected_ = false;
|
bool echo_leakage_detected_ = false;
|
||||||
AecState aec_state_;
|
AecState aec_state_;
|
||||||
EchoRemoverMetrics metrics_;
|
EchoRemoverMetrics metrics_;
|
||||||
@ -201,7 +201,7 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
|
|||||||
sample_rate_hz_,
|
sample_rate_hz_,
|
||||||
num_capture_channels_),
|
num_capture_channels_),
|
||||||
render_signal_analyzer_(config_),
|
render_signal_analyzer_(config_),
|
||||||
residual_echo_estimators_(num_capture_channels_),
|
residual_echo_estimator_(config_, num_render_channels),
|
||||||
aec_state_(config_, num_capture_channels_),
|
aec_state_(config_, num_capture_channels_),
|
||||||
e_old_(num_capture_channels_),
|
e_old_(num_capture_channels_),
|
||||||
y_old_(num_capture_channels_),
|
y_old_(num_capture_channels_),
|
||||||
@ -222,8 +222,6 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
|
|||||||
|
|
||||||
uint32_t cng_seed = 42;
|
uint32_t cng_seed = 42;
|
||||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||||
residual_echo_estimators_[ch] =
|
|
||||||
std::make_unique<ResidualEchoEstimator>(config_);
|
|
||||||
suppression_gains_[ch] = std::make_unique<SuppressionGain>(
|
suppression_gains_[ch] = std::make_unique<SuppressionGain>(
|
||||||
config_, optimization_, sample_rate_hz);
|
config_, optimization_, sample_rate_hz);
|
||||||
cngs_[ch] =
|
cngs_[ch] =
|
||||||
@ -400,11 +398,11 @@ void EchoRemoverImpl::ProcessCapture(
|
|||||||
std::array<float, kFftLengthBy2Plus1> G;
|
std::array<float, kFftLengthBy2Plus1> G;
|
||||||
G.fill(1.f);
|
G.fill(1.f);
|
||||||
|
|
||||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
|
||||||
// Estimate the residual echo power.
|
// Estimate the residual echo power.
|
||||||
residual_echo_estimators_[ch]->Estimate(aec_state_, *render_buffer,
|
residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2,
|
||||||
S2_linear[ch], Y2[ch], &R2[ch]);
|
R2);
|
||||||
|
|
||||||
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||||
// Estimate the comfort noise.
|
// Estimate the comfort noise.
|
||||||
cngs_[ch]->Compute(aec_state_, Y2[ch], &comfort_noise[ch],
|
cngs_[ch]->Compute(aec_state_, Y2[ch], &comfort_noise[ch],
|
||||||
&high_band_comfort_noise[ch]);
|
&high_band_comfort_noise[ch]);
|
||||||
@ -462,8 +460,6 @@ void EchoRemoverImpl::ProcessCapture(
|
|||||||
"aec3_X2",
|
"aec3_X2",
|
||||||
render_buffer->Spectrum(aec_state_.FilterDelayBlocks(), /*channel=*/0));
|
render_buffer->Spectrum(aec_state_.FilterDelayBlocks(), /*channel=*/0));
|
||||||
data_dumper_->DumpRaw("aec3_R2", R2[0]);
|
data_dumper_->DumpRaw("aec3_R2", R2[0]);
|
||||||
data_dumper_->DumpRaw("aec3_R2_reverb",
|
|
||||||
residual_echo_estimators_[0]->GetReverbPowerSpectrum());
|
|
||||||
data_dumper_->DumpRaw("aec3_filter_delay", aec_state_.FilterDelayBlocks());
|
data_dumper_->DumpRaw("aec3_filter_delay", aec_state_.FilterDelayBlocks());
|
||||||
data_dumper_->DumpRaw("aec3_capture_saturation",
|
data_dumper_->DumpRaw("aec3_capture_saturation",
|
||||||
aec_state_.SaturatedCapture() ? 1 : 0);
|
aec_state_.SaturatedCapture() ? 1 : 0);
|
||||||
|
@ -70,7 +70,7 @@ void EchoRemoverMetrics::Update(
|
|||||||
if (++block_counter_ <= kMetricsCollectionBlocks) {
|
if (++block_counter_ <= kMetricsCollectionBlocks) {
|
||||||
aec3::UpdateDbMetric(aec_state.Erl(), &erl_);
|
aec3::UpdateDbMetric(aec_state.Erl(), &erl_);
|
||||||
erl_time_domain_.UpdateInstant(aec_state.ErlTimeDomain());
|
erl_time_domain_.UpdateInstant(aec_state.ErlTimeDomain());
|
||||||
aec3::UpdateDbMetric(aec_state.Erle(), &erle_);
|
aec3::UpdateDbMetric(aec_state.Erle()[0], &erle_);
|
||||||
erle_time_domain_.UpdateInstant(aec_state.FullBandErleLog2());
|
erle_time_domain_.UpdateInstant(aec_state.FullBandErleLog2());
|
||||||
aec3::UpdateDbMetric(comfort_noise_spectrum, &comfort_noise_);
|
aec3::UpdateDbMetric(comfort_noise_spectrum, &comfort_noise_);
|
||||||
aec3::UpdateDbMetric(suppressor_gain, &suppressor_gain_);
|
aec3::UpdateDbMetric(suppressor_gain, &suppressor_gain_);
|
||||||
|
@ -16,12 +16,13 @@
|
|||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
|
|
||||||
ErleEstimator::ErleEstimator(size_t startup_phase_length_blocks_,
|
ErleEstimator::ErleEstimator(size_t startup_phase_length_blocks_,
|
||||||
const EchoCanceller3Config& config)
|
const EchoCanceller3Config& config,
|
||||||
|
size_t num_capture_channels)
|
||||||
: startup_phase_length_blocks__(startup_phase_length_blocks_),
|
: startup_phase_length_blocks__(startup_phase_length_blocks_),
|
||||||
use_signal_dependent_erle_(config.erle.num_sections > 1),
|
use_signal_dependent_erle_(config.erle.num_sections > 1),
|
||||||
fullband_erle_estimator_(config.erle.min, config.erle.max_l),
|
fullband_erle_estimator_(config.erle.min, config.erle.max_l),
|
||||||
subband_erle_estimator_(config),
|
subband_erle_estimator_(config, num_capture_channels),
|
||||||
signal_dependent_erle_estimator_(config) {
|
signal_dependent_erle_estimator_(config, num_capture_channels) {
|
||||||
Reset(true);
|
Reset(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -33,7 +33,8 @@ namespace webrtc {
|
|||||||
class ErleEstimator {
|
class ErleEstimator {
|
||||||
public:
|
public:
|
||||||
ErleEstimator(size_t startup_phase_length_blocks_,
|
ErleEstimator(size_t startup_phase_length_blocks_,
|
||||||
const EchoCanceller3Config& config);
|
const EchoCanceller3Config& config,
|
||||||
|
size_t num_capture_channels);
|
||||||
~ErleEstimator();
|
~ErleEstimator();
|
||||||
|
|
||||||
// Resets the fullband ERLE estimator and the subbands ERLE estimators.
|
// Resets the fullband ERLE estimator and the subbands ERLE estimators.
|
||||||
@ -50,10 +51,11 @@ class ErleEstimator {
|
|||||||
bool onset_detection);
|
bool onset_detection);
|
||||||
|
|
||||||
// Returns the most recent subband ERLE estimates.
|
// Returns the most recent subband ERLE estimates.
|
||||||
const std::array<float, kFftLengthBy2Plus1>& Erle() const {
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
|
||||||
return use_signal_dependent_erle_ ? signal_dependent_erle_estimator_.Erle()
|
return use_signal_dependent_erle_ ? signal_dependent_erle_estimator_.Erle()
|
||||||
: subband_erle_estimator_.Erle();
|
: subband_erle_estimator_.Erle();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the subband ERLE that are estimated during onsets. Used
|
// Returns the subband ERLE that are estimated during onsets. Used
|
||||||
// for logging/testing.
|
// for logging/testing.
|
||||||
rtc::ArrayView<const float> ErleOnsets() const {
|
rtc::ArrayView<const float> ErleOnsets() const {
|
||||||
|
@ -113,22 +113,23 @@ TEST(ErleEstimator, VerifyErleIncreaseAndHold) {
|
|||||||
std::array<float, kFftLengthBy2Plus1> X2;
|
std::array<float, kFftLengthBy2Plus1> X2;
|
||||||
std::array<float, kFftLengthBy2Plus1> E2;
|
std::array<float, kFftLengthBy2Plus1> E2;
|
||||||
std::array<float, kFftLengthBy2Plus1> Y2;
|
std::array<float, kFftLengthBy2Plus1> Y2;
|
||||||
constexpr size_t kNumChannels = 1;
|
constexpr size_t kNumRenderChannels = 1;
|
||||||
|
constexpr size_t kNumCaptureChannels = 1;
|
||||||
constexpr int kSampleRateHz = 48000;
|
constexpr int kSampleRateHz = 48000;
|
||||||
constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
|
constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
|
||||||
|
|
||||||
EchoCanceller3Config config;
|
EchoCanceller3Config config;
|
||||||
std::vector<std::vector<std::vector<float>>> x(
|
std::vector<std::vector<std::vector<float>>> x(
|
||||||
kNumBands, std::vector<std::vector<float>>(
|
kNumBands, std::vector<std::vector<float>>(
|
||||||
kNumChannels, std::vector<float>(kBlockSize, 0.f)));
|
kNumRenderChannels, std::vector<float>(kBlockSize, 0.f)));
|
||||||
std::vector<std::array<float, kFftLengthBy2Plus1>> filter_frequency_response(
|
std::vector<std::array<float, kFftLengthBy2Plus1>> filter_frequency_response(
|
||||||
config.filter.main.length_blocks);
|
config.filter.main.length_blocks);
|
||||||
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
|
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
|
||||||
RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels));
|
RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels));
|
||||||
|
|
||||||
GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_samples);
|
GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_samples);
|
||||||
|
|
||||||
ErleEstimator estimator(0, config);
|
ErleEstimator estimator(0, config, kNumCaptureChannels);
|
||||||
|
|
||||||
FormFarendTimeFrame(&x);
|
FormFarendTimeFrame(&x);
|
||||||
render_delay_buffer->Insert(x);
|
render_delay_buffer->Insert(x);
|
||||||
@ -142,7 +143,7 @@ TEST(ErleEstimator, VerifyErleIncreaseAndHold) {
|
|||||||
estimator.Update(*render_delay_buffer->GetRenderBuffer(),
|
estimator.Update(*render_delay_buffer->GetRenderBuffer(),
|
||||||
filter_frequency_response, X2, Y2, E2, true, true);
|
filter_frequency_response, X2, Y2, E2, true, true);
|
||||||
}
|
}
|
||||||
VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()),
|
VerifyErle(estimator.Erle()[0], std::pow(2.f, estimator.FullbandErleLog2()),
|
||||||
config.erle.max_l, config.erle.max_h);
|
config.erle.max_l, config.erle.max_h);
|
||||||
|
|
||||||
FormNearendFrame(&x, &X2, &E2, &Y2);
|
FormNearendFrame(&x, &X2, &E2, &Y2);
|
||||||
@ -154,12 +155,13 @@ TEST(ErleEstimator, VerifyErleIncreaseAndHold) {
|
|||||||
estimator.Update(*render_delay_buffer->GetRenderBuffer(),
|
estimator.Update(*render_delay_buffer->GetRenderBuffer(),
|
||||||
filter_frequency_response, X2, Y2, E2, true, true);
|
filter_frequency_response, X2, Y2, E2, true, true);
|
||||||
}
|
}
|
||||||
VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()),
|
VerifyErle(estimator.Erle()[0], std::pow(2.f, estimator.FullbandErleLog2()),
|
||||||
config.erle.max_l, config.erle.max_h);
|
config.erle.max_l, config.erle.max_h);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ErleEstimator, VerifyErleTrackingOnOnsets) {
|
TEST(ErleEstimator, VerifyErleTrackingOnOnsets) {
|
||||||
constexpr size_t kNumChannels = 1;
|
constexpr size_t kNumRenderChannels = 1;
|
||||||
|
constexpr size_t kNumCaptureChannels = 1;
|
||||||
constexpr int kSampleRateHz = 48000;
|
constexpr int kSampleRateHz = 48000;
|
||||||
constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
|
constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
|
||||||
std::array<float, kFftLengthBy2Plus1> X2;
|
std::array<float, kFftLengthBy2Plus1> X2;
|
||||||
@ -168,16 +170,16 @@ TEST(ErleEstimator, VerifyErleTrackingOnOnsets) {
|
|||||||
EchoCanceller3Config config;
|
EchoCanceller3Config config;
|
||||||
std::vector<std::vector<std::vector<float>>> x(
|
std::vector<std::vector<std::vector<float>>> x(
|
||||||
kNumBands, std::vector<std::vector<float>>(
|
kNumBands, std::vector<std::vector<float>>(
|
||||||
kNumChannels, std::vector<float>(kBlockSize, 0.f)));
|
kNumRenderChannels, std::vector<float>(kBlockSize, 0.f)));
|
||||||
std::vector<std::array<float, kFftLengthBy2Plus1>> filter_frequency_response(
|
std::vector<std::array<float, kFftLengthBy2Plus1>> filter_frequency_response(
|
||||||
config.filter.main.length_blocks);
|
config.filter.main.length_blocks);
|
||||||
|
|
||||||
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
|
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
|
||||||
RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels));
|
RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels));
|
||||||
|
|
||||||
GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_samples);
|
GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_samples);
|
||||||
|
|
||||||
ErleEstimator estimator(0, config);
|
ErleEstimator estimator(0, config, kNumCaptureChannels);
|
||||||
|
|
||||||
FormFarendTimeFrame(&x);
|
FormFarendTimeFrame(&x);
|
||||||
render_delay_buffer->Insert(x);
|
render_delay_buffer->Insert(x);
|
||||||
@ -215,7 +217,7 @@ TEST(ErleEstimator, VerifyErleTrackingOnOnsets) {
|
|||||||
filter_frequency_response, X2, Y2, E2, true, true);
|
filter_frequency_response, X2, Y2, E2, true, true);
|
||||||
}
|
}
|
||||||
// Verifies that during ne activity, Erle converges to the Erle for onsets.
|
// Verifies that during ne activity, Erle converges to the Erle for onsets.
|
||||||
VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()),
|
VerifyErle(estimator.Erle()[0], std::pow(2.f, estimator.FullbandErleLog2()),
|
||||||
config.erle.min, config.erle.min);
|
config.erle.min, config.erle.min);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,10 +36,14 @@ void RenderReverbModel::Apply(const SpectrumBuffer& spectrum_buffer,
|
|||||||
int idx_past = spectrum_buffer.IncIndex(idx_at_delay);
|
int idx_past = spectrum_buffer.IncIndex(idx_at_delay);
|
||||||
const auto& X2 = spectrum_buffer.buffer[idx_at_delay][/*channel=*/0];
|
const auto& X2 = spectrum_buffer.buffer[idx_at_delay][/*channel=*/0];
|
||||||
RTC_DCHECK_EQ(X2.size(), reverb_power_spectrum.size());
|
RTC_DCHECK_EQ(X2.size(), reverb_power_spectrum.size());
|
||||||
std::copy(X2.begin(), X2.end(), reverb_power_spectrum.begin());
|
render_reverb_.UpdateReverbNoFreqShaping(
|
||||||
render_reverb_.AddReverbNoFreqShaping(
|
spectrum_buffer.buffer[idx_past][/*channel=*/0], 1.0f, reverb_decay);
|
||||||
spectrum_buffer.buffer[idx_past][/*channel=*/0], 1.0f, reverb_decay,
|
|
||||||
reverb_power_spectrum);
|
rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
|
||||||
|
render_reverb_.reverb();
|
||||||
|
for (size_t k = 0; k < X2.size(); ++k) {
|
||||||
|
reverb_power_spectrum[k] = X2[k] + reverb_power[k];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
@ -37,7 +37,7 @@ class RenderReverbModel {
|
|||||||
// Gets the reverberation spectrum that was added to the render spectrum for
|
// Gets the reverberation spectrum that was added to the render spectrum for
|
||||||
// computing the reverberation render spectrum.
|
// computing the reverberation render spectrum.
|
||||||
rtc::ArrayView<const float> GetReverbContributionPowerSpectrum() const {
|
rtc::ArrayView<const float> GetReverbContributionPowerSpectrum() const {
|
||||||
return render_reverb_.GetPowerSpectrum();
|
return render_reverb_.reverb();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -43,10 +43,114 @@ void GetRenderIndexesToAnalyze(
|
|||||||
*idx_stop = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_end + 1);
|
*idx_stop = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_end + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Estimates the residual echo power based on the echo return loss enhancement
|
||||||
|
// (ERLE) and the linear power estimate.
|
||||||
|
void LinearEstimate(
|
||||||
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
|
||||||
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle,
|
||||||
|
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
|
||||||
|
RTC_DCHECK_EQ(S2_linear.size(), erle.size());
|
||||||
|
RTC_DCHECK_EQ(S2_linear.size(), R2.size());
|
||||||
|
|
||||||
|
const size_t num_capture_channels = R2.size();
|
||||||
|
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||||
|
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||||
|
RTC_DCHECK_LT(0.f, erle[ch][k]);
|
||||||
|
R2[ch][k] = S2_linear[ch][k] / erle[ch][k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Estimates the residual echo power based on an uncertainty estimate of the
|
||||||
|
// echo return loss enhancement (ERLE) and the linear power estimate.
|
||||||
|
void LinearEstimate(
|
||||||
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
|
||||||
|
float erle_uncertainty,
|
||||||
|
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
|
||||||
|
RTC_DCHECK_EQ(S2_linear.size(), R2.size());
|
||||||
|
|
||||||
|
const size_t num_capture_channels = R2.size();
|
||||||
|
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||||
|
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||||
|
R2[ch][k] = S2_linear[ch][k] * erle_uncertainty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Estimates the residual echo power based on the estimate of the echo path
|
||||||
|
// gain.
|
||||||
|
void NonLinearEstimate(
|
||||||
|
float echo_path_gain,
|
||||||
|
const std::array<float, kFftLengthBy2Plus1>& X2,
|
||||||
|
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
|
||||||
|
const size_t num_capture_channels = R2.size();
|
||||||
|
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||||
|
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||||
|
R2[ch][k] = X2[k] * echo_path_gain;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Applies a soft noise gate to the echo generating power.
|
||||||
|
void ApplyNoiseGate(const EchoCanceller3Config::EchoModel& config,
|
||||||
|
rtc::ArrayView<float, kFftLengthBy2Plus1> X2) {
|
||||||
|
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||||
|
if (config.noise_gate_power > X2[k]) {
|
||||||
|
X2[k] = std::max(0.f, X2[k] - config.noise_gate_slope *
|
||||||
|
(config.noise_gate_power - X2[k]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Estimates the echo generating signal power as gated maximal power over a
|
||||||
|
// time window.
|
||||||
|
void EchoGeneratingPower(size_t num_render_channels,
|
||||||
|
const SpectrumBuffer& spectrum_buffer,
|
||||||
|
const EchoCanceller3Config::EchoModel& echo_model,
|
||||||
|
int filter_delay_blocks,
|
||||||
|
rtc::ArrayView<float, kFftLengthBy2Plus1> X2) {
|
||||||
|
int idx_stop;
|
||||||
|
int idx_start;
|
||||||
|
GetRenderIndexesToAnalyze(spectrum_buffer, echo_model, filter_delay_blocks,
|
||||||
|
&idx_start, &idx_stop);
|
||||||
|
|
||||||
|
std::fill(X2.begin(), X2.end(), 0.f);
|
||||||
|
if (num_render_channels == 1) {
|
||||||
|
for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) {
|
||||||
|
for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
|
||||||
|
X2[j] = std::max(X2[j], spectrum_buffer.buffer[k][/*channel=*/0][j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) {
|
||||||
|
std::array<float, kFftLengthBy2Plus1> render_power;
|
||||||
|
render_power.fill(0.f);
|
||||||
|
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||||
|
const auto& channel_power = spectrum_buffer.buffer[k][ch];
|
||||||
|
for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
|
||||||
|
render_power[j] += channel_power[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
|
||||||
|
X2[j] = std::max(X2[j], render_power[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chooses the echo path gain to use.
|
||||||
|
float GetEchoPathGain(const AecState& aec_state,
|
||||||
|
const EchoCanceller3Config::EpStrength& config) {
|
||||||
|
float gain_amplitude =
|
||||||
|
aec_state.TransparentMode() ? 0.01f : config.default_gain;
|
||||||
|
return gain_amplitude * gain_amplitude;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config)
|
ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config,
|
||||||
: config_(config) {
|
size_t num_render_channels)
|
||||||
|
: config_(config), num_render_channels_(num_render_channels) {
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -55,72 +159,78 @@ ResidualEchoEstimator::~ResidualEchoEstimator() = default;
|
|||||||
void ResidualEchoEstimator::Estimate(
|
void ResidualEchoEstimator::Estimate(
|
||||||
const AecState& aec_state,
|
const AecState& aec_state,
|
||||||
const RenderBuffer& render_buffer,
|
const RenderBuffer& render_buffer,
|
||||||
const std::array<float, kFftLengthBy2Plus1>& S2_linear,
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
|
||||||
const std::array<float, kFftLengthBy2Plus1>& Y2,
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||||
std::array<float, kFftLengthBy2Plus1>* R2) {
|
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
|
||||||
RTC_DCHECK(R2);
|
RTC_DCHECK_EQ(R2.size(), Y2.size());
|
||||||
|
RTC_DCHECK_EQ(R2.size(), S2_linear.size());
|
||||||
|
|
||||||
|
const size_t num_capture_channels = R2.size();
|
||||||
|
|
||||||
// Estimate the power of the stationary noise in the render signal.
|
// Estimate the power of the stationary noise in the render signal.
|
||||||
RenderNoisePower(render_buffer, &X2_noise_floor_, &X2_noise_floor_counter_);
|
UpdateRenderNoisePower(render_buffer);
|
||||||
|
|
||||||
// Estimate the residual echo power.
|
// Estimate the residual echo power.
|
||||||
if (aec_state.UsableLinearEstimate()) {
|
if (aec_state.UsableLinearEstimate()) {
|
||||||
LinearEstimate(S2_linear, aec_state.Erle(), aec_state.ErleUncertainty(),
|
// When there is saturated echo, assume the same spectral content as is
|
||||||
R2);
|
// present in the microphone signal.
|
||||||
|
if (aec_state.SaturatedEcho()) {
|
||||||
|
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||||
|
std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
absl::optional<float> erle_uncertainty = aec_state.ErleUncertainty();
|
||||||
|
if (erle_uncertainty) {
|
||||||
|
LinearEstimate(S2_linear, *erle_uncertainty, R2);
|
||||||
|
} else {
|
||||||
|
LinearEstimate(S2_linear, aec_state.Erle(), R2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AddReverb(ReverbType::kLinear, aec_state, render_buffer, R2);
|
||||||
|
} else {
|
||||||
|
const float echo_path_gain =
|
||||||
|
GetEchoPathGain(aec_state, config_.ep_strength);
|
||||||
|
|
||||||
// When there is saturated echo, assume the same spectral content as is
|
// When there is saturated echo, assume the same spectral content as is
|
||||||
// present in the microphone signal.
|
// present in the microphone signal.
|
||||||
if (aec_state.SaturatedEcho()) {
|
if (aec_state.SaturatedEcho()) {
|
||||||
std::copy(Y2.begin(), Y2.end(), R2->begin());
|
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||||
|
std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Adds the estimated unmodelled echo power to the residual echo power
|
|
||||||
// estimate.
|
|
||||||
echo_reverb_.AddReverb(
|
|
||||||
render_buffer.Spectrum(aec_state.FilterLengthBlocks() + 1,
|
|
||||||
/*channel=*/0),
|
|
||||||
aec_state.GetReverbFrequencyResponse(), aec_state.ReverbDecay(), *R2);
|
|
||||||
} else {
|
} else {
|
||||||
// Estimate the echo generating signal power.
|
// Estimate the echo generating signal power.
|
||||||
std::array<float, kFftLengthBy2Plus1> X2;
|
std::array<float, kFftLengthBy2Plus1> X2;
|
||||||
|
EchoGeneratingPower(num_render_channels_,
|
||||||
EchoGeneratingPower(render_buffer.GetSpectrumBuffer(), config_.echo_model,
|
render_buffer.GetSpectrumBuffer(), config_.echo_model,
|
||||||
aec_state.FilterDelayBlocks(),
|
aec_state.FilterDelayBlocks(), X2);
|
||||||
!aec_state.UseStationaryProperties(), &X2);
|
if (!aec_state.UseStationarityProperties()) {
|
||||||
|
ApplyNoiseGate(config_.echo_model, X2);
|
||||||
|
}
|
||||||
|
|
||||||
// Subtract the stationary noise power to avoid stationary noise causing
|
// Subtract the stationary noise power to avoid stationary noise causing
|
||||||
// excessive echo suppression.
|
// excessive echo suppression.
|
||||||
std::transform(X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
|
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||||
[&](float a, float b) {
|
X2[k] -= config_.echo_model.stationary_gate_slope * X2_noise_floor_[k];
|
||||||
return std::max(
|
X2[k] = std::max(0.f, X2[k]);
|
||||||
0.f, a - config_.echo_model.stationary_gate_slope * b);
|
}
|
||||||
});
|
|
||||||
|
|
||||||
float echo_path_gain;
|
|
||||||
echo_path_gain =
|
|
||||||
aec_state.TransparentMode() ? 0.01f : config_.ep_strength.default_gain;
|
|
||||||
NonLinearEstimate(echo_path_gain, X2, R2);
|
NonLinearEstimate(echo_path_gain, X2, R2);
|
||||||
|
|
||||||
// When there is saturated echo, assume the same spectral content as is
|
|
||||||
// present in the microphone signal.
|
|
||||||
if (aec_state.SaturatedEcho()) {
|
|
||||||
std::copy(Y2.begin(), Y2.end(), R2->begin());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(aec_state.TransparentMode())) {
|
if (!aec_state.TransparentMode()) {
|
||||||
echo_reverb_.AddReverbNoFreqShaping(
|
AddReverb(ReverbType::kNonLinear, aec_state, render_buffer, R2);
|
||||||
render_buffer.Spectrum(aec_state.FilterDelayBlocks() + 1,
|
|
||||||
/*channel=*/0),
|
|
||||||
echo_path_gain * echo_path_gain, aec_state.ReverbDecay(), *R2);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (aec_state.UseStationaryProperties()) {
|
if (aec_state.UseStationarityProperties()) {
|
||||||
// Scale the echo according to echo audibility.
|
// Scale the echo according to echo audibility.
|
||||||
std::array<float, kFftLengthBy2Plus1> residual_scaling;
|
std::array<float, kFftLengthBy2Plus1> residual_scaling;
|
||||||
aec_state.GetResidualEchoScaling(residual_scaling);
|
aec_state.GetResidualEchoScaling(residual_scaling);
|
||||||
for (size_t k = 0; k < R2->size(); ++k) {
|
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||||
(*R2)[k] *= residual_scaling[k];
|
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||||
|
R2[ch][k] *= residual_scaling[k];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -131,94 +241,97 @@ void ResidualEchoEstimator::Reset() {
|
|||||||
X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power);
|
X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ResidualEchoEstimator::LinearEstimate(
|
void ResidualEchoEstimator::UpdateRenderNoisePower(
|
||||||
const std::array<float, kFftLengthBy2Plus1>& S2_linear,
|
const RenderBuffer& render_buffer) {
|
||||||
const std::array<float, kFftLengthBy2Plus1>& erle,
|
std::array<float, kFftLengthBy2Plus1> render_power_data;
|
||||||
absl::optional<float> erle_uncertainty,
|
rtc::ArrayView<const float> render_power;
|
||||||
std::array<float, kFftLengthBy2Plus1>* R2) {
|
if (num_render_channels_ == 1) {
|
||||||
if (erle_uncertainty) {
|
render_power = render_buffer.Spectrum(0, /*channel=*/0);
|
||||||
for (size_t k = 0; k < R2->size(); ++k) {
|
|
||||||
(*R2)[k] = S2_linear[k] * *erle_uncertainty;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(),
|
render_power_data.fill(0.f);
|
||||||
[](float a, float b) {
|
for (size_t ch = 0; ch < num_render_channels_; ++ch) {
|
||||||
RTC_DCHECK_LT(0.f, a);
|
const auto& channel_power = render_buffer.Spectrum(0, ch);
|
||||||
return b / a;
|
RTC_DCHECK_EQ(channel_power.size(), kFftLengthBy2Plus1);
|
||||||
});
|
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||||
|
render_power_data[k] += channel_power[k];
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void ResidualEchoEstimator::NonLinearEstimate(
|
|
||||||
float echo_path_gain,
|
|
||||||
const std::array<float, kFftLengthBy2Plus1>& X2,
|
|
||||||
std::array<float, kFftLengthBy2Plus1>* R2) {
|
|
||||||
// Compute preliminary residual echo.
|
|
||||||
std::transform(X2.begin(), X2.end(), R2->begin(), [echo_path_gain](float a) {
|
|
||||||
return a * echo_path_gain * echo_path_gain;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
void ResidualEchoEstimator::EchoGeneratingPower(
|
|
||||||
const SpectrumBuffer& spectrum_buffer,
|
|
||||||
const EchoCanceller3Config::EchoModel& echo_model,
|
|
||||||
int filter_delay_blocks,
|
|
||||||
bool apply_noise_gating,
|
|
||||||
std::array<float, kFftLengthBy2Plus1>* X2) const {
|
|
||||||
int idx_stop, idx_start;
|
|
||||||
|
|
||||||
RTC_DCHECK(X2);
|
|
||||||
GetRenderIndexesToAnalyze(spectrum_buffer, config_.echo_model,
|
|
||||||
filter_delay_blocks, &idx_start, &idx_stop);
|
|
||||||
|
|
||||||
X2->fill(0.f);
|
|
||||||
for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) {
|
|
||||||
std::transform(X2->begin(), X2->end(),
|
|
||||||
spectrum_buffer.buffer[k][/*channel=*/0].begin(),
|
|
||||||
X2->begin(),
|
|
||||||
[](float a, float b) { return std::max(a, b); });
|
|
||||||
}
|
}
|
||||||
|
render_power = render_power_data;
|
||||||
if (apply_noise_gating) {
|
|
||||||
// Apply soft noise gate.
|
|
||||||
std::for_each(X2->begin(), X2->end(), [&](float& a) {
|
|
||||||
if (config_.echo_model.noise_gate_power > a) {
|
|
||||||
a = std::max(0.f, a - config_.echo_model.noise_gate_slope *
|
|
||||||
(config_.echo_model.noise_gate_power - a));
|
|
||||||
}
|
}
|
||||||
});
|
RTC_DCHECK_EQ(render_power.size(), kFftLengthBy2Plus1);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ResidualEchoEstimator::RenderNoisePower(
|
|
||||||
const RenderBuffer& render_buffer,
|
|
||||||
std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
|
|
||||||
std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) const {
|
|
||||||
RTC_DCHECK(X2_noise_floor);
|
|
||||||
RTC_DCHECK(X2_noise_floor_counter);
|
|
||||||
|
|
||||||
const auto render_power = render_buffer.Spectrum(0, /*channel=*/0);
|
|
||||||
RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size());
|
|
||||||
RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size());
|
|
||||||
|
|
||||||
// Estimate the stationary noise power in a minimum statistics manner.
|
// Estimate the stationary noise power in a minimum statistics manner.
|
||||||
for (size_t k = 0; k < render_power.size(); ++k) {
|
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||||
// Decrease rapidly.
|
// Decrease rapidly.
|
||||||
if (render_power[k] < (*X2_noise_floor)[k]) {
|
if (render_power[k] < X2_noise_floor_[k]) {
|
||||||
(*X2_noise_floor)[k] = render_power[k];
|
X2_noise_floor_[k] = render_power[k];
|
||||||
(*X2_noise_floor_counter)[k] = 0;
|
X2_noise_floor_counter_[k] = 0;
|
||||||
} else {
|
} else {
|
||||||
// Increase in a delayed, leaky manner.
|
// Increase in a delayed, leaky manner.
|
||||||
if ((*X2_noise_floor_counter)[k] >=
|
if (X2_noise_floor_counter_[k] >=
|
||||||
static_cast<int>(config_.echo_model.noise_floor_hold)) {
|
static_cast<int>(config_.echo_model.noise_floor_hold)) {
|
||||||
(*X2_noise_floor)[k] =
|
X2_noise_floor_[k] = std::max(X2_noise_floor_[k] * 1.1f,
|
||||||
std::max((*X2_noise_floor)[k] * 1.1f,
|
|
||||||
config_.echo_model.min_noise_floor_power);
|
config_.echo_model.min_noise_floor_power);
|
||||||
} else {
|
} else {
|
||||||
++(*X2_noise_floor_counter)[k];
|
++X2_noise_floor_counter_[k];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Adds the estimated power of the reverb to the residual echo power.
|
||||||
|
void ResidualEchoEstimator::AddReverb(
|
||||||
|
ReverbType reverb_type,
|
||||||
|
const AecState& aec_state,
|
||||||
|
const RenderBuffer& render_buffer,
|
||||||
|
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
|
||||||
|
const size_t num_capture_channels = R2.size();
|
||||||
|
|
||||||
|
// Choose reverb partition based on what type of echo power model is used.
|
||||||
|
const size_t first_reverb_partition = reverb_type == ReverbType::kLinear
|
||||||
|
? aec_state.FilterLengthBlocks() + 1
|
||||||
|
: aec_state.FilterDelayBlocks() + 1;
|
||||||
|
|
||||||
|
// Compute render power for the reverb.
|
||||||
|
std::array<float, kFftLengthBy2Plus1> render_power_data;
|
||||||
|
rtc::ArrayView<const float> render_power;
|
||||||
|
if (num_render_channels_ == 1) {
|
||||||
|
render_power =
|
||||||
|
render_buffer.Spectrum(first_reverb_partition, /*channel=*/0);
|
||||||
|
} else {
|
||||||
|
render_power_data.fill(0.f);
|
||||||
|
for (size_t ch = 0; ch < num_render_channels_; ++ch) {
|
||||||
|
const auto& channel_power =
|
||||||
|
render_buffer.Spectrum(first_reverb_partition, ch);
|
||||||
|
RTC_DCHECK_EQ(channel_power.size(), kFftLengthBy2Plus1);
|
||||||
|
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||||
|
render_power_data[k] += channel_power[k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
render_power = render_power_data;
|
||||||
|
}
|
||||||
|
RTC_DCHECK_EQ(render_power.size(), kFftLengthBy2Plus1);
|
||||||
|
|
||||||
|
// Update the reverb estimate.
|
||||||
|
if (reverb_type == ReverbType::kLinear) {
|
||||||
|
echo_reverb_.UpdateReverb(render_power,
|
||||||
|
aec_state.GetReverbFrequencyResponse(),
|
||||||
|
aec_state.ReverbDecay());
|
||||||
|
} else {
|
||||||
|
const float echo_path_gain =
|
||||||
|
GetEchoPathGain(aec_state, config_.ep_strength);
|
||||||
|
echo_reverb_.UpdateReverbNoFreqShaping(render_power, echo_path_gain,
|
||||||
|
aec_state.ReverbDecay());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the reverb power.
|
||||||
|
rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
|
||||||
|
echo_reverb_.reverb();
|
||||||
|
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||||
|
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||||
|
R2[ch][k] += reverb_power[k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
@ -22,63 +22,47 @@
|
|||||||
#include "modules/audio_processing/aec3/reverb_model.h"
|
#include "modules/audio_processing/aec3/reverb_model.h"
|
||||||
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
||||||
#include "rtc_base/checks.h"
|
#include "rtc_base/checks.h"
|
||||||
#include "rtc_base/constructor_magic.h"
|
|
||||||
|
|
||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
|
|
||||||
class ResidualEchoEstimator {
|
class ResidualEchoEstimator {
|
||||||
public:
|
public:
|
||||||
explicit ResidualEchoEstimator(const EchoCanceller3Config& config);
|
ResidualEchoEstimator(const EchoCanceller3Config& config,
|
||||||
|
size_t num_render_channels);
|
||||||
~ResidualEchoEstimator();
|
~ResidualEchoEstimator();
|
||||||
|
|
||||||
void Estimate(const AecState& aec_state,
|
ResidualEchoEstimator(const ResidualEchoEstimator&) = delete;
|
||||||
const RenderBuffer& render_buffer,
|
ResidualEchoEstimator& operator=(const ResidualEchoEstimator&) = delete;
|
||||||
const std::array<float, kFftLengthBy2Plus1>& S2_linear,
|
|
||||||
const std::array<float, kFftLengthBy2Plus1>& Y2,
|
|
||||||
std::array<float, kFftLengthBy2Plus1>* R2);
|
|
||||||
|
|
||||||
// Returns the reverberant power spectrum contributions to the echo residual.
|
void Estimate(
|
||||||
rtc::ArrayView<const float> GetReverbPowerSpectrum() const {
|
const AecState& aec_state,
|
||||||
return echo_reverb_.GetPowerSpectrum();
|
const RenderBuffer& render_buffer,
|
||||||
}
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
|
||||||
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||||
|
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
enum class ReverbType { kLinear, kNonLinear };
|
||||||
|
|
||||||
// Resets the state.
|
// Resets the state.
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
// Estimates the residual echo power based on the echo return loss enhancement
|
|
||||||
// (ERLE) and the linear power estimate.
|
|
||||||
void LinearEstimate(const std::array<float, kFftLengthBy2Plus1>& S2_linear,
|
|
||||||
const std::array<float, kFftLengthBy2Plus1>& erle,
|
|
||||||
absl::optional<float> erle_uncertainty,
|
|
||||||
std::array<float, kFftLengthBy2Plus1>* R2);
|
|
||||||
|
|
||||||
// Estimates the residual echo power based on the estimate of the echo path
|
|
||||||
// gain.
|
|
||||||
void NonLinearEstimate(float echo_path_gain,
|
|
||||||
const std::array<float, kFftLengthBy2Plus1>& X2,
|
|
||||||
std::array<float, kFftLengthBy2Plus1>* R2);
|
|
||||||
|
|
||||||
// Estimates the echo generating signal power as gated maximal power over a
|
|
||||||
// time window.
|
|
||||||
void EchoGeneratingPower(const SpectrumBuffer& spectrum_buffer,
|
|
||||||
const EchoCanceller3Config::EchoModel& echo_model,
|
|
||||||
int filter_delay_blocks,
|
|
||||||
bool apply_noise_gating,
|
|
||||||
std::array<float, kFftLengthBy2Plus1>* X2) const;
|
|
||||||
|
|
||||||
// Updates estimate for the power of the stationary noise component in the
|
// Updates estimate for the power of the stationary noise component in the
|
||||||
// render signal.
|
// render signal.
|
||||||
void RenderNoisePower(
|
void UpdateRenderNoisePower(const RenderBuffer& render_buffer);
|
||||||
|
|
||||||
|
// Adds the estimated unmodelled echo power to the residual echo power
|
||||||
|
// estimate.
|
||||||
|
void AddReverb(ReverbType reverb_type,
|
||||||
|
const AecState& aec_state,
|
||||||
const RenderBuffer& render_buffer,
|
const RenderBuffer& render_buffer,
|
||||||
std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
|
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2);
|
||||||
std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) const;
|
|
||||||
|
|
||||||
const EchoCanceller3Config config_;
|
const EchoCanceller3Config config_;
|
||||||
|
const size_t num_render_channels_;
|
||||||
std::array<float, kFftLengthBy2Plus1> X2_noise_floor_;
|
std::array<float, kFftLengthBy2Plus1> X2_noise_floor_;
|
||||||
std::array<int, kFftLengthBy2Plus1> X2_noise_floor_counter_;
|
std::array<int, kFftLengthBy2Plus1> X2_noise_floor_counter_;
|
||||||
ReverbModel echo_reverb_;
|
ReverbModel echo_reverb_;
|
||||||
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(ResidualEchoEstimator);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
@ -20,55 +20,34 @@
|
|||||||
|
|
||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
|
|
||||||
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
|
TEST(ResidualEchoEstimator, BasicTest) {
|
||||||
|
for (size_t num_render_channels : {1, 2, 4}) {
|
||||||
// Verifies that the check for non-null output residual echo power works.
|
for (size_t num_capture_channels : {1, 2, 4}) {
|
||||||
TEST(ResidualEchoEstimator, NullResidualEchoPowerOutput) {
|
|
||||||
EchoCanceller3Config config;
|
|
||||||
AecState aec_state(config, 1);
|
|
||||||
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
|
|
||||||
RenderDelayBuffer::Create(config, 48000, 1));
|
|
||||||
std::vector<std::array<float, kFftLengthBy2Plus1>> H2;
|
|
||||||
std::array<float, kFftLengthBy2Plus1> S2_linear;
|
|
||||||
std::array<float, kFftLengthBy2Plus1> Y2;
|
|
||||||
EXPECT_DEATH(ResidualEchoEstimator(EchoCanceller3Config{})
|
|
||||||
.Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(),
|
|
||||||
S2_linear, Y2, nullptr),
|
|
||||||
"");
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// TODO(peah): This test is broken in the sense that it not at all tests what it
|
|
||||||
// seems to test. Enable the test once that is adressed.
|
|
||||||
TEST(ResidualEchoEstimator, DISABLED_BasicTest) {
|
|
||||||
constexpr size_t kNumChannels = 1;
|
|
||||||
constexpr int kSampleRateHz = 48000;
|
constexpr int kSampleRateHz = 48000;
|
||||||
constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
|
constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
|
||||||
|
|
||||||
EchoCanceller3Config config;
|
EchoCanceller3Config config;
|
||||||
config.ep_strength.default_len = 0.f;
|
ResidualEchoEstimator estimator(config, num_render_channels);
|
||||||
ResidualEchoEstimator estimator(config);
|
AecState aec_state(config, num_render_channels);
|
||||||
AecState aec_state(config, kNumChannels);
|
|
||||||
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
|
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
|
||||||
RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels));
|
RenderDelayBuffer::Create(config, kSampleRateHz,
|
||||||
|
num_render_channels));
|
||||||
|
|
||||||
std::array<float, kFftLengthBy2Plus1> E2_main;
|
std::array<float, kFftLengthBy2Plus1> E2_main;
|
||||||
std::array<float, kFftLengthBy2Plus1> E2_shadow;
|
std::vector<std::array<float, kFftLengthBy2Plus1>> S2_linear(
|
||||||
std::array<float, kFftLengthBy2Plus1> S2_linear;
|
num_capture_channels);
|
||||||
std::array<float, kFftLengthBy2Plus1> S2_fallback;
|
std::vector<std::array<float, kFftLengthBy2Plus1>> Y2(
|
||||||
std::array<float, kFftLengthBy2Plus1> Y2;
|
num_capture_channels);
|
||||||
std::array<float, kFftLengthBy2Plus1> R2;
|
std::vector<std::array<float, kFftLengthBy2Plus1>> R2(
|
||||||
EchoPathVariability echo_path_variability(
|
num_capture_channels);
|
||||||
false, EchoPathVariability::DelayAdjustment::kNone, false);
|
|
||||||
std::vector<std::vector<std::vector<float>>> x(
|
std::vector<std::vector<std::vector<float>>> x(
|
||||||
kNumBands, std::vector<std::vector<float>>(
|
kNumBands,
|
||||||
kNumChannels, std::vector<float>(kBlockSize, 0.f)));
|
std::vector<std::vector<float>>(num_render_channels,
|
||||||
|
std::vector<float>(kBlockSize, 0.f)));
|
||||||
std::vector<std::array<float, kFftLengthBy2Plus1>> H2(10);
|
std::vector<std::array<float, kFftLengthBy2Plus1>> H2(10);
|
||||||
Random random_generator(42U);
|
Random random_generator(42U);
|
||||||
std::vector<SubtractorOutput> output(kNumChannels);
|
std::vector<SubtractorOutput> output(num_render_channels);
|
||||||
std::array<float, kBlockSize> y;
|
std::array<float, kBlockSize> y;
|
||||||
Aec3Fft fft;
|
|
||||||
absl::optional<DelayEstimate> delay_estimate;
|
absl::optional<DelayEstimate> delay_estimate;
|
||||||
|
|
||||||
for (auto& H2_k : H2) {
|
for (auto& H2_k : H2) {
|
||||||
@ -77,8 +56,8 @@ TEST(ResidualEchoEstimator, DISABLED_BasicTest) {
|
|||||||
H2[2].fill(10.f);
|
H2[2].fill(10.f);
|
||||||
H2[2][0] = 0.1f;
|
H2[2][0] = 0.1f;
|
||||||
|
|
||||||
std::vector<float> h(GetTimeDomainLength(config.filter.main.length_blocks),
|
std::vector<float> h(
|
||||||
0.f);
|
GetTimeDomainLength(config.filter.main.length_blocks), 0.f);
|
||||||
|
|
||||||
for (auto& subtractor_output : output) {
|
for (auto& subtractor_output : output) {
|
||||||
subtractor_output.Reset();
|
subtractor_output.Reset();
|
||||||
@ -87,31 +66,27 @@ TEST(ResidualEchoEstimator, DISABLED_BasicTest) {
|
|||||||
y.fill(0.f);
|
y.fill(0.f);
|
||||||
|
|
||||||
constexpr float kLevel = 10.f;
|
constexpr float kLevel = 10.f;
|
||||||
E2_shadow.fill(kLevel);
|
|
||||||
E2_main.fill(kLevel);
|
E2_main.fill(kLevel);
|
||||||
S2_linear.fill(kLevel);
|
S2_linear[0].fill(kLevel);
|
||||||
S2_fallback.fill(kLevel);
|
Y2[0].fill(kLevel);
|
||||||
Y2.fill(kLevel);
|
|
||||||
|
|
||||||
for (int k = 0; k < 1993; ++k) {
|
for (int k = 0; k < 1993; ++k) {
|
||||||
RandomizeSampleVector(&random_generator, x[0][0]);
|
RandomizeSampleVector(&random_generator, x[0][0]);
|
||||||
std::for_each(x[0][0].begin(), x[0][0].end(), [](float& a) { a /= 30.f; });
|
|
||||||
render_delay_buffer->Insert(x);
|
render_delay_buffer->Insert(x);
|
||||||
if (k == 0) {
|
if (k == 0) {
|
||||||
render_delay_buffer->Reset();
|
render_delay_buffer->Reset();
|
||||||
}
|
}
|
||||||
render_delay_buffer->PrepareCaptureProcessing();
|
render_delay_buffer->PrepareCaptureProcessing();
|
||||||
|
|
||||||
aec_state.HandleEchoPathChange(echo_path_variability);
|
|
||||||
aec_state.Update(delay_estimate, H2, h,
|
aec_state.Update(delay_estimate, H2, h,
|
||||||
*render_delay_buffer->GetRenderBuffer(), E2_main, Y2,
|
*render_delay_buffer->GetRenderBuffer(), E2_main,
|
||||||
output);
|
Y2[0], output);
|
||||||
|
|
||||||
estimator.Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(),
|
estimator.Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(),
|
||||||
S2_linear, Y2, &R2);
|
S2_linear, Y2, R2);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
std::for_each(R2.begin(), R2.end(),
|
|
||||||
[&](float a) { EXPECT_NEAR(kLevel, a, 0.1f); });
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
@ -29,34 +29,7 @@ void ReverbModel::Reset() {
|
|||||||
reverb_.fill(0.);
|
reverb_.fill(0.);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ReverbModel::AddReverbNoFreqShaping(
|
void ReverbModel::UpdateReverbNoFreqShaping(
|
||||||
rtc::ArrayView<const float> power_spectrum,
|
|
||||||
float power_spectrum_scaling,
|
|
||||||
float reverb_decay,
|
|
||||||
rtc::ArrayView<float> reverb_power_spectrum) {
|
|
||||||
UpdateReverbContributionsNoFreqShaping(power_spectrum, power_spectrum_scaling,
|
|
||||||
reverb_decay);
|
|
||||||
|
|
||||||
// Add the power of the echo reverb to the residual echo power.
|
|
||||||
std::transform(reverb_power_spectrum.begin(), reverb_power_spectrum.end(),
|
|
||||||
reverb_.begin(), reverb_power_spectrum.begin(),
|
|
||||||
std::plus<float>());
|
|
||||||
}
|
|
||||||
|
|
||||||
void ReverbModel::AddReverb(rtc::ArrayView<const float> power_spectrum,
|
|
||||||
rtc::ArrayView<const float> power_spectrum_scaling,
|
|
||||||
float reverb_decay,
|
|
||||||
rtc::ArrayView<float> reverb_power_spectrum) {
|
|
||||||
UpdateReverbContributions(power_spectrum, power_spectrum_scaling,
|
|
||||||
reverb_decay);
|
|
||||||
|
|
||||||
// Add the power of the echo reverb to the residual echo power.
|
|
||||||
std::transform(reverb_power_spectrum.begin(), reverb_power_spectrum.end(),
|
|
||||||
reverb_.begin(), reverb_power_spectrum.begin(),
|
|
||||||
std::plus<float>());
|
|
||||||
}
|
|
||||||
|
|
||||||
void ReverbModel::UpdateReverbContributionsNoFreqShaping(
|
|
||||||
rtc::ArrayView<const float> power_spectrum,
|
rtc::ArrayView<const float> power_spectrum,
|
||||||
float power_spectrum_scaling,
|
float power_spectrum_scaling,
|
||||||
float reverb_decay) {
|
float reverb_decay) {
|
||||||
@ -69,9 +42,9 @@ void ReverbModel::UpdateReverbContributionsNoFreqShaping(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ReverbModel::UpdateReverbContributions(
|
void ReverbModel::UpdateReverb(
|
||||||
rtc::ArrayView<const float>& power_spectrum,
|
rtc::ArrayView<const float> power_spectrum,
|
||||||
rtc::ArrayView<const float>& power_spectrum_scaling,
|
rtc::ArrayView<const float> power_spectrum_scaling,
|
||||||
float reverb_decay) {
|
float reverb_decay) {
|
||||||
if (reverb_decay > 0) {
|
if (reverb_decay > 0) {
|
||||||
// Update the estimate of the reverberant power.
|
// Update the estimate of the reverberant power.
|
||||||
|
@ -28,37 +28,27 @@ class ReverbModel {
|
|||||||
// Resets the state.
|
// Resets the state.
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
// The methods AddReverbNoFreqShaping and AddReverb add the reverberation
|
// Returns the reverb.
|
||||||
// contribution to an input/output power spectrum
|
rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb() const {
|
||||||
// Before applying the exponential reverberant model, the input power spectrum
|
return reverb_;
|
||||||
// is pre-scaled. Use the method AddReverb when a different scaling should be
|
}
|
||||||
// applied per frequency and AddReverb_no_freq_shape if the same scaling
|
|
||||||
// should be used for all the frequencies.
|
|
||||||
void AddReverbNoFreqShaping(rtc::ArrayView<const float> power_spectrum,
|
|
||||||
float power_spectrum_scaling,
|
|
||||||
float reverb_decay,
|
|
||||||
rtc::ArrayView<float> reverb_power_spectrum);
|
|
||||||
|
|
||||||
void AddReverb(rtc::ArrayView<const float> power_spectrum,
|
// The methods UpdateReverbNoFreqShaping and UpdateReverb update the
|
||||||
rtc::ArrayView<const float> freq_response_tail,
|
// estimate of the reverberation contribution to an input/output power
|
||||||
float reverb_decay,
|
// spectrum. Before applying the exponential reverberant model, the input
|
||||||
rtc::ArrayView<float> reverb_power_spectrum);
|
// power spectrum is pre-scaled. Use the method UpdateReverb when a different
|
||||||
|
// scaling should be applied per frequency and UpdateReverb_no_freq_shape if
|
||||||
// Updates the reverberation contributions without applying any shaping of the
|
// the same scaling should be used for all the frequencies.
|
||||||
// spectrum.
|
void UpdateReverbNoFreqShaping(rtc::ArrayView<const float> power_spectrum,
|
||||||
void UpdateReverbContributionsNoFreqShaping(
|
|
||||||
rtc::ArrayView<const float> power_spectrum,
|
|
||||||
float power_spectrum_scaling,
|
float power_spectrum_scaling,
|
||||||
float reverb_decay);
|
float reverb_decay);
|
||||||
|
|
||||||
// Returns the current power spectrum reverberation contributions.
|
// Update the reverb based on new data.
|
||||||
rtc::ArrayView<const float> GetPowerSpectrum() const { return reverb_; }
|
void UpdateReverb(rtc::ArrayView<const float> power_spectrum,
|
||||||
|
rtc::ArrayView<const float> power_spectrum_scaling,
|
||||||
|
float reverb_decay);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Updates the reverberation contributions.
|
|
||||||
void UpdateReverbContributions(rtc::ArrayView<const float>& power_spectrum,
|
|
||||||
rtc::ArrayView<const float>& freq_resp_tail,
|
|
||||||
float reverb_decay);
|
|
||||||
|
|
||||||
std::array<float, kFftLengthBy2Plus1> reverb_;
|
std::array<float, kFftLengthBy2Plus1> reverb_;
|
||||||
};
|
};
|
||||||
|
@ -118,7 +118,8 @@ SetMaxErleSubbands(float max_erle_l, float max_erle_h, size_t limit_subband_l) {
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
SignalDependentErleEstimator::SignalDependentErleEstimator(
|
SignalDependentErleEstimator::SignalDependentErleEstimator(
|
||||||
const EchoCanceller3Config& config)
|
const EchoCanceller3Config& config,
|
||||||
|
size_t num_capture_channels)
|
||||||
: min_erle_(config.erle.min),
|
: min_erle_(config.erle.min),
|
||||||
num_sections_(config.erle.num_sections),
|
num_sections_(config.erle.num_sections),
|
||||||
num_blocks_(config.filter.main.length_blocks),
|
num_blocks_(config.filter.main.length_blocks),
|
||||||
@ -130,6 +131,7 @@ SignalDependentErleEstimator::SignalDependentErleEstimator(
|
|||||||
section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_,
|
section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_,
|
||||||
num_blocks_,
|
num_blocks_,
|
||||||
num_sections_)),
|
num_sections_)),
|
||||||
|
erle_(num_capture_channels),
|
||||||
S2_section_accum_(num_sections_),
|
S2_section_accum_(num_sections_),
|
||||||
erle_estimators_(num_sections_),
|
erle_estimators_(num_sections_),
|
||||||
correction_factors_(num_sections_) {
|
correction_factors_(num_sections_) {
|
||||||
@ -142,10 +144,12 @@ SignalDependentErleEstimator::SignalDependentErleEstimator(
|
|||||||
SignalDependentErleEstimator::~SignalDependentErleEstimator() = default;
|
SignalDependentErleEstimator::~SignalDependentErleEstimator() = default;
|
||||||
|
|
||||||
void SignalDependentErleEstimator::Reset() {
|
void SignalDependentErleEstimator::Reset() {
|
||||||
erle_.fill(min_erle_);
|
for (auto& erle : erle_) {
|
||||||
for (auto& erle : erle_estimators_) {
|
|
||||||
erle.fill(min_erle_);
|
erle.fill(min_erle_);
|
||||||
}
|
}
|
||||||
|
for (auto& erle_estimator : erle_estimators_) {
|
||||||
|
erle_estimator.fill(min_erle_);
|
||||||
|
}
|
||||||
erle_ref_.fill(min_erle_);
|
erle_ref_.fill(min_erle_);
|
||||||
for (auto& factor : correction_factors_) {
|
for (auto& factor : correction_factors_) {
|
||||||
factor.fill(1.0f);
|
factor.fill(1.0f);
|
||||||
@ -166,7 +170,7 @@ void SignalDependentErleEstimator::Update(
|
|||||||
rtc::ArrayView<const float> X2,
|
rtc::ArrayView<const float> X2,
|
||||||
rtc::ArrayView<const float> Y2,
|
rtc::ArrayView<const float> Y2,
|
||||||
rtc::ArrayView<const float> E2,
|
rtc::ArrayView<const float> E2,
|
||||||
rtc::ArrayView<const float> average_erle,
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> average_erle,
|
||||||
bool converged_filter) {
|
bool converged_filter) {
|
||||||
RTC_DCHECK_GT(num_sections_, 1);
|
RTC_DCHECK_GT(num_sections_, 1);
|
||||||
|
|
||||||
@ -187,8 +191,8 @@ void SignalDependentErleEstimator::Update(
|
|||||||
for (size_t k = 0; k < kFftLengthBy2; ++k) {
|
for (size_t k = 0; k < kFftLengthBy2; ++k) {
|
||||||
float correction_factor =
|
float correction_factor =
|
||||||
correction_factors_[n_active_sections[k]][band_to_subband_[k]];
|
correction_factors_[n_active_sections[k]][band_to_subband_[k]];
|
||||||
erle_[k] = rtc::SafeClamp(average_erle[k] * correction_factor, min_erle_,
|
erle_[0][k] = rtc::SafeClamp(average_erle[0][k] * correction_factor,
|
||||||
max_erle_[band_to_subband_[k]]);
|
min_erle_, max_erle_[band_to_subband_[k]]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,24 +29,28 @@ namespace webrtc {
|
|||||||
// this class receive as an input.
|
// this class receive as an input.
|
||||||
class SignalDependentErleEstimator {
|
class SignalDependentErleEstimator {
|
||||||
public:
|
public:
|
||||||
explicit SignalDependentErleEstimator(const EchoCanceller3Config& config);
|
SignalDependentErleEstimator(const EchoCanceller3Config& config,
|
||||||
|
size_t num_capture_channels);
|
||||||
|
|
||||||
~SignalDependentErleEstimator();
|
~SignalDependentErleEstimator();
|
||||||
|
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
// Returns the Erle per frequency subband.
|
// Returns the Erle per frequency subband.
|
||||||
const std::array<float, kFftLengthBy2Plus1>& Erle() const { return erle_; }
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
|
||||||
|
return erle_;
|
||||||
|
}
|
||||||
|
|
||||||
// Updates the Erle estimate. The Erle that is passed as an input is required
|
// Updates the Erle estimate. The Erle that is passed as an input is required
|
||||||
// to be an estimation of the average Erle achieved by the linear filter.
|
// to be an estimation of the average Erle achieved by the linear filter.
|
||||||
void Update(const RenderBuffer& render_buffer,
|
void Update(
|
||||||
|
const RenderBuffer& render_buffer,
|
||||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
|
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
|
||||||
filter_frequency_response,
|
filter_frequency_response,
|
||||||
rtc::ArrayView<const float> X2,
|
rtc::ArrayView<const float> X2,
|
||||||
rtc::ArrayView<const float> Y2,
|
rtc::ArrayView<const float> Y2,
|
||||||
rtc::ArrayView<const float> E2,
|
rtc::ArrayView<const float> E2,
|
||||||
rtc::ArrayView<const float> average_erle,
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> average_erle,
|
||||||
bool converged_filter);
|
bool converged_filter);
|
||||||
|
|
||||||
void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
|
void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
|
||||||
@ -80,7 +84,7 @@ class SignalDependentErleEstimator {
|
|||||||
const std::array<size_t, kFftLengthBy2Plus1> band_to_subband_;
|
const std::array<size_t, kFftLengthBy2Plus1> band_to_subband_;
|
||||||
const std::array<float, kSubbands> max_erle_;
|
const std::array<float, kSubbands> max_erle_;
|
||||||
const std::vector<size_t> section_boundaries_blocks_;
|
const std::vector<size_t> section_boundaries_blocks_;
|
||||||
std::array<float, kFftLengthBy2Plus1> erle_;
|
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_;
|
||||||
std::vector<std::array<float, kFftLengthBy2Plus1>> S2_section_accum_;
|
std::vector<std::array<float, kFftLengthBy2Plus1>> S2_section_accum_;
|
||||||
std::vector<std::array<float, kSubbands>> erle_estimators_;
|
std::vector<std::array<float, kSubbands>> erle_estimators_;
|
||||||
std::array<float, kSubbands> erle_ref_;
|
std::array<float, kSubbands> erle_ref_;
|
||||||
|
@ -112,6 +112,7 @@ void TestInputs::UpdateCurrentPowerSpectra() {
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
TEST(SignalDependentErleEstimator, SweepSettings) {
|
TEST(SignalDependentErleEstimator, SweepSettings) {
|
||||||
|
const size_t kNumCaptureChannels = 1;
|
||||||
EchoCanceller3Config cfg;
|
EchoCanceller3Config cfg;
|
||||||
size_t max_length_blocks = 50;
|
size_t max_length_blocks = 50;
|
||||||
for (size_t blocks = 0; blocks < max_length_blocks; blocks = blocks + 10) {
|
for (size_t blocks = 0; blocks < max_length_blocks; blocks = blocks + 10) {
|
||||||
@ -124,9 +125,12 @@ TEST(SignalDependentErleEstimator, SweepSettings) {
|
|||||||
cfg.delay.delay_headroom_samples = delay_headroom * kBlockSize;
|
cfg.delay.delay_headroom_samples = delay_headroom * kBlockSize;
|
||||||
cfg.erle.num_sections = num_sections;
|
cfg.erle.num_sections = num_sections;
|
||||||
if (EchoCanceller3Config::Validate(&cfg)) {
|
if (EchoCanceller3Config::Validate(&cfg)) {
|
||||||
SignalDependentErleEstimator s(cfg);
|
SignalDependentErleEstimator s(cfg, kNumCaptureChannels);
|
||||||
std::array<float, kFftLengthBy2Plus1> average_erle;
|
std::array<std::array<float, kFftLengthBy2Plus1>, kNumCaptureChannels>
|
||||||
average_erle.fill(cfg.erle.max_l);
|
average_erle;
|
||||||
|
for (auto& e : average_erle) {
|
||||||
|
e.fill(cfg.erle.max_l);
|
||||||
|
}
|
||||||
TestInputs inputs(cfg);
|
TestInputs inputs(cfg);
|
||||||
for (size_t n = 0; n < 10; ++n) {
|
for (size_t n = 0; n < 10; ++n) {
|
||||||
inputs.Update();
|
inputs.Update();
|
||||||
@ -140,6 +144,7 @@ TEST(SignalDependentErleEstimator, SweepSettings) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(SignalDependentErleEstimator, LongerRun) {
|
TEST(SignalDependentErleEstimator, LongerRun) {
|
||||||
|
const size_t kNumCaptureChannels = 1;
|
||||||
EchoCanceller3Config cfg;
|
EchoCanceller3Config cfg;
|
||||||
cfg.filter.main.length_blocks = 2;
|
cfg.filter.main.length_blocks = 2;
|
||||||
cfg.filter.main_initial.length_blocks = 1;
|
cfg.filter.main_initial.length_blocks = 1;
|
||||||
@ -147,9 +152,12 @@ TEST(SignalDependentErleEstimator, LongerRun) {
|
|||||||
cfg.delay.hysteresis_limit_blocks = 0;
|
cfg.delay.hysteresis_limit_blocks = 0;
|
||||||
cfg.erle.num_sections = 2;
|
cfg.erle.num_sections = 2;
|
||||||
EXPECT_EQ(EchoCanceller3Config::Validate(&cfg), true);
|
EXPECT_EQ(EchoCanceller3Config::Validate(&cfg), true);
|
||||||
std::array<float, kFftLengthBy2Plus1> average_erle;
|
std::array<std::array<float, kFftLengthBy2Plus1>, kNumCaptureChannels>
|
||||||
average_erle.fill(cfg.erle.max_l);
|
average_erle;
|
||||||
SignalDependentErleEstimator s(cfg);
|
for (auto& e : average_erle) {
|
||||||
|
e.fill(cfg.erle.max_l);
|
||||||
|
}
|
||||||
|
SignalDependentErleEstimator s(cfg, kNumCaptureChannels);
|
||||||
TestInputs inputs(cfg);
|
TestInputs inputs(cfg);
|
||||||
for (size_t n = 0; n < 200; ++n) {
|
for (size_t n = 0; n < 200; ++n) {
|
||||||
inputs.Update();
|
inputs.Update();
|
||||||
|
@ -40,17 +40,21 @@ bool EnableMinErleDuringOnsets() {
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config)
|
SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config,
|
||||||
|
size_t num_capture_channels)
|
||||||
: min_erle_(config.erle.min),
|
: min_erle_(config.erle.min),
|
||||||
max_erle_(SetMaxErleBands(config.erle.max_l, config.erle.max_h)),
|
max_erle_(SetMaxErleBands(config.erle.max_l, config.erle.max_h)),
|
||||||
use_min_erle_during_onsets_(EnableMinErleDuringOnsets()) {
|
use_min_erle_during_onsets_(EnableMinErleDuringOnsets()),
|
||||||
|
erle_(num_capture_channels) {
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
SubbandErleEstimator::~SubbandErleEstimator() = default;
|
SubbandErleEstimator::~SubbandErleEstimator() = default;
|
||||||
|
|
||||||
void SubbandErleEstimator::Reset() {
|
void SubbandErleEstimator::Reset() {
|
||||||
erle_.fill(min_erle_);
|
for (auto& erle : erle_) {
|
||||||
|
erle.fill(min_erle_);
|
||||||
|
}
|
||||||
erle_onsets_.fill(min_erle_);
|
erle_onsets_.fill(min_erle_);
|
||||||
coming_onset_.fill(true);
|
coming_onset_.fill(true);
|
||||||
hold_counters_.fill(0);
|
hold_counters_.fill(0);
|
||||||
@ -74,8 +78,10 @@ void SubbandErleEstimator::Update(rtc::ArrayView<const float> X2,
|
|||||||
DecreaseErlePerBandForLowRenderSignals();
|
DecreaseErlePerBandForLowRenderSignals();
|
||||||
}
|
}
|
||||||
|
|
||||||
erle_[0] = erle_[1];
|
for (auto& erle : erle_) {
|
||||||
erle_[kFftLengthBy2] = erle_[kFftLengthBy2 - 1];
|
erle[0] = erle[1];
|
||||||
|
erle[kFftLengthBy2] = erle[kFftLengthBy2 - 1];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SubbandErleEstimator::Dump(
|
void SubbandErleEstimator::Dump(
|
||||||
@ -116,10 +122,11 @@ void SubbandErleEstimator::UpdateBands(bool onset_detection) {
|
|||||||
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
||||||
if (is_erle_updated[k]) {
|
if (is_erle_updated[k]) {
|
||||||
float alpha = 0.05f;
|
float alpha = 0.05f;
|
||||||
if (new_erle[k] < erle_[k]) {
|
if (new_erle[k] < erle_[0][k]) {
|
||||||
alpha = accum_spectra_.low_render_energy_[k] ? 0.f : 0.1f;
|
alpha = accum_spectra_.low_render_energy_[k] ? 0.f : 0.1f;
|
||||||
}
|
}
|
||||||
erle_[k] = rtc::SafeClamp(erle_[k] + alpha * (new_erle[k] - erle_[k]),
|
erle_[0][k] =
|
||||||
|
rtc::SafeClamp(erle_[0][k] + alpha * (new_erle[k] - erle_[0][k]),
|
||||||
min_erle_, max_erle_[k]);
|
min_erle_, max_erle_[k]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -129,9 +136,9 @@ void SubbandErleEstimator::DecreaseErlePerBandForLowRenderSignals() {
|
|||||||
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
||||||
hold_counters_[k]--;
|
hold_counters_[k]--;
|
||||||
if (hold_counters_[k] <= (kBlocksForOnsetDetection - kBlocksToHoldErle)) {
|
if (hold_counters_[k] <= (kBlocksForOnsetDetection - kBlocksToHoldErle)) {
|
||||||
if (erle_[k] > erle_onsets_[k]) {
|
if (erle_[0][k] > erle_onsets_[k]) {
|
||||||
erle_[k] = std::max(erle_onsets_[k], 0.97f * erle_[k]);
|
erle_[0][k] = std::max(erle_onsets_[k], 0.97f * erle_[0][k]);
|
||||||
RTC_DCHECK_LE(min_erle_, erle_[k]);
|
RTC_DCHECK_LE(min_erle_, erle_[0][k]);
|
||||||
}
|
}
|
||||||
if (hold_counters_[k] <= 0) {
|
if (hold_counters_[k] <= 0) {
|
||||||
coming_onset_[k] = true;
|
coming_onset_[k] = true;
|
||||||
|
@ -27,7 +27,8 @@ namespace webrtc {
|
|||||||
// Estimates the echo return loss enhancement for each frequency subband.
|
// Estimates the echo return loss enhancement for each frequency subband.
|
||||||
class SubbandErleEstimator {
|
class SubbandErleEstimator {
|
||||||
public:
|
public:
|
||||||
explicit SubbandErleEstimator(const EchoCanceller3Config& config);
|
SubbandErleEstimator(const EchoCanceller3Config& config,
|
||||||
|
size_t num_capture_channels);
|
||||||
~SubbandErleEstimator();
|
~SubbandErleEstimator();
|
||||||
|
|
||||||
// Resets the ERLE estimator.
|
// Resets the ERLE estimator.
|
||||||
@ -41,7 +42,9 @@ class SubbandErleEstimator {
|
|||||||
bool onset_detection);
|
bool onset_detection);
|
||||||
|
|
||||||
// Returns the ERLE estimate.
|
// Returns the ERLE estimate.
|
||||||
const std::array<float, kFftLengthBy2Plus1>& Erle() const { return erle_; }
|
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
|
||||||
|
return erle_;
|
||||||
|
}
|
||||||
|
|
||||||
// Returns the ERLE estimate at onsets.
|
// Returns the ERLE estimate at onsets.
|
||||||
rtc::ArrayView<const float> ErleOnsets() const { return erle_onsets_; }
|
rtc::ArrayView<const float> ErleOnsets() const { return erle_onsets_; }
|
||||||
@ -69,7 +72,7 @@ class SubbandErleEstimator {
|
|||||||
const std::array<float, kFftLengthBy2Plus1> max_erle_;
|
const std::array<float, kFftLengthBy2Plus1> max_erle_;
|
||||||
const bool use_min_erle_during_onsets_;
|
const bool use_min_erle_during_onsets_;
|
||||||
AccumulatedSpectra accum_spectra_;
|
AccumulatedSpectra accum_spectra_;
|
||||||
std::array<float, kFftLengthBy2Plus1> erle_;
|
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_;
|
||||||
std::array<float, kFftLengthBy2Plus1> erle_onsets_;
|
std::array<float, kFftLengthBy2Plus1> erle_onsets_;
|
||||||
std::array<bool, kFftLengthBy2Plus1> coming_onset_;
|
std::array<bool, kFftLengthBy2Plus1> coming_onset_;
|
||||||
std::array<int, kFftLengthBy2Plus1> hold_counters_;
|
std::array<int, kFftLengthBy2Plus1> hold_counters_;
|
||||||
|
Reference in New Issue
Block a user