diff --git a/modules/audio_processing/aec3/adaptive_fir_filter.cc b/modules/audio_processing/aec3/adaptive_fir_filter.cc index c6665b2322..024b605527 100644 --- a/modules/audio_processing/aec3/adaptive_fir_filter.cc +++ b/modules/audio_processing/aec3/adaptive_fir_filter.cc @@ -136,11 +136,11 @@ void UpdateErlEstimator_SSE2( void AdaptPartitions(const RenderBuffer& render_buffer, const FftData& G, rtc::ArrayView H) { - rtc::ArrayView render_buffer_data = + rtc::ArrayView> render_buffer_data = render_buffer.GetFftBuffer(); size_t index = render_buffer.Position(); for (auto& H_j : H) { - const FftData& X = render_buffer_data[index]; + const FftData& X = render_buffer_data[index][/*channel=*/0]; for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { H_j.re[k] += X.re[k] * G.re[k] + X.im[k] * G.im[k]; H_j.im[k] += X.re[k] * G.im[k] - X.im[k] * G.re[k]; @@ -155,23 +155,25 @@ void AdaptPartitions(const RenderBuffer& render_buffer, void AdaptPartitions_NEON(const RenderBuffer& render_buffer, const FftData& G, rtc::ArrayView H) { - rtc::ArrayView render_buffer_data = + rtc::ArrayView> render_buffer_data = render_buffer.GetFftBuffer(); const int lim1 = std::min(render_buffer_data.size() - render_buffer.Position(), H.size()); const int lim2 = H.size(); constexpr int kNumFourBinBands = kFftLengthBy2 / 4; FftData* H_j = &H[0]; - const FftData* X = &render_buffer_data[render_buffer.Position()]; + const std::vector* X_channels = + &render_buffer_data[render_buffer.Position()]; int limit = lim1; int j = 0; do { - for (; j < limit; ++j, ++H_j, ++X) { + for (; j < limit; ++j, ++H_j, ++X_channels) { + const FftData& X = (*X_channels)[/*channel=*/0]; for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { const float32x4_t G_re = vld1q_f32(&G.re[k]); const float32x4_t G_im = vld1q_f32(&G.im[k]); - const float32x4_t X_re = vld1q_f32(&X->re[k]); - const float32x4_t X_im = vld1q_f32(&X->im[k]); + const float32x4_t X_re = vld1q_f32(&X.re[k]); + const float32x4_t X_im = vld1q_f32(&X.im[k]); const float32x4_t H_re = vld1q_f32(&H_j->re[k]); const float32x4_t H_im = vld1q_f32(&H_j->im[k]); const float32x4_t a = vmulq_f32(X_re, G_re); @@ -186,23 +188,24 @@ void AdaptPartitions_NEON(const RenderBuffer& render_buffer, } } - X = &render_buffer_data[0]; + X_channels = &render_buffer_data[0]; limit = lim2; } while (j < lim2); H_j = &H[0]; - X = &render_buffer_data[render_buffer.Position()]; + X_channels = &render_buffer_data[render_buffer.Position()]; limit = lim1; j = 0; do { - for (; j < limit; ++j, ++H_j, ++X) { - H_j->re[kFftLengthBy2] += X->re[kFftLengthBy2] * G.re[kFftLengthBy2] + - X->im[kFftLengthBy2] * G.im[kFftLengthBy2]; - H_j->im[kFftLengthBy2] += X->re[kFftLengthBy2] * G.im[kFftLengthBy2] - - X->im[kFftLengthBy2] * G.re[kFftLengthBy2]; + for (; j < limit; ++j, ++H_j, ++X_channels) { + const FftData& X = (*X_channels)[/*channel=*/0]; + H_j->re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] + + X.im[kFftLengthBy2] * G.im[kFftLengthBy2]; + H_j->im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] - + X.im[kFftLengthBy2] * G.re[kFftLengthBy2]; } - X = &render_buffer_data[0]; + X_channels = &render_buffer_data[0]; limit = lim2; } while (j < lim2); } @@ -213,14 +216,14 @@ void AdaptPartitions_NEON(const RenderBuffer& render_buffer, void AdaptPartitions_SSE2(const RenderBuffer& render_buffer, const FftData& G, rtc::ArrayView H) { - rtc::ArrayView render_buffer_data = + rtc::ArrayView> render_buffer_data = render_buffer.GetFftBuffer(); const int lim1 = std::min(render_buffer_data.size() - render_buffer.Position(), H.size()); const int lim2 = H.size(); constexpr int kNumFourBinBands = kFftLengthBy2 / 4; FftData* H_j; - const FftData* X; + const std::vector* X_channels; int limit; int j; for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { @@ -228,13 +231,14 @@ void AdaptPartitions_SSE2(const RenderBuffer& render_buffer, const __m128 G_im = _mm_loadu_ps(&G.im[k]); H_j = &H[0]; - X = &render_buffer_data[render_buffer.Position()]; + X_channels = &render_buffer_data[render_buffer.Position()]; limit = lim1; j = 0; do { - for (; j < limit; ++j, ++H_j, ++X) { - const __m128 X_re = _mm_loadu_ps(&X->re[k]); - const __m128 X_im = _mm_loadu_ps(&X->im[k]); + for (; j < limit; ++j, ++H_j, ++X_channels) { + const FftData& X = (*X_channels)[/*channel=*/0]; + const __m128 X_re = _mm_loadu_ps(&X.re[k]); + const __m128 X_im = _mm_loadu_ps(&X.im[k]); const __m128 H_re = _mm_loadu_ps(&H_j->re[k]); const __m128 H_im = _mm_loadu_ps(&H_j->im[k]); const __m128 a = _mm_mul_ps(X_re, G_re); @@ -249,24 +253,25 @@ void AdaptPartitions_SSE2(const RenderBuffer& render_buffer, _mm_storeu_ps(&H_j->im[k], h); } - X = &render_buffer_data[0]; + X_channels = &render_buffer_data[0]; limit = lim2; } while (j < lim2); } H_j = &H[0]; - X = &render_buffer_data[render_buffer.Position()]; + X_channels = &render_buffer_data[render_buffer.Position()]; limit = lim1; j = 0; do { - for (; j < limit; ++j, ++H_j, ++X) { - H_j->re[kFftLengthBy2] += X->re[kFftLengthBy2] * G.re[kFftLengthBy2] + - X->im[kFftLengthBy2] * G.im[kFftLengthBy2]; - H_j->im[kFftLengthBy2] += X->re[kFftLengthBy2] * G.im[kFftLengthBy2] - - X->im[kFftLengthBy2] * G.re[kFftLengthBy2]; + for (; j < limit; ++j, ++H_j, ++X_channels) { + const FftData& X = (*X_channels)[/*channel=*/0]; + H_j->re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] + + X.im[kFftLengthBy2] * G.im[kFftLengthBy2]; + H_j->im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] - + X.im[kFftLengthBy2] * G.re[kFftLengthBy2]; } - X = &render_buffer_data[0]; + X_channels = &render_buffer_data[0]; limit = lim2; } while (j < lim2); } @@ -279,11 +284,11 @@ void ApplyFilter(const RenderBuffer& render_buffer, S->re.fill(0.f); S->im.fill(0.f); - rtc::ArrayView render_buffer_data = + rtc::ArrayView> render_buffer_data = render_buffer.GetFftBuffer(); size_t index = render_buffer.Position(); for (auto& H_j : H) { - const FftData& X = render_buffer_data[index]; + const FftData& X = render_buffer_data[index][0]; for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { S->re[k] += X.re[k] * H_j.re[k] - X.im[k] * H_j.im[k]; S->im[k] += X.re[k] * H_j.im[k] + X.im[k] * H_j.re[k]; @@ -300,22 +305,24 @@ void ApplyFilter_NEON(const RenderBuffer& render_buffer, RTC_DCHECK_GE(H.size(), H.size() - 1); S->Clear(); - rtc::ArrayView render_buffer_data = + rtc::ArrayView> render_buffer_data = render_buffer.GetFftBuffer(); const int lim1 = std::min(render_buffer_data.size() - render_buffer.Position(), H.size()); const int lim2 = H.size(); constexpr int kNumFourBinBands = kFftLengthBy2 / 4; const FftData* H_j = &H[0]; - const FftData* X = &render_buffer_data[render_buffer.Position()]; + const std::vector* X_channels = + &render_buffer_data[render_buffer.Position()]; int j = 0; int limit = lim1; do { - for (; j < limit; ++j, ++H_j, ++X) { + for (; j < limit; ++j, ++H_j, ++X_channels) { + const FftData& X = (*X_channels)[/*channel=*/0]; for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { - const float32x4_t X_re = vld1q_f32(&X->re[k]); - const float32x4_t X_im = vld1q_f32(&X->im[k]); + const float32x4_t X_re = vld1q_f32(&X.re[k]); + const float32x4_t X_im = vld1q_f32(&X.im[k]); const float32x4_t H_re = vld1q_f32(&H_j->re[k]); const float32x4_t H_im = vld1q_f32(&H_j->im[k]); const float32x4_t S_re = vld1q_f32(&S->re[k]); @@ -331,22 +338,23 @@ void ApplyFilter_NEON(const RenderBuffer& render_buffer, } } limit = lim2; - X = &render_buffer_data[0]; + X_channels = &render_buffer_data[0]; } while (j < lim2); H_j = &H[0]; - X = &render_buffer_data[render_buffer.Position()]; + X_channels = &render_buffer_data[render_buffer.Position()]; j = 0; limit = lim1; do { - for (; j < limit; ++j, ++H_j, ++X) { - S->re[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->re[kFftLengthBy2] - - X->im[kFftLengthBy2] * H_j->im[kFftLengthBy2]; - S->im[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->im[kFftLengthBy2] + - X->im[kFftLengthBy2] * H_j->re[kFftLengthBy2]; + for (; j < limit; ++j, ++H_j, ++X_channels) { + const FftData& X = (*X_channels)[/*channel=*/0]; + S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_j->re[kFftLengthBy2] - + X.im[kFftLengthBy2] * H_j->im[kFftLengthBy2]; + S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_j->im[kFftLengthBy2] + + X.im[kFftLengthBy2] * H_j->re[kFftLengthBy2]; } limit = lim2; - X = &render_buffer_data[0]; + X_channels = &render_buffer_data[0]; } while (j < lim2); } #endif @@ -360,22 +368,24 @@ void ApplyFilter_SSE2(const RenderBuffer& render_buffer, S->re.fill(0.f); S->im.fill(0.f); - rtc::ArrayView render_buffer_data = + rtc::ArrayView> render_buffer_data = render_buffer.GetFftBuffer(); const int lim1 = std::min(render_buffer_data.size() - render_buffer.Position(), H.size()); const int lim2 = H.size(); constexpr int kNumFourBinBands = kFftLengthBy2 / 4; const FftData* H_j = &H[0]; - const FftData* X = &render_buffer_data[render_buffer.Position()]; + const std::vector* X_channels = + &render_buffer_data[render_buffer.Position()]; int j = 0; int limit = lim1; do { - for (; j < limit; ++j, ++H_j, ++X) { + for (; j < limit; ++j, ++H_j, ++X_channels) { + const FftData& X = (*X_channels)[/*channel=*/0]; for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) { - const __m128 X_re = _mm_loadu_ps(&X->re[k]); - const __m128 X_im = _mm_loadu_ps(&X->im[k]); + const __m128 X_re = _mm_loadu_ps(&X.re[k]); + const __m128 X_im = _mm_loadu_ps(&X.im[k]); const __m128 H_re = _mm_loadu_ps(&H_j->re[k]); const __m128 H_im = _mm_loadu_ps(&H_j->im[k]); const __m128 S_re = _mm_loadu_ps(&S->re[k]); @@ -393,22 +403,23 @@ void ApplyFilter_SSE2(const RenderBuffer& render_buffer, } } limit = lim2; - X = &render_buffer_data[0]; + X_channels = &render_buffer_data[0]; } while (j < lim2); H_j = &H[0]; - X = &render_buffer_data[render_buffer.Position()]; + X_channels = &render_buffer_data[render_buffer.Position()]; j = 0; limit = lim1; do { - for (; j < limit; ++j, ++H_j, ++X) { - S->re[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->re[kFftLengthBy2] - - X->im[kFftLengthBy2] * H_j->im[kFftLengthBy2]; - S->im[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->im[kFftLengthBy2] + - X->im[kFftLengthBy2] * H_j->re[kFftLengthBy2]; + for (; j < limit; ++j, ++H_j, ++X_channels) { + const FftData& X = (*X_channels)[/*channel=*/0]; + S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_j->re[kFftLengthBy2] - + X.im[kFftLengthBy2] * H_j->im[kFftLengthBy2]; + S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_j->im[kFftLengthBy2] + + X.im[kFftLengthBy2] * H_j->re[kFftLengthBy2]; } limit = lim2; - X = &render_buffer_data[0]; + X_channels = &render_buffer_data[0]; } while (j < lim2); } #endif diff --git a/modules/audio_processing/aec3/fft_buffer.cc b/modules/audio_processing/aec3/fft_buffer.cc index 379ef7c521..1ce2d31d8f 100644 --- a/modules/audio_processing/aec3/fft_buffer.cc +++ b/modules/audio_processing/aec3/fft_buffer.cc @@ -12,9 +12,13 @@ namespace webrtc { -FftBuffer::FftBuffer(size_t size) : size(static_cast(size)), buffer(size) { - for (auto& b : buffer) { - b.Clear(); +FftBuffer::FftBuffer(size_t size, size_t num_channels) + : size(static_cast(size)), + buffer(size, std::vector(num_channels)) { + for (auto& block : buffer) { + for (auto& channel_fft_data : block) { + channel_fft_data.Clear(); + } } } diff --git a/modules/audio_processing/aec3/fft_buffer.h b/modules/audio_processing/aec3/fft_buffer.h index a367f9eb45..4187315863 100644 --- a/modules/audio_processing/aec3/fft_buffer.h +++ b/modules/audio_processing/aec3/fft_buffer.h @@ -23,7 +23,7 @@ namespace webrtc { // Struct for bundling a circular buffer of FftData objects together with the // read and write indices. struct FftBuffer { - explicit FftBuffer(size_t size); + FftBuffer(size_t size, size_t num_channels); ~FftBuffer(); int IncIndex(int index) const { @@ -50,7 +50,7 @@ struct FftBuffer { void DecReadIndex() { read = DecIndex(read); } const int size; - std::vector buffer; + std::vector> buffer; int write = 0; int read = 0; }; diff --git a/modules/audio_processing/aec3/mock/mock_render_delay_buffer.cc b/modules/audio_processing/aec3/mock/mock_render_delay_buffer.cc index 56569a2b88..f721fd8606 100644 --- a/modules/audio_processing/aec3/mock/mock_render_delay_buffer.cc +++ b/modules/audio_processing/aec3/mock/mock_render_delay_buffer.cc @@ -22,7 +22,7 @@ MockRenderDelayBuffer::MockRenderDelayBuffer(int sample_rate_hz, spectrum_buffer_(block_buffer_.buffer.size(), num_channels, kFftLengthBy2Plus1), - fft_buffer_(block_buffer_.buffer.size()), + fft_buffer_(block_buffer_.buffer.size(), num_channels), render_buffer_(&block_buffer_, &spectrum_buffer_, &fft_buffer_), downsampled_render_buffer_(GetDownSampledBufferSize(4, 4)) { ON_CALL(*this, GetRenderBuffer()) diff --git a/modules/audio_processing/aec3/render_buffer.h b/modules/audio_processing/aec3/render_buffer.h index 5099a13c41..d44abd9df4 100644 --- a/modules/audio_processing/aec3/render_buffer.h +++ b/modules/audio_processing/aec3/render_buffer.h @@ -52,7 +52,7 @@ class RenderBuffer { } // Returns the circular fft buffer. - rtc::ArrayView GetFftBuffer() const { + rtc::ArrayView> GetFftBuffer() const { return fft_buffer_->buffer; } diff --git a/modules/audio_processing/aec3/render_buffer_unittest.cc b/modules/audio_processing/aec3/render_buffer_unittest.cc index 64f317f189..dca1e2130f 100644 --- a/modules/audio_processing/aec3/render_buffer_unittest.cc +++ b/modules/audio_processing/aec3/render_buffer_unittest.cc @@ -29,14 +29,14 @@ TEST(RenderBuffer, NullExternalFftBuffer) { // Verifies the check for non-null spectrum buffer. TEST(RenderBuffer, NullExternalSpectrumBuffer) { - FftBuffer fft_buffer(10); + FftBuffer fft_buffer(10, 1); BlockBuffer block_buffer(10, 3, 1, kBlockSize); EXPECT_DEATH(RenderBuffer(&block_buffer, nullptr, &fft_buffer), ""); } // Verifies the check for non-null block buffer. TEST(RenderBuffer, NullExternalBlockBuffer) { - FftBuffer fft_buffer(10); + FftBuffer fft_buffer(10, 1); SpectrumBuffer spectrum_buffer(10, 1, kFftLengthBy2Plus1); EXPECT_DEATH(RenderBuffer(nullptr, &spectrum_buffer, &fft_buffer), ""); } diff --git a/modules/audio_processing/aec3/render_delay_buffer.cc b/modules/audio_processing/aec3/render_delay_buffer.cc index 55dced0720..96f8409d81 100644 --- a/modules/audio_processing/aec3/render_delay_buffer.cc +++ b/modules/audio_processing/aec3/render_delay_buffer.cc @@ -128,7 +128,7 @@ RenderDelayBufferImpl::RenderDelayBufferImpl(const EchoCanceller3Config& config, num_render_channels, kBlockSize), spectra_(blocks_.buffer.size(), num_render_channels, kFftLengthBy2Plus1), - ffts_(blocks_.buffer.size()), + ffts_(blocks_.buffer.size(), num_render_channels), delay_(config_.delay.default_delay), echo_remover_buffer_(&blocks_, &spectra_, &ffts_), low_rate_(GetDownSampledBufferSize(down_sampling_factor_, @@ -139,6 +139,10 @@ RenderDelayBufferImpl::RenderDelayBufferImpl(const EchoCanceller3Config& config, buffer_headroom_(config.filter.main.length_blocks) { RTC_DCHECK_EQ(blocks_.buffer.size(), ffts_.buffer.size()); RTC_DCHECK_EQ(spectra_.buffer.size(), ffts_.buffer.size()); + for (size_t i = 0; i < blocks_.buffer.size(); ++i) { + RTC_DCHECK_EQ(blocks_.buffer[i][0].size(), ffts_.buffer[i].size()); + RTC_DCHECK_EQ(spectra_.buffer[i].size(), ffts_.buffer[i].size()); + } Reset(); } @@ -379,11 +383,12 @@ void RenderDelayBufferImpl::InsertBlock( data_dumper_->DumpWav("aec3_render_decimator_output", ds.size(), ds.data(), 16000 / down_sampling_factor_, 1); std::copy(ds.rbegin(), ds.rend(), lr.buffer.begin() + lr.write); - fft_.PaddedFft(block[0][0], b.buffer[previous_write][0][0], - &f.buffer[f.write]); - // TODO(http://bugs.webrtc.org/10913): Loop over all channels when FftBuffer - // supports multi-channel. - f.buffer[f.write].Spectrum(optimization_, s.buffer[s.write][/*channel=*/0]); + for (size_t channel = 0; channel < block[0].size(); ++channel) { + fft_.PaddedFft(block[0][channel], b.buffer[previous_write][0][channel], + &f.buffer[f.write][channel]); + f.buffer[f.write][channel].Spectrum(optimization_, + s.buffer[s.write][channel]); + } } bool RenderDelayBufferImpl::DetectActiveRender( diff --git a/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc index aefa2cb15d..300f6b18c7 100644 --- a/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc +++ b/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc @@ -141,7 +141,7 @@ std::string ProduceDebugText(size_t delay, int filter_length_blocks) { // Verifies that the check for non-null output gain parameter works. TEST(ShadowFilterUpdateGain, NullDataOutputGain) { ApmDataDumper data_dumper(42); - FftBuffer fft_buffer(1); + FftBuffer fft_buffer(1, 1); RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); FftData E; const EchoCanceller3Config::Filter::ShadowConfiguration& config = { @@ -159,7 +159,8 @@ TEST(ShadowFilterUpdateGain, GainCausesFilterToConverge) { std::vector blocks_with_echo_path_changes; std::vector blocks_with_saturation; - for (size_t num_render_channels : {1, 2, 8}) { + // TODO(http://bugs.webrtc.org/10913): Test multiple render channel counts. + for (size_t num_render_channels : {1}) { for (size_t filter_length_blocks : {12, 20, 30}) { for (size_t delay_samples : {0, 64, 150, 200, 301}) { SCOPED_TRACE(ProduceDebugText(delay_samples, filter_length_blocks)); @@ -190,7 +191,8 @@ TEST(ShadowFilterUpdateGain, GainCausesFilterToConverge) { // Verifies that the magnitude of the gain on average decreases for a // persistently exciting signal. TEST(ShadowFilterUpdateGain, DecreasingGain) { - for (size_t num_render_channels : {1, 2, 8}) { + // TODO(http://bugs.webrtc.org/10913): Test multiple render channel counts. + for (size_t num_render_channels : {1}) { for (size_t filter_length_blocks : {12, 20, 30}) { SCOPED_TRACE(ProduceDebugText(filter_length_blocks)); std::vector blocks_with_echo_path_changes; @@ -232,7 +234,8 @@ TEST(ShadowFilterUpdateGain, SaturationBehavior) { for (int k = 99; k < 200; ++k) { blocks_with_saturation.push_back(k); } - for (size_t num_render_channels : {1, 2, 8}) { + // TODO(http://bugs.webrtc.org/10913): Test multiple render channel counts. + for (size_t num_render_channels : {1}) { for (size_t filter_length_blocks : {12, 20, 30}) { SCOPED_TRACE(ProduceDebugText(filter_length_blocks));