Add multi-channel to FftBuffer

All channels are populated by RenderDelayBuffer. but all other
dependent modules are hardcoded to do their regular mono processing
on the first channel.

Bug: webrtc:10913
Tested: Bitexactness on a large set of aecdumps
Change-Id: I11d11aa0ad3da0f244c0ec020d2c9f0f4a735834
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/151640
Reviewed-by: Per Åhgren <peah@webrtc.org>
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29079}
This commit is contained in:
Sam Zackrisson
2019-09-05 15:03:07 +02:00
committed by Commit Bot
parent 5b728cca77
commit cfb9497299
8 changed files with 99 additions and 76 deletions

View File

@ -136,11 +136,11 @@ void UpdateErlEstimator_SSE2(
void AdaptPartitions(const RenderBuffer& render_buffer,
const FftData& G,
rtc::ArrayView<FftData> H) {
rtc::ArrayView<const FftData> render_buffer_data =
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
render_buffer.GetFftBuffer();
size_t index = render_buffer.Position();
for (auto& H_j : H) {
const FftData& X = render_buffer_data[index];
const FftData& X = render_buffer_data[index][/*channel=*/0];
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
H_j.re[k] += X.re[k] * G.re[k] + X.im[k] * G.im[k];
H_j.im[k] += X.re[k] * G.im[k] - X.im[k] * G.re[k];
@ -155,23 +155,25 @@ void AdaptPartitions(const RenderBuffer& render_buffer,
void AdaptPartitions_NEON(const RenderBuffer& render_buffer,
const FftData& G,
rtc::ArrayView<FftData> H) {
rtc::ArrayView<const FftData> render_buffer_data =
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
render_buffer.GetFftBuffer();
const int lim1 =
std::min(render_buffer_data.size() - render_buffer.Position(), H.size());
const int lim2 = H.size();
constexpr int kNumFourBinBands = kFftLengthBy2 / 4;
FftData* H_j = &H[0];
const FftData* X = &render_buffer_data[render_buffer.Position()];
const std::vector<FftData>* X_channels =
&render_buffer_data[render_buffer.Position()];
int limit = lim1;
int j = 0;
do {
for (; j < limit; ++j, ++H_j, ++X) {
for (; j < limit; ++j, ++H_j, ++X_channels) {
const FftData& X = (*X_channels)[/*channel=*/0];
for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
const float32x4_t G_re = vld1q_f32(&G.re[k]);
const float32x4_t G_im = vld1q_f32(&G.im[k]);
const float32x4_t X_re = vld1q_f32(&X->re[k]);
const float32x4_t X_im = vld1q_f32(&X->im[k]);
const float32x4_t X_re = vld1q_f32(&X.re[k]);
const float32x4_t X_im = vld1q_f32(&X.im[k]);
const float32x4_t H_re = vld1q_f32(&H_j->re[k]);
const float32x4_t H_im = vld1q_f32(&H_j->im[k]);
const float32x4_t a = vmulq_f32(X_re, G_re);
@ -186,23 +188,24 @@ void AdaptPartitions_NEON(const RenderBuffer& render_buffer,
}
}
X = &render_buffer_data[0];
X_channels = &render_buffer_data[0];
limit = lim2;
} while (j < lim2);
H_j = &H[0];
X = &render_buffer_data[render_buffer.Position()];
X_channels = &render_buffer_data[render_buffer.Position()];
limit = lim1;
j = 0;
do {
for (; j < limit; ++j, ++H_j, ++X) {
H_j->re[kFftLengthBy2] += X->re[kFftLengthBy2] * G.re[kFftLengthBy2] +
X->im[kFftLengthBy2] * G.im[kFftLengthBy2];
H_j->im[kFftLengthBy2] += X->re[kFftLengthBy2] * G.im[kFftLengthBy2] -
X->im[kFftLengthBy2] * G.re[kFftLengthBy2];
for (; j < limit; ++j, ++H_j, ++X_channels) {
const FftData& X = (*X_channels)[/*channel=*/0];
H_j->re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] +
X.im[kFftLengthBy2] * G.im[kFftLengthBy2];
H_j->im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] -
X.im[kFftLengthBy2] * G.re[kFftLengthBy2];
}
X = &render_buffer_data[0];
X_channels = &render_buffer_data[0];
limit = lim2;
} while (j < lim2);
}
@ -213,14 +216,14 @@ void AdaptPartitions_NEON(const RenderBuffer& render_buffer,
void AdaptPartitions_SSE2(const RenderBuffer& render_buffer,
const FftData& G,
rtc::ArrayView<FftData> H) {
rtc::ArrayView<const FftData> render_buffer_data =
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
render_buffer.GetFftBuffer();
const int lim1 =
std::min(render_buffer_data.size() - render_buffer.Position(), H.size());
const int lim2 = H.size();
constexpr int kNumFourBinBands = kFftLengthBy2 / 4;
FftData* H_j;
const FftData* X;
const std::vector<FftData>* X_channels;
int limit;
int j;
for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
@ -228,13 +231,14 @@ void AdaptPartitions_SSE2(const RenderBuffer& render_buffer,
const __m128 G_im = _mm_loadu_ps(&G.im[k]);
H_j = &H[0];
X = &render_buffer_data[render_buffer.Position()];
X_channels = &render_buffer_data[render_buffer.Position()];
limit = lim1;
j = 0;
do {
for (; j < limit; ++j, ++H_j, ++X) {
const __m128 X_re = _mm_loadu_ps(&X->re[k]);
const __m128 X_im = _mm_loadu_ps(&X->im[k]);
for (; j < limit; ++j, ++H_j, ++X_channels) {
const FftData& X = (*X_channels)[/*channel=*/0];
const __m128 X_re = _mm_loadu_ps(&X.re[k]);
const __m128 X_im = _mm_loadu_ps(&X.im[k]);
const __m128 H_re = _mm_loadu_ps(&H_j->re[k]);
const __m128 H_im = _mm_loadu_ps(&H_j->im[k]);
const __m128 a = _mm_mul_ps(X_re, G_re);
@ -249,24 +253,25 @@ void AdaptPartitions_SSE2(const RenderBuffer& render_buffer,
_mm_storeu_ps(&H_j->im[k], h);
}
X = &render_buffer_data[0];
X_channels = &render_buffer_data[0];
limit = lim2;
} while (j < lim2);
}
H_j = &H[0];
X = &render_buffer_data[render_buffer.Position()];
X_channels = &render_buffer_data[render_buffer.Position()];
limit = lim1;
j = 0;
do {
for (; j < limit; ++j, ++H_j, ++X) {
H_j->re[kFftLengthBy2] += X->re[kFftLengthBy2] * G.re[kFftLengthBy2] +
X->im[kFftLengthBy2] * G.im[kFftLengthBy2];
H_j->im[kFftLengthBy2] += X->re[kFftLengthBy2] * G.im[kFftLengthBy2] -
X->im[kFftLengthBy2] * G.re[kFftLengthBy2];
for (; j < limit; ++j, ++H_j, ++X_channels) {
const FftData& X = (*X_channels)[/*channel=*/0];
H_j->re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] +
X.im[kFftLengthBy2] * G.im[kFftLengthBy2];
H_j->im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] -
X.im[kFftLengthBy2] * G.re[kFftLengthBy2];
}
X = &render_buffer_data[0];
X_channels = &render_buffer_data[0];
limit = lim2;
} while (j < lim2);
}
@ -279,11 +284,11 @@ void ApplyFilter(const RenderBuffer& render_buffer,
S->re.fill(0.f);
S->im.fill(0.f);
rtc::ArrayView<const FftData> render_buffer_data =
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
render_buffer.GetFftBuffer();
size_t index = render_buffer.Position();
for (auto& H_j : H) {
const FftData& X = render_buffer_data[index];
const FftData& X = render_buffer_data[index][0];
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
S->re[k] += X.re[k] * H_j.re[k] - X.im[k] * H_j.im[k];
S->im[k] += X.re[k] * H_j.im[k] + X.im[k] * H_j.re[k];
@ -300,22 +305,24 @@ void ApplyFilter_NEON(const RenderBuffer& render_buffer,
RTC_DCHECK_GE(H.size(), H.size() - 1);
S->Clear();
rtc::ArrayView<const FftData> render_buffer_data =
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
render_buffer.GetFftBuffer();
const int lim1 =
std::min(render_buffer_data.size() - render_buffer.Position(), H.size());
const int lim2 = H.size();
constexpr int kNumFourBinBands = kFftLengthBy2 / 4;
const FftData* H_j = &H[0];
const FftData* X = &render_buffer_data[render_buffer.Position()];
const std::vector<FftData>* X_channels =
&render_buffer_data[render_buffer.Position()];
int j = 0;
int limit = lim1;
do {
for (; j < limit; ++j, ++H_j, ++X) {
for (; j < limit; ++j, ++H_j, ++X_channels) {
const FftData& X = (*X_channels)[/*channel=*/0];
for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
const float32x4_t X_re = vld1q_f32(&X->re[k]);
const float32x4_t X_im = vld1q_f32(&X->im[k]);
const float32x4_t X_re = vld1q_f32(&X.re[k]);
const float32x4_t X_im = vld1q_f32(&X.im[k]);
const float32x4_t H_re = vld1q_f32(&H_j->re[k]);
const float32x4_t H_im = vld1q_f32(&H_j->im[k]);
const float32x4_t S_re = vld1q_f32(&S->re[k]);
@ -331,22 +338,23 @@ void ApplyFilter_NEON(const RenderBuffer& render_buffer,
}
}
limit = lim2;
X = &render_buffer_data[0];
X_channels = &render_buffer_data[0];
} while (j < lim2);
H_j = &H[0];
X = &render_buffer_data[render_buffer.Position()];
X_channels = &render_buffer_data[render_buffer.Position()];
j = 0;
limit = lim1;
do {
for (; j < limit; ++j, ++H_j, ++X) {
S->re[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->re[kFftLengthBy2] -
X->im[kFftLengthBy2] * H_j->im[kFftLengthBy2];
S->im[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->im[kFftLengthBy2] +
X->im[kFftLengthBy2] * H_j->re[kFftLengthBy2];
for (; j < limit; ++j, ++H_j, ++X_channels) {
const FftData& X = (*X_channels)[/*channel=*/0];
S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_j->re[kFftLengthBy2] -
X.im[kFftLengthBy2] * H_j->im[kFftLengthBy2];
S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_j->im[kFftLengthBy2] +
X.im[kFftLengthBy2] * H_j->re[kFftLengthBy2];
}
limit = lim2;
X = &render_buffer_data[0];
X_channels = &render_buffer_data[0];
} while (j < lim2);
}
#endif
@ -360,22 +368,24 @@ void ApplyFilter_SSE2(const RenderBuffer& render_buffer,
S->re.fill(0.f);
S->im.fill(0.f);
rtc::ArrayView<const FftData> render_buffer_data =
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
render_buffer.GetFftBuffer();
const int lim1 =
std::min(render_buffer_data.size() - render_buffer.Position(), H.size());
const int lim2 = H.size();
constexpr int kNumFourBinBands = kFftLengthBy2 / 4;
const FftData* H_j = &H[0];
const FftData* X = &render_buffer_data[render_buffer.Position()];
const std::vector<FftData>* X_channels =
&render_buffer_data[render_buffer.Position()];
int j = 0;
int limit = lim1;
do {
for (; j < limit; ++j, ++H_j, ++X) {
for (; j < limit; ++j, ++H_j, ++X_channels) {
const FftData& X = (*X_channels)[/*channel=*/0];
for (int k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
const __m128 X_re = _mm_loadu_ps(&X->re[k]);
const __m128 X_im = _mm_loadu_ps(&X->im[k]);
const __m128 X_re = _mm_loadu_ps(&X.re[k]);
const __m128 X_im = _mm_loadu_ps(&X.im[k]);
const __m128 H_re = _mm_loadu_ps(&H_j->re[k]);
const __m128 H_im = _mm_loadu_ps(&H_j->im[k]);
const __m128 S_re = _mm_loadu_ps(&S->re[k]);
@ -393,22 +403,23 @@ void ApplyFilter_SSE2(const RenderBuffer& render_buffer,
}
}
limit = lim2;
X = &render_buffer_data[0];
X_channels = &render_buffer_data[0];
} while (j < lim2);
H_j = &H[0];
X = &render_buffer_data[render_buffer.Position()];
X_channels = &render_buffer_data[render_buffer.Position()];
j = 0;
limit = lim1;
do {
for (; j < limit; ++j, ++H_j, ++X) {
S->re[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->re[kFftLengthBy2] -
X->im[kFftLengthBy2] * H_j->im[kFftLengthBy2];
S->im[kFftLengthBy2] += X->re[kFftLengthBy2] * H_j->im[kFftLengthBy2] +
X->im[kFftLengthBy2] * H_j->re[kFftLengthBy2];
for (; j < limit; ++j, ++H_j, ++X_channels) {
const FftData& X = (*X_channels)[/*channel=*/0];
S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_j->re[kFftLengthBy2] -
X.im[kFftLengthBy2] * H_j->im[kFftLengthBy2];
S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_j->im[kFftLengthBy2] +
X.im[kFftLengthBy2] * H_j->re[kFftLengthBy2];
}
limit = lim2;
X = &render_buffer_data[0];
X_channels = &render_buffer_data[0];
} while (j < lim2);
}
#endif

View File

@ -12,9 +12,13 @@
namespace webrtc {
FftBuffer::FftBuffer(size_t size) : size(static_cast<int>(size)), buffer(size) {
for (auto& b : buffer) {
b.Clear();
FftBuffer::FftBuffer(size_t size, size_t num_channels)
: size(static_cast<int>(size)),
buffer(size, std::vector<FftData>(num_channels)) {
for (auto& block : buffer) {
for (auto& channel_fft_data : block) {
channel_fft_data.Clear();
}
}
}

View File

@ -23,7 +23,7 @@ namespace webrtc {
// Struct for bundling a circular buffer of FftData objects together with the
// read and write indices.
struct FftBuffer {
explicit FftBuffer(size_t size);
FftBuffer(size_t size, size_t num_channels);
~FftBuffer();
int IncIndex(int index) const {
@ -50,7 +50,7 @@ struct FftBuffer {
void DecReadIndex() { read = DecIndex(read); }
const int size;
std::vector<FftData> buffer;
std::vector<std::vector<FftData>> buffer;
int write = 0;
int read = 0;
};

View File

@ -22,7 +22,7 @@ MockRenderDelayBuffer::MockRenderDelayBuffer(int sample_rate_hz,
spectrum_buffer_(block_buffer_.buffer.size(),
num_channels,
kFftLengthBy2Plus1),
fft_buffer_(block_buffer_.buffer.size()),
fft_buffer_(block_buffer_.buffer.size(), num_channels),
render_buffer_(&block_buffer_, &spectrum_buffer_, &fft_buffer_),
downsampled_render_buffer_(GetDownSampledBufferSize(4, 4)) {
ON_CALL(*this, GetRenderBuffer())

View File

@ -52,7 +52,7 @@ class RenderBuffer {
}
// Returns the circular fft buffer.
rtc::ArrayView<const FftData> GetFftBuffer() const {
rtc::ArrayView<const std::vector<FftData>> GetFftBuffer() const {
return fft_buffer_->buffer;
}

View File

@ -29,14 +29,14 @@ TEST(RenderBuffer, NullExternalFftBuffer) {
// Verifies the check for non-null spectrum buffer.
TEST(RenderBuffer, NullExternalSpectrumBuffer) {
FftBuffer fft_buffer(10);
FftBuffer fft_buffer(10, 1);
BlockBuffer block_buffer(10, 3, 1, kBlockSize);
EXPECT_DEATH(RenderBuffer(&block_buffer, nullptr, &fft_buffer), "");
}
// Verifies the check for non-null block buffer.
TEST(RenderBuffer, NullExternalBlockBuffer) {
FftBuffer fft_buffer(10);
FftBuffer fft_buffer(10, 1);
SpectrumBuffer spectrum_buffer(10, 1, kFftLengthBy2Plus1);
EXPECT_DEATH(RenderBuffer(nullptr, &spectrum_buffer, &fft_buffer), "");
}

View File

@ -128,7 +128,7 @@ RenderDelayBufferImpl::RenderDelayBufferImpl(const EchoCanceller3Config& config,
num_render_channels,
kBlockSize),
spectra_(blocks_.buffer.size(), num_render_channels, kFftLengthBy2Plus1),
ffts_(blocks_.buffer.size()),
ffts_(blocks_.buffer.size(), num_render_channels),
delay_(config_.delay.default_delay),
echo_remover_buffer_(&blocks_, &spectra_, &ffts_),
low_rate_(GetDownSampledBufferSize(down_sampling_factor_,
@ -139,6 +139,10 @@ RenderDelayBufferImpl::RenderDelayBufferImpl(const EchoCanceller3Config& config,
buffer_headroom_(config.filter.main.length_blocks) {
RTC_DCHECK_EQ(blocks_.buffer.size(), ffts_.buffer.size());
RTC_DCHECK_EQ(spectra_.buffer.size(), ffts_.buffer.size());
for (size_t i = 0; i < blocks_.buffer.size(); ++i) {
RTC_DCHECK_EQ(blocks_.buffer[i][0].size(), ffts_.buffer[i].size());
RTC_DCHECK_EQ(spectra_.buffer[i].size(), ffts_.buffer[i].size());
}
Reset();
}
@ -379,11 +383,12 @@ void RenderDelayBufferImpl::InsertBlock(
data_dumper_->DumpWav("aec3_render_decimator_output", ds.size(), ds.data(),
16000 / down_sampling_factor_, 1);
std::copy(ds.rbegin(), ds.rend(), lr.buffer.begin() + lr.write);
fft_.PaddedFft(block[0][0], b.buffer[previous_write][0][0],
&f.buffer[f.write]);
// TODO(http://bugs.webrtc.org/10913): Loop over all channels when FftBuffer
// supports multi-channel.
f.buffer[f.write].Spectrum(optimization_, s.buffer[s.write][/*channel=*/0]);
for (size_t channel = 0; channel < block[0].size(); ++channel) {
fft_.PaddedFft(block[0][channel], b.buffer[previous_write][0][channel],
&f.buffer[f.write][channel]);
f.buffer[f.write][channel].Spectrum(optimization_,
s.buffer[s.write][channel]);
}
}
bool RenderDelayBufferImpl::DetectActiveRender(

View File

@ -141,7 +141,7 @@ std::string ProduceDebugText(size_t delay, int filter_length_blocks) {
// Verifies that the check for non-null output gain parameter works.
TEST(ShadowFilterUpdateGain, NullDataOutputGain) {
ApmDataDumper data_dumper(42);
FftBuffer fft_buffer(1);
FftBuffer fft_buffer(1, 1);
RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
FftData E;
const EchoCanceller3Config::Filter::ShadowConfiguration& config = {
@ -159,7 +159,8 @@ TEST(ShadowFilterUpdateGain, GainCausesFilterToConverge) {
std::vector<int> blocks_with_echo_path_changes;
std::vector<int> blocks_with_saturation;
for (size_t num_render_channels : {1, 2, 8}) {
// TODO(http://bugs.webrtc.org/10913): Test multiple render channel counts.
for (size_t num_render_channels : {1}) {
for (size_t filter_length_blocks : {12, 20, 30}) {
for (size_t delay_samples : {0, 64, 150, 200, 301}) {
SCOPED_TRACE(ProduceDebugText(delay_samples, filter_length_blocks));
@ -190,7 +191,8 @@ TEST(ShadowFilterUpdateGain, GainCausesFilterToConverge) {
// Verifies that the magnitude of the gain on average decreases for a
// persistently exciting signal.
TEST(ShadowFilterUpdateGain, DecreasingGain) {
for (size_t num_render_channels : {1, 2, 8}) {
// TODO(http://bugs.webrtc.org/10913): Test multiple render channel counts.
for (size_t num_render_channels : {1}) {
for (size_t filter_length_blocks : {12, 20, 30}) {
SCOPED_TRACE(ProduceDebugText(filter_length_blocks));
std::vector<int> blocks_with_echo_path_changes;
@ -232,7 +234,8 @@ TEST(ShadowFilterUpdateGain, SaturationBehavior) {
for (int k = 99; k < 200; ++k) {
blocks_with_saturation.push_back(k);
}
for (size_t num_render_channels : {1, 2, 8}) {
// TODO(http://bugs.webrtc.org/10913): Test multiple render channel counts.
for (size_t num_render_channels : {1}) {
for (size_t filter_length_blocks : {12, 20, 30}) {
SCOPED_TRACE(ProduceDebugText(filter_length_blocks));