AEC3: Downmix multichannel signals before delay estimation

Multichannel signals are downmixed to mono before decimation and
delay estimation. This is useful when not all channels play
audio content. The feature can be toggled in the AEC3 configuration.

Bug: webrtc:10913
Change-Id: I7d40edf7732bb51fec69e7f3ca063d821c5069c4
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/151762
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29126}
This commit is contained in:
Gustaf Ullberg
2019-09-10 09:36:43 +02:00
committed by Commit Bot
parent d181ee798d
commit ee84d39fce
15 changed files with 89 additions and 55 deletions

View File

@ -165,7 +165,7 @@ void BlockProcessorImpl::ProcessCapture(
// alignment.
estimated_delay_ = delay_controller_->GetDelay(
render_buffer_->GetDownsampledRenderBuffer(), render_buffer_->Delay(),
(*capture_block)[0][0]);
(*capture_block)[0]);
if (estimated_delay_) {
bool delay_change =

View File

@ -69,14 +69,32 @@ Decimator::Decimator(size_t down_sampling_factor)
down_sampling_factor_ == 8);
}
void Decimator::Decimate(rtc::ArrayView<const float> in,
void Decimator::Decimate(const std::vector<std::vector<float>>& in,
bool downmix,
rtc::ArrayView<float> out) {
RTC_DCHECK_EQ(kBlockSize, in.size());
RTC_DCHECK_EQ(kBlockSize, in[0].size());
RTC_DCHECK_EQ(kBlockSize / down_sampling_factor_, out.size());
std::array<float, kBlockSize> in_downmixed;
std::array<float, kBlockSize> x;
// Mix channels before decimation.
std::copy(in[0].begin(), in[0].end(), in_downmixed.begin());
if (downmix && in.size() > 1) {
for (size_t channel = 1; channel < in.size(); channel++) {
const auto& data = in[channel];
for (size_t i = 0; i < kBlockSize; i++) {
in_downmixed[i] += data[i];
}
}
const float one_by_num_channels = 1.f / in.size();
for (size_t i = 0; i < kBlockSize; i++) {
in_downmixed[i] *= one_by_num_channels;
}
}
// Limit the frequency content of the signal to avoid aliasing.
anti_aliasing_filter_.Process(in, x);
anti_aliasing_filter_.Process(in_downmixed, x);
// Reduce the impact of near-end noise.
noise_reduction_filter_.Process(x);

View File

@ -12,6 +12,7 @@
#define MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_
#include <array>
#include <vector>
#include "api/array_view.h"
#include "modules/audio_processing/aec3/aec3_common.h"
@ -26,7 +27,9 @@ class Decimator {
explicit Decimator(size_t down_sampling_factor);
// Downsamples the signal.
void Decimate(rtc::ArrayView<const float> in, rtc::ArrayView<float> out);
void Decimate(const std::vector<std::vector<float>>& in,
bool downmix,
rtc::ArrayView<float> out);
private:
const size_t down_sampling_factor_;

View File

@ -15,6 +15,7 @@
#include <algorithm>
#include <array>
#include <cmath>
#include <cstring>
#include <numeric>
#include <string>
#include <vector>
@ -57,10 +58,11 @@ void ProduceDecimatedSinusoidalOutputPower(int sample_rate_hz,
for (size_t k = 0; k < kNumBlocks; ++k) {
std::vector<float> sub_block(sub_block_size);
decimator.Decimate(
rtc::ArrayView<const float>(&input[k * kBlockSize], kBlockSize),
sub_block);
std::vector<std::vector<float>> input_multichannel(
1, std::vector<float>(kBlockSize));
memcpy(input_multichannel[0].data(), &input[k * kBlockSize],
kBlockSize * sizeof(float));
decimator.Decimate(input_multichannel, true, sub_block);
std::copy(sub_block.begin(), sub_block.end(),
output.begin() + k * sub_block_size);
@ -105,24 +107,24 @@ TEST(Decimator, NoLeakageFromUpperFrequencies) {
// Verifies the check for the input size.
TEST(Decimator, WrongInputSize) {
Decimator decimator(4);
std::vector<float> x(std::vector<float>(kBlockSize - 1, 0.f));
std::vector<std::vector<float>> x(1, std::vector<float>(kBlockSize - 1, 0.f));
std::array<float, kBlockSize / 4> x_downsampled;
EXPECT_DEATH(decimator.Decimate(x, x_downsampled), "");
EXPECT_DEATH(decimator.Decimate(x, true, x_downsampled), "");
}
// Verifies the check for non-null output parameter.
TEST(Decimator, NullOutput) {
Decimator decimator(4);
std::vector<float> x(std::vector<float>(kBlockSize, 0.f));
EXPECT_DEATH(decimator.Decimate(x, nullptr), "");
std::vector<std::vector<float>> x(1, std::vector<float>(kBlockSize, 0.f));
EXPECT_DEATH(decimator.Decimate(x, true, nullptr), "");
}
// Verifies the check for the output size.
TEST(Decimator, WrongOutputSize) {
Decimator decimator(4);
std::vector<float> x(std::vector<float>(kBlockSize, 0.f));
std::vector<std::vector<float>> x(1, std::vector<float>(kBlockSize, 0.f));
std::array<float, kBlockSize / 4 - 1> x_downsampled;
EXPECT_DEATH(decimator.Decimate(x, x_downsampled), "");
EXPECT_DEATH(decimator.Decimate(x, true, x_downsampled), "");
}
// Verifies the check for the correct downsampling factor.

View File

@ -42,7 +42,8 @@ EchoPathDelayEstimator::EchoPathDelayEstimator(
config.delay.delay_candidate_detection_threshold),
matched_filter_lag_aggregator_(data_dumper_,
matched_filter_.GetMaxFilterLag(),
config.delay.delay_selection_thresholds) {
config.delay.delay_selection_thresholds),
downmix_(config.delay.downmix_before_delay_estimation) {
RTC_DCHECK(data_dumper);
RTC_DCHECK(down_sampling_factor_ > 0);
}
@ -55,15 +56,13 @@ void EchoPathDelayEstimator::Reset(bool reset_delay_confidence) {
absl::optional<DelayEstimate> EchoPathDelayEstimator::EstimateDelay(
const DownsampledRenderBuffer& render_buffer,
rtc::ArrayView<const float> capture) {
RTC_DCHECK_EQ(kBlockSize, capture.size());
const std::vector<std::vector<float>>& capture) {
RTC_DCHECK_EQ(kBlockSize, capture[0].size());
std::array<float, kBlockSize> downsampled_capture_data;
rtc::ArrayView<float> downsampled_capture(downsampled_capture_data.data(),
sub_block_size_);
data_dumper_->DumpWav("aec3_capture_decimator_input", capture.size(),
capture.data(), 16000, 1);
capture_decimator_.Decimate(capture, downsampled_capture);
capture_decimator_.Decimate(capture, downmix_, downsampled_capture);
data_dumper_->DumpWav("aec3_capture_decimator_output",
downsampled_capture.size(), downsampled_capture.data(),
16000 / down_sampling_factor_, 1);

View File

@ -42,7 +42,7 @@ class EchoPathDelayEstimator {
// Produce a delay estimate if such is avaliable.
absl::optional<DelayEstimate> EstimateDelay(
const DownsampledRenderBuffer& render_buffer,
rtc::ArrayView<const float> capture);
const std::vector<std::vector<float>>& capture);
// Log delay estimator properties.
void LogDelayEstimationProperties(int sample_rate_hz, size_t shift) const {
@ -65,6 +65,7 @@ class EchoPathDelayEstimator {
absl::optional<DelayEstimate> old_aggregated_lag_;
size_t consistent_estimate_counter_ = 0;
ClockdriftDetector clockdrift_detector_;
bool downmix_;
// Internal reset method with more granularity.
void Reset(bool reset_lag_aggregator, bool reset_delay_confidence);

View File

@ -47,7 +47,7 @@ TEST(EchoPathDelayEstimator, BasicApiCalls) {
std::vector<std::vector<std::vector<float>>> render(
kNumBands, std::vector<std::vector<float>>(
kNumChannels, std::vector<float>(kBlockSize)));
std::vector<float> capture(kBlockSize);
std::vector<std::vector<float>> capture(1, std::vector<float>(kBlockSize));
for (size_t k = 0; k < 100; ++k) {
render_delay_buffer->Insert(render);
estimator.EstimateDelay(render_delay_buffer->GetDownsampledRenderBuffer(),
@ -66,7 +66,7 @@ TEST(EchoPathDelayEstimator, DelayEstimation) {
std::vector<std::vector<std::vector<float>>> render(
kNumBands, std::vector<std::vector<float>>(
kNumChannels, std::vector<float>(kBlockSize)));
std::vector<float> capture(kBlockSize);
std::vector<std::vector<float>> capture(1, std::vector<float>(kBlockSize));
ApmDataDumper data_dumper(0);
constexpr size_t kDownSamplingFactors[] = {2, 4, 8};
for (auto down_sampling_factor : kDownSamplingFactors) {
@ -83,7 +83,7 @@ TEST(EchoPathDelayEstimator, DelayEstimation) {
absl::optional<DelayEstimate> estimated_delay_samples;
for (size_t k = 0; k < (500 + (delay_samples) / kBlockSize); ++k) {
RandomizeSampleVector(&random_generator, render[0][0]);
signal_delay_buffer.Delay(render[0][0], capture);
signal_delay_buffer.Delay(render[0][0], capture[0]);
render_delay_buffer->Insert(render);
if (k == 0) {
@ -125,7 +125,7 @@ TEST(EchoPathDelayEstimator, NoDelayEstimatesForLowLevelRenderSignals) {
std::vector<std::vector<std::vector<float>>> render(
kNumBands, std::vector<std::vector<float>>(
kNumChannels, std::vector<float>(kBlockSize)));
std::vector<float> capture(kBlockSize);
std::vector<std::vector<float>> capture(1, std::vector<float>(kBlockSize));
ApmDataDumper data_dumper(0);
EchoPathDelayEstimator estimator(&data_dumper, config);
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
@ -136,7 +136,7 @@ TEST(EchoPathDelayEstimator, NoDelayEstimatesForLowLevelRenderSignals) {
for (auto& render_k : render[0][0]) {
render_k *= 100.f / 32767.f;
}
std::copy(render[0][0].begin(), render[0][0].end(), capture.begin());
std::copy(render[0][0].begin(), render[0][0].end(), capture[0].begin());
render_delay_buffer->Insert(render);
render_delay_buffer->PrepareCaptureProcessing();
EXPECT_FALSE(estimator.EstimateDelay(
@ -155,7 +155,7 @@ TEST(EchoPathDelayEstimator, DISABLED_WrongRenderBlockSize) {
EchoPathDelayEstimator estimator(&data_dumper, config);
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
RenderDelayBuffer::Create(config, 48000, 1));
std::vector<float> capture(kBlockSize);
std::vector<std::vector<float>> capture(1, std::vector<float>(kBlockSize));
EXPECT_DEATH(estimator.EstimateDelay(
render_delay_buffer->GetDownsampledRenderBuffer(), capture),
"");
@ -170,7 +170,8 @@ TEST(EchoPathDelayEstimator, WrongCaptureBlockSize) {
EchoPathDelayEstimator estimator(&data_dumper, config);
std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
RenderDelayBuffer::Create(config, 48000, 1));
std::vector<float> capture(std::vector<float>(kBlockSize - 1));
std::vector<std::vector<float>> capture(1,
std::vector<float>(kBlockSize - 1));
EXPECT_DEATH(estimator.EstimateDelay(
render_delay_buffer->GetDownsampledRenderBuffer(), capture),
"");

View File

@ -150,8 +150,8 @@ TEST(MatchedFilter, LagEstimation) {
std::vector<std::vector<std::vector<float>>> render(
kNumBands, std::vector<std::vector<float>>(
kNumChannels, std::vector<float>(kBlockSize, 0.f)));
std::array<float, kBlockSize> capture;
capture.fill(0.f);
std::vector<std::vector<float>> capture(
1, std::vector<float>(kBlockSize, 0.f));
ApmDataDumper data_dumper(0);
for (size_t delay_samples : {5, 64, 150, 200, 800, 1000}) {
SCOPED_TRACE(ProduceDebugText(delay_samples, down_sampling_factor));
@ -177,7 +177,7 @@ TEST(MatchedFilter, LagEstimation) {
RandomizeSampleVector(&random_generator, render[band][channel]);
}
}
signal_delay_buffer.Delay(render[0][0], capture);
signal_delay_buffer.Delay(render[0][0], capture[0]);
render_delay_buffer->Insert(render);
if (k == 0) {
@ -188,7 +188,7 @@ TEST(MatchedFilter, LagEstimation) {
std::array<float, kBlockSize> downsampled_capture_data;
rtc::ArrayView<float> downsampled_capture(
downsampled_capture_data.data(), sub_block_size);
capture_decimator.Decimate(capture, downsampled_capture);
capture_decimator.Decimate(capture, true, downsampled_capture);
filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(),
downsampled_capture);
}
@ -312,8 +312,8 @@ TEST(MatchedFilter, LagNotUpdatedForLowLevelRender) {
std::vector<std::vector<std::vector<float>>> render(
kNumBands, std::vector<std::vector<float>>(
kNumChannels, std::vector<float>(kBlockSize, 0.f)));
std::array<float, kBlockSize> capture;
capture.fill(0.f);
std::vector<std::vector<float>> capture(
1, std::vector<float>(kBlockSize, 0.f));
ApmDataDumper data_dumper(0);
EchoCanceller3Config config;
MatchedFilter filter(&data_dumper, DetectOptimization(), sub_block_size,
@ -332,11 +332,11 @@ TEST(MatchedFilter, LagNotUpdatedForLowLevelRender) {
for (auto& render_k : render[0][0]) {
render_k *= 149.f / 32767.f;
}
std::copy(render[0][0].begin(), render[0][0].end(), capture.begin());
std::copy(render[0][0].begin(), render[0][0].end(), capture[0].begin());
std::array<float, kBlockSize> downsampled_capture_data;
rtc::ArrayView<float> downsampled_capture(downsampled_capture_data.data(),
sub_block_size);
capture_decimator.Decimate(capture, downsampled_capture);
capture_decimator.Decimate(capture, true, downsampled_capture);
filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(),
downsampled_capture);
}

View File

@ -31,7 +31,7 @@ class MockRenderDelayController : public RenderDelayController {
absl::optional<DelayEstimate>(
const DownsampledRenderBuffer& render_buffer,
size_t render_delay_buffer_delay,
rtc::ArrayView<const float> capture));
const std::vector<std::vector<float>>& capture));
MOCK_CONST_METHOD0(HasClockdrift, bool());
};

View File

@ -377,9 +377,8 @@ void RenderDelayBufferImpl::InsertBlock(
std::copy(block[k].begin(), block[k].end(), b.buffer[b.write][k].begin());
}
data_dumper_->DumpWav("aec3_render_decimator_input", block[0][0].size(),
block[0][0].data(), 16000, 1);
render_decimator_.Decimate(block[0][0], ds);
render_decimator_.Decimate(block[0],
config_.delay.downmix_before_delay_estimation, ds);
data_dumper_->DumpWav("aec3_render_decimator_output", ds.size(), ds.data(),
16000 / down_sampling_factor_, 1);
std::copy(ds.rbegin(), ds.rend(), lr.buffer.begin() + lr.write);

View File

@ -41,7 +41,7 @@ class RenderDelayControllerImpl final : public RenderDelayController {
absl::optional<DelayEstimate> GetDelay(
const DownsampledRenderBuffer& render_buffer,
size_t render_delay_buffer_delay,
rtc::ArrayView<const float> capture) override;
const std::vector<std::vector<float>>& capture) override;
bool HasClockdrift() const override;
private:
@ -118,8 +118,8 @@ void RenderDelayControllerImpl::LogRenderCall() {}
absl::optional<DelayEstimate> RenderDelayControllerImpl::GetDelay(
const DownsampledRenderBuffer& render_buffer,
size_t render_delay_buffer_delay,
rtc::ArrayView<const float> capture) {
RTC_DCHECK_EQ(kBlockSize, capture.size());
const std::vector<std::vector<float>>& capture) {
RTC_DCHECK_EQ(kBlockSize, capture[0].size());
++capture_call_counter_;
auto delay_samples = delay_estimator_.EstimateDelay(render_buffer, capture);

View File

@ -39,7 +39,7 @@ class RenderDelayController {
virtual absl::optional<DelayEstimate> GetDelay(
const DownsampledRenderBuffer& render_buffer,
size_t render_delay_buffer_delay,
rtc::ArrayView<const float> capture) = 0;
const std::vector<std::vector<float>>& capture) = 0;
// Returns true if clockdrift has been detected.
virtual bool HasClockdrift() const = 0;

View File

@ -46,7 +46,7 @@ constexpr size_t kDownSamplingFactors[] = {2, 4, 8};
// Verifies the output of GetDelay when there are no AnalyzeRender calls.
TEST(RenderDelayController, NoRenderSignal) {
std::vector<float> block(kBlockSize, 0.f);
std::vector<std::vector<float>> block(1, std::vector<float>(kBlockSize, 0.f));
EchoCanceller3Config config;
for (size_t num_matched_filters = 4; num_matched_filters == 10;
num_matched_filters++) {
@ -73,7 +73,8 @@ TEST(RenderDelayController, NoRenderSignal) {
// Verifies the basic API call sequence.
TEST(RenderDelayController, BasicApiCalls) {
constexpr size_t kNumChannels = 1;
std::vector<float> capture_block(kBlockSize, 0.f);
std::vector<std::vector<float>> capture_block(
1, std::vector<float>(kBlockSize, 0.f));
absl::optional<DelayEstimate> delay_blocks;
for (size_t num_matched_filters = 4; num_matched_filters == 10;
num_matched_filters++) {
@ -109,7 +110,8 @@ TEST(RenderDelayController, BasicApiCalls) {
// simple timeshifts between the signals.
TEST(RenderDelayController, Alignment) {
Random random_generator(42U);
std::vector<float> capture_block(kBlockSize, 0.f);
std::vector<std::vector<float>> capture_block(
1, std::vector<float>(kBlockSize, 0.f));
for (size_t num_matched_filters = 4; num_matched_filters == 10;
num_matched_filters++) {
for (auto down_sampling_factor : kDownSamplingFactors) {
@ -140,7 +142,7 @@ TEST(RenderDelayController, Alignment) {
render_block[band][channel]);
}
}
signal_delay_buffer.Delay(render_block[0][0], capture_block);
signal_delay_buffer.Delay(render_block[0][0], capture_block[0]);
render_delay_buffer->Insert(render_block);
render_delay_buffer->PrepareCaptureProcessing();
delay_blocks = delay_controller->GetDelay(
@ -200,7 +202,7 @@ TEST(RenderDelayController, NonCausalAlignment) {
render_delay_buffer->PrepareCaptureProcessing();
delay_blocks = delay_controller->GetDelay(
render_delay_buffer->GetDownsampledRenderBuffer(),
render_delay_buffer->Delay(), capture_block[0][0]);
render_delay_buffer->Delay(), capture_block[0]);
}
ASSERT_FALSE(delay_blocks);
@ -215,7 +217,8 @@ TEST(RenderDelayController, NonCausalAlignment) {
TEST(RenderDelayController, AlignmentWithJitter) {
Random random_generator(42U);
constexpr size_t kNumRenderChannels = 1;
std::vector<float> capture_block(kBlockSize, 0.f);
std::vector<std::vector<float>> capture_block(
1, std::vector<float>(kBlockSize, 0.f));
for (size_t num_matched_filters = 4; num_matched_filters == 10;
num_matched_filters++) {
for (auto down_sampling_factor : kDownSamplingFactors) {
@ -240,10 +243,10 @@ TEST(RenderDelayController, AlignmentWithJitter) {
j <
(1000 + delay_samples / kBlockSize) / kMaxTestJitterBlocks + 1;
++j) {
std::vector<std::vector<float>> capture_block_buffer;
std::vector<std::vector<std::vector<float>>> capture_block_buffer;
for (size_t k = 0; k < (kMaxTestJitterBlocks - 1); ++k) {
RandomizeSampleVector(&random_generator, render_block[0][0]);
signal_delay_buffer.Delay(render_block[0][0], capture_block);
signal_delay_buffer.Delay(render_block[0][0], capture_block[0]);
capture_block_buffer.push_back(capture_block);
render_delay_buffer->Insert(render_block);
}
@ -297,7 +300,8 @@ TEST(RenderDelayController, InitialHeadroom) {
// Verifies the check for the capture signal block size.
TEST(RenderDelayController, WrongCaptureSize) {
std::vector<float> block(kBlockSize - 1, 0.f);
std::vector<std::vector<float>> block(
1, std::vector<float>(kBlockSize - 1, 0.f));
EchoCanceller3Config config;
for (auto rate : {16000, 32000, 48000}) {
SCOPED_TRACE(ProduceDebugText(rate));