Add flag to APM to force multichannel even with AEC3

Currently, APM fakes multichannel in two ways:
 - With injected AECs, capture processing is only performed on the left
channel. The result is copied into the other channels.
 - With multichannel render audio, all channels are mixed into one
before analysing.

This CL adds a flag to disable these behaviors, ensuring proper
multichannel processing happens throughout the APM pipeline.

Adds killswitches to separately disable render / capture multichannel.

Additionally - AEC3 currently crashes when running with multichannel.
This CL adds the missing pieces to at least have it run without
triggering any DCHECKS, including making the high pass filter properly
handle multichannel.

Bug: webrtc:10913, webrtc:10907
Change-Id: I38795bf8f312b959fcc816a056fba2c68d4e424d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/152483
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29248}
This commit is contained in:
Sam Zackrisson
2019-09-20 07:50:35 +02:00
committed by Commit Bot
parent e24557f51e
commit feee1e4c36
11 changed files with 173 additions and 56 deletions

View File

@ -198,9 +198,9 @@ EchoCanceller3::RenderWriter::RenderWriter(
EchoCanceller3::RenderWriter::~RenderWriter() = default;
void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) {
RTC_DCHECK_EQ(1, input.num_channels());
RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, input.num_frames_per_band());
RTC_DCHECK_EQ(num_bands_, input.num_bands());
RTC_DCHECK_EQ(num_channels_, input.num_channels());
// TODO(bugs.webrtc.org/8759) Temporary work-around.
if (num_bands_ != input.num_bands())
@ -211,9 +211,7 @@ void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) {
CopyBufferIntoFrame(input, num_bands_, num_channels_,
&render_queue_input_frame_);
for (size_t channel = 0; channel < num_channels_; ++channel) {
high_pass_filter_.Process(render_queue_input_frame_[0][channel]);
}
high_pass_filter_.Process(&render_queue_input_frame_[0]);
static_cast<void>(render_transfer_queue_->Insert(&render_queue_input_frame_));
}
@ -321,7 +319,6 @@ void EchoCanceller3::AnalyzeCapture(const AudioBuffer& capture) {
void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) {
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
RTC_DCHECK(capture);
RTC_DCHECK_EQ(1u, capture->num_channels());
RTC_DCHECK_EQ(num_bands_, capture->num_bands());
RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, capture->num_frames_per_band());
RTC_DCHECK_EQ(capture->num_channels(), num_capture_channels_);

View File

@ -215,7 +215,7 @@ class EchoCanceller3Tester {
std::unique_ptr<BlockProcessor>(
new RenderTransportVerificationProcessor(num_bands_)));
std::vector<float> render_input;
std::vector<std::vector<float>> render_input(1);
std::vector<float> capture_output;
for (size_t frame_index = 0; frame_index < kNumFramesToProcess;
++frame_index) {
@ -227,7 +227,7 @@ class EchoCanceller3Tester {
&render_buffer_.split_bands(0)[0], 0);
for (size_t k = 0; k < frame_length_; ++k) {
render_input.push_back(render_buffer_.split_bands(0)[0][k]);
render_input[0].push_back(render_buffer_.split_bands(0)[0][k]);
}
aec3.AnalyzeRender(&render_buffer_);
aec3.ProcessCapture(&capture_buffer_, false);
@ -236,10 +236,10 @@ class EchoCanceller3Tester {
}
}
HighPassFilter hp_filter(1);
hp_filter.Process(render_input);
hp_filter.Process(&render_input);
EXPECT_TRUE(
VerifyOutputFrameBitexactness(render_input, capture_output, -64));
VerifyOutputFrameBitexactness(render_input[0], capture_output, -64));
}
// Verifies that information about echo path changes are properly propagated
@ -492,7 +492,7 @@ class EchoCanceller3Tester {
std::unique_ptr<BlockProcessor>(
new RenderTransportVerificationProcessor(num_bands_)));
std::vector<float> render_input;
std::vector<std::vector<float>> render_input(1);
std::vector<float> capture_output;
for (size_t frame_index = 0; frame_index < kRenderTransferQueueSizeFrames;
@ -508,7 +508,7 @@ class EchoCanceller3Tester {
}
for (size_t k = 0; k < frame_length_; ++k) {
render_input.push_back(render_buffer_.split_bands(0)[0][k]);
render_input[0].push_back(render_buffer_.split_bands(0)[0][k]);
}
aec3.AnalyzeRender(&render_buffer_);
}
@ -529,10 +529,10 @@ class EchoCanceller3Tester {
}
}
HighPassFilter hp_filter(1);
hp_filter.Process(render_input);
hp_filter.Process(&render_input);
EXPECT_TRUE(
VerifyOutputFrameBitexactness(render_input, capture_output, -64));
VerifyOutputFrameBitexactness(render_input[0], capture_output, -64));
}
// This test verifies that a buffer overrun in the render swapqueue is

View File

@ -48,6 +48,7 @@
#include "rtc_base/ref_counted_object.h"
#include "rtc_base/time_utils.h"
#include "rtc_base/trace_event.h"
#include "system_wrappers/include/field_trial.h"
#include "system_wrappers/include/metrics.h"
#define RETURN_ON_ERR(expr) \
@ -348,8 +349,12 @@ AudioProcessing* AudioProcessingBuilder::Create(const webrtc::Config& config) {
}
AudioProcessingImpl::AudioProcessingImpl(const webrtc::Config& config)
: AudioProcessingImpl(config, nullptr, nullptr, nullptr, nullptr, nullptr) {
}
: AudioProcessingImpl(config,
/*capture_post_processor=*/nullptr,
/*render_pre_processor=*/nullptr,
/*echo_control_factory=*/nullptr,
/*echo_detector=*/nullptr,
/*capture_analyzer=*/nullptr) {}
int AudioProcessingImpl::instance_count_ = 0;
@ -382,13 +387,17 @@ AudioProcessingImpl::AudioProcessingImpl(
/* enabled= */ false,
/* enabled_agc2_level_estimator= */ false,
/* digital_adaptive_disabled= */ false,
/* analyze_before_aec= */ false),
/* analyze_before_aec= */ false,
#else
config.Get<ExperimentalAgc>().enabled,
config.Get<ExperimentalAgc>().enabled_agc2_level_estimator,
config.Get<ExperimentalAgc>().digital_adaptive_disabled,
config.Get<ExperimentalAgc>().analyze_before_aec),
config.Get<ExperimentalAgc>().analyze_before_aec,
#endif
!field_trial::IsEnabled(
"WebRTC-ApmExperimentalMultiChannelRenderKillSwitch"),
!field_trial::IsEnabled(
"WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch")),
#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
capture_(false),
#else
@ -630,10 +639,18 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
RTC_DCHECK_NE(8000, render_processing_rate);
// Always downmix the render stream to mono for analysis. This has been
// demonstrated to work well for AEC in most practical scenarios.
if (submodule_states_.RenderMultiBandSubModulesActive()) {
formats_.render_processing_format = StreamConfig(render_processing_rate, 1);
// By default, downmix the render stream to mono for analysis. This has been
// demonstrated to work well for AEC in most practical scenarios.
const bool experimental_multi_channel_render =
config_.pipeline.experimental_multi_channel &&
constants_.experimental_multi_channel_render_support;
int render_processing_num_channels =
experimental_multi_channel_render
? formats_.api_format.reverse_input_stream().num_channels()
: 1;
formats_.render_processing_format =
StreamConfig(render_processing_rate, render_processing_num_channels);
} else {
formats_.render_processing_format = StreamConfig(
formats_.api_format.reverse_input_stream().sample_rate_hz(),
@ -658,6 +675,10 @@ void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) {
rtc::CritScope cs_render(&crit_render_);
rtc::CritScope cs_capture(&crit_capture_);
const bool pipeline_config_changed =
config_.pipeline.experimental_multi_channel !=
config.pipeline.experimental_multi_channel;
const bool aec_config_changed =
config_.echo_canceller.enabled != config.echo_canceller.enabled ||
config_.echo_canceller.use_legacy_aec !=
@ -733,6 +754,12 @@ void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) {
private_submodules_->voice_detector->Initialize(
proc_split_sample_rate_hz());
}
// Reinitialization must happen after all submodule configuration to avoid
// additional reinitializations on the next capture / render processing call.
if (pipeline_config_changed) {
InitializeLocked(formats_.api_format);
}
}
void AudioProcessingImpl::ApplyAgc1Config(
@ -809,7 +836,14 @@ size_t AudioProcessingImpl::num_input_channels() const {
size_t AudioProcessingImpl::num_proc_channels() const {
// Used as callback from submodules, hence locking is not allowed.
return capture_nonlocked_.echo_controller_enabled ? 1 : num_output_channels();
const bool experimental_multi_channel_capture =
config_.pipeline.experimental_multi_channel &&
constants_.experimental_multi_channel_capture_support;
if (capture_nonlocked_.echo_controller_enabled &&
!experimental_multi_channel_capture) {
return 1;
}
return num_output_channels();
}
size_t AudioProcessingImpl::num_output_channels() const {
@ -1338,7 +1372,11 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
capture_buffer->SplitIntoFrequencyBands();
}
if (private_submodules_->echo_controller) {
const bool experimental_multi_channel_capture =
config_.pipeline.experimental_multi_channel &&
constants_.experimental_multi_channel_capture_support;
if (private_submodules_->echo_controller &&
!experimental_multi_channel_capture) {
// Force down-mixing of the number of channels after the detection of
// capture signal saturation.
// TODO(peah): Look into ensuring that this kind of tampering with the
@ -1846,8 +1884,8 @@ void AudioProcessingImpl::InitializeEchoController() {
echo_control_factory_->Create(proc_sample_rate_hz());
} else {
private_submodules_->echo_controller = std::make_unique<EchoCanceller3>(
EchoCanceller3Config(), proc_sample_rate_hz(),
/*num_render_channels=*/1, /*num_capture_channels=*/1);
EchoCanceller3Config(), proc_sample_rate_hz(), num_reverse_channels(),
num_proc_channels());
}
capture_nonlocked_.echo_controller_enabled = true;

View File

@ -354,9 +354,10 @@ class AudioProcessingImpl : public AudioProcessing {
bool use_experimental_agc,
bool use_experimental_agc_agc2_level_estimation,
bool use_experimental_agc_agc2_digital_adaptive,
bool use_experimental_agc_process_before_aec)
: // Format of processing streams at input/output call sites.
agc_startup_min_volume(agc_startup_min_volume),
bool use_experimental_agc_process_before_aec,
bool experimental_multi_channel_render_support,
bool experimental_multi_channel_capture_support)
: agc_startup_min_volume(agc_startup_min_volume),
agc_clipped_level_min(agc_clipped_level_min),
use_experimental_agc(use_experimental_agc),
use_experimental_agc_agc2_level_estimation(
@ -364,14 +365,19 @@ class AudioProcessingImpl : public AudioProcessing {
use_experimental_agc_agc2_digital_adaptive(
use_experimental_agc_agc2_digital_adaptive),
use_experimental_agc_process_before_aec(
use_experimental_agc_process_before_aec) {}
use_experimental_agc_process_before_aec),
experimental_multi_channel_render_support(
experimental_multi_channel_render_support),
experimental_multi_channel_capture_support(
experimental_multi_channel_capture_support) {}
int agc_startup_min_volume;
int agc_clipped_level_min;
bool use_experimental_agc;
bool use_experimental_agc_agc2_level_estimation;
bool use_experimental_agc_agc2_digital_adaptive;
bool use_experimental_agc_process_before_aec;
bool experimental_multi_channel_render_support;
bool experimental_multi_channel_capture_support;
} constants_;
struct ApmCaptureState {

View File

@ -45,9 +45,11 @@ void HighPassFilter::Process(AudioBuffer* audio) {
}
}
void HighPassFilter::Process(rtc::ArrayView<float> audio) {
RTC_DCHECK_EQ(filters_.size(), 1);
filters_[0]->Process(audio);
void HighPassFilter::Process(std::vector<std::vector<float>>* audio) {
RTC_DCHECK_EQ(filters_.size(), audio->size());
for (size_t k = 0; k < audio->size(); ++k) {
filters_[k]->Process((*audio)[k]);
}
}
void HighPassFilter::Reset() {

View File

@ -30,9 +30,7 @@ class HighPassFilter {
HighPassFilter& operator=(const HighPassFilter&) = delete;
void Process(AudioBuffer* audio);
// Only to be used when the number of channels are 1.
// TODO(peah): Add support for more channels.
void Process(rtc::ArrayView<float> audio);
void Process(std::vector<std::vector<float>>* audio);
void Reset();
void Reset(size_t num_channels);

View File

@ -20,10 +20,12 @@
namespace webrtc {
namespace {
// Process one frame of data and produce the output.
std::vector<float> ProcessOneFrame(const std::vector<float>& frame_input,
const StreamConfig& stream_config,
HighPassFilter* high_pass_filter) {
// Process one frame of data via the AudioBuffer interface and produce the
// output.
std::vector<float> ProcessOneFrameAsAudioBuffer(
const std::vector<float>& frame_input,
const StreamConfig& stream_config,
HighPassFilter* high_pass_filter) {
AudioBuffer audio_buffer(
stream_config.sample_rate_hz(), stream_config.num_channels(),
stream_config.sample_rate_hz(), stream_config.num_channels(),
@ -37,9 +39,40 @@ std::vector<float> ProcessOneFrame(const std::vector<float>& frame_input,
return frame_output;
}
// Process one frame of data via the vector interface and produce the output.
std::vector<float> ProcessOneFrameAsVector(
const std::vector<float>& frame_input,
const StreamConfig& stream_config,
HighPassFilter* high_pass_filter) {
std::vector<std::vector<float>> process_vector(
stream_config.num_channels(),
std::vector<float>(stream_config.num_frames()));
for (size_t k = 0; k < stream_config.num_frames(); ++k) {
for (size_t channel = 0; channel < stream_config.num_channels();
++channel) {
process_vector[channel][k] =
frame_input[k * stream_config.num_channels() + channel];
}
}
high_pass_filter->Process(&process_vector);
std::vector<float> output;
for (size_t k = 0; k < stream_config.num_frames(); ++k) {
for (size_t channel = 0; channel < stream_config.num_channels();
++channel) {
output.push_back(process_vector[channel][k]);
}
}
return process_vector[0];
}
// Processes a specified amount of frames, verifies the results and reports
// any errors.
void RunBitexactnessTest(int num_channels,
bool use_audio_buffer_interface,
const std::vector<float>& input,
const std::vector<float>& reference) {
const StreamConfig stream_config(16000, num_channels, false);
@ -55,8 +88,13 @@ void RunBitexactnessTest(int num_channels,
stream_config.num_channels() * frame_no,
input.begin() + stream_config.num_frames() *
stream_config.num_channels() * (frame_no + 1));
output = ProcessOneFrame(frame_input, stream_config, &high_pass_filter);
if (use_audio_buffer_interface) {
output = ProcessOneFrameAsAudioBuffer(frame_input, stream_config,
&high_pass_filter);
} else {
output = ProcessOneFrameAsVector(frame_input, stream_config,
&high_pass_filter);
}
}
// Form vector to compare the reference to. Only the last frame processed
@ -92,19 +130,36 @@ std::vector<float> CreateVector(const rtc::ArrayView<const float>& array_view) {
}
} // namespace
TEST(HighPassFilterAccuracyTest, Reset) {
TEST(HighPassFilterAccuracyTest, ResetWithAudioBufferInterface) {
const StreamConfig stream_config_stereo(16000, 2, false);
const StreamConfig stream_config_mono(16000, 1, false);
std::vector<float> x_mono(160, 1.f);
std::vector<float> x_stereo(320, 1.f);
HighPassFilter lc(1);
std::vector<float> y = ProcessOneFrame(x_mono, stream_config_mono, &lc);
lc.Reset(2);
y = ProcessOneFrame(x_stereo, stream_config_stereo, &lc);
lc.Reset(1);
y = ProcessOneFrame(x_mono, stream_config_mono, &lc);
lc.Reset();
y = ProcessOneFrame(x_mono, stream_config_mono, &lc);
HighPassFilter hpf(1);
std::vector<float> y =
ProcessOneFrameAsAudioBuffer(x_mono, stream_config_mono, &hpf);
hpf.Reset(2);
y = ProcessOneFrameAsAudioBuffer(x_stereo, stream_config_stereo, &hpf);
hpf.Reset(1);
y = ProcessOneFrameAsAudioBuffer(x_mono, stream_config_mono, &hpf);
hpf.Reset();
y = ProcessOneFrameAsAudioBuffer(x_mono, stream_config_mono, &hpf);
}
TEST(HighPassFilterAccuracyTest, ResetWithVectorInterface) {
const StreamConfig stream_config_stereo(16000, 2, false);
const StreamConfig stream_config_mono(16000, 1, false);
std::vector<float> x_mono(160, 1.f);
std::vector<float> x_stereo(320, 1.f);
HighPassFilter hpf(1);
std::vector<float> y =
ProcessOneFrameAsVector(x_mono, stream_config_mono, &hpf);
hpf.Reset(2);
y = ProcessOneFrameAsVector(x_stereo, stream_config_stereo, &hpf);
hpf.Reset(1);
y = ProcessOneFrameAsVector(x_mono, stream_config_mono, &hpf);
hpf.Reset();
y = ProcessOneFrameAsVector(x_mono, stream_config_mono, &hpf);
}
TEST(HighPassFilterAccuracyTest, MonoInitial) {
@ -140,9 +195,12 @@ TEST(HighPassFilterAccuracyTest, MonoInitial) {
0.073214f, -0.373256f, -0.115394f, 0.102109f,
0.976217f, 0.702270f, -0.457697f, 0.757116f};
RunBitexactnessTest(
1, CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
CreateVector(rtc::ArrayView<const float>(kReference)));
for (bool use_audio_buffer_interface : {true, false}) {
RunBitexactnessTest(
1, use_audio_buffer_interface,
CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
CreateVector(rtc::ArrayView<const float>(kReference)));
}
}
TEST(HighPassFilterAccuracyTest, MonoConverged) {
@ -232,9 +290,12 @@ TEST(HighPassFilterAccuracyTest, MonoConverged) {
0.127212f, 0.147464f, -0.221733f, -0.004484f,
-0.535107f, 0.385999f, -0.116346f, -0.265302f};
RunBitexactnessTest(
1, CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
CreateVector(rtc::ArrayView<const float>(kReference)));
for (bool use_audio_buffer_interface : {true, false}) {
RunBitexactnessTest(
1, use_audio_buffer_interface,
CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
CreateVector(rtc::ArrayView<const float>(kReference)));
}
}
} // namespace webrtc

View File

@ -254,6 +254,9 @@ class AudioProcessing : public rtc::RefCountInterface {
// default rate is currently selected based on the CPU architecture, but
// that logic may change.
int maximum_internal_processing_rate;
// Force multi-channel processing on playout and capture audio. This is an
// experimental feature, and is likely to change without warning.
bool experimental_multi_channel = false;
} pipeline;
// Enabled the pre-amplifier. It amplifies the capture signal

View File

@ -377,6 +377,11 @@ void AudioProcessingSimulator::CreateAudioProcessor() {
if (settings_.use_ts) {
config.Set<ExperimentalNs>(new ExperimentalNs(*settings_.use_ts));
}
if (settings_.experimental_multi_channel) {
apm_config.pipeline.experimental_multi_channel =
*settings_.experimental_multi_channel;
}
if (settings_.use_agc2) {
apm_config.gain_controller2.enabled = *settings_.use_agc2;
if (settings_.agc2_fixed_gain_db) {

View File

@ -85,6 +85,7 @@ struct SimulationSettings {
absl::optional<bool> use_refined_adaptive_filter;
int initial_mic_level;
bool simulate_mic_gain = false;
absl::optional<bool> experimental_multi_channel;
absl::optional<int> simulated_mic_kind;
bool report_performance = false;
absl::optional<std::string> performance_report_output_filename;

View File

@ -215,6 +215,10 @@ ABSL_FLAG(int,
simulate_mic_gain,
0,
"Activate (1) or deactivate(0) the analog mic gain simulation");
ABSL_FLAG(int,
experimental_multi_channel,
kParameterNotSpecifiedValue,
"Activate (1) or deactivate(0) multi-channel audio in APM pipeline");
ABSL_FLAG(int,
simulated_mic_kind,
kParameterNotSpecifiedValue,
@ -437,6 +441,8 @@ SimulationSettings CreateSettings() {
SetSettingIfSpecified(absl::GetFlag(FLAGS_aec_settings),
&settings.aec_settings_filename);
settings.initial_mic_level = absl::GetFlag(FLAGS_initial_mic_level);
SetSettingIfFlagSet(absl::GetFlag(FLAGS_experimental_multi_channel),
&settings.experimental_multi_channel);
settings.simulate_mic_gain = absl::GetFlag(FLAGS_simulate_mic_gain);
SetSettingIfSpecified(absl::GetFlag(FLAGS_simulated_mic_kind),
&settings.simulated_mic_kind);