diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc index cf953ae205..85b2dc7630 100644 --- a/modules/audio_processing/aec3/echo_canceller3.cc +++ b/modules/audio_processing/aec3/echo_canceller3.cc @@ -198,9 +198,9 @@ EchoCanceller3::RenderWriter::RenderWriter( EchoCanceller3::RenderWriter::~RenderWriter() = default; void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) { - RTC_DCHECK_EQ(1, input.num_channels()); RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, input.num_frames_per_band()); RTC_DCHECK_EQ(num_bands_, input.num_bands()); + RTC_DCHECK_EQ(num_channels_, input.num_channels()); // TODO(bugs.webrtc.org/8759) Temporary work-around. if (num_bands_ != input.num_bands()) @@ -211,9 +211,7 @@ void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) { CopyBufferIntoFrame(input, num_bands_, num_channels_, &render_queue_input_frame_); - for (size_t channel = 0; channel < num_channels_; ++channel) { - high_pass_filter_.Process(render_queue_input_frame_[0][channel]); - } + high_pass_filter_.Process(&render_queue_input_frame_[0]); static_cast(render_transfer_queue_->Insert(&render_queue_input_frame_)); } @@ -321,7 +319,6 @@ void EchoCanceller3::AnalyzeCapture(const AudioBuffer& capture) { void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) { RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); RTC_DCHECK(capture); - RTC_DCHECK_EQ(1u, capture->num_channels()); RTC_DCHECK_EQ(num_bands_, capture->num_bands()); RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, capture->num_frames_per_band()); RTC_DCHECK_EQ(capture->num_channels(), num_capture_channels_); diff --git a/modules/audio_processing/aec3/echo_canceller3_unittest.cc b/modules/audio_processing/aec3/echo_canceller3_unittest.cc index a2f3367394..c7a71c34fc 100644 --- a/modules/audio_processing/aec3/echo_canceller3_unittest.cc +++ b/modules/audio_processing/aec3/echo_canceller3_unittest.cc @@ -215,7 +215,7 @@ class EchoCanceller3Tester { std::unique_ptr( new RenderTransportVerificationProcessor(num_bands_))); - std::vector render_input; + std::vector> render_input(1); std::vector capture_output; for (size_t frame_index = 0; frame_index < kNumFramesToProcess; ++frame_index) { @@ -227,7 +227,7 @@ class EchoCanceller3Tester { &render_buffer_.split_bands(0)[0], 0); for (size_t k = 0; k < frame_length_; ++k) { - render_input.push_back(render_buffer_.split_bands(0)[0][k]); + render_input[0].push_back(render_buffer_.split_bands(0)[0][k]); } aec3.AnalyzeRender(&render_buffer_); aec3.ProcessCapture(&capture_buffer_, false); @@ -236,10 +236,10 @@ class EchoCanceller3Tester { } } HighPassFilter hp_filter(1); - hp_filter.Process(render_input); + hp_filter.Process(&render_input); EXPECT_TRUE( - VerifyOutputFrameBitexactness(render_input, capture_output, -64)); + VerifyOutputFrameBitexactness(render_input[0], capture_output, -64)); } // Verifies that information about echo path changes are properly propagated @@ -492,7 +492,7 @@ class EchoCanceller3Tester { std::unique_ptr( new RenderTransportVerificationProcessor(num_bands_))); - std::vector render_input; + std::vector> render_input(1); std::vector capture_output; for (size_t frame_index = 0; frame_index < kRenderTransferQueueSizeFrames; @@ -508,7 +508,7 @@ class EchoCanceller3Tester { } for (size_t k = 0; k < frame_length_; ++k) { - render_input.push_back(render_buffer_.split_bands(0)[0][k]); + render_input[0].push_back(render_buffer_.split_bands(0)[0][k]); } aec3.AnalyzeRender(&render_buffer_); } @@ -529,10 +529,10 @@ class EchoCanceller3Tester { } } HighPassFilter hp_filter(1); - hp_filter.Process(render_input); + hp_filter.Process(&render_input); EXPECT_TRUE( - VerifyOutputFrameBitexactness(render_input, capture_output, -64)); + VerifyOutputFrameBitexactness(render_input[0], capture_output, -64)); } // This test verifies that a buffer overrun in the render swapqueue is diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index cdc37c698a..b1187fab1f 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -48,6 +48,7 @@ #include "rtc_base/ref_counted_object.h" #include "rtc_base/time_utils.h" #include "rtc_base/trace_event.h" +#include "system_wrappers/include/field_trial.h" #include "system_wrappers/include/metrics.h" #define RETURN_ON_ERR(expr) \ @@ -348,8 +349,12 @@ AudioProcessing* AudioProcessingBuilder::Create(const webrtc::Config& config) { } AudioProcessingImpl::AudioProcessingImpl(const webrtc::Config& config) - : AudioProcessingImpl(config, nullptr, nullptr, nullptr, nullptr, nullptr) { -} + : AudioProcessingImpl(config, + /*capture_post_processor=*/nullptr, + /*render_pre_processor=*/nullptr, + /*echo_control_factory=*/nullptr, + /*echo_detector=*/nullptr, + /*capture_analyzer=*/nullptr) {} int AudioProcessingImpl::instance_count_ = 0; @@ -382,13 +387,17 @@ AudioProcessingImpl::AudioProcessingImpl( /* enabled= */ false, /* enabled_agc2_level_estimator= */ false, /* digital_adaptive_disabled= */ false, - /* analyze_before_aec= */ false), + /* analyze_before_aec= */ false, #else config.Get().enabled, config.Get().enabled_agc2_level_estimator, config.Get().digital_adaptive_disabled, - config.Get().analyze_before_aec), + config.Get().analyze_before_aec, #endif + !field_trial::IsEnabled( + "WebRTC-ApmExperimentalMultiChannelRenderKillSwitch"), + !field_trial::IsEnabled( + "WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch")), #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) capture_(false), #else @@ -630,10 +639,18 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { RTC_DCHECK_NE(8000, render_processing_rate); - // Always downmix the render stream to mono for analysis. This has been - // demonstrated to work well for AEC in most practical scenarios. if (submodule_states_.RenderMultiBandSubModulesActive()) { - formats_.render_processing_format = StreamConfig(render_processing_rate, 1); + // By default, downmix the render stream to mono for analysis. This has been + // demonstrated to work well for AEC in most practical scenarios. + const bool experimental_multi_channel_render = + config_.pipeline.experimental_multi_channel && + constants_.experimental_multi_channel_render_support; + int render_processing_num_channels = + experimental_multi_channel_render + ? formats_.api_format.reverse_input_stream().num_channels() + : 1; + formats_.render_processing_format = + StreamConfig(render_processing_rate, render_processing_num_channels); } else { formats_.render_processing_format = StreamConfig( formats_.api_format.reverse_input_stream().sample_rate_hz(), @@ -658,6 +675,10 @@ void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) { rtc::CritScope cs_render(&crit_render_); rtc::CritScope cs_capture(&crit_capture_); + const bool pipeline_config_changed = + config_.pipeline.experimental_multi_channel != + config.pipeline.experimental_multi_channel; + const bool aec_config_changed = config_.echo_canceller.enabled != config.echo_canceller.enabled || config_.echo_canceller.use_legacy_aec != @@ -733,6 +754,12 @@ void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) { private_submodules_->voice_detector->Initialize( proc_split_sample_rate_hz()); } + + // Reinitialization must happen after all submodule configuration to avoid + // additional reinitializations on the next capture / render processing call. + if (pipeline_config_changed) { + InitializeLocked(formats_.api_format); + } } void AudioProcessingImpl::ApplyAgc1Config( @@ -809,7 +836,14 @@ size_t AudioProcessingImpl::num_input_channels() const { size_t AudioProcessingImpl::num_proc_channels() const { // Used as callback from submodules, hence locking is not allowed. - return capture_nonlocked_.echo_controller_enabled ? 1 : num_output_channels(); + const bool experimental_multi_channel_capture = + config_.pipeline.experimental_multi_channel && + constants_.experimental_multi_channel_capture_support; + if (capture_nonlocked_.echo_controller_enabled && + !experimental_multi_channel_capture) { + return 1; + } + return num_output_channels(); } size_t AudioProcessingImpl::num_output_channels() const { @@ -1338,7 +1372,11 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { capture_buffer->SplitIntoFrequencyBands(); } - if (private_submodules_->echo_controller) { + const bool experimental_multi_channel_capture = + config_.pipeline.experimental_multi_channel && + constants_.experimental_multi_channel_capture_support; + if (private_submodules_->echo_controller && + !experimental_multi_channel_capture) { // Force down-mixing of the number of channels after the detection of // capture signal saturation. // TODO(peah): Look into ensuring that this kind of tampering with the @@ -1846,8 +1884,8 @@ void AudioProcessingImpl::InitializeEchoController() { echo_control_factory_->Create(proc_sample_rate_hz()); } else { private_submodules_->echo_controller = std::make_unique( - EchoCanceller3Config(), proc_sample_rate_hz(), - /*num_render_channels=*/1, /*num_capture_channels=*/1); + EchoCanceller3Config(), proc_sample_rate_hz(), num_reverse_channels(), + num_proc_channels()); } capture_nonlocked_.echo_controller_enabled = true; diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h index 6bda06a002..4d5b3681d7 100644 --- a/modules/audio_processing/audio_processing_impl.h +++ b/modules/audio_processing/audio_processing_impl.h @@ -354,9 +354,10 @@ class AudioProcessingImpl : public AudioProcessing { bool use_experimental_agc, bool use_experimental_agc_agc2_level_estimation, bool use_experimental_agc_agc2_digital_adaptive, - bool use_experimental_agc_process_before_aec) - : // Format of processing streams at input/output call sites. - agc_startup_min_volume(agc_startup_min_volume), + bool use_experimental_agc_process_before_aec, + bool experimental_multi_channel_render_support, + bool experimental_multi_channel_capture_support) + : agc_startup_min_volume(agc_startup_min_volume), agc_clipped_level_min(agc_clipped_level_min), use_experimental_agc(use_experimental_agc), use_experimental_agc_agc2_level_estimation( @@ -364,14 +365,19 @@ class AudioProcessingImpl : public AudioProcessing { use_experimental_agc_agc2_digital_adaptive( use_experimental_agc_agc2_digital_adaptive), use_experimental_agc_process_before_aec( - use_experimental_agc_process_before_aec) {} + use_experimental_agc_process_before_aec), + experimental_multi_channel_render_support( + experimental_multi_channel_render_support), + experimental_multi_channel_capture_support( + experimental_multi_channel_capture_support) {} int agc_startup_min_volume; int agc_clipped_level_min; bool use_experimental_agc; bool use_experimental_agc_agc2_level_estimation; bool use_experimental_agc_agc2_digital_adaptive; bool use_experimental_agc_process_before_aec; - + bool experimental_multi_channel_render_support; + bool experimental_multi_channel_capture_support; } constants_; struct ApmCaptureState { diff --git a/modules/audio_processing/high_pass_filter.cc b/modules/audio_processing/high_pass_filter.cc index 306bcbd776..bd953e319d 100644 --- a/modules/audio_processing/high_pass_filter.cc +++ b/modules/audio_processing/high_pass_filter.cc @@ -45,9 +45,11 @@ void HighPassFilter::Process(AudioBuffer* audio) { } } -void HighPassFilter::Process(rtc::ArrayView audio) { - RTC_DCHECK_EQ(filters_.size(), 1); - filters_[0]->Process(audio); +void HighPassFilter::Process(std::vector>* audio) { + RTC_DCHECK_EQ(filters_.size(), audio->size()); + for (size_t k = 0; k < audio->size(); ++k) { + filters_[k]->Process((*audio)[k]); + } } void HighPassFilter::Reset() { diff --git a/modules/audio_processing/high_pass_filter.h b/modules/audio_processing/high_pass_filter.h index b0682061bf..87105cebaf 100644 --- a/modules/audio_processing/high_pass_filter.h +++ b/modules/audio_processing/high_pass_filter.h @@ -30,9 +30,7 @@ class HighPassFilter { HighPassFilter& operator=(const HighPassFilter&) = delete; void Process(AudioBuffer* audio); - // Only to be used when the number of channels are 1. - // TODO(peah): Add support for more channels. - void Process(rtc::ArrayView audio); + void Process(std::vector>* audio); void Reset(); void Reset(size_t num_channels); diff --git a/modules/audio_processing/high_pass_filter_unittest.cc b/modules/audio_processing/high_pass_filter_unittest.cc index 4025454d18..56ccb950b2 100644 --- a/modules/audio_processing/high_pass_filter_unittest.cc +++ b/modules/audio_processing/high_pass_filter_unittest.cc @@ -20,10 +20,12 @@ namespace webrtc { namespace { -// Process one frame of data and produce the output. -std::vector ProcessOneFrame(const std::vector& frame_input, - const StreamConfig& stream_config, - HighPassFilter* high_pass_filter) { +// Process one frame of data via the AudioBuffer interface and produce the +// output. +std::vector ProcessOneFrameAsAudioBuffer( + const std::vector& frame_input, + const StreamConfig& stream_config, + HighPassFilter* high_pass_filter) { AudioBuffer audio_buffer( stream_config.sample_rate_hz(), stream_config.num_channels(), stream_config.sample_rate_hz(), stream_config.num_channels(), @@ -37,9 +39,40 @@ std::vector ProcessOneFrame(const std::vector& frame_input, return frame_output; } +// Process one frame of data via the vector interface and produce the output. +std::vector ProcessOneFrameAsVector( + const std::vector& frame_input, + const StreamConfig& stream_config, + HighPassFilter* high_pass_filter) { + std::vector> process_vector( + stream_config.num_channels(), + std::vector(stream_config.num_frames())); + + for (size_t k = 0; k < stream_config.num_frames(); ++k) { + for (size_t channel = 0; channel < stream_config.num_channels(); + ++channel) { + process_vector[channel][k] = + frame_input[k * stream_config.num_channels() + channel]; + } + } + + high_pass_filter->Process(&process_vector); + + std::vector output; + for (size_t k = 0; k < stream_config.num_frames(); ++k) { + for (size_t channel = 0; channel < stream_config.num_channels(); + ++channel) { + output.push_back(process_vector[channel][k]); + } + } + + return process_vector[0]; +} + // Processes a specified amount of frames, verifies the results and reports // any errors. void RunBitexactnessTest(int num_channels, + bool use_audio_buffer_interface, const std::vector& input, const std::vector& reference) { const StreamConfig stream_config(16000, num_channels, false); @@ -55,8 +88,13 @@ void RunBitexactnessTest(int num_channels, stream_config.num_channels() * frame_no, input.begin() + stream_config.num_frames() * stream_config.num_channels() * (frame_no + 1)); - - output = ProcessOneFrame(frame_input, stream_config, &high_pass_filter); + if (use_audio_buffer_interface) { + output = ProcessOneFrameAsAudioBuffer(frame_input, stream_config, + &high_pass_filter); + } else { + output = ProcessOneFrameAsVector(frame_input, stream_config, + &high_pass_filter); + } } // Form vector to compare the reference to. Only the last frame processed @@ -92,19 +130,36 @@ std::vector CreateVector(const rtc::ArrayView& array_view) { } } // namespace -TEST(HighPassFilterAccuracyTest, Reset) { +TEST(HighPassFilterAccuracyTest, ResetWithAudioBufferInterface) { const StreamConfig stream_config_stereo(16000, 2, false); const StreamConfig stream_config_mono(16000, 1, false); std::vector x_mono(160, 1.f); std::vector x_stereo(320, 1.f); - HighPassFilter lc(1); - std::vector y = ProcessOneFrame(x_mono, stream_config_mono, &lc); - lc.Reset(2); - y = ProcessOneFrame(x_stereo, stream_config_stereo, &lc); - lc.Reset(1); - y = ProcessOneFrame(x_mono, stream_config_mono, &lc); - lc.Reset(); - y = ProcessOneFrame(x_mono, stream_config_mono, &lc); + HighPassFilter hpf(1); + std::vector y = + ProcessOneFrameAsAudioBuffer(x_mono, stream_config_mono, &hpf); + hpf.Reset(2); + y = ProcessOneFrameAsAudioBuffer(x_stereo, stream_config_stereo, &hpf); + hpf.Reset(1); + y = ProcessOneFrameAsAudioBuffer(x_mono, stream_config_mono, &hpf); + hpf.Reset(); + y = ProcessOneFrameAsAudioBuffer(x_mono, stream_config_mono, &hpf); +} + +TEST(HighPassFilterAccuracyTest, ResetWithVectorInterface) { + const StreamConfig stream_config_stereo(16000, 2, false); + const StreamConfig stream_config_mono(16000, 1, false); + std::vector x_mono(160, 1.f); + std::vector x_stereo(320, 1.f); + HighPassFilter hpf(1); + std::vector y = + ProcessOneFrameAsVector(x_mono, stream_config_mono, &hpf); + hpf.Reset(2); + y = ProcessOneFrameAsVector(x_stereo, stream_config_stereo, &hpf); + hpf.Reset(1); + y = ProcessOneFrameAsVector(x_mono, stream_config_mono, &hpf); + hpf.Reset(); + y = ProcessOneFrameAsVector(x_mono, stream_config_mono, &hpf); } TEST(HighPassFilterAccuracyTest, MonoInitial) { @@ -140,9 +195,12 @@ TEST(HighPassFilterAccuracyTest, MonoInitial) { 0.073214f, -0.373256f, -0.115394f, 0.102109f, 0.976217f, 0.702270f, -0.457697f, 0.757116f}; - RunBitexactnessTest( - 1, CreateVector(rtc::ArrayView(kReferenceInput)), - CreateVector(rtc::ArrayView(kReference))); + for (bool use_audio_buffer_interface : {true, false}) { + RunBitexactnessTest( + 1, use_audio_buffer_interface, + CreateVector(rtc::ArrayView(kReferenceInput)), + CreateVector(rtc::ArrayView(kReference))); + } } TEST(HighPassFilterAccuracyTest, MonoConverged) { @@ -232,9 +290,12 @@ TEST(HighPassFilterAccuracyTest, MonoConverged) { 0.127212f, 0.147464f, -0.221733f, -0.004484f, -0.535107f, 0.385999f, -0.116346f, -0.265302f}; - RunBitexactnessTest( - 1, CreateVector(rtc::ArrayView(kReferenceInput)), - CreateVector(rtc::ArrayView(kReference))); + for (bool use_audio_buffer_interface : {true, false}) { + RunBitexactnessTest( + 1, use_audio_buffer_interface, + CreateVector(rtc::ArrayView(kReferenceInput)), + CreateVector(rtc::ArrayView(kReference))); + } } } // namespace webrtc diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index 114bfcd402..e063e95499 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -254,6 +254,9 @@ class AudioProcessing : public rtc::RefCountInterface { // default rate is currently selected based on the CPU architecture, but // that logic may change. int maximum_internal_processing_rate; + // Force multi-channel processing on playout and capture audio. This is an + // experimental feature, and is likely to change without warning. + bool experimental_multi_channel = false; } pipeline; // Enabled the pre-amplifier. It amplifies the capture signal diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc index e3534cc471..e0b7730084 100644 --- a/modules/audio_processing/test/audio_processing_simulator.cc +++ b/modules/audio_processing/test/audio_processing_simulator.cc @@ -377,6 +377,11 @@ void AudioProcessingSimulator::CreateAudioProcessor() { if (settings_.use_ts) { config.Set(new ExperimentalNs(*settings_.use_ts)); } + if (settings_.experimental_multi_channel) { + apm_config.pipeline.experimental_multi_channel = + *settings_.experimental_multi_channel; + } + if (settings_.use_agc2) { apm_config.gain_controller2.enabled = *settings_.use_agc2; if (settings_.agc2_fixed_gain_db) { diff --git a/modules/audio_processing/test/audio_processing_simulator.h b/modules/audio_processing/test/audio_processing_simulator.h index d4915939e1..270cdcc0e0 100644 --- a/modules/audio_processing/test/audio_processing_simulator.h +++ b/modules/audio_processing/test/audio_processing_simulator.h @@ -85,6 +85,7 @@ struct SimulationSettings { absl::optional use_refined_adaptive_filter; int initial_mic_level; bool simulate_mic_gain = false; + absl::optional experimental_multi_channel; absl::optional simulated_mic_kind; bool report_performance = false; absl::optional performance_report_output_filename; diff --git a/modules/audio_processing/test/audioproc_float_impl.cc b/modules/audio_processing/test/audioproc_float_impl.cc index a96641b416..d24b881968 100644 --- a/modules/audio_processing/test/audioproc_float_impl.cc +++ b/modules/audio_processing/test/audioproc_float_impl.cc @@ -215,6 +215,10 @@ ABSL_FLAG(int, simulate_mic_gain, 0, "Activate (1) or deactivate(0) the analog mic gain simulation"); +ABSL_FLAG(int, + experimental_multi_channel, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) multi-channel audio in APM pipeline"); ABSL_FLAG(int, simulated_mic_kind, kParameterNotSpecifiedValue, @@ -437,6 +441,8 @@ SimulationSettings CreateSettings() { SetSettingIfSpecified(absl::GetFlag(FLAGS_aec_settings), &settings.aec_settings_filename); settings.initial_mic_level = absl::GetFlag(FLAGS_initial_mic_level); + SetSettingIfFlagSet(absl::GetFlag(FLAGS_experimental_multi_channel), + &settings.experimental_multi_channel); settings.simulate_mic_gain = absl::GetFlag(FLAGS_simulate_mic_gain); SetSettingIfSpecified(absl::GetFlag(FLAGS_simulated_mic_kind), &settings.simulated_mic_kind);