From d2b9740f484235d9aee0e26a14742f00d46cc68e Mon Sep 17 00:00:00 2001 From: Alessio Bazzica Date: Thu, 9 Aug 2018 14:23:11 +0200 Subject: [PATCH] APM: render pre-processor moved before echo detector queuing. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Any modification of the render stream now happens *before* the echo detector enqueues render stream frames. In this way, there is no impact of the render pre-processor on the echo likelihood metric. Bug: webrtc:9591 Change-Id: I9b5e339e892796a0d0cd072fdd45d35ec89d8802 Reviewed-on: https://webrtc-review.googlesource.com/93031 Reviewed-by: Per Åhgren Commit-Queue: Alessio Bazzica Cr-Commit-Position: refs/heads/master@{#24251} --- .../audio_processing/audio_processing_impl.cc | 10 +- .../audio_processing_impl_unittest.cc | 148 +++++++++++++++--- 2 files changed, 134 insertions(+), 24 deletions(-) diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 5e2bad31ae..4865afa45b 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -1099,10 +1099,10 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame"); { // Acquire the capture lock in order to safely call the function - // that retrieves the render side data. This function accesses apm + // that retrieves the render side data. This function accesses APM // getters that need the capture lock held when being called. // The lock needs to be released as - // public_submodules_->echo_control_mobile->is_enabled() aquires this lock + // public_submodules_->echo_control_mobile->is_enabled() acquires this lock // as well. rtc::CritScope cs_capture(&crit_capture_); EmptyQueuedRenderAudio(); @@ -1481,14 +1481,14 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) { int AudioProcessingImpl::ProcessRenderStreamLocked() { AudioBuffer* render_buffer = render_.render_audio.get(); // For brevity. - QueueNonbandedRenderAudio(render_buffer); - HandleRenderRuntimeSettings(); if (private_submodules_->render_pre_processor) { private_submodules_->render_pre_processor->Process(render_buffer); } + QueueNonbandedRenderAudio(render_buffer); + if (submodule_states_.RenderMultiBandSubModulesActive() && SampleRateSupportsMultiBand( formats_.render_processing_format.sample_rate_hz())) { @@ -1506,7 +1506,7 @@ int AudioProcessingImpl::ProcessRenderStreamLocked() { QueueBandedRenderAudio(render_buffer); } - // TODO(peah): Perform the queueing ínside QueueRenderAudiuo(). + // TODO(peah): Perform the queuing inside QueueRenderAudiuo(). if (private_submodules_->echo_controller) { private_submodules_->echo_controller->AnalyzeRender(render_buffer); } diff --git a/modules/audio_processing/audio_processing_impl_unittest.cc b/modules/audio_processing/audio_processing_impl_unittest.cc index 3ac5e71291..8926f0223f 100644 --- a/modules/audio_processing/audio_processing_impl_unittest.cc +++ b/modules/audio_processing/audio_processing_impl_unittest.cc @@ -10,7 +10,10 @@ #include "modules/audio_processing/audio_processing_impl.h" +#include "modules/audio_processing/include/audio_processing.h" #include "modules/audio_processing/test/test_utils.h" +#include "rtc_base/refcountedobject.h" +#include "rtc_base/scoped_ref_ptr.h" #include "test/gmock.h" #include "test/gtest.h" @@ -33,24 +36,83 @@ class MockInitialize : public AudioProcessingImpl { MOCK_CONST_METHOD0(Release, rtc::RefCountReleaseStatus()); }; -void GenerateFixedFrame(int16_t audio_level, - size_t input_rate, - size_t num_channels, - AudioFrame* fixed_frame) { +void InitializeAudioFrame(size_t input_rate, + size_t num_channels, + AudioFrame* frame) { const size_t samples_per_input_channel = rtc::CheckedDivExact( input_rate, static_cast(rtc::CheckedDivExact( 1000, AudioProcessing::kChunkSizeMs))); - fixed_frame->samples_per_channel_ = samples_per_input_channel; - fixed_frame->sample_rate_hz_ = input_rate; - fixed_frame->num_channels_ = num_channels; - RTC_DCHECK_LE(samples_per_input_channel * num_channels, AudioFrame::kMaxDataSizeSamples); - for (size_t i = 0; i < samples_per_input_channel * num_channels; ++i) { - fixed_frame->mutable_data()[i] = audio_level; + frame->samples_per_channel_ = samples_per_input_channel; + frame->sample_rate_hz_ = input_rate; + frame->num_channels_ = num_channels; +} + +void FillFixedFrame(int16_t audio_level, AudioFrame* frame) { + const size_t num_samples = frame->samples_per_channel_ * frame->num_channels_; + for (size_t i = 0; i < num_samples; ++i) { + frame->mutable_data()[i] = audio_level; } } +// Mocks EchoDetector and records the first samples of the last analyzed render +// stream frame. Used to check what data is read by an EchoDetector +// implementation injected into an APM. +class TestEchoDetector : public EchoDetector { + public: + TestEchoDetector() + : analyze_render_audio_called_(false), + last_render_audio_first_sample_(0.f) {} + ~TestEchoDetector() override = default; + void AnalyzeRenderAudio(rtc::ArrayView render_audio) override { + last_render_audio_first_sample_ = render_audio[0]; + analyze_render_audio_called_ = true; + } + void AnalyzeCaptureAudio(rtc::ArrayView capture_audio) override { + } + void Initialize(int capture_sample_rate_hz, + int num_capture_channels, + int render_sample_rate_hz, + int num_render_channels) override {} + EchoDetector::Metrics GetMetrics() const override { return {}; } + // Returns true if AnalyzeRenderAudio() has been called at least once. + bool analyze_render_audio_called() const { + return analyze_render_audio_called_; + } + // Returns the first sample of the last analyzed render frame. + float last_render_audio_first_sample() const { + return last_render_audio_first_sample_; + } + + private: + bool analyze_render_audio_called_; + float last_render_audio_first_sample_; +}; + +// Mocks CustomProcessing and applies ProcessSample() to all the samples. +// Meant to be injected into an APM to modify samples in a known and detectable +// way. +class TestRenderPreProcessor : public CustomProcessing { + public: + TestRenderPreProcessor() = default; + ~TestRenderPreProcessor() = default; + void Initialize(int sample_rate_hz, int num_channels) override {} + void Process(AudioBuffer* audio) override { + for (size_t k = 0; k < audio->num_channels(); ++k) { + rtc::ArrayView channel_view(audio->channels_f()[k], + audio->num_frames()); + std::transform(channel_view.begin(), channel_view.end(), + channel_view.begin(), ProcessSample); + } + }; + std::string ToString() const override { return "TestRenderPreProcessor"; } + void SetRuntimeSetting(AudioProcessing::RuntimeSetting setting) override {} + // Modifies a sample. This member is used in Process() to modify a frame and + // it is publicly visible to enable tests. + static constexpr float ProcessSample(float x) { return 2.f * x; } +}; + } // namespace TEST(AudioProcessingImplTest, AudioParameterChangeTriggersInit) { @@ -98,26 +160,74 @@ TEST(AudioProcessingImplTest, UpdateCapturePreGainRuntimeSetting) { apm->ApplyConfig(apm_config); AudioFrame frame; - constexpr int16_t audio_level = 10000; - constexpr size_t input_rate = 48000; - constexpr size_t num_channels = 2; + constexpr int16_t kAudioLevel = 10000; + constexpr size_t kSampleRateHz = 48000; + constexpr size_t kNumChannels = 2; + InitializeAudioFrame(kSampleRateHz, kNumChannels, &frame); - GenerateFixedFrame(audio_level, input_rate, num_channels, &frame); + FillFixedFrame(kAudioLevel, &frame); apm->ProcessStream(&frame); - EXPECT_EQ(frame.data()[100], audio_level) + EXPECT_EQ(frame.data()[100], kAudioLevel) << "With factor 1, frame shouldn't be modified."; - constexpr float gain_factor = 2.f; + constexpr float kGainFactor = 2.f; apm->SetRuntimeSetting( - AudioProcessing::RuntimeSetting::CreateCapturePreGain(gain_factor)); + AudioProcessing::RuntimeSetting::CreateCapturePreGain(kGainFactor)); // Process for two frames to have time to ramp up gain. for (int i = 0; i < 2; ++i) { - GenerateFixedFrame(audio_level, input_rate, num_channels, &frame); + FillFixedFrame(kAudioLevel, &frame); apm->ProcessStream(&frame); } - EXPECT_EQ(frame.data()[100], gain_factor * audio_level) + EXPECT_EQ(frame.data()[100], kGainFactor * kAudioLevel) << "Frame should be amplified."; } +TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) { + // Make sure that signal changes caused by a render pre-processing sub-module + // take place before any echo detector analysis. + rtc::scoped_refptr test_echo_detector( + new rtc::RefCountedObject()); + std::unique_ptr test_render_pre_processor( + new TestRenderPreProcessor()); + // Create APM injecting the test echo detector and render pre-processor. + std::unique_ptr apm( + AudioProcessingBuilder() + .SetEchoDetector(test_echo_detector) + .SetRenderPreProcessing(std::move(test_render_pre_processor)) + .Create()); + webrtc::AudioProcessing::Config apm_config; + apm_config.pre_amplifier.enabled = true; + apm_config.residual_echo_detector.enabled = true; + apm->ApplyConfig(apm_config); + + constexpr int16_t kAudioLevel = 1000; + constexpr int kSampleRateHz = 16000; + constexpr size_t kNumChannels = 1; + AudioFrame frame; + InitializeAudioFrame(kSampleRateHz, kNumChannels, &frame); + + constexpr float kAudioLevelFloat = static_cast(kAudioLevel); + constexpr float kExpectedPreprocessedAudioLevel = + TestRenderPreProcessor::ProcessSample(kAudioLevelFloat); + ASSERT_NE(kAudioLevelFloat, kExpectedPreprocessedAudioLevel); + + // Analyze a render stream frame. + FillFixedFrame(kAudioLevel, &frame); + ASSERT_EQ(AudioProcessing::Error::kNoError, + apm->ProcessReverseStream(&frame)); + // Trigger a call to in EchoDetector::AnalyzeRenderAudio() via + // ProcessStream(). + FillFixedFrame(kAudioLevel, &frame); + ASSERT_EQ(AudioProcessing::Error::kNoError, apm->ProcessStream(&frame)); + // Regardless of how the call to in EchoDetector::AnalyzeRenderAudio() is + // triggered, the line below checks that the call has occurred. If not, the + // APM implementation may have changed and this test might need to be adapted. + ASSERT_TRUE(test_echo_detector->analyze_render_audio_called()); + // Check that the data read in EchoDetector::AnalyzeRenderAudio() is that + // produced by the render pre-processor. + EXPECT_EQ(kExpectedPreprocessedAudioLevel, + test_echo_detector->last_render_audio_first_sample()); +} + } // namespace webrtc