From dc5522b4bf4d729c2dfe269f84a5148212c57a88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20=C3=85hgren?= Date: Thu, 19 Mar 2020 14:55:58 +0100 Subject: [PATCH] APM: Removing the redundant VAD output from the integer API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL removes the redundant VAD output from the newly introduced integer API in AudioProcessing. Bug: webrtc:5298 Change-Id: Iad2b1b97ada7f4863139655526c110e326c6788a Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/170824 Commit-Queue: Per Ã…hgren Reviewed-by: Sam Zackrisson Cr-Commit-Position: refs/heads/master@{#30832} --- audio/audio_state_unittest.cc | 5 +- .../aec_dump/aec_dump_integration_test.cc | 5 +- .../audio_processing/audio_processing_impl.cc | 13 +--- .../audio_processing/audio_processing_impl.h | 3 +- .../audio_processing_impl_locking_unittest.cc | 3 +- .../audio_processing_impl_unittest.cc | 41 ++++------ .../audio_processing_unittest.cc | 77 ++++++++----------- .../include/audio_frame_proxies.cc | 16 ++-- .../include/audio_processing.h | 5 +- .../include/mock_audio_processing.h | 5 +- .../test/audio_processing_simulator.cc | 9 +-- modules/audio_processing/test/test_utils.h | 3 - 12 files changed, 67 insertions(+), 118 deletions(-) diff --git a/audio/audio_state_unittest.cc b/audio/audio_state_unittest.cc index 229a24d169..2a1018c120 100644 --- a/audio/audio_state_unittest.cc +++ b/audio/audio_state_unittest.cc @@ -25,6 +25,7 @@ namespace test { namespace { using ::testing::_; +using ::testing::Matcher; constexpr int kSampleRate = 16000; constexpr int kNumberOfChannels = 1; @@ -122,7 +123,7 @@ TEST(AudioStateTest, RecordedAudioArrivesAtSingleStream) { static_cast(audio_state->audio_processing()); EXPECT_CALL(*ap, set_stream_delay_ms(0)); EXPECT_CALL(*ap, set_stream_key_pressed(false)); - EXPECT_CALL(*ap, ProcessStream(_, _, _, _, _)); + EXPECT_CALL(*ap, ProcessStream(_, _, _, Matcher(_))); constexpr int kSampleRate = 16000; constexpr size_t kNumChannels = 2; @@ -172,7 +173,7 @@ TEST(AudioStateTest, RecordedAudioArrivesAtMultipleStreams) { static_cast(audio_state->audio_processing()); EXPECT_CALL(*ap, set_stream_delay_ms(5)); EXPECT_CALL(*ap, set_stream_key_pressed(true)); - EXPECT_CALL(*ap, ProcessStream(_, _, _, _, _)); + EXPECT_CALL(*ap, ProcessStream(_, _, _, Matcher(_))); constexpr int kSampleRate = 16000; constexpr size_t kNumChannels = 1; diff --git a/modules/audio_processing/aec_dump/aec_dump_integration_test.cc b/modules/audio_processing/aec_dump/aec_dump_integration_test.cc index 7b1f218e03..6d6b46655a 100644 --- a/modules/audio_processing/aec_dump/aec_dump_integration_test.cc +++ b/modules/audio_processing/aec_dump/aec_dump_integration_test.cc @@ -74,8 +74,6 @@ TEST(AecDumpIntegration, CaptureStreamShouldBeLoggedOnceEveryProcessStream) { constexpr int kNumSamplesPerChannel = kNumSampleRateHz / 100; std::array frame; frame.fill(0.f); - webrtc::AudioProcessing::VoiceDetectionResult vad_result = - webrtc::AudioProcessing::VoiceDetectionResult::kNotAvailable; webrtc::StreamConfig stream_config(kNumSampleRateHz, kNumChannels, /*has_keyboard=*/false); @@ -93,6 +91,5 @@ TEST(AecDumpIntegration, CaptureStreamShouldBeLoggedOnceEveryProcessStream) { .Times(Exactly(1)); apm->AttachAecDump(std::move(mock_aec_dump)); - apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(), - &vad_result); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); } diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index c19d810fa2..ca73fcf900 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -1057,8 +1057,7 @@ void AudioProcessingImpl::EmptyQueuedRenderAudio() { int AudioProcessingImpl::ProcessStream(const int16_t* const src, const StreamConfig& input_config, const StreamConfig& output_config, - int16_t* const dest, - VoiceDetectionResult* vad_result) { + int16_t* const dest) { TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame"); RETURN_ON_ERR(MaybeInitializeCapture(input_config, output_config)); @@ -1082,16 +1081,6 @@ int AudioProcessingImpl::ProcessStream(const int16_t* const src, } } - if (vad_result) { - if (capture_.stats.voice_detected) { - *vad_result = *capture_.stats.voice_detected - ? VoiceDetectionResult::kDetected - : VoiceDetectionResult::kNotDetected; - } else { - *vad_result = VoiceDetectionResult::kNotAvailable; - } - } - if (aec_dump_) { RecordProcessedCaptureStream(dest, output_config); } diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h index 4cc55c7791..28a8829a4d 100644 --- a/modules/audio_processing/audio_processing_impl.h +++ b/modules/audio_processing/audio_processing_impl.h @@ -87,8 +87,7 @@ class AudioProcessingImpl : public AudioProcessing { int ProcessStream(const int16_t* const src, const StreamConfig& input_config, const StreamConfig& output_config, - int16_t* const dest, - VoiceDetectionResult* vad_state) override; + int16_t* const dest) override; int ProcessStream(const float* const* src, const StreamConfig& input_config, const StreamConfig& output_config, diff --git a/modules/audio_processing/audio_processing_impl_locking_unittest.cc b/modules/audio_processing/audio_processing_impl_locking_unittest.cc index a4607164c2..f1e049d44a 100644 --- a/modules/audio_processing/audio_processing_impl_locking_unittest.cc +++ b/modules/audio_processing/audio_processing_impl_locking_unittest.cc @@ -668,8 +668,7 @@ void CaptureProcessor::CallApmCaptureSide() { case CaptureApiImpl::ProcessStreamImplInteger: result = apm_->ProcessStream(frame_data_.frame.data(), input_stream_config, - output_stream_config, frame_data_.frame.data(), - /*vad_result*/ nullptr); + output_stream_config, frame_data_.frame.data()); break; case CaptureApiImpl::ProcessStreamImplFloat: result = apm_->ProcessStream(&frame_data_.input_frame[0], diff --git a/modules/audio_processing/audio_processing_impl_unittest.cc b/modules/audio_processing/audio_processing_impl_unittest.cc index afc3ae56b1..a441e2f208 100644 --- a/modules/audio_processing/audio_processing_impl_unittest.cc +++ b/modules/audio_processing/audio_processing_impl_unittest.cc @@ -141,23 +141,20 @@ TEST(AudioProcessingImplTest, AudioParameterChangeTriggersInit) { StreamConfig config(16000, 1, /*has_keyboard=*/false); // Call with the default parameters; there should be an init. EXPECT_CALL(mock, InitializeLocked()).Times(0); - EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data(), - /*vad_result=*/nullptr)); + EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data())); EXPECT_NOERR( mock.ProcessReverseStream(frame.data(), config, config, frame.data())); // New sample rate. (Only impacts ProcessStream). config = StreamConfig(32000, 1, /*has_keyboard=*/false); EXPECT_CALL(mock, InitializeLocked()).Times(1); - EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data(), - /*vad_result=*/nullptr)); + EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data())); // New number of channels. // TODO(peah): Investigate why this causes 2 inits. config = StreamConfig(32000, 2, /*has_keyboard=*/false); EXPECT_CALL(mock, InitializeLocked()).Times(2); - EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data(), - /*vad_result=*/nullptr)); + EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data())); // ProcessStream sets num_channels_ == num_output_channels. EXPECT_NOERR( mock.ProcessReverseStream(frame.data(), config, config, frame.data())); @@ -183,8 +180,7 @@ TEST(AudioProcessingImplTest, UpdateCapturePreGainRuntimeSetting) { std::array frame; StreamConfig config(kSampleRateHz, kNumChannels, /*has_keyboard=*/false); frame.fill(kAudioLevel); - apm->ProcessStream(frame.data(), config, config, frame.data(), - /*vad_result=*/nullptr); + apm->ProcessStream(frame.data(), config, config, frame.data()); EXPECT_EQ(frame[100], kAudioLevel) << "With factor 1, frame shouldn't be modified."; @@ -195,8 +191,7 @@ TEST(AudioProcessingImplTest, UpdateCapturePreGainRuntimeSetting) { // Process for two frames to have time to ramp up gain. for (int i = 0; i < 2; ++i) { frame.fill(kAudioLevel); - apm->ProcessStream(frame.data(), config, config, frame.data(), - /*vad_result=*/nullptr); + apm->ProcessStream(frame.data(), config, config, frame.data()); } EXPECT_EQ(frame[100], kGainFactor * kAudioLevel) << "Frame should be amplified."; @@ -234,8 +229,7 @@ TEST(AudioProcessingImplTest, EXPECT_CALL(*echo_control_mock, ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) .Times(1); - apm->ProcessStream(frame.data(), config, config, frame.data(), - /*vad_result=*/nullptr); + apm->ProcessStream(frame.data(), config, config, frame.data()); EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); EXPECT_CALL(*echo_control_mock, @@ -243,8 +237,7 @@ TEST(AudioProcessingImplTest, .Times(1); apm->SetRuntimeSetting( AudioProcessing::RuntimeSetting::CreateCapturePreGain(2.f)); - apm->ProcessStream(frame.data(), config, config, frame.data(), - /*vad_result=*/nullptr); + apm->ProcessStream(frame.data(), config, config, frame.data()); } TEST(AudioProcessingImplTest, @@ -281,8 +274,7 @@ TEST(AudioProcessingImplTest, EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); EXPECT_CALL(*echo_control_mock, ProcessCapture(NotNull(), testing::_, false)) .Times(1); - apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(), - /*vad_result=*/nullptr); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); // Force an analog gain change if it did not happen. if (initial_analog_gain == apm->recommended_stream_analog_level()) { @@ -292,8 +284,7 @@ TEST(AudioProcessingImplTest, EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); EXPECT_CALL(*echo_control_mock, ProcessCapture(NotNull(), testing::_, true)) .Times(1); - apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(), - /*vad_result=*/nullptr); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); } TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) { @@ -326,8 +317,7 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) { EXPECT_CALL(*echo_control_mock, ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false)) .Times(1); - apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(), - /*vad_result=*/nullptr); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); EXPECT_CALL(*echo_control_mock, @@ -335,8 +325,7 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) { .Times(1); apm->SetRuntimeSetting( AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(50)); - apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(), - /*vad_result=*/nullptr); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); EXPECT_CALL(*echo_control_mock, @@ -344,8 +333,7 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) { .Times(1); apm->SetRuntimeSetting( AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(50)); - apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(), - /*vad_result=*/nullptr); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1); EXPECT_CALL(*echo_control_mock, @@ -353,8 +341,7 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) { .Times(1); apm->SetRuntimeSetting( AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(100)); - apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(), - /*vad_result=*/nullptr); + apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); } TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) { @@ -406,7 +393,7 @@ TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) { frame.fill(kAudioLevel); ASSERT_EQ(AudioProcessing::Error::kNoError, apm->ProcessStream(frame.data(), stream_config, stream_config, - frame.data(), /*vad_result=*/nullptr)); + frame.data())); // Regardless of how the call to in EchoDetector::AnalyzeRenderAudio() is // triggered, the line below checks that the call has occurred. If not, the // APM implementation may have changed and this test might need to be adapted. diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc index a320fcd940..cdca7c3524 100644 --- a/modules/audio_processing/audio_processing_unittest.cc +++ b/modules/audio_processing/audio_processing_unittest.cc @@ -561,7 +561,7 @@ int ApmTest::ProcessStreamChooser(Format format) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity); + frame_.data.data()); } return apm_->ProcessStream( float_cb_->channels(), @@ -646,8 +646,7 @@ void ApmTest::ProcessDelayVerificationTest(int delay_ms, process_frame->num_channels), StreamConfig(process_frame->sample_rate_hz, process_frame->num_channels), - process_frame->data.data(), - &process_frame->vad_activity)); + process_frame->data.data())); frame = frame_queue.front(); frame_queue.pop(); delete frame; @@ -753,7 +752,7 @@ void ApmTest::TestChangingChannelsInt16Interface( frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); EXPECT_EQ(expected_return, apm_->ProcessReverseStream( frame_.data.data(), @@ -1052,7 +1051,7 @@ void ApmTest::RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); out_analog_level = apm_->recommended_stream_analog_level(); } @@ -1088,7 +1087,7 @@ void ApmTest::RunManualVolumeChangeIsPossibleTest(int sample_rate) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); out_analog_level = apm_->recommended_stream_analog_level(); } @@ -1108,7 +1107,7 @@ void ApmTest::RunManualVolumeChangeIsPossibleTest(int sample_rate) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); out_analog_level = apm_->recommended_stream_analog_level(); // Check that AGC respected the manually adjusted volume. EXPECT_LT(out_analog_level, highest_level_reached); @@ -1154,7 +1153,7 @@ TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy)); EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream( @@ -1222,8 +1221,6 @@ TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) { revframe_.data.data())); CopyLeftToRightChannel(frame_.data.data(), frame_.samples_per_channel); - frame_.vad_activity = - AudioProcessing::VoiceDetectionResult::kNotAvailable; ASSERT_EQ(kNoErr, apm_->set_stream_delay_ms(0)); apm_->set_stream_analog_level(analog_level); @@ -1232,7 +1229,7 @@ TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); analog_level = apm_->recommended_stream_analog_level(); VerifyChannelsAreEqual(frame_.data.data(), frame_.samples_per_channel); @@ -1253,13 +1250,13 @@ TEST_F(ApmTest, SplittingFilter) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); EXPECT_EQ(apm_->kNoError, apm_->ProcessStream( frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy)); // 2. Only the level estimator is enabled... @@ -1273,13 +1270,13 @@ TEST_F(ApmTest, SplittingFilter) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); EXPECT_EQ(apm_->kNoError, apm_->ProcessStream( frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy)); apm_config.level_estimation.enabled = false; apm_->ApplyConfig(apm_config); @@ -1294,13 +1291,13 @@ TEST_F(ApmTest, SplittingFilter) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); EXPECT_EQ(apm_->kNoError, apm_->ProcessStream( frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy)); apm_config.voice_detection.enabled = false; apm_->ApplyConfig(apm_config); @@ -1316,13 +1313,13 @@ TEST_F(ApmTest, SplittingFilter) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); EXPECT_EQ(apm_->kNoError, apm_->ProcessStream( frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy)); apm_config.voice_detection.enabled = false; apm_config.level_estimation.enabled = false; @@ -1344,7 +1341,7 @@ TEST_F(ApmTest, SplittingFilter) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); EXPECT_FALSE(FrameDataAreEqual(frame_, frame_copy)); } @@ -1535,7 +1532,7 @@ TEST_F(ApmTest, DebugDump) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); EXPECT_EQ(apm_->kNoError, apm_->ProcessReverseStream( revframe_.data.data(), @@ -1584,7 +1581,7 @@ TEST_F(ApmTest, DebugDumpFromFileHandle) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); apm_->DetachAecDump(); // Verify the file has been written. @@ -1671,9 +1668,6 @@ TEST_F(ApmTest, Process) { StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels), revframe_.data.data())); - frame_.vad_activity = - AudioProcessing::VoiceDetectionResult::kNotAvailable; - EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); apm_->set_stream_analog_level(analog_level); @@ -1682,7 +1676,7 @@ TEST_F(ApmTest, Process) { frame_.data.data(), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), StreamConfig(frame_.sample_rate_hz, frame_.num_channels), - frame_.data.data(), &frame_.vad_activity)); + frame_.data.data())); // Ensure the frame was downmixed properly. EXPECT_EQ(static_cast(test->num_output_channels()), @@ -2473,7 +2467,7 @@ TEST(ApmConfiguration, EnablePostProcessing) { apm->ProcessStream(audio.data.data(), StreamConfig(audio.sample_rate_hz, audio.num_channels), StreamConfig(audio.sample_rate_hz, audio.num_channels), - audio.data.data(), &audio.vad_activity); + audio.data.data()); } TEST(ApmConfiguration, EnablePreProcessing) { @@ -2517,7 +2511,7 @@ TEST(ApmConfiguration, EnableCaptureAnalyzer) { apm->ProcessStream(audio.data.data(), StreamConfig(audio.sample_rate_hz, audio.num_channels), StreamConfig(audio.sample_rate_hz, audio.num_channels), - audio.data.data(), &audio.vad_activity); + audio.data.data()); } TEST(ApmConfiguration, PreProcessingReceivesRuntimeSettings) { @@ -2581,7 +2575,7 @@ TEST(ApmConfiguration, EchoControlInjection) { apm->ProcessStream(audio.data.data(), StreamConfig(audio.sample_rate_hz, audio.num_channels), StreamConfig(audio.sample_rate_hz, audio.num_channels), - audio.data.data(), &audio.vad_activity); + audio.data.data()); apm->ProcessReverseStream( audio.data.data(), StreamConfig(audio.sample_rate_hz, audio.num_channels), StreamConfig(audio.sample_rate_hz, audio.num_channels), @@ -2589,7 +2583,7 @@ TEST(ApmConfiguration, EchoControlInjection) { apm->ProcessStream(audio.data.data(), StreamConfig(audio.sample_rate_hz, audio.num_channels), StreamConfig(audio.sample_rate_hz, audio.num_channels), - audio.data.data(), &audio.vad_activity); + audio.data.data()); } std::unique_ptr CreateApm(bool mobile_aec) { @@ -2660,7 +2654,7 @@ TEST(MAYBE_ApmStatistics, AECEnabledTest) { frame.data.data(), StreamConfig(frame.sample_rate_hz, frame.num_channels), StreamConfig(frame.sample_rate_hz, frame.num_channels), - frame.data.data(), &frame.vad_activity), + frame.data.data()), 0); } @@ -2708,7 +2702,7 @@ TEST(MAYBE_ApmStatistics, AECMEnabledTest) { frame.data.data(), StreamConfig(frame.sample_rate_hz, frame.num_channels), StreamConfig(frame.sample_rate_hz, frame.num_channels), - frame.data.data(), &frame.vad_activity), + frame.data.data()), 0); } @@ -2754,7 +2748,7 @@ TEST(ApmStatistics, ReportOutputRmsDbfs) { apm->ProcessStream(frame.data.data(), StreamConfig(frame.sample_rate_hz, frame.num_channels), StreamConfig(frame.sample_rate_hz, frame.num_channels), - frame.data.data(), &frame.vad_activity), + frame.data.data()), 0); EXPECT_FALSE(apm->GetStatistics().output_rms_dbfs); @@ -2765,7 +2759,7 @@ TEST(ApmStatistics, ReportOutputRmsDbfs) { apm->ProcessStream(frame.data.data(), StreamConfig(frame.sample_rate_hz, frame.num_channels), StreamConfig(frame.sample_rate_hz, frame.num_channels), - frame.data.data(), &frame.vad_activity), + frame.data.data()), 0); auto stats = apm->GetStatistics(); EXPECT_TRUE(stats.output_rms_dbfs); @@ -2778,7 +2772,7 @@ TEST(ApmStatistics, ReportOutputRmsDbfs) { apm->ProcessStream(frame.data.data(), StreamConfig(frame.sample_rate_hz, frame.num_channels), StreamConfig(frame.sample_rate_hz, frame.num_channels), - frame.data.data(), &frame.vad_activity), + frame.data.data()), 0); EXPECT_FALSE(apm->GetStatistics().output_rms_dbfs); } @@ -2807,10 +2801,8 @@ TEST(ApmStatistics, ReportHasVoice) { apm->ProcessStream(frame.data.data(), StreamConfig(frame.sample_rate_hz, frame.num_channels), StreamConfig(frame.sample_rate_hz, frame.num_channels), - frame.data.data(), &frame.vad_activity), + frame.data.data()), 0); - EXPECT_EQ(frame.vad_activity, - AudioProcessing::VoiceDetectionResult::kNotAvailable); EXPECT_FALSE(apm->GetStatistics().voice_detected); // If enabled, metrics should be reported. @@ -2820,25 +2812,20 @@ TEST(ApmStatistics, ReportHasVoice) { apm->ProcessStream(frame.data.data(), StreamConfig(frame.sample_rate_hz, frame.num_channels), StreamConfig(frame.sample_rate_hz, frame.num_channels), - frame.data.data(), &frame.vad_activity), + frame.data.data()), 0); auto stats = apm->GetStatistics(); - EXPECT_EQ(frame.vad_activity, - AudioProcessing::VoiceDetectionResult::kDetected); EXPECT_TRUE(stats.voice_detected); // If re-disabled, the value is again not reported. - frame.vad_activity = AudioProcessing::VoiceDetectionResult::kNotAvailable; config.voice_detection.enabled = false; apm->ApplyConfig(config); EXPECT_EQ( apm->ProcessStream(frame.data.data(), StreamConfig(frame.sample_rate_hz, frame.num_channels), StreamConfig(frame.sample_rate_hz, frame.num_channels), - frame.data.data(), &frame.vad_activity), + frame.data.data()), 0); - EXPECT_EQ(frame.vad_activity, - AudioProcessing::VoiceDetectionResult::kNotAvailable); EXPECT_FALSE(apm->GetStatistics().voice_detected); } diff --git a/modules/audio_processing/include/audio_frame_proxies.cc b/modules/audio_processing/include/audio_frame_proxies.cc index b298702535..b960e72e86 100644 --- a/modules/audio_processing/include/audio_frame_proxies.cc +++ b/modules/audio_processing/include/audio_frame_proxies.cc @@ -26,17 +26,15 @@ int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame) { /*has_keyboard=*/false); RTC_DCHECK_EQ(frame->samples_per_channel(), input_config.num_frames()); - AudioProcessing::VoiceDetectionResult vad_result = - AudioProcessing::VoiceDetectionResult::kNotAvailable; - int result = ap->ProcessStream(frame->data(), input_config, output_config, - frame->mutable_data(), &vad_result); + frame->mutable_data()); - if (vad_result != AudioProcessing::VoiceDetectionResult::kNotAvailable) { - frame->vad_activity_ = - vad_result == AudioProcessing::VoiceDetectionResult::kDetected - ? AudioFrame::VADActivity::kVadActive - : AudioFrame::VADActivity::kVadPassive; + AudioProcessingStats stats = ap->GetStatistics(); + + if (stats.voice_detected) { + frame->vad_activity_ = *stats.voice_detected + ? AudioFrame::VADActivity::kVadActive + : AudioFrame::VADActivity::kVadPassive; } return result; diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index fa943c43b5..953ccebaba 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -369,8 +369,6 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { kStereoAndKeyboard }; - enum class VoiceDetectionResult { kNotAvailable, kDetected, kNotDetected }; - // Specifies the properties of a setting to be passed to AudioProcessing at // runtime. class RuntimeSetting { @@ -543,8 +541,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { virtual int ProcessStream(const int16_t* const src, const StreamConfig& input_config, const StreamConfig& output_config, - int16_t* const dest, - VoiceDetectionResult* vad_result) = 0; + int16_t* const dest) = 0; // Accepts deinterleaved float audio with the range [-1, 1]. Each element of // |src| points to a channel buffer, arranged according to |input_stream|. At diff --git a/modules/audio_processing/include/mock_audio_processing.h b/modules/audio_processing/include/mock_audio_processing.h index 518087a264..f3e936185f 100644 --- a/modules/audio_processing/include/mock_audio_processing.h +++ b/modules/audio_processing/include/mock_audio_processing.h @@ -82,12 +82,11 @@ class MockAudioProcessing : public ::testing::NiceMock { MOCK_METHOD1(set_output_will_be_muted, void(bool muted)); MOCK_METHOD1(SetRuntimeSetting, void(RuntimeSetting setting)); MOCK_METHOD1(ProcessStream, int(AudioFrame* frame)); - MOCK_METHOD5(ProcessStream, + MOCK_METHOD4(ProcessStream, int(const int16_t* const src, const StreamConfig& input_config, const StreamConfig& output_config, - int16_t* const dest, - VoiceDetectionResult* const vad_result)); + int16_t* const dest)); MOCK_METHOD7(ProcessStream, int(const float* const* src, size_t samples_per_channel, diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc index b1d7150dfa..a37a83f1e3 100644 --- a/modules/audio_processing/test/audio_processing_simulator.cc +++ b/modules/audio_processing/test/audio_processing_simulator.cc @@ -178,11 +178,10 @@ void AudioProcessingSimulator::ProcessStream(bool fixed_interface) { { const auto st = ScopedTimer(&api_call_statistics_, ApiCallStatistics::CallType::kCapture); - AudioProcessing::VoiceDetectionResult vad_result; - RTC_CHECK_EQ(AudioProcessing::kNoError, - ap_->ProcessStream(fwd_frame_.data.data(), fwd_frame_.config, - fwd_frame_.config, fwd_frame_.data.data(), - &vad_result)); + RTC_CHECK_EQ( + AudioProcessing::kNoError, + ap_->ProcessStream(fwd_frame_.data.data(), fwd_frame_.config, + fwd_frame_.config, fwd_frame_.data.data())); } fwd_frame_.CopyTo(out_buf_.get()); } else { diff --git a/modules/audio_processing/test/test_utils.h b/modules/audio_processing/test/test_utils.h index 24679e2a00..e2d243eec4 100644 --- a/modules/audio_processing/test/test_utils.h +++ b/modules/audio_processing/test/test_utils.h @@ -54,14 +54,12 @@ struct Int16FrameData { sample_rate_hz = 0; num_channels = 0; samples_per_channel = 0; - vad_activity = AudioProcessing::VoiceDetectionResult::kNotAvailable; data.fill(0); } void CopyFrom(const Int16FrameData& src) { samples_per_channel = src.samples_per_channel; sample_rate_hz = src.sample_rate_hz; - vad_activity = src.vad_activity; num_channels = src.num_channels; const size_t length = samples_per_channel * num_channels; @@ -72,7 +70,6 @@ struct Int16FrameData { int32_t sample_rate_hz; size_t num_channels; size_t samples_per_channel; - AudioProcessing::VoiceDetectionResult vad_activity; }; // Reads ChannelBuffers from a provided WavReader.