From dc5522b4bf4d729c2dfe269f84a5148212c57a88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Per=20=C3=85hgren?= <peah@webrtc.org>
Date: Thu, 19 Mar 2020 14:55:58 +0100
Subject: [PATCH] APM: Removing the redundant VAD output from the integer API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This CL removes the redundant VAD output from the newly introduced
integer API in AudioProcessing.

Bug: webrtc:5298
Change-Id: Iad2b1b97ada7f4863139655526c110e326c6788a
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/170824
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30832}
---
 audio/audio_state_unittest.cc                 |  5 +-
 .../aec_dump/aec_dump_integration_test.cc     |  5 +-
 .../audio_processing/audio_processing_impl.cc | 13 +---
 .../audio_processing/audio_processing_impl.h  |  3 +-
 .../audio_processing_impl_locking_unittest.cc |  3 +-
 .../audio_processing_impl_unittest.cc         | 41 ++++------
 .../audio_processing_unittest.cc              | 77 ++++++++-----------
 .../include/audio_frame_proxies.cc            | 16 ++--
 .../include/audio_processing.h                |  5 +-
 .../include/mock_audio_processing.h           |  5 +-
 .../test/audio_processing_simulator.cc        |  9 +--
 modules/audio_processing/test/test_utils.h    |  3 -
 12 files changed, 67 insertions(+), 118 deletions(-)

diff --git a/audio/audio_state_unittest.cc b/audio/audio_state_unittest.cc
index 229a24d169..2a1018c120 100644
--- a/audio/audio_state_unittest.cc
+++ b/audio/audio_state_unittest.cc
@@ -25,6 +25,7 @@ namespace test {
 namespace {
 
 using ::testing::_;
+using ::testing::Matcher;
 
 constexpr int kSampleRate = 16000;
 constexpr int kNumberOfChannels = 1;
@@ -122,7 +123,7 @@ TEST(AudioStateTest, RecordedAudioArrivesAtSingleStream) {
       static_cast<MockAudioProcessing*>(audio_state->audio_processing());
   EXPECT_CALL(*ap, set_stream_delay_ms(0));
   EXPECT_CALL(*ap, set_stream_key_pressed(false));
-  EXPECT_CALL(*ap, ProcessStream(_, _, _, _, _));
+  EXPECT_CALL(*ap, ProcessStream(_, _, _, Matcher<int16_t*>(_)));
 
   constexpr int kSampleRate = 16000;
   constexpr size_t kNumChannels = 2;
@@ -172,7 +173,7 @@ TEST(AudioStateTest, RecordedAudioArrivesAtMultipleStreams) {
       static_cast<MockAudioProcessing*>(audio_state->audio_processing());
   EXPECT_CALL(*ap, set_stream_delay_ms(5));
   EXPECT_CALL(*ap, set_stream_key_pressed(true));
-  EXPECT_CALL(*ap, ProcessStream(_, _, _, _, _));
+  EXPECT_CALL(*ap, ProcessStream(_, _, _, Matcher<int16_t*>(_)));
 
   constexpr int kSampleRate = 16000;
   constexpr size_t kNumChannels = 1;
diff --git a/modules/audio_processing/aec_dump/aec_dump_integration_test.cc b/modules/audio_processing/aec_dump/aec_dump_integration_test.cc
index 7b1f218e03..6d6b46655a 100644
--- a/modules/audio_processing/aec_dump/aec_dump_integration_test.cc
+++ b/modules/audio_processing/aec_dump/aec_dump_integration_test.cc
@@ -74,8 +74,6 @@ TEST(AecDumpIntegration, CaptureStreamShouldBeLoggedOnceEveryProcessStream) {
   constexpr int kNumSamplesPerChannel = kNumSampleRateHz / 100;
   std::array<int16_t, kNumSamplesPerChannel * kNumChannels> frame;
   frame.fill(0.f);
-  webrtc::AudioProcessing::VoiceDetectionResult vad_result =
-      webrtc::AudioProcessing::VoiceDetectionResult::kNotAvailable;
 
   webrtc::StreamConfig stream_config(kNumSampleRateHz, kNumChannels,
                                      /*has_keyboard=*/false);
@@ -93,6 +91,5 @@ TEST(AecDumpIntegration, CaptureStreamShouldBeLoggedOnceEveryProcessStream) {
       .Times(Exactly(1));
 
   apm->AttachAecDump(std::move(mock_aec_dump));
-  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
-                     &vad_result);
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data());
 }
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index c19d810fa2..ca73fcf900 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -1057,8 +1057,7 @@ void AudioProcessingImpl::EmptyQueuedRenderAudio() {
 int AudioProcessingImpl::ProcessStream(const int16_t* const src,
                                        const StreamConfig& input_config,
                                        const StreamConfig& output_config,
-                                       int16_t* const dest,
-                                       VoiceDetectionResult* vad_result) {
+                                       int16_t* const dest) {
   TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame");
   RETURN_ON_ERR(MaybeInitializeCapture(input_config, output_config));
 
@@ -1082,16 +1081,6 @@ int AudioProcessingImpl::ProcessStream(const int16_t* const src,
     }
   }
 
-  if (vad_result) {
-    if (capture_.stats.voice_detected) {
-      *vad_result = *capture_.stats.voice_detected
-                        ? VoiceDetectionResult::kDetected
-                        : VoiceDetectionResult::kNotDetected;
-    } else {
-      *vad_result = VoiceDetectionResult::kNotAvailable;
-    }
-  }
-
   if (aec_dump_) {
     RecordProcessedCaptureStream(dest, output_config);
   }
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index 4cc55c7791..28a8829a4d 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -87,8 +87,7 @@ class AudioProcessingImpl : public AudioProcessing {
   int ProcessStream(const int16_t* const src,
                     const StreamConfig& input_config,
                     const StreamConfig& output_config,
-                    int16_t* const dest,
-                    VoiceDetectionResult* vad_state) override;
+                    int16_t* const dest) override;
   int ProcessStream(const float* const* src,
                     const StreamConfig& input_config,
                     const StreamConfig& output_config,
diff --git a/modules/audio_processing/audio_processing_impl_locking_unittest.cc b/modules/audio_processing/audio_processing_impl_locking_unittest.cc
index a4607164c2..f1e049d44a 100644
--- a/modules/audio_processing/audio_processing_impl_locking_unittest.cc
+++ b/modules/audio_processing/audio_processing_impl_locking_unittest.cc
@@ -668,8 +668,7 @@ void CaptureProcessor::CallApmCaptureSide() {
     case CaptureApiImpl::ProcessStreamImplInteger:
       result =
           apm_->ProcessStream(frame_data_.frame.data(), input_stream_config,
-                              output_stream_config, frame_data_.frame.data(),
-                              /*vad_result*/ nullptr);
+                              output_stream_config, frame_data_.frame.data());
       break;
     case CaptureApiImpl::ProcessStreamImplFloat:
       result = apm_->ProcessStream(&frame_data_.input_frame[0],
diff --git a/modules/audio_processing/audio_processing_impl_unittest.cc b/modules/audio_processing/audio_processing_impl_unittest.cc
index afc3ae56b1..a441e2f208 100644
--- a/modules/audio_processing/audio_processing_impl_unittest.cc
+++ b/modules/audio_processing/audio_processing_impl_unittest.cc
@@ -141,23 +141,20 @@ TEST(AudioProcessingImplTest, AudioParameterChangeTriggersInit) {
   StreamConfig config(16000, 1, /*has_keyboard=*/false);
   // Call with the default parameters; there should be an init.
   EXPECT_CALL(mock, InitializeLocked()).Times(0);
-  EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data(),
-                                  /*vad_result=*/nullptr));
+  EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data()));
   EXPECT_NOERR(
       mock.ProcessReverseStream(frame.data(), config, config, frame.data()));
 
   // New sample rate. (Only impacts ProcessStream).
   config = StreamConfig(32000, 1, /*has_keyboard=*/false);
   EXPECT_CALL(mock, InitializeLocked()).Times(1);
-  EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data(),
-                                  /*vad_result=*/nullptr));
+  EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data()));
 
   // New number of channels.
   // TODO(peah): Investigate why this causes 2 inits.
   config = StreamConfig(32000, 2, /*has_keyboard=*/false);
   EXPECT_CALL(mock, InitializeLocked()).Times(2);
-  EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data(),
-                                  /*vad_result=*/nullptr));
+  EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data()));
   // ProcessStream sets num_channels_ == num_output_channels.
   EXPECT_NOERR(
       mock.ProcessReverseStream(frame.data(), config, config, frame.data()));
@@ -183,8 +180,7 @@ TEST(AudioProcessingImplTest, UpdateCapturePreGainRuntimeSetting) {
   std::array<int16_t, kNumChannels * kSampleRateHz / 100> frame;
   StreamConfig config(kSampleRateHz, kNumChannels, /*has_keyboard=*/false);
   frame.fill(kAudioLevel);
-  apm->ProcessStream(frame.data(), config, config, frame.data(),
-                     /*vad_result=*/nullptr);
+  apm->ProcessStream(frame.data(), config, config, frame.data());
   EXPECT_EQ(frame[100], kAudioLevel)
       << "With factor 1, frame shouldn't be modified.";
 
@@ -195,8 +191,7 @@ TEST(AudioProcessingImplTest, UpdateCapturePreGainRuntimeSetting) {
   // Process for two frames to have time to ramp up gain.
   for (int i = 0; i < 2; ++i) {
     frame.fill(kAudioLevel);
-    apm->ProcessStream(frame.data(), config, config, frame.data(),
-                       /*vad_result=*/nullptr);
+    apm->ProcessStream(frame.data(), config, config, frame.data());
   }
   EXPECT_EQ(frame[100], kGainFactor * kAudioLevel)
       << "Frame should be amplified.";
@@ -234,8 +229,7 @@ TEST(AudioProcessingImplTest,
   EXPECT_CALL(*echo_control_mock,
               ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false))
       .Times(1);
-  apm->ProcessStream(frame.data(), config, config, frame.data(),
-                     /*vad_result=*/nullptr);
+  apm->ProcessStream(frame.data(), config, config, frame.data());
 
   EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
   EXPECT_CALL(*echo_control_mock,
@@ -243,8 +237,7 @@ TEST(AudioProcessingImplTest,
       .Times(1);
   apm->SetRuntimeSetting(
       AudioProcessing::RuntimeSetting::CreateCapturePreGain(2.f));
-  apm->ProcessStream(frame.data(), config, config, frame.data(),
-                     /*vad_result=*/nullptr);
+  apm->ProcessStream(frame.data(), config, config, frame.data());
 }
 
 TEST(AudioProcessingImplTest,
@@ -281,8 +274,7 @@ TEST(AudioProcessingImplTest,
   EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
   EXPECT_CALL(*echo_control_mock, ProcessCapture(NotNull(), testing::_, false))
       .Times(1);
-  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
-                     /*vad_result=*/nullptr);
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data());
 
   // Force an analog gain change if it did not happen.
   if (initial_analog_gain == apm->recommended_stream_analog_level()) {
@@ -292,8 +284,7 @@ TEST(AudioProcessingImplTest,
   EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
   EXPECT_CALL(*echo_control_mock, ProcessCapture(NotNull(), testing::_, true))
       .Times(1);
-  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
-                     /*vad_result=*/nullptr);
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data());
 }
 
 TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) {
@@ -326,8 +317,7 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) {
   EXPECT_CALL(*echo_control_mock,
               ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false))
       .Times(1);
-  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
-                     /*vad_result=*/nullptr);
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data());
 
   EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
   EXPECT_CALL(*echo_control_mock,
@@ -335,8 +325,7 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) {
       .Times(1);
   apm->SetRuntimeSetting(
       AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(50));
-  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
-                     /*vad_result=*/nullptr);
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data());
 
   EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
   EXPECT_CALL(*echo_control_mock,
@@ -344,8 +333,7 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) {
       .Times(1);
   apm->SetRuntimeSetting(
       AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(50));
-  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
-                     /*vad_result=*/nullptr);
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data());
 
   EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
   EXPECT_CALL(*echo_control_mock,
@@ -353,8 +341,7 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) {
       .Times(1);
   apm->SetRuntimeSetting(
       AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(100));
-  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
-                     /*vad_result=*/nullptr);
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data());
 }
 
 TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) {
@@ -406,7 +393,7 @@ TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) {
   frame.fill(kAudioLevel);
   ASSERT_EQ(AudioProcessing::Error::kNoError,
             apm->ProcessStream(frame.data(), stream_config, stream_config,
-                               frame.data(), /*vad_result=*/nullptr));
+                               frame.data()));
   // Regardless of how the call to in EchoDetector::AnalyzeRenderAudio() is
   // triggered, the line below checks that the call has occurred. If not, the
   // APM implementation may have changed and this test might need to be adapted.
diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index a320fcd940..cdca7c3524 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
@@ -561,7 +561,7 @@ int ApmTest::ProcessStreamChooser(Format format) {
         frame_.data.data(),
         StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
         StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-        frame_.data.data(), &frame_.vad_activity);
+        frame_.data.data());
   }
   return apm_->ProcessStream(
       float_cb_->channels(),
@@ -646,8 +646,7 @@ void ApmTest::ProcessDelayVerificationTest(int delay_ms,
                                                process_frame->num_channels),
                                   StreamConfig(process_frame->sample_rate_hz,
                                                process_frame->num_channels),
-                                  process_frame->data.data(),
-                                  &process_frame->vad_activity));
+                                  process_frame->data.data()));
     frame = frame_queue.front();
     frame_queue.pop();
     delete frame;
@@ -753,7 +752,7 @@ void ApmTest::TestChangingChannelsInt16Interface(
                 frame_.data.data(),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data(), &frame_.vad_activity));
+                frame_.data.data()));
   EXPECT_EQ(expected_return,
             apm_->ProcessReverseStream(
                 frame_.data.data(),
@@ -1052,7 +1051,7 @@ void ApmTest::RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate) {
                   frame_.data.data(),
                   StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                   StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                  frame_.data.data(), &frame_.vad_activity));
+                  frame_.data.data()));
     out_analog_level = apm_->recommended_stream_analog_level();
   }
 
@@ -1088,7 +1087,7 @@ void ApmTest::RunManualVolumeChangeIsPossibleTest(int sample_rate) {
                   frame_.data.data(),
                   StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                   StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                  frame_.data.data(), &frame_.vad_activity));
+                  frame_.data.data()));
     out_analog_level = apm_->recommended_stream_analog_level();
   }
 
@@ -1108,7 +1107,7 @@ void ApmTest::RunManualVolumeChangeIsPossibleTest(int sample_rate) {
                   frame_.data.data(),
                   StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                   StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                  frame_.data.data(), &frame_.vad_activity));
+                  frame_.data.data()));
     out_analog_level = apm_->recommended_stream_analog_level();
     // Check that AGC respected the manually adjusted volume.
     EXPECT_LT(out_analog_level, highest_level_reached);
@@ -1154,7 +1153,7 @@ TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) {
                     frame_.data.data(),
                     StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                     StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                    frame_.data.data(), &frame_.vad_activity));
+                    frame_.data.data()));
       EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
       EXPECT_EQ(apm_->kNoError,
                 apm_->ProcessReverseStream(
@@ -1222,8 +1221,6 @@ TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) {
               revframe_.data.data()));
 
       CopyLeftToRightChannel(frame_.data.data(), frame_.samples_per_channel);
-      frame_.vad_activity =
-          AudioProcessing::VoiceDetectionResult::kNotAvailable;
 
       ASSERT_EQ(kNoErr, apm_->set_stream_delay_ms(0));
       apm_->set_stream_analog_level(analog_level);
@@ -1232,7 +1229,7 @@ TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) {
                     frame_.data.data(),
                     StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                     StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                    frame_.data.data(), &frame_.vad_activity));
+                    frame_.data.data()));
       analog_level = apm_->recommended_stream_analog_level();
 
       VerifyChannelsAreEqual(frame_.data.data(), frame_.samples_per_channel);
@@ -1253,13 +1250,13 @@ TEST_F(ApmTest, SplittingFilter) {
                 frame_.data.data(),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data(), &frame_.vad_activity));
+                frame_.data.data()));
   EXPECT_EQ(apm_->kNoError,
             apm_->ProcessStream(
                 frame_.data.data(),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data(), &frame_.vad_activity));
+                frame_.data.data()));
   EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
 
   // 2. Only the level estimator is enabled...
@@ -1273,13 +1270,13 @@ TEST_F(ApmTest, SplittingFilter) {
                 frame_.data.data(),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data(), &frame_.vad_activity));
+                frame_.data.data()));
   EXPECT_EQ(apm_->kNoError,
             apm_->ProcessStream(
                 frame_.data.data(),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data(), &frame_.vad_activity));
+                frame_.data.data()));
   EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
   apm_config.level_estimation.enabled = false;
   apm_->ApplyConfig(apm_config);
@@ -1294,13 +1291,13 @@ TEST_F(ApmTest, SplittingFilter) {
                 frame_.data.data(),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data(), &frame_.vad_activity));
+                frame_.data.data()));
   EXPECT_EQ(apm_->kNoError,
             apm_->ProcessStream(
                 frame_.data.data(),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data(), &frame_.vad_activity));
+                frame_.data.data()));
   EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
   apm_config.voice_detection.enabled = false;
   apm_->ApplyConfig(apm_config);
@@ -1316,13 +1313,13 @@ TEST_F(ApmTest, SplittingFilter) {
                 frame_.data.data(),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data(), &frame_.vad_activity));
+                frame_.data.data()));
   EXPECT_EQ(apm_->kNoError,
             apm_->ProcessStream(
                 frame_.data.data(),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data(), &frame_.vad_activity));
+                frame_.data.data()));
   EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
   apm_config.voice_detection.enabled = false;
   apm_config.level_estimation.enabled = false;
@@ -1344,7 +1341,7 @@ TEST_F(ApmTest, SplittingFilter) {
                 frame_.data.data(),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data(), &frame_.vad_activity));
+                frame_.data.data()));
   EXPECT_FALSE(FrameDataAreEqual(frame_, frame_copy));
 }
 
@@ -1535,7 +1532,7 @@ TEST_F(ApmTest, DebugDump) {
                 frame_.data.data(),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data(), &frame_.vad_activity));
+                frame_.data.data()));
   EXPECT_EQ(apm_->kNoError,
             apm_->ProcessReverseStream(
                 revframe_.data.data(),
@@ -1584,7 +1581,7 @@ TEST_F(ApmTest, DebugDumpFromFileHandle) {
                 frame_.data.data(),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                 StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data(), &frame_.vad_activity));
+                frame_.data.data()));
   apm_->DetachAecDump();
 
   // Verify the file has been written.
@@ -1671,9 +1668,6 @@ TEST_F(ApmTest, Process) {
               StreamConfig(revframe_.sample_rate_hz, revframe_.num_channels),
               revframe_.data.data()));
 
-      frame_.vad_activity =
-          AudioProcessing::VoiceDetectionResult::kNotAvailable;
-
       EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
       apm_->set_stream_analog_level(analog_level);
 
@@ -1682,7 +1676,7 @@ TEST_F(ApmTest, Process) {
                     frame_.data.data(),
                     StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
                     StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                    frame_.data.data(), &frame_.vad_activity));
+                    frame_.data.data()));
 
       // Ensure the frame was downmixed properly.
       EXPECT_EQ(static_cast<size_t>(test->num_output_channels()),
@@ -2473,7 +2467,7 @@ TEST(ApmConfiguration, EnablePostProcessing) {
   apm->ProcessStream(audio.data.data(),
                      StreamConfig(audio.sample_rate_hz, audio.num_channels),
                      StreamConfig(audio.sample_rate_hz, audio.num_channels),
-                     audio.data.data(), &audio.vad_activity);
+                     audio.data.data());
 }
 
 TEST(ApmConfiguration, EnablePreProcessing) {
@@ -2517,7 +2511,7 @@ TEST(ApmConfiguration, EnableCaptureAnalyzer) {
   apm->ProcessStream(audio.data.data(),
                      StreamConfig(audio.sample_rate_hz, audio.num_channels),
                      StreamConfig(audio.sample_rate_hz, audio.num_channels),
-                     audio.data.data(), &audio.vad_activity);
+                     audio.data.data());
 }
 
 TEST(ApmConfiguration, PreProcessingReceivesRuntimeSettings) {
@@ -2581,7 +2575,7 @@ TEST(ApmConfiguration, EchoControlInjection) {
   apm->ProcessStream(audio.data.data(),
                      StreamConfig(audio.sample_rate_hz, audio.num_channels),
                      StreamConfig(audio.sample_rate_hz, audio.num_channels),
-                     audio.data.data(), &audio.vad_activity);
+                     audio.data.data());
   apm->ProcessReverseStream(
       audio.data.data(), StreamConfig(audio.sample_rate_hz, audio.num_channels),
       StreamConfig(audio.sample_rate_hz, audio.num_channels),
@@ -2589,7 +2583,7 @@ TEST(ApmConfiguration, EchoControlInjection) {
   apm->ProcessStream(audio.data.data(),
                      StreamConfig(audio.sample_rate_hz, audio.num_channels),
                      StreamConfig(audio.sample_rate_hz, audio.num_channels),
-                     audio.data.data(), &audio.vad_activity);
+                     audio.data.data());
 }
 
 std::unique_ptr<AudioProcessing> CreateApm(bool mobile_aec) {
@@ -2660,7 +2654,7 @@ TEST(MAYBE_ApmStatistics, AECEnabledTest) {
                   frame.data.data(),
                   StreamConfig(frame.sample_rate_hz, frame.num_channels),
                   StreamConfig(frame.sample_rate_hz, frame.num_channels),
-                  frame.data.data(), &frame.vad_activity),
+                  frame.data.data()),
               0);
   }
 
@@ -2708,7 +2702,7 @@ TEST(MAYBE_ApmStatistics, AECMEnabledTest) {
                   frame.data.data(),
                   StreamConfig(frame.sample_rate_hz, frame.num_channels),
                   StreamConfig(frame.sample_rate_hz, frame.num_channels),
-                  frame.data.data(), &frame.vad_activity),
+                  frame.data.data()),
               0);
   }
 
@@ -2754,7 +2748,7 @@ TEST(ApmStatistics, ReportOutputRmsDbfs) {
       apm->ProcessStream(frame.data.data(),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
-                         frame.data.data(), &frame.vad_activity),
+                         frame.data.data()),
       0);
   EXPECT_FALSE(apm->GetStatistics().output_rms_dbfs);
 
@@ -2765,7 +2759,7 @@ TEST(ApmStatistics, ReportOutputRmsDbfs) {
       apm->ProcessStream(frame.data.data(),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
-                         frame.data.data(), &frame.vad_activity),
+                         frame.data.data()),
       0);
   auto stats = apm->GetStatistics();
   EXPECT_TRUE(stats.output_rms_dbfs);
@@ -2778,7 +2772,7 @@ TEST(ApmStatistics, ReportOutputRmsDbfs) {
       apm->ProcessStream(frame.data.data(),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
-                         frame.data.data(), &frame.vad_activity),
+                         frame.data.data()),
       0);
   EXPECT_FALSE(apm->GetStatistics().output_rms_dbfs);
 }
@@ -2807,10 +2801,8 @@ TEST(ApmStatistics, ReportHasVoice) {
       apm->ProcessStream(frame.data.data(),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
-                         frame.data.data(), &frame.vad_activity),
+                         frame.data.data()),
       0);
-  EXPECT_EQ(frame.vad_activity,
-            AudioProcessing::VoiceDetectionResult::kNotAvailable);
   EXPECT_FALSE(apm->GetStatistics().voice_detected);
 
   // If enabled, metrics should be reported.
@@ -2820,25 +2812,20 @@ TEST(ApmStatistics, ReportHasVoice) {
       apm->ProcessStream(frame.data.data(),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
-                         frame.data.data(), &frame.vad_activity),
+                         frame.data.data()),
       0);
   auto stats = apm->GetStatistics();
-  EXPECT_EQ(frame.vad_activity,
-            AudioProcessing::VoiceDetectionResult::kDetected);
   EXPECT_TRUE(stats.voice_detected);
 
   // If re-disabled, the value is again not reported.
-  frame.vad_activity = AudioProcessing::VoiceDetectionResult::kNotAvailable;
   config.voice_detection.enabled = false;
   apm->ApplyConfig(config);
   EXPECT_EQ(
       apm->ProcessStream(frame.data.data(),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
                          StreamConfig(frame.sample_rate_hz, frame.num_channels),
-                         frame.data.data(), &frame.vad_activity),
+                         frame.data.data()),
       0);
-  EXPECT_EQ(frame.vad_activity,
-            AudioProcessing::VoiceDetectionResult::kNotAvailable);
   EXPECT_FALSE(apm->GetStatistics().voice_detected);
 }
 
diff --git a/modules/audio_processing/include/audio_frame_proxies.cc b/modules/audio_processing/include/audio_frame_proxies.cc
index b298702535..b960e72e86 100644
--- a/modules/audio_processing/include/audio_frame_proxies.cc
+++ b/modules/audio_processing/include/audio_frame_proxies.cc
@@ -26,17 +26,15 @@ int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame) {
                              /*has_keyboard=*/false);
   RTC_DCHECK_EQ(frame->samples_per_channel(), input_config.num_frames());
 
-  AudioProcessing::VoiceDetectionResult vad_result =
-      AudioProcessing::VoiceDetectionResult::kNotAvailable;
-
   int result = ap->ProcessStream(frame->data(), input_config, output_config,
-                                 frame->mutable_data(), &vad_result);
+                                 frame->mutable_data());
 
-  if (vad_result != AudioProcessing::VoiceDetectionResult::kNotAvailable) {
-    frame->vad_activity_ =
-        vad_result == AudioProcessing::VoiceDetectionResult::kDetected
-            ? AudioFrame::VADActivity::kVadActive
-            : AudioFrame::VADActivity::kVadPassive;
+  AudioProcessingStats stats = ap->GetStatistics();
+
+  if (stats.voice_detected) {
+    frame->vad_activity_ = *stats.voice_detected
+                               ? AudioFrame::VADActivity::kVadActive
+                               : AudioFrame::VADActivity::kVadPassive;
   }
 
   return result;
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index fa943c43b5..953ccebaba 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -369,8 +369,6 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
     kStereoAndKeyboard
   };
 
-  enum class VoiceDetectionResult { kNotAvailable, kDetected, kNotDetected };
-
   // Specifies the properties of a setting to be passed to AudioProcessing at
   // runtime.
   class RuntimeSetting {
@@ -543,8 +541,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
   virtual int ProcessStream(const int16_t* const src,
                             const StreamConfig& input_config,
                             const StreamConfig& output_config,
-                            int16_t* const dest,
-                            VoiceDetectionResult* vad_result) = 0;
+                            int16_t* const dest) = 0;
 
   // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
   // |src| points to a channel buffer, arranged according to |input_stream|. At
diff --git a/modules/audio_processing/include/mock_audio_processing.h b/modules/audio_processing/include/mock_audio_processing.h
index 518087a264..f3e936185f 100644
--- a/modules/audio_processing/include/mock_audio_processing.h
+++ b/modules/audio_processing/include/mock_audio_processing.h
@@ -82,12 +82,11 @@ class MockAudioProcessing : public ::testing::NiceMock<AudioProcessing> {
   MOCK_METHOD1(set_output_will_be_muted, void(bool muted));
   MOCK_METHOD1(SetRuntimeSetting, void(RuntimeSetting setting));
   MOCK_METHOD1(ProcessStream, int(AudioFrame* frame));
-  MOCK_METHOD5(ProcessStream,
+  MOCK_METHOD4(ProcessStream,
                int(const int16_t* const src,
                    const StreamConfig& input_config,
                    const StreamConfig& output_config,
-                   int16_t* const dest,
-                   VoiceDetectionResult* const vad_result));
+                   int16_t* const dest));
   MOCK_METHOD7(ProcessStream,
                int(const float* const* src,
                    size_t samples_per_channel,
diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc
index b1d7150dfa..a37a83f1e3 100644
--- a/modules/audio_processing/test/audio_processing_simulator.cc
+++ b/modules/audio_processing/test/audio_processing_simulator.cc
@@ -178,11 +178,10 @@ void AudioProcessingSimulator::ProcessStream(bool fixed_interface) {
     {
       const auto st = ScopedTimer(&api_call_statistics_,
                                   ApiCallStatistics::CallType::kCapture);
-      AudioProcessing::VoiceDetectionResult vad_result;
-      RTC_CHECK_EQ(AudioProcessing::kNoError,
-                   ap_->ProcessStream(fwd_frame_.data.data(), fwd_frame_.config,
-                                      fwd_frame_.config, fwd_frame_.data.data(),
-                                      &vad_result));
+      RTC_CHECK_EQ(
+          AudioProcessing::kNoError,
+          ap_->ProcessStream(fwd_frame_.data.data(), fwd_frame_.config,
+                             fwd_frame_.config, fwd_frame_.data.data()));
     }
     fwd_frame_.CopyTo(out_buf_.get());
   } else {
diff --git a/modules/audio_processing/test/test_utils.h b/modules/audio_processing/test/test_utils.h
index 24679e2a00..e2d243eec4 100644
--- a/modules/audio_processing/test/test_utils.h
+++ b/modules/audio_processing/test/test_utils.h
@@ -54,14 +54,12 @@ struct Int16FrameData {
     sample_rate_hz = 0;
     num_channels = 0;
     samples_per_channel = 0;
-    vad_activity = AudioProcessing::VoiceDetectionResult::kNotAvailable;
     data.fill(0);
   }
 
   void CopyFrom(const Int16FrameData& src) {
     samples_per_channel = src.samples_per_channel;
     sample_rate_hz = src.sample_rate_hz;
-    vad_activity = src.vad_activity;
     num_channels = src.num_channels;
 
     const size_t length = samples_per_channel * num_channels;
@@ -72,7 +70,6 @@ struct Int16FrameData {
   int32_t sample_rate_hz;
   size_t num_channels;
   size_t samples_per_channel;
-  AudioProcessing::VoiceDetectionResult vad_activity;
 };
 
 // Reads ChannelBuffers from a provided WavReader.