Remove unused APM voice activity detection sub-module

API changes: - webrtc::AudioProcessing::Config::VoiceDetection removed - webrtc::AudioProcessingStats::voice_detected deprecated - cricket::AudioOptions::typing_detection deprecated - webrtc::StatsReport::StatsValueName:: kStatsValueNameTypingNoiseState deprecated PSA: https://groups.google.com/g/discuss-webrtc/c/7X6uwmJarE0 Bug: webrtc:11226,webrtc:11292 Change-Id: I8d008b56708cf62961b9857ec052b59fda3b41bf Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/250666 Reviewed-by: Harald Alvestrand <hta@webrtc.org> Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org> Reviewed-by: Sam Zackrisson <saza@webrtc.org> Reviewed-by: Björn Terelius <terelius@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/main@{#35975}
2022-02-11 10:51:01 +01:00
parent cc5532f95a
commit b4e06d032e
23 changed files with 18 additions and 483 deletions
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
@ -190,7 +190,6 @@ void EnableAllAPComponents(AudioProcessing* ap) {
  apm_config.noise_suppression.enabled = true;

  apm_config.high_pass_filter.enabled = true;
-  apm_config.voice_detection.enabled = true;
  apm_config.pipeline.maximum_internal_processing_rate = 48000;
  ap->ApplyConfig(apm_config);
 }
@ -1226,7 +1225,6 @@ TEST_F(ApmTest, AllProcessingDisabledByDefault) {
  EXPECT_FALSE(config.high_pass_filter.enabled);
  EXPECT_FALSE(config.gain_controller1.enabled);
  EXPECT_FALSE(config.noise_suppression.enabled);
-  EXPECT_FALSE(config.voice_detection.enabled);
 }

 TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) {
@ -1367,48 +1365,6 @@ TEST_F(ApmTest, SplittingFilter) {
  EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
  apm_->ApplyConfig(apm_config);

-  // 3. Only GetStatistics-reporting VAD is enabled...
-  SetFrameTo(&frame_, 1000);
-  frame_copy.CopyFrom(frame_);
-  apm_config.voice_detection.enabled = true;
-  apm_->ApplyConfig(apm_config);
-  EXPECT_EQ(apm_->kNoError,
-            apm_->ProcessStream(
-                frame_.data.data(),
-                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data()));
-  EXPECT_EQ(apm_->kNoError,
-            apm_->ProcessStream(
-                frame_.data.data(),
-                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data()));
-  EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
-  apm_config.voice_detection.enabled = false;
-  apm_->ApplyConfig(apm_config);
-
-  // 4. The VAD is enabled...
-  SetFrameTo(&frame_, 1000);
-  frame_copy.CopyFrom(frame_);
-  apm_config.voice_detection.enabled = true;
-  apm_->ApplyConfig(apm_config);
-  EXPECT_EQ(apm_->kNoError,
-            apm_->ProcessStream(
-                frame_.data.data(),
-                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data()));
-  EXPECT_EQ(apm_->kNoError,
-            apm_->ProcessStream(
-                frame_.data.data(),
-                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
-                frame_.data.data()));
-  EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
-  apm_config.voice_detection.enabled = false;
-  apm_->ApplyConfig(apm_config);
-
  // Check the test is valid. We should have distortion from the filter
  // when AEC is enabled (which won't affect the audio).
  apm_config.echo_canceller.enabled = true;
@ -1736,7 +1692,6 @@ TEST_F(ApmTest, Process) {
         static_cast<size_t>(test->num_reverse_channels()), true);

    int frame_count = 0;
-    int has_voice_count = 0;
    int analog_level = 127;
    int analog_level_average = 0;
    int max_output_average = 0;
@ -1772,8 +1727,6 @@ TEST_F(ApmTest, Process) {
      analog_level = apm_->recommended_stream_analog_level();
      analog_level_average += analog_level;
      AudioProcessingStats stats = apm_->GetStatistics();
-      EXPECT_TRUE(stats.voice_detected);
-      has_voice_count += *stats.voice_detected ? 1 : 0;

      size_t frame_size = frame_.samples_per_channel * frame_.num_channels;
      size_t write_count =
@ -1829,33 +1782,23 @@ TEST_F(ApmTest, Process) {

    if (!absl::GetFlag(FLAGS_write_apm_ref_data)) {
      const int kIntNear = 1;
-      // When running the test on a N7 we get a {2, 6} difference of
-      // `has_voice_count` and `max_output_average` is up to 18 higher.
-      // All numbers being consistently higher on N7 compare to ref_data.
+      // All numbers being consistently higher on N7 compare to the reference
+      // data.
      // TODO(bjornv): If we start getting more of these offsets on Android we
      // should consider a different approach. Either using one slack for all,
      // or generate a separate android reference.
 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
-      const int kHasVoiceCountOffset = 3;
-      const int kHasVoiceCountNear = 8;
      const int kMaxOutputAverageOffset = 9;
      const int kMaxOutputAverageNear = 26;
 #else
-      const int kHasVoiceCountOffset = 0;
-      const int kHasVoiceCountNear = kIntNear;
      const int kMaxOutputAverageOffset = 0;
      const int kMaxOutputAverageNear = kIntNear;
 #endif
-      EXPECT_NEAR(test->has_voice_count(),
-                  has_voice_count - kHasVoiceCountOffset, kHasVoiceCountNear);
-
      EXPECT_NEAR(test->analog_level_average(), analog_level_average, kIntNear);
      EXPECT_NEAR(test->max_output_average(),
                  max_output_average - kMaxOutputAverageOffset,
                  kMaxOutputAverageNear);
    } else {
-      test->set_has_voice_count(has_voice_count);
-
      test->set_analog_level_average(analog_level_average);
      test->set_max_output_average(max_output_average);
    }
@ -2685,7 +2628,6 @@ rtc::scoped_refptr<AudioProcessing> CreateApm(bool mobile_aec) {
  apm_config.echo_canceller.enabled = true;
  apm_config.echo_canceller.mobile_mode = mobile_aec;
  apm_config.noise_suppression.enabled = false;
-  apm_config.voice_detection.enabled = false;
  apm->ApplyConfig(apm_config);
  return apm;
 }
@ -2794,10 +2736,9 @@ TEST(MAYBE_ApmStatistics, AECMEnabledTest) {
  EXPECT_FALSE(stats.echo_return_loss_enhancement.has_value());
 }

-TEST(ApmStatistics, ReportHasVoice) {
+TEST(ApmStatistics, DoNotReportVoiceDetectedStat) {
  ProcessingConfig processing_config = {
      {{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}};
-  AudioProcessing::Config config;

  // Set up an audioframe.
  Int16FrameData frame;
@ -2814,37 +2755,14 @@ TEST(ApmStatistics, ReportHasVoice) {
      AudioProcessingBuilderForTesting().Create();
  apm->Initialize(processing_config);

-  // If not enabled, no metric should be reported.
+  // No metric should be reported.
  EXPECT_EQ(
      apm->ProcessStream(frame.data.data(),
                         StreamConfig(frame.sample_rate_hz, frame.num_channels),
                         StreamConfig(frame.sample_rate_hz, frame.num_channels),
                         frame.data.data()),
      0);
-  EXPECT_FALSE(apm->GetStatistics().voice_detected);
-
-  // If enabled, metrics should be reported.
-  config.voice_detection.enabled = true;
-  apm->ApplyConfig(config);
-  EXPECT_EQ(
-      apm->ProcessStream(frame.data.data(),
-                         StreamConfig(frame.sample_rate_hz, frame.num_channels),
-                         StreamConfig(frame.sample_rate_hz, frame.num_channels),
-                         frame.data.data()),
-      0);
-  auto stats = apm->GetStatistics();
-  EXPECT_TRUE(stats.voice_detected);
-
-  // If re-disabled, the value is again not reported.
-  config.voice_detection.enabled = false;
-  apm->ApplyConfig(config);
-  EXPECT_EQ(
-      apm->ProcessStream(frame.data.data(),
-                         StreamConfig(frame.sample_rate_hz, frame.num_channels),
-                         StreamConfig(frame.sample_rate_hz, frame.num_channels),
-                         frame.data.data()),
-      0);
-  EXPECT_FALSE(apm->GetStatistics().voice_detected);
+  EXPECT_FALSE(apm->GetStatistics().voice_detected.has_value());
 }

 TEST(ApmStatistics, GetStatisticsReportsNoEchoDetectorStatsWhenDisabled) {