Stop using the beamformer inside APM

Removes the usage of an injected/enabled beamformer in APM, and marks the API parts as deprecated. Initialization and process calls are removed, and all enabled/disabled flags are replaced by assuming no beamforming. Additionally, an AGC test relying on the beamformer as a VAD is removed. Bug: webrtc:9402 Change-Id: I0d3d0b9773da083ce43c28045db9a77278f59f95 Reviewed-on: https://webrtc-review.googlesource.com/83341 Reviewed-by: Minyue Li <minyue@webrtc.org> Commit-Queue: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/master@{#23643}
2018-06-14 10:11:35 +02:00
parent 431abd989b
commit 9394f6fda1
5 changed files with 12 additions and 171 deletions
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@ -166,7 +166,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
    bool residual_echo_detector_enabled,
    bool noise_suppressor_enabled,
    bool intelligibility_enhancer_enabled,
-    bool beamformer_enabled,
    bool adaptive_gain_controller_enabled,
    bool gain_controller2_enabled,
    bool pre_amplifier_enabled,
@ -184,7 +183,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
  changed |= (noise_suppressor_enabled != noise_suppressor_enabled_);
  changed |=
      (intelligibility_enhancer_enabled != intelligibility_enhancer_enabled_);
-  changed |= (beamformer_enabled != beamformer_enabled_);
  changed |=
      (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_);
  changed |=
@ -202,7 +200,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
    residual_echo_detector_enabled_ = residual_echo_detector_enabled;
    noise_suppressor_enabled_ = noise_suppressor_enabled;
    intelligibility_enhancer_enabled_ = intelligibility_enhancer_enabled;
-    beamformer_enabled_ = beamformer_enabled;
    adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled;
    gain_controller2_enabled_ = gain_controller2_enabled;
    pre_amplifier_enabled_ = pre_amplifier_enabled;
@ -231,8 +228,7 @@ bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandProcessingActive()
    const {
  return low_cut_filter_enabled_ || echo_canceller_enabled_ ||
         mobile_echo_controller_enabled_ || noise_suppressor_enabled_ ||
-         beamformer_enabled_ || adaptive_gain_controller_enabled_ ||
-         echo_controller_enabled_;
+         adaptive_gain_controller_enabled_ || echo_controller_enabled_;
 }

 bool AudioProcessingImpl::ApmSubmoduleStates::CaptureFullBandProcessingActive()
@ -388,14 +384,11 @@ AudioProcessingImpl::AudioProcessingImpl(
                 config.Get<ExperimentalAgc>().enabled),
 #endif
 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
-      capture_(false,
+      capture_(false),
 #else
-      capture_(config.Get<ExperimentalNs>().enabled,
+      capture_(config.Get<ExperimentalNs>().enabled),
 #endif
-               config.Get<Beamforming>().array_geometry,
-               config.Get<Beamforming>().target_direction),
-      capture_nonlocked_(config.Get<Beamforming>().enabled,
-                         config.Get<Intelligibility>().enabled) {
+      capture_nonlocked_(config.Get<Intelligibility>().enabled) {
  {
    rtc::CritScope cs_render(&crit_render_);
    rtc::CritScope cs_capture(&crit_capture_);
@ -509,11 +502,6 @@ int AudioProcessingImpl::MaybeInitialize(
 int AudioProcessingImpl::InitializeLocked() {
  UpdateActiveSubmoduleStates();

-  const int capture_audiobuffer_num_channels =
-      capture_nonlocked_.beamformer_enabled
-          ? formats_.api_format.input_stream().num_channels()
-          : formats_.api_format.output_stream().num_channels();
-
  const int render_audiobuffer_num_output_frames =
      formats_.api_format.reverse_output_stream().num_frames() == 0
          ? formats_.render_processing_format.num_frames()
@ -544,7 +532,7 @@ int AudioProcessingImpl::InitializeLocked() {
      new AudioBuffer(formats_.api_format.input_stream().num_frames(),
                      formats_.api_format.input_stream().num_channels(),
                      capture_nonlocked_.capture_processing_format.num_frames(),
-                      capture_audiobuffer_num_channels,
+                      formats_.api_format.output_stream().num_channels(),
                      formats_.api_format.output_stream().num_frames()));

  public_submodules_->echo_cancellation->Initialize(
@ -575,7 +563,6 @@ int AudioProcessingImpl::InitializeLocked() {
    public_submodules_->gain_control_for_experimental_agc->Initialize();
  }
  InitializeTransient();
-  InitializeBeamformer();
 #if WEBRTC_INTELLIGIBILITY_ENHANCER
  InitializeIntelligibility();
 #endif
@ -615,11 +602,6 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
    return kBadNumberChannelsError;
  }

-  if (capture_nonlocked_.beamformer_enabled &&
-      num_in_channels != capture_.array_geometry.size()) {
-    return kBadNumberChannelsError;
-  }
-
  formats_.api_format = config;

  int capture_processing_rate = FindNativeProcessRateToUse(
@ -735,18 +717,6 @@ void AudioProcessingImpl::SetExtraOptions(const webrtc::Config& config) {
    InitializeIntelligibility();
  }
 #endif
-
-#ifdef WEBRTC_ANDROID_PLATFORM_BUILD
-  if (capture_nonlocked_.beamformer_enabled !=
-          config.Get<Beamforming>().enabled) {
-    capture_nonlocked_.beamformer_enabled = config.Get<Beamforming>().enabled;
-    if (config.Get<Beamforming>().array_geometry.size() > 1) {
-      capture_.array_geometry = config.Get<Beamforming>().array_geometry;
-    }
-    capture_.target_direction = config.Get<Beamforming>().target_direction;
-    InitializeBeamformer();
-  }
-#endif  // WEBRTC_ANDROID_PLATFORM_BUILD
 }

 int AudioProcessingImpl::proc_sample_rate_hz() const {
@ -771,10 +741,7 @@ size_t AudioProcessingImpl::num_input_channels() const {

 size_t AudioProcessingImpl::num_proc_channels() const {
  // Used as callback from submodules, hence locking is not allowed.
-  return (capture_nonlocked_.beamformer_enabled ||
-          capture_nonlocked_.echo_controller_enabled)
-             ? 1
-             : num_output_channels();
+  return capture_nonlocked_.echo_controller_enabled ? 1 : num_output_channels();
 }

 size_t AudioProcessingImpl::num_output_channels() const {
@ -1265,13 +1232,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
    capture_buffer->set_num_channels(1);
  }

-  if (capture_nonlocked_.beamformer_enabled) {
-    private_submodules_->beamformer->AnalyzeChunk(
-        *capture_buffer->split_data_f());
-    // Discards all channels by the leftmost one.
-    capture_buffer->set_num_channels(1);
-  }
-
  // TODO(peah): Move the AEC3 low-cut filter to this place.
  if (private_submodules_->low_cut_filter &&
      !private_submodules_->echo_controller) {
@ -1334,16 +1294,10 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
        capture_buffer, stream_delay_ms()));
  }

-  if (capture_nonlocked_.beamformer_enabled) {
-    private_submodules_->beamformer->PostFilter(capture_buffer->split_data_f());
-  }
-
  public_submodules_->voice_detection->ProcessCaptureAudio(capture_buffer);

  if (constants_.use_experimental_agc &&
-      public_submodules_->gain_control->is_enabled() &&
-      (!capture_nonlocked_.beamformer_enabled ||
-       private_submodules_->beamformer->is_target_present())) {
+      public_submodules_->gain_control->is_enabled()) {
    private_submodules_->agc_manager->Process(
        capture_buffer->split_bands_const(0)[kBand0To8kHz],
        capture_buffer->num_frames_per_band(), capture_nonlocked_.split_rate);
@ -1811,7 +1765,6 @@ bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
      config_.residual_echo_detector.enabled,
      public_submodules_->noise_suppression->is_enabled(),
      capture_nonlocked_.intelligibility_enabled,
-      capture_nonlocked_.beamformer_enabled,
      public_submodules_->gain_control->is_enabled(),
      config_.gain_controller2.enabled, config_.pre_amplifier.enabled,
      capture_nonlocked_.echo_controller_enabled,
@ -1832,17 +1785,6 @@ void AudioProcessingImpl::InitializeTransient() {
  }
 }

-void AudioProcessingImpl::InitializeBeamformer() {
-  if (capture_nonlocked_.beamformer_enabled) {
-    if (!private_submodules_->beamformer) {
-      private_submodules_->beamformer.reset(new NonlinearBeamformer(
-          capture_.array_geometry, 1u, capture_.target_direction));
-    }
-    private_submodules_->beamformer->Initialize(kChunkSizeMs,
-                                                capture_nonlocked_.split_rate);
-  }
-}
-
 void AudioProcessingImpl::InitializeIntelligibility() {
 #if WEBRTC_INTELLIGIBILITY_ENHANCER
  if (capture_nonlocked_.intelligibility_enabled) {
@ -2102,9 +2044,7 @@ void AudioProcessingImpl::RecordAudioProcessingState() {
 }

 AudioProcessingImpl::ApmCaptureState::ApmCaptureState(
-    bool transient_suppressor_enabled,
-    const std::vector<Point>& array_geometry,
-    SphericalPointf target_direction)
+    bool transient_suppressor_enabled)
    : aec_system_delay_jumps(-1),
      delay_offset_ms(0),
      was_stream_delay_set(false),
@ -2114,8 +2054,6 @@ AudioProcessingImpl::ApmCaptureState::ApmCaptureState(
      output_will_be_muted(false),
      key_pressed(false),
      transient_suppressor_enabled(transient_suppressor_enabled),
-      array_geometry(array_geometry),
-      target_direction(target_direction),
      capture_processing_format(kSampleRate16kHz),
      split_rate(kSampleRate16kHz),
      echo_path_gain_change(false) {}
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@ -185,7 +185,6 @@ class AudioProcessingImpl : public AudioProcessing {
                bool residual_echo_detector_enabled,
                bool noise_suppressor_enabled,
                bool intelligibility_enhancer_enabled,
-                bool beamformer_enabled,
                bool adaptive_gain_controller_enabled,
                bool gain_controller2_enabled,
                bool pre_amplifier_enabled,
@ -209,7 +208,6 @@ class AudioProcessingImpl : public AudioProcessing {
    bool residual_echo_detector_enabled_ = false;
    bool noise_suppressor_enabled_ = false;
    bool intelligibility_enhancer_enabled_ = false;
-    bool beamformer_enabled_ = false;
    bool adaptive_gain_controller_enabled_ = false;
    bool gain_controller2_enabled_ = false;
    bool pre_amplifier_enabled_ = false;
@ -370,9 +368,7 @@ class AudioProcessingImpl : public AudioProcessing {
  } constants_;

  struct ApmCaptureState {
-    ApmCaptureState(bool transient_suppressor_enabled,
-                    const std::vector<Point>& array_geometry,
-                    SphericalPointf target_direction);
+    ApmCaptureState(bool transient_suppressor_enabled);
    ~ApmCaptureState();
    int aec_system_delay_jumps;
    int delay_offset_ms;
@ -383,8 +379,6 @@ class AudioProcessingImpl : public AudioProcessing {
    bool output_will_be_muted;
    bool key_pressed;
    bool transient_suppressor_enabled;
-    std::vector<Point> array_geometry;
-    SphericalPointf target_direction;
    std::unique_ptr<AudioBuffer> capture_audio;
    // Only the rate and samples fields of capture_processing_format_ are used
    // because the capture processing number of channels is mutable and is
@ -395,12 +389,10 @@ class AudioProcessingImpl : public AudioProcessing {
  } capture_ RTC_GUARDED_BY(crit_capture_);

  struct ApmCaptureNonLockedState {
-    ApmCaptureNonLockedState(bool beamformer_enabled,
-                             bool intelligibility_enabled)
+    ApmCaptureNonLockedState(bool intelligibility_enabled)
        : capture_processing_format(kSampleRate16kHz),
          split_rate(kSampleRate16kHz),
          stream_delay_ms(0),
-          beamformer_enabled(beamformer_enabled),
          intelligibility_enabled(intelligibility_enabled) {}
    // Only the rate and samples fields of capture_processing_format_ are used
    // because the forward processing number of channels is mutable and is
@ -408,7 +400,6 @@ class AudioProcessingImpl : public AudioProcessing {
    StreamConfig capture_processing_format;
    int split_rate;
    int stream_delay_ms;
-    bool beamformer_enabled;
    bool intelligibility_enabled;
    bool echo_controller_enabled = false;
  } capture_nonlocked_;
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
@ -1300,95 +1300,6 @@ TEST_F(ApmTest, ManualVolumeChangeIsPossible) {
  }
 }

-#if !defined(WEBRTC_ANDROID) && !defined(WEBRTC_IOS)
-TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) {
-  const int kSampleRateHz = 16000;
-  const size_t kSamplesPerChannel =
-      static_cast<size_t>(AudioProcessing::kChunkSizeMs * kSampleRateHz / 1000);
-  const size_t kNumInputChannels = 2;
-  const size_t kNumOutputChannels = 1;
-  const size_t kNumChunks = 700;
-  const float kScaleFactor = 0.25f;
-  Config config;
-  std::vector<webrtc::Point> geometry;
-  geometry.push_back(webrtc::Point(0.f, 0.f, 0.f));
-  geometry.push_back(webrtc::Point(0.05f, 0.f, 0.f));
-  config.Set<Beamforming>(new Beamforming(true, geometry));
-  testing::NiceMock<MockNonlinearBeamformer>* beamformer =
-      new testing::NiceMock<MockNonlinearBeamformer>(geometry, 1u);
-  std::unique_ptr<AudioProcessing> apm(
-      AudioProcessingBuilder()
-          .SetNonlinearBeamformer(
-              std::unique_ptr<webrtc::NonlinearBeamformer>(beamformer))
-          .Create(config));
-  EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true));
-  ChannelBuffer<float> src_buf(kSamplesPerChannel, kNumInputChannels);
-  ChannelBuffer<float> dest_buf(kSamplesPerChannel, kNumOutputChannels);
-  const size_t max_length = kSamplesPerChannel * std::max(kNumInputChannels,
-                                                          kNumOutputChannels);
-  std::unique_ptr<int16_t[]> int_data(new int16_t[max_length]);
-  std::unique_ptr<float[]> float_data(new float[max_length]);
-  std::string filename = ResourceFilePath("far", kSampleRateHz);
-  FILE* far_file = fopen(filename.c_str(), "rb");
-  ASSERT_TRUE(far_file != NULL) << "Could not open file " << filename << "\n";
-  const int kDefaultVolume = apm->gain_control()->stream_analog_level();
-  const int kDefaultCompressionGain =
-      apm->gain_control()->compression_gain_db();
-  bool is_target = false;
-  EXPECT_CALL(*beamformer, is_target_present())
-      .WillRepeatedly(testing::ReturnPointee(&is_target));
-  for (size_t i = 0; i < kNumChunks; ++i) {
-    ASSERT_TRUE(ReadChunk(far_file,
-                          int_data.get(),
-                          float_data.get(),
-                          &src_buf));
-    for (size_t j = 0; j < kNumInputChannels; ++j) {
-      for (size_t k = 0; k < kSamplesPerChannel; ++k) {
-        src_buf.channels()[j][k] *= kScaleFactor;
-      }
-    }
-    EXPECT_EQ(kNoErr,
-              apm->ProcessStream(src_buf.channels(),
-                                 src_buf.num_frames(),
-                                 kSampleRateHz,
-                                 LayoutFromChannels(src_buf.num_channels()),
-                                 kSampleRateHz,
-                                 LayoutFromChannels(dest_buf.num_channels()),
-                                 dest_buf.channels()));
-  }
-  EXPECT_EQ(kDefaultVolume,
-            apm->gain_control()->stream_analog_level());
-  EXPECT_EQ(kDefaultCompressionGain,
-            apm->gain_control()->compression_gain_db());
-  rewind(far_file);
-  is_target = true;
-  for (size_t i = 0; i < kNumChunks; ++i) {
-    ASSERT_TRUE(ReadChunk(far_file,
-                          int_data.get(),
-                          float_data.get(),
-                          &src_buf));
-    for (size_t j = 0; j < kNumInputChannels; ++j) {
-      for (size_t k = 0; k < kSamplesPerChannel; ++k) {
-        src_buf.channels()[j][k] *= kScaleFactor;
-      }
-    }
-    EXPECT_EQ(kNoErr,
-              apm->ProcessStream(src_buf.channels(),
-                                 src_buf.num_frames(),
-                                 kSampleRateHz,
-                                 LayoutFromChannels(src_buf.num_channels()),
-                                 kSampleRateHz,
-                                 LayoutFromChannels(dest_buf.num_channels()),
-                                 dest_buf.channels()));
-  }
-  EXPECT_LT(kDefaultVolume,
-            apm->gain_control()->stream_analog_level());
-  EXPECT_LT(kDefaultCompressionGain,
-            apm->gain_control()->compression_gain_db());
-  ASSERT_EQ(0, fclose(far_file));
-}
-#endif
-
 TEST_F(ApmTest, NoiseSuppression) {
  // Test valid suppression levels.
  NoiseSuppression::Level level[] = {
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@ -674,6 +674,7 @@ class AudioProcessingBuilder {
  AudioProcessingBuilder& SetRenderPreProcessing(
      std::unique_ptr<CustomProcessing> render_pre_processing);
  // The AudioProcessingBuilder takes ownership of the nonlinear beamformer.
+  RTC_DEPRECATED
  AudioProcessingBuilder& SetNonlinearBeamformer(
      std::unique_ptr<NonlinearBeamformer> nonlinear_beamformer);
  // The AudioProcessingBuilder takes ownership of the echo_detector.
--- a/modules/audio_processing/include/config.h
+++ b/modules/audio_processing/include/config.h
@ -30,7 +30,7 @@ enum class ConfigOptionID {
  kDelayAgnostic,
  kExperimentalAgc,
  kExperimentalNs,
-  kBeamforming,
+  kBeamforming,  // Deprecated
  kIntelligibility,
  kEchoCanceller3,  // Deprecated
  kAecRefinedAdaptiveFilter,