AGC2: prepare to move speech level estimator into GainController2

- build target isolated - `AdaptiveModeLevelEstimator` renamed to `SpeechLevelEstimator` Bug: webrtc:7494 Change-Id: If16caec2269b2ed1b2ee27c3687a8f8875f55c8c Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/280441 Reviewed-by: Hanna Silen <silen@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38469}
2022-10-25 15:28:07 +02:00
parent 8d7273357d
commit d89dff767c
7 changed files with 79 additions and 48 deletions
--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn
@ -421,6 +421,7 @@ if (rtc_include_tests) {
        "agc2:input_volume_controller_unittests",
        "agc2:input_volume_stats_reporter_unittests",
        "agc2:noise_estimator_unittests",
+        "agc2:speech_level_estimator_unittest",
        "agc2:test_utils",
        "agc2:vad_wrapper_unittests",
        "agc2/rnn_vad:unittests",
--- a/modules/audio_processing/agc2/BUILD.gn
+++ b/modules/audio_processing/agc2/BUILD.gn
@ -15,14 +15,36 @@ group("agc2") {
  ]
 }

+rtc_library("speech_level_estimator") {
+  sources = [
+    "speech_level_estimator.cc",
+    "speech_level_estimator.h",
+  ]
+
+  visibility = [
+    "..:gain_controller2",
+    "./*",
+  ]
+
+  configs += [ "..:apm_debug_dump" ]
+
+  deps = [
+    ":common",
+    "..:api",
+    "..:apm_logging",
+    "../../../api:array_view",
+    "../../../rtc_base:checks",
+    "../../../rtc_base:logging",
+    "../../../rtc_base:safe_minmax",
+  ]
+}
+
 rtc_library("adaptive_digital") {
  sources = [
    "adaptive_digital_gain_applier.cc",
    "adaptive_digital_gain_applier.h",
    "adaptive_digital_gain_controller.cc",
    "adaptive_digital_gain_controller.h",
-    "adaptive_mode_level_estimator.cc",
-    "adaptive_mode_level_estimator.h",
    "saturation_protector.cc",
    "saturation_protector.h",
    "saturation_protector_buffer.cc",
@ -38,10 +60,9 @@ rtc_library("adaptive_digital") {

  deps = [
    ":common",
-    ":cpu_features",
    ":gain_applier",
    ":noise_level_estimator",
-    ":vad_wrapper",
+    ":speech_level_estimator",
    "..:api",
    "..:apm_logging",
    "..:audio_frame_view",
@ -244,13 +265,27 @@ rtc_library("cpu_features") {
  ]
 }

+rtc_library("speech_level_estimator_unittest") {
+  testonly = true
+  configs += [ "..:apm_debug_dump" ]
+
+  sources = [ "speech_level_estimator_unittest.cc" ]
+  deps = [
+    ":common",
+    ":speech_level_estimator",
+    "..:api",
+    "..:apm_logging",
+    "../../../rtc_base:gunit_helpers",
+    "../../../test:test_support",
+  ]
+}
+
 rtc_library("adaptive_digital_unittests") {
  testonly = true
  configs += [ "..:apm_debug_dump" ]

  sources = [
    "adaptive_digital_gain_applier_unittest.cc",
-    "adaptive_mode_level_estimator_unittest.cc",
    "gain_applier_unittest.cc",
    "saturation_protector_buffer_unittest.cc",
    "saturation_protector_unittest.cc",
--- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc
@ -13,7 +13,6 @@
 #include <algorithm>

 #include "common_audio/include/audio_util.h"
-#include "modules/audio_processing/agc2/vad_wrapper.h"
 #include "modules/audio_processing/logging/apm_data_dumper.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
--- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller.h
@ -15,9 +15,9 @@

 #include "absl/types/optional.h"
 #include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
-#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
 #include "modules/audio_processing/agc2/noise_level_estimator.h"
 #include "modules/audio_processing/agc2/saturation_protector.h"
+#include "modules/audio_processing/agc2/speech_level_estimator.h"
 #include "modules/audio_processing/include/audio_frame_view.h"
 #include "modules/audio_processing/include/audio_processing.h"

@ -56,7 +56,7 @@ class AdaptiveDigitalGainController {
  absl::optional<float> GetSpeechLevelDbfsIfConfident() const;

 private:
-  AdaptiveModeLevelEstimator speech_level_estimator_;
+  SpeechLevelEstimator speech_level_estimator_;
  AdaptiveDigitalGainApplier gain_controller_;
  ApmDataDumper* const apm_data_dumper_;
  std::unique_ptr<NoiseLevelEstimator> noise_level_estimator_;
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
@ -8,7 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
+#include "modules/audio_processing/agc2/speech_level_estimator.h"

 #include "modules/audio_processing/agc2/agc2_common.h"
 #include "modules/audio_processing/logging/apm_data_dumper.h"
@ -32,19 +32,19 @@ float GetInitialSpeechLevelEstimateDbfs(

 }  // namespace

-bool AdaptiveModeLevelEstimator::LevelEstimatorState::operator==(
-    const AdaptiveModeLevelEstimator::LevelEstimatorState& b) const {
+bool SpeechLevelEstimator::LevelEstimatorState::operator==(
+    const SpeechLevelEstimator::LevelEstimatorState& b) const {
  return time_to_confidence_ms == b.time_to_confidence_ms &&
         level_dbfs.numerator == b.level_dbfs.numerator &&
         level_dbfs.denominator == b.level_dbfs.denominator;
 }

-float AdaptiveModeLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const {
+float SpeechLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const {
  RTC_DCHECK_NE(denominator, 0.f);
  return numerator / denominator;
 }

-AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
+SpeechLevelEstimator::SpeechLevelEstimator(
    ApmDataDumper* apm_data_dumper,
    const AudioProcessing::Config::GainController2::AdaptiveDigital& config)
    : apm_data_dumper_(apm_data_dumper),
@ -57,9 +57,9 @@ AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
  Reset();
 }

-void AdaptiveModeLevelEstimator::Update(float rms_dbfs,
-                                        float peak_dbfs,
-                                        float speech_probability) {
+void SpeechLevelEstimator::Update(float rms_dbfs,
+                                  float peak_dbfs,
+                                  float speech_probability) {
  RTC_DCHECK_GT(rms_dbfs, -150.0f);
  RTC_DCHECK_LT(rms_dbfs, 50.0f);
  RTC_DCHECK_GT(peak_dbfs, -150.0f);
@ -113,7 +113,7 @@ void AdaptiveModeLevelEstimator::Update(float rms_dbfs,
  DumpDebugData();
 }

-bool AdaptiveModeLevelEstimator::IsConfident() const {
+bool SpeechLevelEstimator::IsConfident() const {
  if (adjacent_speech_frames_threshold_ == 1) {
    // Ignore `reliable_state_` when a single frame is enough to update the
    // level estimate (because it is not used).
@ -129,21 +129,21 @@ bool AdaptiveModeLevelEstimator::IsConfident() const {
          preliminary_state_.time_to_confidence_ms == 0);
 }

-void AdaptiveModeLevelEstimator::Reset() {
+void SpeechLevelEstimator::Reset() {
  ResetLevelEstimatorState(preliminary_state_);
  ResetLevelEstimatorState(reliable_state_);
  level_dbfs_ = initial_speech_level_dbfs_;
  num_adjacent_speech_frames_ = 0;
 }

-void AdaptiveModeLevelEstimator::ResetLevelEstimatorState(
+void SpeechLevelEstimator::ResetLevelEstimatorState(
    LevelEstimatorState& state) const {
  state.time_to_confidence_ms = kLevelEstimatorTimeToConfidenceMs;
  state.level_dbfs.numerator = initial_speech_level_dbfs_;
  state.level_dbfs.denominator = 1.0f;
 }

-void AdaptiveModeLevelEstimator::DumpDebugData() const {
+void SpeechLevelEstimator::DumpDebugData() const {
  apm_data_dumper_->DumpRaw(
      "agc2_adaptive_level_estimator_num_adjacent_speech_frames",
      num_adjacent_speech_frames_);
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
@ -8,29 +8,29 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
-#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_

 #include <stddef.h>

 #include <type_traits>

 #include "modules/audio_processing/agc2/agc2_common.h"
-#include "modules/audio_processing/agc2/vad_wrapper.h"
 #include "modules/audio_processing/include/audio_processing.h"

 namespace webrtc {
 class ApmDataDumper;

-// Level estimator for the digital adaptive gain controller.
-class AdaptiveModeLevelEstimator {
+// Active speech level estimator based on the analysis of the following
+// framewise properties: RMS level (dBFS), peak level (dBFS), speech
+// probability.
+class SpeechLevelEstimator {
 public:
-  AdaptiveModeLevelEstimator(
+  SpeechLevelEstimator(
      ApmDataDumper* apm_data_dumper,
      const AudioProcessing::Config::GainController2::AdaptiveDigital& config);
-  AdaptiveModeLevelEstimator(const AdaptiveModeLevelEstimator&) = delete;
-  AdaptiveModeLevelEstimator& operator=(const AdaptiveModeLevelEstimator&) =
-      delete;
+  SpeechLevelEstimator(const SpeechLevelEstimator&) = delete;
+  SpeechLevelEstimator& operator=(const SpeechLevelEstimator&) = delete;

  // Updates the level estimation.
  void Update(float rms_dbfs, float peak_dbfs, float speech_probability);
@ -74,4 +74,4 @@ class AdaptiveModeLevelEstimator {

 }  // namespace webrtc

-#endif  // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
+#endif  // MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc
@ -8,7 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
+#include "modules/audio_processing/agc2/speech_level_estimator.h"

 #include <memory>

@ -36,7 +36,7 @@ void RunOnConstantLevel(int num_iterations,
                        float rms_dbfs,
                        float peak_dbfs,
                        float speech_probability,
-                        AdaptiveModeLevelEstimator& level_estimator) {
+                        SpeechLevelEstimator& level_estimator) {
  for (int i = 0; i < num_iterations; ++i) {
    level_estimator.Update(rms_dbfs, peak_dbfs, speech_probability);
  }
@ -57,7 +57,7 @@ constexpr float kMaxSpeechProbability = 1.0f;
 struct TestLevelEstimator {
  explicit TestLevelEstimator(int adjacent_speech_frames_threshold)
      : data_dumper(0),
-        estimator(std::make_unique<AdaptiveModeLevelEstimator>(
+        estimator(std::make_unique<SpeechLevelEstimator>(
            &data_dumper,
            GetAdaptiveDigitalConfig(adjacent_speech_frames_threshold))),
        initial_speech_level_dbfs(estimator->level_dbfs()),
@ -70,14 +70,14 @@ struct TestLevelEstimator {
           "level is wide enough for the tests";
  }
  ApmDataDumper data_dumper;
-  std::unique_ptr<AdaptiveModeLevelEstimator> estimator;
+  std::unique_ptr<SpeechLevelEstimator> estimator;
  const float initial_speech_level_dbfs;
  const float level_rms_dbfs;
  const float level_peak_dbfs;
 };

 // Checks that the level estimator converges to a constant input speech level.
-TEST(GainController2AdaptiveModeLevelEstimator, LevelStabilizes) {
+TEST(GainController2SpeechLevelEstimator, LevelStabilizes) {
  TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
  RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
                     level_estimator.level_rms_dbfs,
@ -93,7 +93,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, LevelStabilizes) {

 // Checks that the level controller does not become confident when too few
 // speech frames are observed.
-TEST(GainController2AdaptiveModeLevelEstimator, IsNotConfident) {
+TEST(GainController2SpeechLevelEstimator, IsNotConfident) {
  TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
  RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2,
                     level_estimator.level_rms_dbfs,
@ -104,7 +104,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, IsNotConfident) {

 // Checks that the level controller becomes confident when enough speech frames
 // are observed.
-TEST(GainController2AdaptiveModeLevelEstimator, IsConfident) {
+TEST(GainController2SpeechLevelEstimator, IsConfident) {
  TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
  RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
                     level_estimator.level_rms_dbfs,
@ -115,8 +115,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, IsConfident) {

 // Checks that the estimated level is not affected by the level of non-speech
 // frames.
-TEST(GainController2AdaptiveModeLevelEstimator,
-     EstimatorIgnoresNonSpeechFrames) {
+TEST(GainController2SpeechLevelEstimator, EstimatorIgnoresNonSpeechFrames) {
  TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
  // Simulate speech.
  RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
@ -134,8 +133,7 @@ TEST(GainController2AdaptiveModeLevelEstimator,
 }

 // Checks the convergence speed of the estimator before it becomes confident.
-TEST(GainController2AdaptiveModeLevelEstimator,
-     ConvergenceSpeedBeforeConfidence) {
+TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedBeforeConfidence) {
  TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
  RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
                     level_estimator.level_rms_dbfs,
@ -147,8 +145,7 @@ TEST(GainController2AdaptiveModeLevelEstimator,
 }

 // Checks the convergence speed of the estimator after it becomes confident.
-TEST(GainController2AdaptiveModeLevelEstimator,
-     ConvergenceSpeedAfterConfidence) {
+TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedAfterConfidence) {
  TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
  // Reach confidence using the initial level estimate.
  RunOnConstantLevel(
@ -173,14 +170,13 @@ TEST(GainController2AdaptiveModeLevelEstimator,
              kConvergenceSpeedTestsLevelTolerance);
 }

-class AdaptiveModeLevelEstimatorParametrization
+class SpeechLevelEstimatorParametrization
    : public ::testing::TestWithParam<int> {
 protected:
  int adjacent_speech_frames_threshold() const { return GetParam(); }
 };

-TEST_P(AdaptiveModeLevelEstimatorParametrization,
-       DoNotAdaptToShortSpeechSegments) {
+TEST_P(SpeechLevelEstimatorParametrization, DoNotAdaptToShortSpeechSegments) {
  TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
  const float initial_level = level_estimator.estimator->level_dbfs();
  ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
@ -197,7 +193,7 @@ TEST_P(AdaptiveModeLevelEstimatorParametrization,
  EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
 }

-TEST_P(AdaptiveModeLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
+TEST_P(SpeechLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
  TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
  const float initial_level = level_estimator.estimator->level_dbfs();
  ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
@ -210,7 +206,7 @@ TEST_P(AdaptiveModeLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
 }

 INSTANTIATE_TEST_SUITE_P(GainController2,
-                         AdaptiveModeLevelEstimatorParametrization,
+                         SpeechLevelEstimatorParametrization,
                         ::testing::Values(1, 9, 17));

 }  // namespace