AGC2: max output noise level now part of config

Tested: bit-exactness verified with audioproc_f Bug: webrtc:7494 Change-Id: Ic42f09dc13560494963cdcd338a0c52a729e108d Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186266 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Jakob Ivarsson <jakobi@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32282}
2020-10-01 17:16:56 +02:00
parent c082eba758
commit 9a625e7aef
6 changed files with 43 additions and 27 deletions
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc
@ -30,6 +30,7 @@ void DumpDebugData(const AdaptiveDigitalGainApplier::FrameInfo& info,

 constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1;
 constexpr float kMaxGainChangePerSecondDb = 3.f;
+constexpr float kMaxOutputNoiseLevelDbfs = -50.f;

 }  // namespace

@ -37,7 +38,8 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
    : speech_level_estimator_(apm_data_dumper),
      gain_applier_(apm_data_dumper,
                    kGainApplierAdjacentSpeechFramesThreshold,
-                    kMaxGainChangePerSecondDb),
+                    kMaxGainChangePerSecondDb,
+                    kMaxOutputNoiseLevelDbfs),
      apm_data_dumper_(apm_data_dumper),
      noise_level_estimator_(apm_data_dumper) {
  RTC_DCHECK(apm_data_dumper);
@ -56,7 +58,8 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
      gain_applier_(
          apm_data_dumper,
          config.adaptive_digital.gain_applier_adjacent_speech_frames_threshold,
-          config.adaptive_digital.max_gain_change_db_per_second),
+          config.adaptive_digital.max_gain_change_db_per_second,
+          config.adaptive_digital.max_output_noise_level_dbfs),
      apm_data_dumper_(apm_data_dumper),
      noise_level_estimator_(apm_data_dumper) {
  RTC_DCHECK(apm_data_dumper);
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
@ -44,12 +44,16 @@ float ComputeGainDb(float input_level_dbfs) {
  return 0.f;
 }

-// We require 'gain + noise_level <= kMaxNoiseLevelDbfs'.
+// Returns `target_gain` if the output noise level is below
+// `max_output_noise_level_dbfs`; otherwise returns a capped gain so that the
+// output noise level equals `max_output_noise_level_dbfs`.
 float LimitGainByNoise(float target_gain,
                       float input_noise_level_dbfs,
-                       ApmDataDumper* apm_data_dumper) {
-  const float noise_headroom_db = kMaxNoiseLevelDbfs - input_noise_level_dbfs;
-  apm_data_dumper->DumpRaw("agc2_noise_headroom_db", noise_headroom_db);
+                       float max_output_noise_level_dbfs,
+                       ApmDataDumper& apm_data_dumper) {
+  const float noise_headroom_db =
+      max_output_noise_level_dbfs - input_noise_level_dbfs;
+  apm_data_dumper.DumpRaw("agc2_noise_headroom_db", noise_headroom_db);
  return std::min(target_gain, std::max(noise_headroom_db, 0.f));
 }

@ -89,7 +93,8 @@ float ComputeGainChangeThisFrameDb(float target_gain_db,
 AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
    ApmDataDumper* apm_data_dumper,
    int adjacent_speech_frames_threshold,
-    float max_gain_change_db_per_second)
+    float max_gain_change_db_per_second,
+    float max_output_noise_level_dbfs)
    : apm_data_dumper_(apm_data_dumper),
      gain_applier_(
          /*hard_clip_samples=*/false,
@ -97,11 +102,14 @@ AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
      adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
      max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
                                   kFrameDurationMs / 1000.f),
+      max_output_noise_level_dbfs_(max_output_noise_level_dbfs),
      calls_since_last_gain_log_(0),
      frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
      last_gain_db_(kInitialAdaptiveDigitalGainDb) {
  RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f);
  RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
+  RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.f);
+  RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.f);
 }

 void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
@ -126,7 +134,8 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,

  const float target_gain_db = LimitGainByLowConfidence(
      LimitGainByNoise(ComputeGainDb(std::min(info.input_level_dbfs, 0.f)),
-                       info.input_noise_level_dbfs, apm_data_dumper_),
+                       info.input_noise_level_dbfs,
+                       max_output_noise_level_dbfs_, *apm_data_dumper_),
      last_gain_db_, info.limiter_envelope_dbfs, info.estimate_is_confident);

  // Forbid increasing the gain until enough adjacent speech frames are
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
@ -34,12 +34,15 @@ class AdaptiveDigitalGainApplier {
    bool estimate_is_confident;
  };

+  // Ctor.
  // `adjacent_speech_frames_threshold` indicates how many speech frames are
  // required before a gain increase is allowed. `max_gain_change_db_per_second`
  // limits the adaptation speed (uniformly operated across frames).
+  // `max_output_noise_level_dbfs` limits the output noise level.
  AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
                             int adjacent_speech_frames_threshold,
-                             float max_gain_change_db_per_second);
+                             float max_gain_change_db_per_second,
+                             float max_output_noise_level_dbfs);
  AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
  AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
      delete;
@ -54,6 +57,7 @@ class AdaptiveDigitalGainApplier {

  const int adjacent_speech_frames_threshold_;
  const float max_gain_change_db_per_10ms_;
+  const float max_output_noise_level_dbfs_;

  int calls_since_last_gain_log_;
  int frames_to_gain_increase_allowed_;
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
@ -36,14 +36,18 @@ constexpr VadLevelAnalyzer::Result kVadSpeech{1.f, -20.f, 0.f};
 constexpr float kMaxGainChangePerSecondDb = 3.f;
 constexpr float kMaxGainChangePerFrameDb =
    kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
+constexpr float kMaxOutputNoiseLevelDbfs = -50.f;

 // Helper to instance `AdaptiveDigitalGainApplier`.
 struct GainApplierHelper {
  GainApplierHelper()
+      : GainApplierHelper(/*adjacent_speech_frames_threshold=*/1) {}
+  explicit GainApplierHelper(int adjacent_speech_frames_threshold)
      : apm_data_dumper(0),
        gain_applier(&apm_data_dumper,
-                     /*adjacent_speech_frames_threshold=*/1,
-                     kMaxGainChangePerSecondDb) {}
+                     adjacent_speech_frames_threshold,
+                     kMaxGainChangePerSecondDb,
+                     kMaxOutputNoiseLevelDbfs) {}
  ApmDataDumper apm_data_dumper;
  AdaptiveDigitalGainApplier gain_applier;
 };
@ -185,7 +189,8 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
      kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
  constexpr int num_frames = 50;

-  ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low";
+  ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
+      << "kWithNoiseDbfs is too low";

  for (int i = 0; i < num_initial_frames + num_frames; ++i) {
    VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
@ -223,7 +228,8 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
      kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
  constexpr int num_frames = 50;

-  ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low";
+  ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
+      << "kWithNoiseDbfs is too low";

  for (int i = 0; i < num_initial_frames + num_frames; ++i) {
    VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
@ -252,10 +258,8 @@ class AdaptiveDigitalGainApplierTest : public ::testing::TestWithParam<int> {
 TEST_P(AdaptiveDigitalGainApplierTest,
       DoNotIncreaseGainWithTooFewSpeechFrames) {
  const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
-                                          adjacent_speech_frames_threshold,
-                                          kMaxGainChangePerFrameDb);
+  GainApplierHelper helper(adjacent_speech_frames_threshold);
+
  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
  info.input_level_dbfs = -25.0;

@ -263,7 +267,7 @@ TEST_P(AdaptiveDigitalGainApplierTest,
  for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
    SCOPED_TRACE(i);
    VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
-    gain_applier.Process(info, audio.float_frame_view());
+    helper.gain_applier.Process(info, audio.float_frame_view());
    const float gain = audio.float_frame_view().channel(0)[0];
    if (i > 0) {
      EXPECT_EQ(prev_gain, gain);  // No gain increase.
@ -274,23 +278,21 @@ TEST_P(AdaptiveDigitalGainApplierTest,

 TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
  const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
-                                          adjacent_speech_frames_threshold,
-                                          kMaxGainChangePerFrameDb);
+  GainApplierHelper helper(adjacent_speech_frames_threshold);
+
  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
  info.input_level_dbfs = -25.0;

  float prev_gain = 0.f;
  for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
    VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
-    gain_applier.Process(info, audio.float_frame_view());
+    helper.gain_applier.Process(info, audio.float_frame_view());
    prev_gain = audio.float_frame_view().channel(0)[0];
  }

  // Process one more speech frame.
  VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
-  gain_applier.Process(info, audio.float_frame_view());
+  helper.gain_applier.Process(info, audio.float_frame_view());

  // The gain has increased.
  EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@ -32,9 +32,6 @@ constexpr float kInitialAdaptiveDigitalGainDb = 8.f;
 // At what limiter levels should we start decreasing the adaptive digital gain.
 constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs;

-// This parameter must be tuned together with the noise estimator.
-constexpr float kMaxNoiseLevelDbfs = -50.f;
-
 // This is the threshold for speech. Speech frames are used for updating the
 // speech level, measuring the amount of speech, and decide when to allow target
 // gain reduction.
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@ -354,6 +354,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
        float extra_saturation_margin_db = 2.f;
        int gain_applier_adjacent_speech_frames_threshold = 1;
        float max_gain_change_db_per_second = 3.f;
+        float max_output_noise_level_dbfs = -50.f;
      } adaptive_digital;
    } gain_controller2;