AGC2 AdaptiveDigitalGainApplier and AdaptiveAgc code improvements

This CL was written in preparation for the next CL in the chain and it contains the following changes: - SignalWithLevels -> AdaptiveDigitalGainApplier::FrameInfo - Frame view removed from AdaptiveDigitalGainApplier::FrameInfo - AdaptiveDigitalGainApplier::Process now gets side info as const& to avoid unnecessary copies - AdaptiveAgc::Process: `last_audio_level` renamed to `limiter_envelope` to better reflect what that actually is - Missing class/method docstrings added Tested: bit-exactness verified with audioproc_f Bug: webrtc:7494 Change-Id: Ie25dcd389d6eed74ea9a65f0720eeb8f20f0096b Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186040 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32251}
2020-09-30 13:07:57 +02:00
parent 8845f7e32b
commit d5e6f413ab
5 changed files with 115 additions and 123 deletions
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc
@ -16,6 +16,18 @@
 #include "rtc_base/checks.h"

 namespace webrtc {
+namespace {
+
+void DumpDebugData(const AdaptiveDigitalGainApplier::FrameInfo& info,
+                   ApmDataDumper& dumper) {
+  dumper.DumpRaw("agc2_vad_probability", info.vad_result.speech_probability);
+  dumper.DumpRaw("agc2_vad_rms_dbfs", info.vad_result.rms_dbfs);
+  dumper.DumpRaw("agc2_vad_peak_dbfs", info.vad_result.peak_dbfs);
+  dumper.DumpRaw("agc2_noise_estimate_dbfs", info.input_noise_level_dbfs);
+  dumper.DumpRaw("agc2_last_limiter_audio_level", info.limiter_envelope_dbfs);
+}
+
+}  // namespace

 AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
    : speech_level_estimator_(apm_data_dumper),
@ -40,37 +52,17 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,

 AdaptiveAgc::~AdaptiveAgc() = default;

-void AdaptiveAgc::Process(AudioFrameView<float> float_frame,
-                          float last_audio_level) {
-  auto signal_with_levels = SignalWithLevels(float_frame);
-  signal_with_levels.vad_result = vad_.AnalyzeFrame(float_frame);
-  apm_data_dumper_->DumpRaw("agc2_vad_probability",
-                            signal_with_levels.vad_result.speech_probability);
-  apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs",
-                            signal_with_levels.vad_result.rms_dbfs);
-  apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs",
-                            signal_with_levels.vad_result.peak_dbfs);
-
-  speech_level_estimator_.Update(signal_with_levels.vad_result);
-
-  signal_with_levels.input_level_dbfs = speech_level_estimator_.level_dbfs();
-
-  signal_with_levels.input_noise_level_dbfs =
-      noise_level_estimator_.Analyze(float_frame);
-
-  apm_data_dumper_->DumpRaw("agc2_noise_estimate_dbfs",
-                            signal_with_levels.input_noise_level_dbfs);
-
-  signal_with_levels.limiter_audio_level_dbfs =
-      last_audio_level > 0 ? FloatS16ToDbfs(last_audio_level) : -90.f;
-  apm_data_dumper_->DumpRaw("agc2_last_limiter_audio_level",
-                            signal_with_levels.limiter_audio_level_dbfs);
-
-  signal_with_levels.estimate_is_confident =
-      speech_level_estimator_.IsConfident();
-
-  // The gain applier applies the gain.
-  gain_applier_.Process(signal_with_levels);
+void AdaptiveAgc::Process(AudioFrameView<float> frame, float limiter_envelope) {
+  AdaptiveDigitalGainApplier::FrameInfo info;
+  info.vad_result = vad_.AnalyzeFrame(frame);
+  speech_level_estimator_.Update(info.vad_result);
+  info.input_level_dbfs = speech_level_estimator_.level_dbfs();
+  info.input_noise_level_dbfs = noise_level_estimator_.Analyze(frame);
+  info.limiter_envelope_dbfs =
+      limiter_envelope > 0 ? FloatS16ToDbfs(limiter_envelope) : -90.f;
+  info.estimate_is_confident = speech_level_estimator_.IsConfident();
+  DumpDebugData(info, *apm_data_dumper_);
+  gain_applier_.Process(info, frame);
 }

 void AdaptiveAgc::Reset() {
--- a/modules/audio_processing/agc2/adaptive_agc.h
+++ b/modules/audio_processing/agc2/adaptive_agc.h
@ -21,6 +21,8 @@
 namespace webrtc {
 class ApmDataDumper;

+// Adaptive digital gain controller.
+// TODO(crbug.com/webrtc/7494): Unify with `AdaptiveDigitalGainApplier`.
 class AdaptiveAgc {
 public:
  explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
@ -28,7 +30,10 @@ class AdaptiveAgc {
              const AudioProcessing::Config::GainController2& config);
  ~AdaptiveAgc();

-  void Process(AudioFrameView<float> float_frame, float last_audio_level);
+  // Analyzes `frame` and applies a digital adaptive gain to it. Takes into
+  // account the envelope measured by the limiter.
+  // TODO(crbug.com/webrtc/7494): Make the class depend on the limiter.
+  void Process(AudioFrameView<float> frame, float limiter_envelope);
  void Reset();

 private:
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
@ -85,43 +85,40 @@ float ComputeGainChangeThisFrameDb(float target_gain_db,
 }
 }  // namespace

-SignalWithLevels::SignalWithLevels(AudioFrameView<float> float_frame)
-    : float_frame(float_frame) {}
-SignalWithLevels::SignalWithLevels(const SignalWithLevels&) = default;
-
 AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
    ApmDataDumper* apm_data_dumper)
-    : gain_applier_(false, DbToRatio(last_gain_db_)),
-      apm_data_dumper_(apm_data_dumper) {}
+    : apm_data_dumper_(apm_data_dumper),
+      gain_applier_(
+          /*hard_clip_samples=*/false,
+          /*initial_gain_factor=*/DbToRatio(kInitialAdaptiveDigitalGainDb)),
+      calls_since_last_gain_log_(0),
+      gain_increase_allowed_(true),
+      last_gain_db_(kInitialAdaptiveDigitalGainDb) {}

-void AdaptiveDigitalGainApplier::Process(SignalWithLevels signal_with_levels) {
+void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
+                                         AudioFrameView<float> frame) {
+  RTC_DCHECK_GE(info.input_level_dbfs, -150.f);
+  RTC_DCHECK_GE(frame.num_channels(), 1);
+  RTC_DCHECK_GE(frame.samples_per_channel(), 1);
+
+  // Log every second.
  calls_since_last_gain_log_++;
  if (calls_since_last_gain_log_ == 100) {
    calls_since_last_gain_log_ = 0;
    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
                                last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1);
    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
-                                -signal_with_levels.input_noise_level_dbfs, 0,
-                                100, 101);
+                                -info.input_noise_level_dbfs, 0, 100, 101);
  }

-  signal_with_levels.input_level_dbfs =
-      std::min(signal_with_levels.input_level_dbfs, 0.f);
-
-  RTC_DCHECK_GE(signal_with_levels.input_level_dbfs, -150.f);
-  RTC_DCHECK_GE(signal_with_levels.float_frame.num_channels(), 1);
-  RTC_DCHECK_GE(signal_with_levels.float_frame.samples_per_channel(), 1);
-
  const float target_gain_db = LimitGainByLowConfidence(
-      LimitGainByNoise(ComputeGainDb(signal_with_levels.input_level_dbfs),
-                       signal_with_levels.input_noise_level_dbfs,
-                       apm_data_dumper_),
-      last_gain_db_, signal_with_levels.limiter_audio_level_dbfs,
-      signal_with_levels.estimate_is_confident);
+      LimitGainByNoise(ComputeGainDb(std::min(info.input_level_dbfs, 0.f)),
+                       info.input_noise_level_dbfs, apm_data_dumper_),
+      last_gain_db_, info.limiter_envelope_dbfs, info.estimate_is_confident);

  // Forbid increasing the gain when there is no speech.
-  gain_increase_allowed_ = signal_with_levels.vad_result.speech_probability >
-                           kVadConfidenceThreshold;
+  gain_increase_allowed_ =
+      info.vad_result.speech_probability > kVadConfidenceThreshold;

  const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
      target_gain_db, last_gain_db_, gain_increase_allowed_);
@ -137,7 +134,7 @@ void AdaptiveDigitalGainApplier::Process(SignalWithLevels signal_with_levels) {
    gain_applier_.SetGainFactor(
        DbToRatio(last_gain_db_ + gain_change_this_frame_db));
  }
-  gain_applier_.ApplyGain(signal_with_levels.float_frame);
+  gain_applier_.ApplyGain(frame);

  // Remember that the gain has changed for the next iteration.
  last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
@ -11,7 +11,6 @@
 #ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
 #define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_

-#include "modules/audio_processing/agc2/agc2_common.h"
 #include "modules/audio_processing/agc2/gain_applier.h"
 #include "modules/audio_processing/agc2/vad_with_level.h"
 #include "modules/audio_processing/include/audio_frame_view.h"
@ -20,36 +19,38 @@ namespace webrtc {

 class ApmDataDumper;

-struct SignalWithLevels {
-  SignalWithLevels(AudioFrameView<float> float_frame);
-  SignalWithLevels(const SignalWithLevels&);
-
-  float input_level_dbfs = -1.f;
-  float input_noise_level_dbfs = -1.f;
-  VadLevelAnalyzer::Result vad_result;
-  float limiter_audio_level_dbfs = -1.f;
-  bool estimate_is_confident = false;
-  AudioFrameView<float> float_frame;
-};
-
+// Part of the adaptive digital controller that applies a digital adaptive gain.
+// The gain is updated towards a target. The logic decides when gain updates are
+// allowed, it controls the adaptation speed and caps the target based on the
+// estimated noise level and the speech level estimate confidence.
 class AdaptiveDigitalGainApplier {
 public:
+  // Information about a frame to process.
+  struct FrameInfo {
+    float input_level_dbfs;        // Estimated speech plus noise level.
+    float input_noise_level_dbfs;  // Estimated noise level.
+    VadLevelAnalyzer::Result vad_result;
+    float limiter_envelope_dbfs;  // Envelope level from the limiter.
+    bool estimate_is_confident;
+  };
+
  explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper);
-  // Decide what gain to apply.
-  void Process(SignalWithLevels signal_with_levels);
+  AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
+  AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
+      delete;
+
+  // Analyzes `info`, updates the digital gain and applies it to `frame`.
+  void Process(const FrameInfo& info, AudioFrameView<float> frame);

 private:
-  float last_gain_db_ = kInitialAdaptiveDigitalGainDb;
+  ApmDataDumper* const apm_data_dumper_;
  GainApplier gain_applier_;
-  int calls_since_last_gain_log_ = 0;

-  // For some combinations of noise and speech probability, increasing
-  // the level is not allowed. Since we may get VAD results in bursts,
-  // we keep track of this variable until the next VAD results come
-  // in.
-  bool gain_increase_allowed_ = true;
-  ApmDataDumper* apm_data_dumper_ = nullptr;
+  int calls_since_last_gain_log_;
+  bool gain_increase_allowed_;
+  float last_gain_db_;
 };
+
 }  // namespace webrtc

 #endif  // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
@ -20,6 +20,7 @@

 namespace webrtc {
 namespace {
+
 // Constants used in place of estimated noise levels.
 constexpr float kNoNoiseDbfs = -90.f;
 constexpr float kWithNoiseDbfs = -20.f;
@ -36,31 +37,25 @@ float RunOnConstantLevel(int num_iterations,

  for (int i = 0; i < num_iterations; ++i) {
    VectorFloatFrame fake_audio(1, 1, 1.f);
-    SignalWithLevels signal_with_levels(fake_audio.float_frame_view());
-    signal_with_levels.input_level_dbfs = input_level_dbfs;
-    signal_with_levels.input_noise_level_dbfs = kNoNoiseDbfs;
-    signal_with_levels.vad_result = vad_level;
-    signal_with_levels.limiter_audio_level_dbfs = -2.f;
-    signal_with_levels.estimate_is_confident = true;
-    gain_applier->Process(signal_with_levels);
+    AdaptiveDigitalGainApplier::FrameInfo info;
+    info.input_level_dbfs = input_level_dbfs;
+    info.input_noise_level_dbfs = kNoNoiseDbfs;
+    info.vad_result = vad_level;
+    info.limiter_envelope_dbfs = -2.f;
+    info.estimate_is_confident = true;
+    gain_applier->Process(info, fake_audio.float_frame_view());
    gain_linear = fake_audio.float_frame_view().channel(0)[0];
  }
  return gain_linear;
 }

-// Returns 'SignalWithLevels' for typical GainApplier behavior. Voice on, no
-// noise, low limiter, confident level.
-SignalWithLevels TestSignalWithLevel(AudioFrameView<float> float_frame) {
-  SignalWithLevels result(float_frame);
-  result.input_level_dbfs = -1;
-  result.input_noise_level_dbfs = kNoNoiseDbfs;
-  result.vad_result = kVadSpeech;
-  result.estimate_is_confident = true;
-  result.limiter_audio_level_dbfs = -2.f;
-  return result;
-}
-
-}  // namespace
+// Voice on, no noise, low limiter, confident level.
+constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
+    /*input_level_dbfs=*/-1.f,
+    /*input_noise_level_dbfs=*/kNoNoiseDbfs,
+    /*vad_result=*/kVadSpeech,
+    /*limiter_envelope_dbfs=*/-2.f,
+    /*estimate_is_confident=*/true};

 TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
  ApmDataDumper apm_data_dumper(0);
@ -68,9 +63,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {

  // Make one call with reasonable audio level values and settings.
  VectorFloatFrame fake_audio(2, 480, 10000.f);
-  auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
-  signal_with_level.input_level_dbfs = -5.0;
-  gain_applier.Process(signal_with_level);
+  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  info.input_level_dbfs = -5.0;
+  gain_applier.Process(kFrameInfo, fake_audio.float_frame_view());
 }

 // Check that the output is -kHeadroom dBFS.
@ -120,9 +115,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
  for (int i = 0; i < kNumFramesToAdapt; ++i) {
    SCOPED_TRACE(i);
    VectorFloatFrame fake_audio(1, 1, 1.f);
-    auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
-    signal_with_level.input_level_dbfs = initial_level_dbfs;
-    gain_applier.Process(signal_with_level);
+    AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+    info.input_level_dbfs = initial_level_dbfs;
+    gain_applier.Process(info, fake_audio.float_frame_view());
    float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
    EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
              kMaxChangePerFrameLinear);
@ -133,9 +128,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
  for (int i = 0; i < kNumFramesToAdapt; ++i) {
    SCOPED_TRACE(i);
    VectorFloatFrame fake_audio(1, 1, 1.f);
-    auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
-    signal_with_level.input_level_dbfs = 0.f;
-    gain_applier.Process(signal_with_level);
+    AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+    info.input_level_dbfs = 0.f;
+    gain_applier.Process(info, fake_audio.float_frame_view());
    float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
    EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
              kMaxChangePerFrameLinear);
@ -151,9 +146,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
  constexpr int num_samples = 480;

  VectorFloatFrame fake_audio(1, num_samples, 1.f);
-  auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
-  signal_with_level.input_level_dbfs = initial_level_dbfs;
-  gain_applier.Process(signal_with_level);
+  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  info.input_level_dbfs = initial_level_dbfs;
+  gain_applier.Process(info, fake_audio.float_frame_view());
  float maximal_difference = 0.f;
  float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb);
  for (const auto& x : fake_audio.float_frame_view().channel(0)) {
@ -182,10 +177,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {

  for (int i = 0; i < num_initial_frames + num_frames; ++i) {
    VectorFloatFrame fake_audio(1, num_samples, 1.f);
-    auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
-    signal_with_level.input_level_dbfs = initial_level_dbfs;
-    signal_with_level.input_noise_level_dbfs = kWithNoiseDbfs;
-    gain_applier.Process(signal_with_level);
+    AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+    info.input_level_dbfs = initial_level_dbfs;
+    info.input_noise_level_dbfs = kWithNoiseDbfs;
+    gain_applier.Process(info, fake_audio.float_frame_view());

    // Wait so that the adaptive gain applier has time to lower the gain.
    if (i > num_initial_frames) {
@ -204,9 +199,9 @@ TEST(AutomaticGainController2GainApplier, CanHandlePositiveSpeechLevels) {

  // Make one call with positive audio level values and settings.
  VectorFloatFrame fake_audio(2, 480, 10000.f);
-  auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
-  signal_with_level.input_level_dbfs = 5.0f;
-  gain_applier.Process(signal_with_level);
+  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  info.input_level_dbfs = 5.f;
+  gain_applier.Process(info, fake_audio.float_frame_view());
 }

 TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
@ -223,11 +218,11 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {

  for (int i = 0; i < num_initial_frames + num_frames; ++i) {
    VectorFloatFrame fake_audio(1, num_samples, 1.f);
-    auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
-    signal_with_level.input_level_dbfs = initial_level_dbfs;
-    signal_with_level.limiter_audio_level_dbfs = 1.f;
-    signal_with_level.estimate_is_confident = false;
-    gain_applier.Process(signal_with_level);
+    AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+    info.input_level_dbfs = initial_level_dbfs;
+    info.limiter_envelope_dbfs = 1.f;
+    info.estimate_is_confident = false;
+    gain_applier.Process(info, fake_audio.float_frame_view());

    // Wait so that the adaptive gain applier has time to lower the gain.
    if (i > num_initial_frames) {
@ -239,4 +234,6 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
    }
  }
 }
+
+}  // namespace
 }  // namespace webrtc