Allow AGC2 level estimation in AgcManagerDirect.

This CL does the following: 1. Adds a new AdaptiveModeLevelEstimatorAgc implementation of the Agc interface. The new implementation differs from webrtc::Agc by 1. using the AGC2 speech level estimator in GetRmsErrorDb. webrtc::Agc implements its own with help of webrtc::LoudnessHistogram. 2. Doesn't forget its past at every GetRmsErrorDb call. 2. Makes AgcManagerDirect use AdaptiveModeLevelEstimatorAgc instead of webrtc::Agc if the use_agc2_level_estimation flag is set. Bug: webrtc:7494 Change-Id: I8df3f52e322d433eb5ce5297f4236af2f1877b04 Reviewed-on: https://webrtc-review.googlesource.com/86603 Commit-Queue: Alex Loiko <aleloi@webrtc.org> Reviewed-by: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/master@{#23875}
2018-07-06 15:35:42 +02:00
parent 9a94057a79
commit 2ffafa8244
7 changed files with 164 additions and 20 deletions
--- a/modules/audio_processing/agc/BUILD.gn
+++ b/modules/audio_processing/agc/BUILD.gn
@ -26,6 +26,7 @@ rtc_source_set("agc") {
    "../../../rtc_base:macromagic",
    "../../../rtc_base:safe_minmax",
    "../../../system_wrappers:metrics_api",
+    "../agc2:level_estimation_agc",
    "../vad",
  ]
 }
--- a/modules/audio_processing/agc/agc_manager_direct.cc
+++ b/modules/audio_processing/agc/agc_manager_direct.cc
@ -17,6 +17,7 @@
 #endif

 #include "modules/audio_processing/agc/gain_map_internal.h"
+#include "modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h"
 #include "modules/audio_processing/include/gain_control.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"
@ -164,7 +165,10 @@ AgcManagerDirect::AgcManagerDirect(Agc* agc,
      file_postproc_(new DebugFile("agc_postproc.pcm")) {
  instance_counter_++;
  if (use_agc2_level_estimation_) {
-    RTC_NOTREACHED() << "Agc2 level estimation not implemented.";
+    RTC_DCHECK(!agc);
+    agc_.reset(new AdaptiveModeLevelEstimatorAgc(data_dumper_.get()));
+  } else {
+    RTC_DCHECK(agc);
  }
  if (use_agc2_digital_adaptive_) {
    RTC_NOTREACHED() << "Agc2 digital adaptive not implemented.";
--- a/modules/audio_processing/agc/agc_manager_direct.h
+++ b/modules/audio_processing/agc/agc_manager_direct.h
@ -52,22 +52,6 @@ class AgcManagerDirect final {
                   bool use_agc2_level_estimation,
                   bool use_agc2_digital_adaptive);

-  // Dependency injection for testing. Don't delete |agc| as the memory is owned
-  // by the manager.
-  AgcManagerDirect(Agc* agc,
-                   GainControl* gctrl,
-                   VolumeCallbacks* volume_callbacks,
-                   int startup_min_level,
-                   int clipped_level_min);
-
-  // Most general c-tor.
-  AgcManagerDirect(Agc* agc,
-                   GainControl* gctrl,
-                   VolumeCallbacks* volume_callbacks,
-                   int startup_min_level,
-                   int clipped_level_min,
-                   bool use_agc2_level_estimation,
-                   bool use_agc2_digital_adaptive);
  ~AgcManagerDirect();

  int Initialize();
@ -85,6 +69,25 @@ class AgcManagerDirect final {
  float voice_probability();

 private:
+  friend class AgcManagerDirectTest;
+
+  // Dependency injection for testing. Don't delete |agc| as the memory is owned
+  // by the manager.
+  AgcManagerDirect(Agc* agc,
+                   GainControl* gctrl,
+                   VolumeCallbacks* volume_callbacks,
+                   int startup_min_level,
+                   int clipped_level_min);
+
+  // Most general c-tor.
+  AgcManagerDirect(Agc* agc,
+                   GainControl* gctrl,
+                   VolumeCallbacks* volume_callbacks,
+                   int startup_min_level,
+                   int clipped_level_min,
+                   bool use_agc2_level_estimation,
+                   bool use_agc2_digital_adaptive);
+
  // Sets a new microphone level, after first checking that it hasn't been
  // updated by the user, in which case no action is taken.
  void SetLevel(int new_level);
--- a/modules/audio_processing/agc2/BUILD.gn
+++ b/modules/audio_processing/agc2/BUILD.gn
@ -15,6 +15,32 @@ group("agc2") {
  ]
 }

+rtc_source_set("level_estimation_agc") {
+  sources = [
+    "adaptive_mode_level_estimator_agc.cc",
+    "adaptive_mode_level_estimator_agc.h",
+  ]
+  configs += [ "..:apm_debug_dump" ]
+  deps = [
+    ":adaptive_digital",
+    ":common",
+    ":gain_applier",
+    ":noise_level_estimator",
+    ":rnn_vad_with_level",
+    "..:aec_core",
+    "..:apm_logging",
+    "..:audio_frame_view",
+    "../../..:typedefs",
+    "../../../api:array_view",
+    "../../../common_audio",
+    "../../../rtc_base:checks",
+    "../../../rtc_base:rtc_base_approved",
+    "../../../rtc_base:safe_minmax",
+    "../agc:level_estimation",
+    "../vad",
+  ]
+}
+
 rtc_source_set("adaptive_digital") {
  sources = [
    "adaptive_agc.cc",
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.cc
@ -0,0 +1,61 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h"
+
+#include "modules/audio_processing/include/audio_frame_view.h"
+
+namespace webrtc {
+
+AdaptiveModeLevelEstimatorAgc::AdaptiveModeLevelEstimatorAgc(
+    ApmDataDumper* apm_data_dumper)
+    : level_estimator_(apm_data_dumper) {
+  set_target_level_dbfs(kDefaultLevelDbfs);
+}
+
+// |audio| must be mono; in a multi-channel stream, provide the first (usually
+// left) channel.
+void AdaptiveModeLevelEstimatorAgc::Process(const int16_t* audio,
+                                            size_t length,
+                                            int sample_rate_hz) {
+  std::vector<float> float_audio_frame(audio, audio + length);
+  const float* const first_channel = &float_audio_frame[0];
+  AudioFrameView<const float> frame_view(&first_channel, 1 /* num channels */,
+                                         length);
+  const auto vad_prob = agc2_vad_.AnalyzeFrame(frame_view);
+  latest_voice_probability_ = vad_prob.speech_probability;
+  if (latest_voice_probability_ > kVadConfidenceThreshold) {
+    time_in_ms_since_last_estimate_ += kFrameDurationMs;
+  }
+  level_estimator_.UpdateEstimation(vad_prob);
+}
+
+// Retrieves the difference between the target RMS level and the current
+// signal RMS level in dB. Returns true if an update is available and false
+// otherwise, in which case |error| should be ignored and no action taken.
+bool AdaptiveModeLevelEstimatorAgc::GetRmsErrorDb(int* error) {
+  if (time_in_ms_since_last_estimate_ <= kTimeUntilConfidentMs) {
+    return false;
+  }
+  *error = std::floor(target_level_dbfs() -
+                      level_estimator_.LatestLevelEstimate() + 0.5f);
+  time_in_ms_since_last_estimate_ = 0;
+  return true;
+}
+
+void AdaptiveModeLevelEstimatorAgc::Reset() {
+  level_estimator_.Reset();
+}
+
+float AdaptiveModeLevelEstimatorAgc::voice_probability() const {
+  return latest_voice_probability_;
+}
+
+}  // namespace webrtc
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h
@ -0,0 +1,49 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_AGC_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_AGC_H_
+
+#include <vector>
+
+#include "modules/audio_processing/agc/agc.h"
+#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
+#include "modules/audio_processing/agc2/vad_with_level.h"
+
+namespace webrtc {
+class AdaptiveModeLevelEstimatorAgc : public Agc {
+ public:
+  explicit AdaptiveModeLevelEstimatorAgc(ApmDataDumper* apm_data_dumper);
+
+  // |audio| must be mono; in a multi-channel stream, provide the first (usually
+  // left) channel.
+  void Process(const int16_t* audio,
+               size_t length,
+               int sample_rate_hz) override;
+
+  // Retrieves the difference between the target RMS level and the current
+  // signal RMS level in dB. Returns true if an update is available and false
+  // otherwise, in which case |error| should be ignored and no action taken.
+  bool GetRmsErrorDb(int* error) override;
+  void Reset() override;
+
+  float voice_probability() const override;
+
+ private:
+  static constexpr int kTimeUntilConfidentMs = 700;
+  static constexpr int kDefaultLevelDbfs = 0;
+  int32_t time_in_ms_since_last_estimate_ = 0;
+  AdaptiveModeLevelEstimator level_estimator_;
+  VadWithLevel agc2_vad_;
+  float latest_voice_probability_ = 0.f;
+};
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_AGC_H_
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@ -38,9 +38,9 @@ constexpr float kInitialAdaptiveDigitalGainDb = 8.f;
 // This parameter must be tuned together with the noise estimator.
 constexpr float kMaxNoiseLevelDbfs = -50.f;

-// Used in the Level Estimator for deciding when to update the speech
-// level estimate. Also used in the adaptive digital gain applier to
-// decide when to allow target gain reduction.
+// This is the threshold for speech. Speech frames are used for updating the
+// speech level, measuring the amount of speech, and decide when to allow target
+// gain reduction.
 constexpr float kVadConfidenceThreshold = 0.4f;

 // The amount of 'memory' of the Level Estimator. Decides leak factors.