AGC2-fixed-digital: Level Estimator

This CL adds the Level Estimator of the new gain controller. The Level Estimator divides a 10ms input frame in kSubFramesInFrame=20 sub frames. We take the maximal sample values in every sub frame. We then apply attack/decay smoothing. This is the final level estimate. The results will be used with InterpolatedGainCurve (see this CL https://webrtc-review.googlesource.com/c/src/+/51920). For every level estimate value, we look up a gain with InterpolatedGainCurve::LookUpGainToApply. This gain is then applied to the signal. Bug: webrtc:7949 Change-Id: I2b4b3894a3e945d3dd916ce516c79abacb2b18b1 Reviewed-on: https://webrtc-review.googlesource.com/52381 Reviewed-by: Sam Zackrisson <saza@webrtc.org> Commit-Queue: Alex Loiko <aleloi@webrtc.org> Cr-Commit-Position: refs/heads/master@{#22054}
2018-02-16 12:39:00 +01:00
parent e551dddf8b
commit 153f11e1b4
10 changed files with 419 additions and 1 deletions
--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn
@ -549,6 +549,7 @@ if (rtc_include_tests) {
      "agc/loudness_histogram_unittest.cc",
      "agc/mock_agc.h",
      "audio_buffer_unittest.cc",
+      "audio_frame_view_unittest.cc",
      "beamformer/array_util_unittest.cc",
      "beamformer/complex_matrix_unittest.cc",
      "beamformer/covariance_matrix_generator_unittest.cc",
@ -557,6 +558,7 @@ if (rtc_include_tests) {
      "beamformer/mock_nonlinear_beamformer.h",
      "config_unittest.cc",
      "echo_cancellation_impl_unittest.cc",
+      "gain_controller2_unittest.cc",
      "splitting_filter_unittest.cc",
      "test/fake_recording_device_unittest.cc",
      "transient/dyadic_decimator_unittest.cc",
@ -576,6 +578,7 @@ if (rtc_include_tests) {
      ":aec_core",
      ":analog_mic_simulation",
      ":apm_logging",
+      ":audio_frame_view",
      ":audio_processing",
      ":audioproc_test_utils",
      ":mocks",
@ -677,7 +680,6 @@ if (rtc_include_tests) {
        "echo_detector/moving_max_unittest.cc",
        "echo_detector/normalized_covariance_estimator_unittest.cc",
        "gain_control_unittest.cc",
-        "gain_controller2_unittest.cc",
        "level_controller/level_controller_unittest.cc",
        "level_estimator_unittest.cc",
        "low_cut_filter_unittest.cc",
--- a/modules/audio_processing/agc2/BUILD.gn
+++ b/modules/audio_processing/agc2/BUILD.gn
@ -11,6 +11,8 @@ import("../../../webrtc.gni")
 rtc_source_set("agc2") {
  sources = [
    "agc2_common.h",
+    "fixed_digital_level_estimator.cc",
+    "fixed_digital_level_estimator.h",
    "fixed_gain_controller.cc",
    "fixed_gain_controller.h",
  ]
@ -32,6 +34,8 @@ rtc_source_set("fixed_digital_unittests") {
  configs += [ "..:apm_debug_dump" ]

  sources = [
+    "agc2_testing_common.h",
+    "fixed_digital_level_estimator_unittest.cc",
    "fixed_gain_controller_unittest.cc",
    "vector_float_frame.cc",
    "vector_float_frame.h",
@ -41,6 +45,7 @@ rtc_source_set("fixed_digital_unittests") {
    "..:apm_logging",
    "..:audio_frame_view",
    "../../../api:array_view",
+    "../../../common_audio",
    "../../../rtc_base:rtc_base_tests_utils",
  ]
 }
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@ -11,11 +11,26 @@
 #ifndef MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_
 #define MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_

+#include "rtc_base/basictypes.h"
+
 namespace webrtc {

 constexpr float kMinSampleValue = -32768.f;
 constexpr float kMaxSampleValue = 32767.f;

+constexpr size_t kFrameDurationMs = 10;
+constexpr size_t kSubFramesInFrame = 20;
+
+constexpr float kAttackFilterConstant = 0.f;
+
+constexpr size_t kMaximalNumberOfSamplesPerChannel = 480;
+
+// This is computed from kDecayMs by
+// 10 ** (-1/20 * subframe_duration / kDecayMs).
+// |subframe_duration| is |kFrameDurationMs / kSubFramesInFrame|.
+// kDecayMs is defined in agc2_testing_common.h
+constexpr float kDecayFilterConstant = 0.9998848773724686f;
+
 // TODO(aleloi): add the other constants as more AGC2 components are
 // added.
 }  // namespace webrtc
--- a/modules/audio_processing/agc2/agc2_testing_common.h
+++ b/modules/audio_processing/agc2/agc2_testing_common.h
@ -0,0 +1,21 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_
+
+namespace webrtc {
+
+// Level Estimator test params.
+constexpr float kDecayMs = 500.f;
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_
--- a/modules/audio_processing/agc2/fixed_digital_level_estimator.cc
+++ b/modules/audio_processing/agc2/fixed_digital_level_estimator.cc
@ -0,0 +1,100 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h"
+
+#include <algorithm>
+#include <cmath>
+
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+FixedDigitalLevelEstimator::FixedDigitalLevelEstimator(
+    size_t sample_rate_hz,
+    ApmDataDumper* apm_data_dumper)
+    : apm_data_dumper_(apm_data_dumper) {
+  SetSampleRate(sample_rate_hz);
+  CheckParameterCombination();
+  RTC_DCHECK(apm_data_dumper_);
+  apm_data_dumper_->DumpRaw("agc2_level_estimator_samplerate", sample_rate_hz);
+}
+
+void FixedDigitalLevelEstimator::CheckParameterCombination() {
+  RTC_DCHECK_GT(samples_in_frame_, 0);
+  RTC_DCHECK_LE(kSubFramesInFrame, samples_in_frame_);
+  RTC_DCHECK_EQ(samples_in_frame_ % kSubFramesInFrame, 0);
+  RTC_DCHECK_GT(samples_in_sub_frame_, 1);
+}
+
+std::array<float, kSubFramesInFrame> FixedDigitalLevelEstimator::ComputeLevel(
+    const AudioFrameView<const float>& float_frame) {
+  RTC_DCHECK_GT(float_frame.num_channels(), 0);
+  RTC_DCHECK_EQ(float_frame.samples_per_channel(), samples_in_frame_);
+
+  // Compute max envelope without smoothing.
+  std::array<float, kSubFramesInFrame> envelope{};
+  for (size_t channel_idx = 0; channel_idx < float_frame.num_channels();
+       ++channel_idx) {
+    const auto channel = float_frame.channel(channel_idx);
+    for (size_t sub_frame = 0; sub_frame < kSubFramesInFrame; ++sub_frame) {
+      for (size_t sample_in_sub_frame = 0;
+           sample_in_sub_frame < samples_in_sub_frame_; ++sample_in_sub_frame) {
+        envelope[sub_frame] =
+            std::max(envelope[sub_frame],
+                     std::abs(channel[sub_frame * samples_in_sub_frame_ +
+                                      sample_in_sub_frame]));
+      }
+    }
+  }
+
+  // Make sure envelope increases happen one step earlier so that the
+  // corresponding *gain decrease* doesn't miss a sudden signal
+  // increase due to interpolation.
+  for (size_t sub_frame = 0; sub_frame < kSubFramesInFrame - 1; ++sub_frame) {
+    if (envelope[sub_frame] < envelope[sub_frame + 1]) {
+      envelope[sub_frame] = envelope[sub_frame + 1];
+    }
+  }
+
+  // Add attack / decay smoothing.
+  for (size_t sub_frame = 0; sub_frame < kSubFramesInFrame; ++sub_frame) {
+    const float envelope_value = envelope[sub_frame];
+    if (envelope_value > filter_state_level_) {
+      envelope[sub_frame] = envelope_value * (1 - kAttackFilterConstant) +
+                            filter_state_level_ * kAttackFilterConstant;
+    } else {
+      envelope[sub_frame] = envelope_value * (1 - kDecayFilterConstant) +
+                            filter_state_level_ * kDecayFilterConstant;
+    }
+    filter_state_level_ = envelope[sub_frame];
+
+    // Dump data for debug.
+    RTC_DCHECK(apm_data_dumper_);
+    const auto channel = float_frame.channel(0);
+    apm_data_dumper_->DumpRaw("agc2_level_estimator_samples",
+                              samples_in_sub_frame_,
+                              &channel[sub_frame * samples_in_sub_frame_]);
+    apm_data_dumper_->DumpRaw("agc2_level_estimator_level",
+                              envelope[sub_frame]);
+  }
+
+  return envelope;
+}
+
+void FixedDigitalLevelEstimator::SetSampleRate(size_t sample_rate_hz) {
+  samples_in_frame_ = rtc::CheckedDivExact(sample_rate_hz * kFrameDurationMs,
+                                           static_cast<size_t>(1000));
+  samples_in_sub_frame_ =
+      rtc::CheckedDivExact(samples_in_frame_, kSubFramesInFrame);
+  CheckParameterCombination();
+}
+}  // namespace webrtc
--- a/modules/audio_processing/agc2/fixed_digital_level_estimator.h
+++ b/modules/audio_processing/agc2/fixed_digital_level_estimator.h
@ -0,0 +1,55 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_
+
+#include <array>
+#include <vector>
+
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/include/audio_frame_view.h"
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+// Produces a smooth signal level estimate from an input audio
+// stream. The estimate smoothing is done through exponential
+// filtering.
+class FixedDigitalLevelEstimator {
+ public:
+  FixedDigitalLevelEstimator(size_t sample_rate_hz,
+                             ApmDataDumper* apm_data_dumper);
+
+  // The input is assumed to be in FloatS16 format. Scaled input will
+  // produce similarly scaled output. A frame of
+  // length kFrameDurationMs=10 ms produces kSubFramesInFrame=20 level
+  // estimates in the same scale.
+  std::array<float, kSubFramesInFrame> ComputeLevel(
+      const AudioFrameView<const float>& float_frame);
+
+  // Rate may be changed at any time (but not concurrently) from the
+  // value passed to the constructor. The class is not thread safe.
+  void SetSampleRate(size_t sample_rate_hz);
+
+ private:
+  void CheckParameterCombination();
+
+  ApmDataDumper* const apm_data_dumper_;
+  float filter_state_level_ = 0.f;
+  size_t samples_in_frame_;
+  size_t samples_in_sub_frame_;
+
+  RTC_DISALLOW_COPY_AND_ASSIGN(FixedDigitalLevelEstimator);
+};
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_
--- a/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc
+++ b/modules/audio_processing/agc2/fixed_digital_level_estimator_unittest.cc
@ -0,0 +1,158 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h"
+
+#include <limits>
+
+#include "common_audio/include/audio_util.h"
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/agc2/agc2_testing_common.h"
+#include "modules/audio_processing/agc2/vector_float_frame.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/gunit.h"
+
+namespace webrtc {
+namespace {
+
+constexpr float kInputLevel = 10000.f;
+
+// Run audio at specified settings through the level estimator, and
+// verify that the output level falls within the bounds.
+void TestLevelEstimator(int sample_rate_hz,
+                        int num_channels,
+                        float input_level_linear_scale,
+                        float expected_min,
+                        float expected_max) {
+  ApmDataDumper apm_data_dumper(0);
+  FixedDigitalLevelEstimator level_estimator(sample_rate_hz, &apm_data_dumper);
+
+  const VectorFloatFrame vectors_with_float_frame(
+      num_channels, rtc::CheckedDivExact(sample_rate_hz, 100),
+      input_level_linear_scale);
+
+  for (int i = 0; i < 500; ++i) {
+    const auto level = level_estimator.ComputeLevel(
+        vectors_with_float_frame.float_frame_view());
+
+    // Give the estimator some time to ramp up.
+    if (i < 50) {
+      continue;
+    }
+
+    for (const auto& x : level) {
+      EXPECT_LE(expected_min, x);
+      EXPECT_LE(x, expected_max);
+    }
+  }
+}
+
+// Returns time it takes for the level estimator to decrease its level
+// estimate by 'level_reduction_db'.
+float TimeMsToDecreaseLevel(int sample_rate_hz,
+                            int num_channels,
+                            float input_level_db,
+                            float level_reduction_db) {
+  const float input_level = DbfsToFloatS16(input_level_db);
+  RTC_DCHECK_GT(level_reduction_db, 0);
+
+  const VectorFloatFrame vectors_with_float_frame(
+      num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), input_level);
+
+  ApmDataDumper apm_data_dumper(0);
+  FixedDigitalLevelEstimator level_estimator(sample_rate_hz, &apm_data_dumper);
+
+  // Give the LevelEstimator plenty of time to ramp up and stabilize
+  float last_level = 0.f;
+  for (int i = 0; i < 500; ++i) {
+    const auto level_envelope = level_estimator.ComputeLevel(
+        vectors_with_float_frame.float_frame_view());
+    last_level = *level_envelope.rbegin();
+  }
+
+  // Set input to 0.
+  VectorFloatFrame vectors_with_zero_float_frame(
+      num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), 0);
+
+  const float reduced_level_linear =
+      DbfsToFloatS16(input_level_db - level_reduction_db);
+  int sub_frames_until_level_reduction = 0;
+  while (last_level > reduced_level_linear) {
+    const auto level_envelope = level_estimator.ComputeLevel(
+        vectors_with_zero_float_frame.float_frame_view());
+    for (const auto& v : level_envelope) {
+      EXPECT_LT(v, last_level);
+      sub_frames_until_level_reduction++;
+      last_level = v;
+      if (last_level <= reduced_level_linear) {
+        break;
+      }
+    }
+  }
+  return static_cast<float>(sub_frames_until_level_reduction) *
+         kFrameDurationMs / kSubFramesInFrame;
+}
+}  // namespace
+
+TEST(AutomaticGainController2LevelEstimator, EstimatorShouldNotCrash) {
+  TestLevelEstimator(8000, 1, 0, std::numeric_limits<float>::lowest(),
+                     std::numeric_limits<float>::max());
+}
+
+TEST(AutomaticGainController2LevelEstimator,
+     EstimatorShouldEstimateConstantLevel) {
+  TestLevelEstimator(10000, 1, kInputLevel, kInputLevel * 0.99,
+                     kInputLevel * 1.01);
+}
+
+TEST(AutomaticGainController2LevelEstimator,
+     EstimatorShouldEstimateConstantLevelForManyChannels) {
+  constexpr size_t num_channels = 10;
+  TestLevelEstimator(20000, num_channels, kInputLevel, kInputLevel * 0.99,
+                     kInputLevel * 1.01);
+}
+
+TEST(AutomaticGainController2LevelEstimator, TimeToDecreaseForLowLevel) {
+  constexpr float kLevelReductionDb = 25;
+  constexpr float kInitialLowLevel = -40;
+  constexpr float kExpectedTime = kLevelReductionDb * kDecayMs;
+
+  const float time_to_decrease =
+      TimeMsToDecreaseLevel(22000, 1, kInitialLowLevel, kLevelReductionDb);
+
+  EXPECT_LE(kExpectedTime * 0.9, time_to_decrease);
+  EXPECT_LE(time_to_decrease, kExpectedTime * 1.1);
+}
+
+TEST(AutomaticGainController2LevelEstimator, TimeToDecreaseForFullScaleLevel) {
+  constexpr float kLevelReductionDb = 25;
+  constexpr float kExpectedTime = kLevelReductionDb * kDecayMs;
+
+  const float time_to_decrease =
+      TimeMsToDecreaseLevel(26000, 1, 0, kLevelReductionDb);
+
+  EXPECT_LE(kExpectedTime * 0.9, time_to_decrease);
+  EXPECT_LE(time_to_decrease, kExpectedTime * 1.1);
+}
+
+TEST(AutomaticGainController2LevelEstimator,
+     TimeToDecreaseForMultipleChannels) {
+  constexpr float kLevelReductionDb = 25;
+  constexpr float kExpectedTime = kLevelReductionDb * kDecayMs;
+  constexpr size_t kNumChannels = 10;
+
+  const float time_to_decrease =
+      TimeMsToDecreaseLevel(28000, kNumChannels, 0, kLevelReductionDb);
+
+  EXPECT_LE(kExpectedTime * 0.9, time_to_decrease);
+  EXPECT_LE(time_to_decrease, kExpectedTime * 1.1);
+}
+
+}  // namespace webrtc
--- a/modules/audio_processing/agc2/vector_float_frame.h
+++ b/modules/audio_processing/agc2/vector_float_frame.h
@ -25,6 +25,9 @@ class VectorFloatFrame {
                   int samples_per_channel,
                   float start_value);
  const AudioFrameView<float>& float_frame_view() { return float_frame_view_; }
+  AudioFrameView<const float> float_frame_view() const {
+    return float_frame_view_;
+  }

  ~VectorFloatFrame();

--- a/modules/audio_processing/audio_frame_view_unittest.cc
+++ b/modules/audio_processing/audio_frame_view_unittest.cc
@ -0,0 +1,49 @@
+/*
+ *  Copyright 2018 The WebRTC Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/include/audio_frame_view.h"
+
+#include "modules/audio_processing/audio_buffer.h"
+#include "test/gtest.h"
+
+TEST(AudioFrameTest, ConstructFromAudioBuffer) {
+  constexpr int kSampleRateHz = 48000;
+  constexpr int kNumChannels = 2;
+  constexpr float kFloatConstant = 1272.f;
+  constexpr float kIntConstant = 17252;
+  const webrtc::StreamConfig stream_config(kSampleRateHz, kNumChannels, false);
+  webrtc::AudioBuffer buffer(
+      stream_config.num_frames(), stream_config.num_channels(),
+      stream_config.num_frames(), stream_config.num_channels(),
+      stream_config.num_frames());
+
+  AudioFrameView<float> non_const_view(
+      buffer.channels_f(), buffer.num_channels(), buffer.num_frames());
+  // Modification is allowed.
+  non_const_view.channel(0)[0] = kFloatConstant;
+  EXPECT_EQ(buffer.channels_f()[0][0], kFloatConstant);
+
+  AudioFrameView<const float> const_view(
+      buffer.channels_f(), buffer.num_channels(), buffer.num_frames());
+  // Modification is not allowed.
+  // const_view.channel(0)[0] = kFloatConstant;
+
+  // Assignment is allowed.
+  AudioFrameView<const float> other_const_view = non_const_view;
+  static_cast<void>(other_const_view);
+
+  // But not the other way.
+  // non_const_view = other_const_view;
+
+  AudioFrameView<int16_t> non_const_int16_view(
+      buffer.channels(), buffer.num_channels(), buffer.num_frames());
+  non_const_int16_view.channel(0)[0] = kIntConstant;
+  EXPECT_EQ(buffer.channels()[0][0], kIntConstant);
+}
--- a/modules/audio_processing/include/audio_frame_view.h
+++ b/modules/audio_processing/include/audio_frame_view.h
@ -27,6 +27,14 @@ class AudioFrameView {
        num_channels_(num_channels),
        channel_size_(channel_size) {}

+  // Implicit cast to allow converting Frame<float> to
+  // Frame<const float>.
+  template <class U>
+  AudioFrameView(AudioFrameView<U> other)
+      : audio_samples_(other.data()),
+        num_channels_(other.num_channels()),
+        channel_size_(other.samples_per_channel()) {}
+
  AudioFrameView() = delete;

  size_t num_channels() const { return num_channels_; }
@ -45,6 +53,8 @@ class AudioFrameView {
    return rtc::ArrayView<const T>(audio_samples_[idx], channel_size_);
  }

+  T* const* data() { return audio_samples_; }
+
 private:
  T* const* audio_samples_;
  size_t num_channels_;