AGC2-fixed-digital: Level Estimator

This CL adds the Level Estimator of the new gain controller. The Level
Estimator divides a 10ms input frame in kSubFramesInFrame=20 sub
frames. We take the maximal sample values in every sub frame. We then
apply attack/decay smoothing. This is the final level estimate.

The results will be used with InterpolatedGainCurve (see this CL
https://webrtc-review.googlesource.com/c/src/+/51920). For every level
estimate value, we look up a gain with
InterpolatedGainCurve::LookUpGainToApply. This gain is then applied to
the signal.

Bug: webrtc:7949
Change-Id: I2b4b3894a3e945d3dd916ce516c79abacb2b18b1
Reviewed-on: https://webrtc-review.googlesource.com/52381
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Commit-Queue: Alex Loiko <aleloi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22054}
This commit is contained in:
Alex Loiko
2018-02-16 12:39:00 +01:00
committed by Commit Bot
parent e551dddf8b
commit 153f11e1b4
10 changed files with 419 additions and 1 deletions

View File

@ -549,6 +549,7 @@ if (rtc_include_tests) {
"agc/loudness_histogram_unittest.cc",
"agc/mock_agc.h",
"audio_buffer_unittest.cc",
"audio_frame_view_unittest.cc",
"beamformer/array_util_unittest.cc",
"beamformer/complex_matrix_unittest.cc",
"beamformer/covariance_matrix_generator_unittest.cc",
@ -557,6 +558,7 @@ if (rtc_include_tests) {
"beamformer/mock_nonlinear_beamformer.h",
"config_unittest.cc",
"echo_cancellation_impl_unittest.cc",
"gain_controller2_unittest.cc",
"splitting_filter_unittest.cc",
"test/fake_recording_device_unittest.cc",
"transient/dyadic_decimator_unittest.cc",
@ -576,6 +578,7 @@ if (rtc_include_tests) {
":aec_core",
":analog_mic_simulation",
":apm_logging",
":audio_frame_view",
":audio_processing",
":audioproc_test_utils",
":mocks",
@ -677,7 +680,6 @@ if (rtc_include_tests) {
"echo_detector/moving_max_unittest.cc",
"echo_detector/normalized_covariance_estimator_unittest.cc",
"gain_control_unittest.cc",
"gain_controller2_unittest.cc",
"level_controller/level_controller_unittest.cc",
"level_estimator_unittest.cc",
"low_cut_filter_unittest.cc",

View File

@ -11,6 +11,8 @@ import("../../../webrtc.gni")
rtc_source_set("agc2") {
sources = [
"agc2_common.h",
"fixed_digital_level_estimator.cc",
"fixed_digital_level_estimator.h",
"fixed_gain_controller.cc",
"fixed_gain_controller.h",
]
@ -32,6 +34,8 @@ rtc_source_set("fixed_digital_unittests") {
configs += [ "..:apm_debug_dump" ]
sources = [
"agc2_testing_common.h",
"fixed_digital_level_estimator_unittest.cc",
"fixed_gain_controller_unittest.cc",
"vector_float_frame.cc",
"vector_float_frame.h",
@ -41,6 +45,7 @@ rtc_source_set("fixed_digital_unittests") {
"..:apm_logging",
"..:audio_frame_view",
"../../../api:array_view",
"../../../common_audio",
"../../../rtc_base:rtc_base_tests_utils",
]
}

View File

@ -11,11 +11,26 @@
#ifndef MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_
#define MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_
#include "rtc_base/basictypes.h"
namespace webrtc {
constexpr float kMinSampleValue = -32768.f;
constexpr float kMaxSampleValue = 32767.f;
constexpr size_t kFrameDurationMs = 10;
constexpr size_t kSubFramesInFrame = 20;
constexpr float kAttackFilterConstant = 0.f;
constexpr size_t kMaximalNumberOfSamplesPerChannel = 480;
// This is computed from kDecayMs by
// 10 ** (-1/20 * subframe_duration / kDecayMs).
// |subframe_duration| is |kFrameDurationMs / kSubFramesInFrame|.
// kDecayMs is defined in agc2_testing_common.h
constexpr float kDecayFilterConstant = 0.9998848773724686f;
// TODO(aleloi): add the other constants as more AGC2 components are
// added.
} // namespace webrtc

View File

@ -0,0 +1,21 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_
#define MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_
namespace webrtc {
// Level Estimator test params.
constexpr float kDecayMs = 500.f;
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AGC2_AGC2_TESTING_COMMON_H_

View File

@ -0,0 +1,100 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h"
#include <algorithm>
#include <cmath>
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
namespace webrtc {
FixedDigitalLevelEstimator::FixedDigitalLevelEstimator(
size_t sample_rate_hz,
ApmDataDumper* apm_data_dumper)
: apm_data_dumper_(apm_data_dumper) {
SetSampleRate(sample_rate_hz);
CheckParameterCombination();
RTC_DCHECK(apm_data_dumper_);
apm_data_dumper_->DumpRaw("agc2_level_estimator_samplerate", sample_rate_hz);
}
void FixedDigitalLevelEstimator::CheckParameterCombination() {
RTC_DCHECK_GT(samples_in_frame_, 0);
RTC_DCHECK_LE(kSubFramesInFrame, samples_in_frame_);
RTC_DCHECK_EQ(samples_in_frame_ % kSubFramesInFrame, 0);
RTC_DCHECK_GT(samples_in_sub_frame_, 1);
}
std::array<float, kSubFramesInFrame> FixedDigitalLevelEstimator::ComputeLevel(
const AudioFrameView<const float>& float_frame) {
RTC_DCHECK_GT(float_frame.num_channels(), 0);
RTC_DCHECK_EQ(float_frame.samples_per_channel(), samples_in_frame_);
// Compute max envelope without smoothing.
std::array<float, kSubFramesInFrame> envelope{};
for (size_t channel_idx = 0; channel_idx < float_frame.num_channels();
++channel_idx) {
const auto channel = float_frame.channel(channel_idx);
for (size_t sub_frame = 0; sub_frame < kSubFramesInFrame; ++sub_frame) {
for (size_t sample_in_sub_frame = 0;
sample_in_sub_frame < samples_in_sub_frame_; ++sample_in_sub_frame) {
envelope[sub_frame] =
std::max(envelope[sub_frame],
std::abs(channel[sub_frame * samples_in_sub_frame_ +
sample_in_sub_frame]));
}
}
}
// Make sure envelope increases happen one step earlier so that the
// corresponding *gain decrease* doesn't miss a sudden signal
// increase due to interpolation.
for (size_t sub_frame = 0; sub_frame < kSubFramesInFrame - 1; ++sub_frame) {
if (envelope[sub_frame] < envelope[sub_frame + 1]) {
envelope[sub_frame] = envelope[sub_frame + 1];
}
}
// Add attack / decay smoothing.
for (size_t sub_frame = 0; sub_frame < kSubFramesInFrame; ++sub_frame) {
const float envelope_value = envelope[sub_frame];
if (envelope_value > filter_state_level_) {
envelope[sub_frame] = envelope_value * (1 - kAttackFilterConstant) +
filter_state_level_ * kAttackFilterConstant;
} else {
envelope[sub_frame] = envelope_value * (1 - kDecayFilterConstant) +
filter_state_level_ * kDecayFilterConstant;
}
filter_state_level_ = envelope[sub_frame];
// Dump data for debug.
RTC_DCHECK(apm_data_dumper_);
const auto channel = float_frame.channel(0);
apm_data_dumper_->DumpRaw("agc2_level_estimator_samples",
samples_in_sub_frame_,
&channel[sub_frame * samples_in_sub_frame_]);
apm_data_dumper_->DumpRaw("agc2_level_estimator_level",
envelope[sub_frame]);
}
return envelope;
}
void FixedDigitalLevelEstimator::SetSampleRate(size_t sample_rate_hz) {
samples_in_frame_ = rtc::CheckedDivExact(sample_rate_hz * kFrameDurationMs,
static_cast<size_t>(1000));
samples_in_sub_frame_ =
rtc::CheckedDivExact(samples_in_frame_, kSubFramesInFrame);
CheckParameterCombination();
}
} // namespace webrtc

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_
#define MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_
#include <array>
#include <vector>
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/include/audio_frame_view.h"
#include "rtc_base/constructormagic.h"
namespace webrtc {
class ApmDataDumper;
// Produces a smooth signal level estimate from an input audio
// stream. The estimate smoothing is done through exponential
// filtering.
class FixedDigitalLevelEstimator {
public:
FixedDigitalLevelEstimator(size_t sample_rate_hz,
ApmDataDumper* apm_data_dumper);
// The input is assumed to be in FloatS16 format. Scaled input will
// produce similarly scaled output. A frame of
// length kFrameDurationMs=10 ms produces kSubFramesInFrame=20 level
// estimates in the same scale.
std::array<float, kSubFramesInFrame> ComputeLevel(
const AudioFrameView<const float>& float_frame);
// Rate may be changed at any time (but not concurrently) from the
// value passed to the constructor. The class is not thread safe.
void SetSampleRate(size_t sample_rate_hz);
private:
void CheckParameterCombination();
ApmDataDumper* const apm_data_dumper_;
float filter_state_level_ = 0.f;
size_t samples_in_frame_;
size_t samples_in_sub_frame_;
RTC_DISALLOW_COPY_AND_ASSIGN(FixedDigitalLevelEstimator);
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AGC2_FIXED_DIGITAL_LEVEL_ESTIMATOR_H_

View File

@ -0,0 +1,158 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h"
#include <limits>
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/agc2_testing_common.h"
#include "modules/audio_processing/agc2/vector_float_frame.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/gunit.h"
namespace webrtc {
namespace {
constexpr float kInputLevel = 10000.f;
// Run audio at specified settings through the level estimator, and
// verify that the output level falls within the bounds.
void TestLevelEstimator(int sample_rate_hz,
int num_channels,
float input_level_linear_scale,
float expected_min,
float expected_max) {
ApmDataDumper apm_data_dumper(0);
FixedDigitalLevelEstimator level_estimator(sample_rate_hz, &apm_data_dumper);
const VectorFloatFrame vectors_with_float_frame(
num_channels, rtc::CheckedDivExact(sample_rate_hz, 100),
input_level_linear_scale);
for (int i = 0; i < 500; ++i) {
const auto level = level_estimator.ComputeLevel(
vectors_with_float_frame.float_frame_view());
// Give the estimator some time to ramp up.
if (i < 50) {
continue;
}
for (const auto& x : level) {
EXPECT_LE(expected_min, x);
EXPECT_LE(x, expected_max);
}
}
}
// Returns time it takes for the level estimator to decrease its level
// estimate by 'level_reduction_db'.
float TimeMsToDecreaseLevel(int sample_rate_hz,
int num_channels,
float input_level_db,
float level_reduction_db) {
const float input_level = DbfsToFloatS16(input_level_db);
RTC_DCHECK_GT(level_reduction_db, 0);
const VectorFloatFrame vectors_with_float_frame(
num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), input_level);
ApmDataDumper apm_data_dumper(0);
FixedDigitalLevelEstimator level_estimator(sample_rate_hz, &apm_data_dumper);
// Give the LevelEstimator plenty of time to ramp up and stabilize
float last_level = 0.f;
for (int i = 0; i < 500; ++i) {
const auto level_envelope = level_estimator.ComputeLevel(
vectors_with_float_frame.float_frame_view());
last_level = *level_envelope.rbegin();
}
// Set input to 0.
VectorFloatFrame vectors_with_zero_float_frame(
num_channels, rtc::CheckedDivExact(sample_rate_hz, 100), 0);
const float reduced_level_linear =
DbfsToFloatS16(input_level_db - level_reduction_db);
int sub_frames_until_level_reduction = 0;
while (last_level > reduced_level_linear) {
const auto level_envelope = level_estimator.ComputeLevel(
vectors_with_zero_float_frame.float_frame_view());
for (const auto& v : level_envelope) {
EXPECT_LT(v, last_level);
sub_frames_until_level_reduction++;
last_level = v;
if (last_level <= reduced_level_linear) {
break;
}
}
}
return static_cast<float>(sub_frames_until_level_reduction) *
kFrameDurationMs / kSubFramesInFrame;
}
} // namespace
TEST(AutomaticGainController2LevelEstimator, EstimatorShouldNotCrash) {
TestLevelEstimator(8000, 1, 0, std::numeric_limits<float>::lowest(),
std::numeric_limits<float>::max());
}
TEST(AutomaticGainController2LevelEstimator,
EstimatorShouldEstimateConstantLevel) {
TestLevelEstimator(10000, 1, kInputLevel, kInputLevel * 0.99,
kInputLevel * 1.01);
}
TEST(AutomaticGainController2LevelEstimator,
EstimatorShouldEstimateConstantLevelForManyChannels) {
constexpr size_t num_channels = 10;
TestLevelEstimator(20000, num_channels, kInputLevel, kInputLevel * 0.99,
kInputLevel * 1.01);
}
TEST(AutomaticGainController2LevelEstimator, TimeToDecreaseForLowLevel) {
constexpr float kLevelReductionDb = 25;
constexpr float kInitialLowLevel = -40;
constexpr float kExpectedTime = kLevelReductionDb * kDecayMs;
const float time_to_decrease =
TimeMsToDecreaseLevel(22000, 1, kInitialLowLevel, kLevelReductionDb);
EXPECT_LE(kExpectedTime * 0.9, time_to_decrease);
EXPECT_LE(time_to_decrease, kExpectedTime * 1.1);
}
TEST(AutomaticGainController2LevelEstimator, TimeToDecreaseForFullScaleLevel) {
constexpr float kLevelReductionDb = 25;
constexpr float kExpectedTime = kLevelReductionDb * kDecayMs;
const float time_to_decrease =
TimeMsToDecreaseLevel(26000, 1, 0, kLevelReductionDb);
EXPECT_LE(kExpectedTime * 0.9, time_to_decrease);
EXPECT_LE(time_to_decrease, kExpectedTime * 1.1);
}
TEST(AutomaticGainController2LevelEstimator,
TimeToDecreaseForMultipleChannels) {
constexpr float kLevelReductionDb = 25;
constexpr float kExpectedTime = kLevelReductionDb * kDecayMs;
constexpr size_t kNumChannels = 10;
const float time_to_decrease =
TimeMsToDecreaseLevel(28000, kNumChannels, 0, kLevelReductionDb);
EXPECT_LE(kExpectedTime * 0.9, time_to_decrease);
EXPECT_LE(time_to_decrease, kExpectedTime * 1.1);
}
} // namespace webrtc

View File

@ -25,6 +25,9 @@ class VectorFloatFrame {
int samples_per_channel,
float start_value);
const AudioFrameView<float>& float_frame_view() { return float_frame_view_; }
AudioFrameView<const float> float_frame_view() const {
return float_frame_view_;
}
~VectorFloatFrame();

View File

@ -0,0 +1,49 @@
/*
* Copyright 2018 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/include/audio_frame_view.h"
#include "modules/audio_processing/audio_buffer.h"
#include "test/gtest.h"
TEST(AudioFrameTest, ConstructFromAudioBuffer) {
constexpr int kSampleRateHz = 48000;
constexpr int kNumChannels = 2;
constexpr float kFloatConstant = 1272.f;
constexpr float kIntConstant = 17252;
const webrtc::StreamConfig stream_config(kSampleRateHz, kNumChannels, false);
webrtc::AudioBuffer buffer(
stream_config.num_frames(), stream_config.num_channels(),
stream_config.num_frames(), stream_config.num_channels(),
stream_config.num_frames());
AudioFrameView<float> non_const_view(
buffer.channels_f(), buffer.num_channels(), buffer.num_frames());
// Modification is allowed.
non_const_view.channel(0)[0] = kFloatConstant;
EXPECT_EQ(buffer.channels_f()[0][0], kFloatConstant);
AudioFrameView<const float> const_view(
buffer.channels_f(), buffer.num_channels(), buffer.num_frames());
// Modification is not allowed.
// const_view.channel(0)[0] = kFloatConstant;
// Assignment is allowed.
AudioFrameView<const float> other_const_view = non_const_view;
static_cast<void>(other_const_view);
// But not the other way.
// non_const_view = other_const_view;
AudioFrameView<int16_t> non_const_int16_view(
buffer.channels(), buffer.num_channels(), buffer.num_frames());
non_const_int16_view.channel(0)[0] = kIntConstant;
EXPECT_EQ(buffer.channels()[0][0], kIntConstant);
}

View File

@ -27,6 +27,14 @@ class AudioFrameView {
num_channels_(num_channels),
channel_size_(channel_size) {}
// Implicit cast to allow converting Frame<float> to
// Frame<const float>.
template <class U>
AudioFrameView(AudioFrameView<U> other)
: audio_samples_(other.data()),
num_channels_(other.num_channels()),
channel_size_(other.samples_per_channel()) {}
AudioFrameView() = delete;
size_t num_channels() const { return num_channels_; }
@ -45,6 +53,8 @@ class AudioFrameView {
return rtc::ArrayView<const T>(audio_samples_[idx], channel_size_);
}
T* const* data() { return audio_samples_; }
private:
T* const* audio_samples_;
size_t num_channels_;