Adaptive digital gain applier
AGC2 component that computes and applies the digital gain. The gain is computed from an estimated speech and noise level. This component decides how fast the gain can change and what it should be. Bug: webrtc:7494 Change-Id: If55b6e5c765f958e433730cd9e3b2b93c14a7910 Reviewed-on: https://webrtc-review.googlesource.com/64985 Commit-Queue: Alex Loiko <aleloi@webrtc.org> Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/master@{#22741}
This commit is contained in:
@ -159,6 +159,7 @@ rtc_source_set("adaptive_digital_unittests") {
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
|
||||
sources = [
|
||||
"adaptive_digital_gain_applier_unittest.cc",
|
||||
"adaptive_mode_level_estimator_unittest.cc",
|
||||
"saturation_protector_unittest.cc",
|
||||
]
|
||||
|
||||
@ -30,6 +30,10 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
|
||||
AdaptiveAgc::~AdaptiveAgc() = default;
|
||||
|
||||
void AdaptiveAgc::Process(AudioFrameView<float> float_frame) {
|
||||
// TODO(webrtc:7494): Remove this loop. Remove the vectors from
|
||||
// VadWithData after we move to a VAD that outputs an estimate every
|
||||
// kFrameDurationMs ms.
|
||||
//
|
||||
// Some VADs are 'bursty'. They return several estimates for some
|
||||
// frames, and no estimates for other frames. We want to feed all to
|
||||
// the level estimator, but only care about the last level it
|
||||
|
||||
@ -15,8 +15,97 @@
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
// This function maps input level to desired applied gain. We want to
|
||||
// boost the signal so that peaks are at -kHeadroomDbfs. We can't
|
||||
// apply more than kMaxGainDb gain.
|
||||
float ComputeGainDb(float input_level_dbfs) {
|
||||
// If the level is very low, boost it as much as we can.
|
||||
if (input_level_dbfs < -(kHeadroomDbfs + kMaxGainDb)) {
|
||||
return kMaxGainDb;
|
||||
}
|
||||
|
||||
// We expect to end up here most of the time: the level is below
|
||||
// -headroom, but we can boost it to -headroom.
|
||||
if (input_level_dbfs < -kHeadroomDbfs) {
|
||||
return -kHeadroomDbfs - input_level_dbfs;
|
||||
}
|
||||
|
||||
// Otherwise, the level is too high and we can't boost. The
|
||||
// LevelEstimator is responsible for not reporting bogus gain
|
||||
// values.
|
||||
RTC_DCHECK_LE(input_level_dbfs, 0.f);
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
// We require 'gain + noise_level <= kMaxNoiseLevelDbfs'.
|
||||
float LimitGainByNoise(float target_gain,
|
||||
float input_noise_level_dbfs,
|
||||
ApmDataDumper* apm_data_dumper) {
|
||||
const float noise_headroom_db = kMaxNoiseLevelDbfs - input_noise_level_dbfs;
|
||||
apm_data_dumper->DumpRaw("agc2_noise_headroom_db", noise_headroom_db);
|
||||
return std::min(target_gain, std::max(noise_headroom_db, 0.f));
|
||||
}
|
||||
|
||||
// Computes how the gain should change during this frame.
|
||||
// Return the gain difference in db to 'last_gain_db'.
|
||||
float ComputeGainChangeThisFrameDb(float target_gain_db,
|
||||
float last_gain_db,
|
||||
bool gain_increase_allowed) {
|
||||
float target_gain_difference_db = target_gain_db - last_gain_db;
|
||||
if (!gain_increase_allowed) {
|
||||
target_gain_difference_db = std::min(target_gain_difference_db, 0.f);
|
||||
}
|
||||
|
||||
return rtc::SafeClamp(target_gain_difference_db, -kMaxGainChangePerFrameDb,
|
||||
kMaxGainChangePerFrameDb);
|
||||
}
|
||||
|
||||
// Returns true when the gain factor is so close to 1 that it would
|
||||
// not affect int16 samples.
|
||||
bool GainCloseToOne(float gain_factor) {
|
||||
return 1.f - 1.f / kMaxFloatS16Value <= gain_factor &&
|
||||
gain_factor <= 1.f + 1.f / kMaxFloatS16Value;
|
||||
}
|
||||
|
||||
void ApplyGainWithRamping(float last_gain_linear,
|
||||
float gain_at_end_of_frame_linear,
|
||||
AudioFrameView<float> float_frame) {
|
||||
// Do not modify the signal when input is loud.
|
||||
if (last_gain_linear == gain_at_end_of_frame_linear &&
|
||||
GainCloseToOne(gain_at_end_of_frame_linear)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// A typical case: gain is constant and different from 1.
|
||||
if (last_gain_linear == gain_at_end_of_frame_linear) {
|
||||
for (size_t k = 0; k < float_frame.num_channels(); ++k) {
|
||||
rtc::ArrayView<float> channel_view = float_frame.channel(k);
|
||||
for (auto& sample : channel_view) {
|
||||
sample *= gain_at_end_of_frame_linear;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// The gain changes. We have to change slowly to avoid discontinuities.
|
||||
const size_t samples = float_frame.samples_per_channel();
|
||||
RTC_DCHECK_GT(samples, 0);
|
||||
const float increment =
|
||||
(gain_at_end_of_frame_linear - last_gain_linear) / samples;
|
||||
float gain = last_gain_linear;
|
||||
for (size_t i = 0; i < samples; ++i) {
|
||||
for (size_t ch = 0; ch < float_frame.num_channels(); ++ch) {
|
||||
float_frame.channel(ch)[i] *= gain;
|
||||
}
|
||||
gain += increment;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
|
||||
ApmDataDumper* apm_data_dumper)
|
||||
@ -32,9 +121,46 @@ void AdaptiveDigitalGainApplier::Process(
|
||||
RTC_DCHECK_GE(float_frame.num_channels(), 1);
|
||||
RTC_DCHECK_GE(float_frame.samples_per_channel(), 1);
|
||||
|
||||
// TODO(webrtc:8925): compute and apply the gain.
|
||||
const float target_gain_db =
|
||||
LimitGainByNoise(ComputeGainDb(input_level_dbfs), input_noise_level_dbfs,
|
||||
apm_data_dumper_);
|
||||
|
||||
last_gain_db_ = 1.f;
|
||||
// TODO(webrtc:7494): Remove this construct. Remove the vectors from
|
||||
// VadWithData after we move to a VAD that outputs an estimate every
|
||||
// kFrameDurationMs ms.
|
||||
//
|
||||
// Forbid increasing the gain when there is no speech. For some
|
||||
// VADs, 'vad_results' has either many or 0 results. If there are 0
|
||||
// results, keep the old flag. If there are many results, and at
|
||||
// least one is confident speech, we allow attenuation.
|
||||
if (!vad_results.empty()) {
|
||||
gain_increase_allowed_ = std::all_of(
|
||||
vad_results.begin(), vad_results.end(),
|
||||
[](const VadWithLevel::LevelAndProbability& vad_result) {
|
||||
return vad_result.speech_probability > kVadConfidenceThreshold;
|
||||
});
|
||||
}
|
||||
|
||||
const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
|
||||
target_gain_db, last_gain_db_, gain_increase_allowed_);
|
||||
|
||||
apm_data_dumper_->DumpRaw("agc2_want_to_change_by_db",
|
||||
target_gain_db - last_gain_db_);
|
||||
apm_data_dumper_->DumpRaw("agc2_will_change_by_db",
|
||||
gain_change_this_frame_db);
|
||||
|
||||
// Optimization: avoid calling math functions if gain does not
|
||||
// change.
|
||||
const float gain_at_end_of_frame =
|
||||
gain_change_this_frame_db == 0.f
|
||||
? last_gain_linear_
|
||||
: DbToRatio(last_gain_db_ + gain_change_this_frame_db);
|
||||
|
||||
ApplyGainWithRamping(last_gain_linear_, gain_at_end_of_frame, float_frame);
|
||||
|
||||
// Remember that the gain has changed for the next iteration.
|
||||
last_gain_linear_ = gain_at_end_of_frame;
|
||||
last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
|
||||
apm_data_dumper_->DumpRaw("agc2_applied_gain_db", last_gain_db_);
|
||||
}
|
||||
} // namespace webrtc
|
||||
|
||||
@ -29,7 +29,17 @@ class AdaptiveDigitalGainApplier {
|
||||
AudioFrameView<float> float_frame);
|
||||
|
||||
private:
|
||||
// Keep track of current gain for ramping up and down and
|
||||
// logging. This member variable is redundant together with
|
||||
// last_gain_db_. Both are kept as an optimization.
|
||||
float last_gain_linear_ = 1.f;
|
||||
float last_gain_db_ = 0.f;
|
||||
|
||||
// For some combinations of noise and speech probability, increasing
|
||||
// the level is not allowed. Since we may get VAD results in bursts,
|
||||
// we keep track of this variable until the next VAD results come
|
||||
// in.
|
||||
bool gain_increase_allowed_ = true;
|
||||
ApmDataDumper* apm_data_dumper_ = nullptr;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
@ -0,0 +1,185 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "modules/audio_processing/agc2/vector_float_frame.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/gunit.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
// Constants used in place of estimated noise levels.
|
||||
constexpr float kNoNoiseDbfs = -90.f;
|
||||
constexpr float kWithNoiseDbfs = -20.f;
|
||||
|
||||
// Runs gain applier and returns the applied gain in linear scale.
|
||||
float RunOnConstantLevel(int num_iterations,
|
||||
VadWithLevel::LevelAndProbability vad_data,
|
||||
float input_level_dbfs,
|
||||
AdaptiveDigitalGainApplier* gain_applier) {
|
||||
float gain_linear = 0.f;
|
||||
|
||||
for (int i = 0; i < num_iterations; ++i) {
|
||||
VectorFloatFrame fake_audio(1, 1, 1.f);
|
||||
gain_applier->Process(
|
||||
input_level_dbfs, kNoNoiseDbfs,
|
||||
rtc::ArrayView<const VadWithLevel::LevelAndProbability>(&vad_data, 1),
|
||||
fake_audio.float_frame_view());
|
||||
gain_linear = fake_audio.float_frame_view().channel(0)[0];
|
||||
}
|
||||
return gain_linear;
|
||||
}
|
||||
|
||||
constexpr VadWithLevel::LevelAndProbability kVadSpeech(1.f, -20.f, 0.f);
|
||||
} // namespace
|
||||
|
||||
TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
|
||||
static_assert(
|
||||
std::is_trivially_destructible<VadWithLevel::LevelAndProbability>::value,
|
||||
"");
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
|
||||
|
||||
// Make one call with reasonable audio level values and settings.
|
||||
VectorFloatFrame fake_audio(2, 480, 10000.f);
|
||||
gain_applier.Process(
|
||||
-5.0, kNoNoiseDbfs,
|
||||
rtc::ArrayView<const VadWithLevel::LevelAndProbability>(&kVadSpeech, 1),
|
||||
fake_audio.float_frame_view());
|
||||
}
|
||||
|
||||
// Check that the output is -kHeadroom dBFS.
|
||||
TEST(AutomaticGainController2AdaptiveGainApplier, TargetLevelIsReached) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
|
||||
|
||||
constexpr float initial_level_dbfs = -5.f;
|
||||
|
||||
const float applied_gain =
|
||||
RunOnConstantLevel(200, kVadSpeech, initial_level_dbfs, &gain_applier);
|
||||
|
||||
EXPECT_NEAR(applied_gain, DbToRatio(-kHeadroomDbfs - initial_level_dbfs),
|
||||
0.1f);
|
||||
}
|
||||
|
||||
// Check that the output is -kHeadroom dBFS
|
||||
TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
|
||||
|
||||
constexpr float initial_level_dbfs = -kHeadroomDbfs - kMaxGainDb - 10.f;
|
||||
// A few extra frames for safety.
|
||||
constexpr int kNumFramesToAdapt =
|
||||
static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
|
||||
|
||||
const float applied_gain = RunOnConstantLevel(
|
||||
kNumFramesToAdapt, kVadSpeech, initial_level_dbfs, &gain_applier);
|
||||
EXPECT_NEAR(applied_gain, DbToRatio(kMaxGainDb), 0.1f);
|
||||
|
||||
const float applied_gain_db = 20.f * std::log10(applied_gain);
|
||||
EXPECT_NEAR(applied_gain_db, kMaxGainDb, 0.1f);
|
||||
}
|
||||
|
||||
TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
|
||||
|
||||
constexpr float initial_level_dbfs = -25.f;
|
||||
// A few extra frames for safety.
|
||||
constexpr int kNumFramesToAdapt =
|
||||
static_cast<int>(initial_level_dbfs / kMaxGainChangePerFrameDb) + 10;
|
||||
|
||||
const float kMaxChangePerFrameLinear = DbToRatio(kMaxGainChangePerFrameDb);
|
||||
|
||||
float last_gain_linear = 1.f;
|
||||
for (int i = 0; i < kNumFramesToAdapt; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
VectorFloatFrame fake_audio(1, 1, 1.f);
|
||||
gain_applier.Process(
|
||||
initial_level_dbfs, kNoNoiseDbfs,
|
||||
rtc::ArrayView<const VadWithLevel::LevelAndProbability>(&kVadSpeech, 1),
|
||||
fake_audio.float_frame_view());
|
||||
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
|
||||
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
|
||||
kMaxChangePerFrameLinear);
|
||||
last_gain_linear = current_gain_linear;
|
||||
}
|
||||
|
||||
// Check that the same is true when gain decreases as well.
|
||||
for (int i = 0; i < kNumFramesToAdapt; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
VectorFloatFrame fake_audio(1, 1, 1.f);
|
||||
gain_applier.Process(
|
||||
0.f, kNoNoiseDbfs,
|
||||
rtc::ArrayView<const VadWithLevel::LevelAndProbability>(&kVadSpeech, 1),
|
||||
fake_audio.float_frame_view());
|
||||
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
|
||||
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
|
||||
kMaxChangePerFrameLinear);
|
||||
last_gain_linear = current_gain_linear;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
|
||||
|
||||
constexpr float initial_level_dbfs = -25.f;
|
||||
constexpr int num_samples = 480;
|
||||
|
||||
VectorFloatFrame fake_audio(1, num_samples, 1.f);
|
||||
gain_applier.Process(
|
||||
initial_level_dbfs, kNoNoiseDbfs,
|
||||
rtc::ArrayView<const VadWithLevel::LevelAndProbability>(&kVadSpeech, 1),
|
||||
fake_audio.float_frame_view());
|
||||
float maximal_difference = 0.f;
|
||||
float current_value = 1.f;
|
||||
for (const auto& x : fake_audio.float_frame_view().channel(0)) {
|
||||
const float difference = std::abs(x - current_value);
|
||||
maximal_difference = std::max(maximal_difference, difference);
|
||||
current_value = x;
|
||||
}
|
||||
|
||||
const float kMaxChangePerFrameLinear = DbToRatio(kMaxGainChangePerFrameDb);
|
||||
const float kMaxChangePerSample = kMaxChangePerFrameLinear / num_samples;
|
||||
|
||||
EXPECT_LE(maximal_difference, kMaxChangePerSample);
|
||||
}
|
||||
|
||||
TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
|
||||
|
||||
constexpr float initial_level_dbfs = -25.f;
|
||||
constexpr int num_samples = 480;
|
||||
constexpr int num_frames = 100;
|
||||
|
||||
ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low";
|
||||
|
||||
for (int i = 0; i < num_frames; ++i) {
|
||||
VectorFloatFrame fake_audio(1, num_samples, 1.f);
|
||||
gain_applier.Process(
|
||||
initial_level_dbfs, kWithNoiseDbfs,
|
||||
rtc::ArrayView<const VadWithLevel::LevelAndProbability>(&kVadSpeech, 1),
|
||||
fake_audio.float_frame_view());
|
||||
|
||||
const float maximal_ratio =
|
||||
*std::max_element(fake_audio.float_frame_view().channel(0).begin(),
|
||||
fake_audio.float_frame_view().channel(0).end());
|
||||
|
||||
EXPECT_NEAR(maximal_ratio, 1.f, 0.001f);
|
||||
}
|
||||
}
|
||||
} // namespace webrtc
|
||||
@ -27,8 +27,19 @@ constexpr size_t kMaximalNumberOfSamplesPerChannel = 480;
|
||||
|
||||
constexpr float kAttackFilterConstant = 0.f;
|
||||
|
||||
// Adaptive digital gain applier settings below.
|
||||
constexpr float kMaxGainChangePerSecondDb = 3.f;
|
||||
constexpr float kMaxGainChangePerFrameDb =
|
||||
kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
|
||||
constexpr float kHeadroomDbfs = 1.f;
|
||||
constexpr float kMaxGainDb = 30.f;
|
||||
|
||||
// This parameter must be tuned together with the noise estimator.
|
||||
constexpr float kMaxNoiseLevelDbfs = -50.f;
|
||||
|
||||
// Used in the Level Estimator for deciding when to update the speech
|
||||
// level estimate.
|
||||
// level estimate. Also used in the adaptive digital gain applier to
|
||||
// decide when to allow target gain reduction.
|
||||
constexpr float kVadConfidenceThreshold = 0.9f;
|
||||
|
||||
// The amount of 'memory' of the Level Estimator. Decides leak factors.
|
||||
|
||||
Reference in New Issue
Block a user