AGC2 AdaptiveDigitalGainApplier and AdaptiveAgc code improvements

This CL was written in preparation for the next CL in the chain and
it contains the following changes:
- SignalWithLevels -> AdaptiveDigitalGainApplier::FrameInfo
- Frame view removed from AdaptiveDigitalGainApplier::FrameInfo
- AdaptiveDigitalGainApplier::Process now gets side info as const& to
  avoid unnecessary copies
- AdaptiveAgc::Process: `last_audio_level` renamed to `limiter_envelope`
  to better reflect what that actually is
- Missing class/method docstrings added

Tested: bit-exactness verified with audioproc_f

Bug: webrtc:7494
Change-Id: Ie25dcd389d6eed74ea9a65f0720eeb8f20f0096b
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186040
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32251}
This commit is contained in:
Alessio Bazzica
2020-09-30 13:07:57 +02:00
committed by Commit Bot
parent 8845f7e32b
commit d5e6f413ab
5 changed files with 115 additions and 123 deletions

View File

@ -16,6 +16,18 @@
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
void DumpDebugData(const AdaptiveDigitalGainApplier::FrameInfo& info,
ApmDataDumper& dumper) {
dumper.DumpRaw("agc2_vad_probability", info.vad_result.speech_probability);
dumper.DumpRaw("agc2_vad_rms_dbfs", info.vad_result.rms_dbfs);
dumper.DumpRaw("agc2_vad_peak_dbfs", info.vad_result.peak_dbfs);
dumper.DumpRaw("agc2_noise_estimate_dbfs", info.input_noise_level_dbfs);
dumper.DumpRaw("agc2_last_limiter_audio_level", info.limiter_envelope_dbfs);
}
} // namespace
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
: speech_level_estimator_(apm_data_dumper),
@ -40,37 +52,17 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
AdaptiveAgc::~AdaptiveAgc() = default;
void AdaptiveAgc::Process(AudioFrameView<float> float_frame,
float last_audio_level) {
auto signal_with_levels = SignalWithLevels(float_frame);
signal_with_levels.vad_result = vad_.AnalyzeFrame(float_frame);
apm_data_dumper_->DumpRaw("agc2_vad_probability",
signal_with_levels.vad_result.speech_probability);
apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs",
signal_with_levels.vad_result.rms_dbfs);
apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs",
signal_with_levels.vad_result.peak_dbfs);
speech_level_estimator_.Update(signal_with_levels.vad_result);
signal_with_levels.input_level_dbfs = speech_level_estimator_.level_dbfs();
signal_with_levels.input_noise_level_dbfs =
noise_level_estimator_.Analyze(float_frame);
apm_data_dumper_->DumpRaw("agc2_noise_estimate_dbfs",
signal_with_levels.input_noise_level_dbfs);
signal_with_levels.limiter_audio_level_dbfs =
last_audio_level > 0 ? FloatS16ToDbfs(last_audio_level) : -90.f;
apm_data_dumper_->DumpRaw("agc2_last_limiter_audio_level",
signal_with_levels.limiter_audio_level_dbfs);
signal_with_levels.estimate_is_confident =
speech_level_estimator_.IsConfident();
// The gain applier applies the gain.
gain_applier_.Process(signal_with_levels);
void AdaptiveAgc::Process(AudioFrameView<float> frame, float limiter_envelope) {
AdaptiveDigitalGainApplier::FrameInfo info;
info.vad_result = vad_.AnalyzeFrame(frame);
speech_level_estimator_.Update(info.vad_result);
info.input_level_dbfs = speech_level_estimator_.level_dbfs();
info.input_noise_level_dbfs = noise_level_estimator_.Analyze(frame);
info.limiter_envelope_dbfs =
limiter_envelope > 0 ? FloatS16ToDbfs(limiter_envelope) : -90.f;
info.estimate_is_confident = speech_level_estimator_.IsConfident();
DumpDebugData(info, *apm_data_dumper_);
gain_applier_.Process(info, frame);
}
void AdaptiveAgc::Reset() {

View File

@ -21,6 +21,8 @@
namespace webrtc {
class ApmDataDumper;
// Adaptive digital gain controller.
// TODO(crbug.com/webrtc/7494): Unify with `AdaptiveDigitalGainApplier`.
class AdaptiveAgc {
public:
explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
@ -28,7 +30,10 @@ class AdaptiveAgc {
const AudioProcessing::Config::GainController2& config);
~AdaptiveAgc();
void Process(AudioFrameView<float> float_frame, float last_audio_level);
// Analyzes `frame` and applies a digital adaptive gain to it. Takes into
// account the envelope measured by the limiter.
// TODO(crbug.com/webrtc/7494): Make the class depend on the limiter.
void Process(AudioFrameView<float> frame, float limiter_envelope);
void Reset();
private:

View File

@ -85,43 +85,40 @@ float ComputeGainChangeThisFrameDb(float target_gain_db,
}
} // namespace
SignalWithLevels::SignalWithLevels(AudioFrameView<float> float_frame)
: float_frame(float_frame) {}
SignalWithLevels::SignalWithLevels(const SignalWithLevels&) = default;
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
ApmDataDumper* apm_data_dumper)
: gain_applier_(false, DbToRatio(last_gain_db_)),
apm_data_dumper_(apm_data_dumper) {}
: apm_data_dumper_(apm_data_dumper),
gain_applier_(
/*hard_clip_samples=*/false,
/*initial_gain_factor=*/DbToRatio(kInitialAdaptiveDigitalGainDb)),
calls_since_last_gain_log_(0),
gain_increase_allowed_(true),
last_gain_db_(kInitialAdaptiveDigitalGainDb) {}
void AdaptiveDigitalGainApplier::Process(SignalWithLevels signal_with_levels) {
void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
AudioFrameView<float> frame) {
RTC_DCHECK_GE(info.input_level_dbfs, -150.f);
RTC_DCHECK_GE(frame.num_channels(), 1);
RTC_DCHECK_GE(frame.samples_per_channel(), 1);
// Log every second.
calls_since_last_gain_log_++;
if (calls_since_last_gain_log_ == 100) {
calls_since_last_gain_log_ = 0;
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1);
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
-signal_with_levels.input_noise_level_dbfs, 0,
100, 101);
-info.input_noise_level_dbfs, 0, 100, 101);
}
signal_with_levels.input_level_dbfs =
std::min(signal_with_levels.input_level_dbfs, 0.f);
RTC_DCHECK_GE(signal_with_levels.input_level_dbfs, -150.f);
RTC_DCHECK_GE(signal_with_levels.float_frame.num_channels(), 1);
RTC_DCHECK_GE(signal_with_levels.float_frame.samples_per_channel(), 1);
const float target_gain_db = LimitGainByLowConfidence(
LimitGainByNoise(ComputeGainDb(signal_with_levels.input_level_dbfs),
signal_with_levels.input_noise_level_dbfs,
apm_data_dumper_),
last_gain_db_, signal_with_levels.limiter_audio_level_dbfs,
signal_with_levels.estimate_is_confident);
LimitGainByNoise(ComputeGainDb(std::min(info.input_level_dbfs, 0.f)),
info.input_noise_level_dbfs, apm_data_dumper_),
last_gain_db_, info.limiter_envelope_dbfs, info.estimate_is_confident);
// Forbid increasing the gain when there is no speech.
gain_increase_allowed_ = signal_with_levels.vad_result.speech_probability >
kVadConfidenceThreshold;
gain_increase_allowed_ =
info.vad_result.speech_probability > kVadConfidenceThreshold;
const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
target_gain_db, last_gain_db_, gain_increase_allowed_);
@ -137,7 +134,7 @@ void AdaptiveDigitalGainApplier::Process(SignalWithLevels signal_with_levels) {
gain_applier_.SetGainFactor(
DbToRatio(last_gain_db_ + gain_change_this_frame_db));
}
gain_applier_.ApplyGain(signal_with_levels.float_frame);
gain_applier_.ApplyGain(frame);
// Remember that the gain has changed for the next iteration.
last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;

View File

@ -11,7 +11,6 @@
#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/gain_applier.h"
#include "modules/audio_processing/agc2/vad_with_level.h"
#include "modules/audio_processing/include/audio_frame_view.h"
@ -20,36 +19,38 @@ namespace webrtc {
class ApmDataDumper;
struct SignalWithLevels {
SignalWithLevels(AudioFrameView<float> float_frame);
SignalWithLevels(const SignalWithLevels&);
float input_level_dbfs = -1.f;
float input_noise_level_dbfs = -1.f;
VadLevelAnalyzer::Result vad_result;
float limiter_audio_level_dbfs = -1.f;
bool estimate_is_confident = false;
AudioFrameView<float> float_frame;
};
// Part of the adaptive digital controller that applies a digital adaptive gain.
// The gain is updated towards a target. The logic decides when gain updates are
// allowed, it controls the adaptation speed and caps the target based on the
// estimated noise level and the speech level estimate confidence.
class AdaptiveDigitalGainApplier {
public:
// Information about a frame to process.
struct FrameInfo {
float input_level_dbfs; // Estimated speech plus noise level.
float input_noise_level_dbfs; // Estimated noise level.
VadLevelAnalyzer::Result vad_result;
float limiter_envelope_dbfs; // Envelope level from the limiter.
bool estimate_is_confident;
};
explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper);
// Decide what gain to apply.
void Process(SignalWithLevels signal_with_levels);
AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
delete;
// Analyzes `info`, updates the digital gain and applies it to `frame`.
void Process(const FrameInfo& info, AudioFrameView<float> frame);
private:
float last_gain_db_ = kInitialAdaptiveDigitalGainDb;
ApmDataDumper* const apm_data_dumper_;
GainApplier gain_applier_;
int calls_since_last_gain_log_ = 0;
// For some combinations of noise and speech probability, increasing
// the level is not allowed. Since we may get VAD results in bursts,
// we keep track of this variable until the next VAD results come
// in.
bool gain_increase_allowed_ = true;
ApmDataDumper* apm_data_dumper_ = nullptr;
int calls_since_last_gain_log_;
bool gain_increase_allowed_;
float last_gain_db_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_

View File

@ -20,6 +20,7 @@
namespace webrtc {
namespace {
// Constants used in place of estimated noise levels.
constexpr float kNoNoiseDbfs = -90.f;
constexpr float kWithNoiseDbfs = -20.f;
@ -36,31 +37,25 @@ float RunOnConstantLevel(int num_iterations,
for (int i = 0; i < num_iterations; ++i) {
VectorFloatFrame fake_audio(1, 1, 1.f);
SignalWithLevels signal_with_levels(fake_audio.float_frame_view());
signal_with_levels.input_level_dbfs = input_level_dbfs;
signal_with_levels.input_noise_level_dbfs = kNoNoiseDbfs;
signal_with_levels.vad_result = vad_level;
signal_with_levels.limiter_audio_level_dbfs = -2.f;
signal_with_levels.estimate_is_confident = true;
gain_applier->Process(signal_with_levels);
AdaptiveDigitalGainApplier::FrameInfo info;
info.input_level_dbfs = input_level_dbfs;
info.input_noise_level_dbfs = kNoNoiseDbfs;
info.vad_result = vad_level;
info.limiter_envelope_dbfs = -2.f;
info.estimate_is_confident = true;
gain_applier->Process(info, fake_audio.float_frame_view());
gain_linear = fake_audio.float_frame_view().channel(0)[0];
}
return gain_linear;
}
// Returns 'SignalWithLevels' for typical GainApplier behavior. Voice on, no
// noise, low limiter, confident level.
SignalWithLevels TestSignalWithLevel(AudioFrameView<float> float_frame) {
SignalWithLevels result(float_frame);
result.input_level_dbfs = -1;
result.input_noise_level_dbfs = kNoNoiseDbfs;
result.vad_result = kVadSpeech;
result.estimate_is_confident = true;
result.limiter_audio_level_dbfs = -2.f;
return result;
}
} // namespace
// Voice on, no noise, low limiter, confident level.
constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
/*input_level_dbfs=*/-1.f,
/*input_noise_level_dbfs=*/kNoNoiseDbfs,
/*vad_result=*/kVadSpeech,
/*limiter_envelope_dbfs=*/-2.f,
/*estimate_is_confident=*/true};
TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
ApmDataDumper apm_data_dumper(0);
@ -68,9 +63,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
// Make one call with reasonable audio level values and settings.
VectorFloatFrame fake_audio(2, 480, 10000.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
signal_with_level.input_level_dbfs = -5.0;
gain_applier.Process(signal_with_level);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -5.0;
gain_applier.Process(kFrameInfo, fake_audio.float_frame_view());
}
// Check that the output is -kHeadroom dBFS.
@ -120,9 +115,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
for (int i = 0; i < kNumFramesToAdapt; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame fake_audio(1, 1, 1.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
signal_with_level.input_level_dbfs = initial_level_dbfs;
gain_applier.Process(signal_with_level);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
gain_applier.Process(info, fake_audio.float_frame_view());
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
kMaxChangePerFrameLinear);
@ -133,9 +128,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
for (int i = 0; i < kNumFramesToAdapt; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame fake_audio(1, 1, 1.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
signal_with_level.input_level_dbfs = 0.f;
gain_applier.Process(signal_with_level);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = 0.f;
gain_applier.Process(info, fake_audio.float_frame_view());
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
kMaxChangePerFrameLinear);
@ -151,9 +146,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
constexpr int num_samples = 480;
VectorFloatFrame fake_audio(1, num_samples, 1.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
signal_with_level.input_level_dbfs = initial_level_dbfs;
gain_applier.Process(signal_with_level);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
gain_applier.Process(info, fake_audio.float_frame_view());
float maximal_difference = 0.f;
float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb);
for (const auto& x : fake_audio.float_frame_view().channel(0)) {
@ -182,10 +177,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
VectorFloatFrame fake_audio(1, num_samples, 1.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
signal_with_level.input_level_dbfs = initial_level_dbfs;
signal_with_level.input_noise_level_dbfs = kWithNoiseDbfs;
gain_applier.Process(signal_with_level);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
info.input_noise_level_dbfs = kWithNoiseDbfs;
gain_applier.Process(info, fake_audio.float_frame_view());
// Wait so that the adaptive gain applier has time to lower the gain.
if (i > num_initial_frames) {
@ -204,9 +199,9 @@ TEST(AutomaticGainController2GainApplier, CanHandlePositiveSpeechLevels) {
// Make one call with positive audio level values and settings.
VectorFloatFrame fake_audio(2, 480, 10000.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
signal_with_level.input_level_dbfs = 5.0f;
gain_applier.Process(signal_with_level);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = 5.f;
gain_applier.Process(info, fake_audio.float_frame_view());
}
TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
@ -223,11 +218,11 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
VectorFloatFrame fake_audio(1, num_samples, 1.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
signal_with_level.input_level_dbfs = initial_level_dbfs;
signal_with_level.limiter_audio_level_dbfs = 1.f;
signal_with_level.estimate_is_confident = false;
gain_applier.Process(signal_with_level);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
info.limiter_envelope_dbfs = 1.f;
info.estimate_is_confident = false;
gain_applier.Process(info, fake_audio.float_frame_view());
// Wait so that the adaptive gain applier has time to lower the gain.
if (i > num_initial_frames) {
@ -239,4 +234,6 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
}
}
}
} // namespace
} // namespace webrtc