AGC2 AdaptiveDigitalGainApplier and AdaptiveAgc code improvements

This CL was written in preparation for the next CL in the chain and
it contains the following changes:
- SignalWithLevels -> AdaptiveDigitalGainApplier::FrameInfo
- Frame view removed from AdaptiveDigitalGainApplier::FrameInfo
- AdaptiveDigitalGainApplier::Process now gets side info as const& to
  avoid unnecessary copies
- AdaptiveAgc::Process: `last_audio_level` renamed to `limiter_envelope`
  to better reflect what that actually is
- Missing class/method docstrings added

Tested: bit-exactness verified with audioproc_f

Bug: webrtc:7494
Change-Id: Ie25dcd389d6eed74ea9a65f0720eeb8f20f0096b
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186040
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32251}
This commit is contained in:
Alessio Bazzica
2020-09-30 13:07:57 +02:00
committed by Commit Bot
parent 8845f7e32b
commit d5e6f413ab
5 changed files with 115 additions and 123 deletions

View File

@ -16,6 +16,18 @@
#include "rtc_base/checks.h" #include "rtc_base/checks.h"
namespace webrtc { namespace webrtc {
namespace {
void DumpDebugData(const AdaptiveDigitalGainApplier::FrameInfo& info,
ApmDataDumper& dumper) {
dumper.DumpRaw("agc2_vad_probability", info.vad_result.speech_probability);
dumper.DumpRaw("agc2_vad_rms_dbfs", info.vad_result.rms_dbfs);
dumper.DumpRaw("agc2_vad_peak_dbfs", info.vad_result.peak_dbfs);
dumper.DumpRaw("agc2_noise_estimate_dbfs", info.input_noise_level_dbfs);
dumper.DumpRaw("agc2_last_limiter_audio_level", info.limiter_envelope_dbfs);
}
} // namespace
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper) AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
: speech_level_estimator_(apm_data_dumper), : speech_level_estimator_(apm_data_dumper),
@ -40,37 +52,17 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
AdaptiveAgc::~AdaptiveAgc() = default; AdaptiveAgc::~AdaptiveAgc() = default;
void AdaptiveAgc::Process(AudioFrameView<float> float_frame, void AdaptiveAgc::Process(AudioFrameView<float> frame, float limiter_envelope) {
float last_audio_level) { AdaptiveDigitalGainApplier::FrameInfo info;
auto signal_with_levels = SignalWithLevels(float_frame); info.vad_result = vad_.AnalyzeFrame(frame);
signal_with_levels.vad_result = vad_.AnalyzeFrame(float_frame); speech_level_estimator_.Update(info.vad_result);
apm_data_dumper_->DumpRaw("agc2_vad_probability", info.input_level_dbfs = speech_level_estimator_.level_dbfs();
signal_with_levels.vad_result.speech_probability); info.input_noise_level_dbfs = noise_level_estimator_.Analyze(frame);
apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs", info.limiter_envelope_dbfs =
signal_with_levels.vad_result.rms_dbfs); limiter_envelope > 0 ? FloatS16ToDbfs(limiter_envelope) : -90.f;
apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs", info.estimate_is_confident = speech_level_estimator_.IsConfident();
signal_with_levels.vad_result.peak_dbfs); DumpDebugData(info, *apm_data_dumper_);
gain_applier_.Process(info, frame);
speech_level_estimator_.Update(signal_with_levels.vad_result);
signal_with_levels.input_level_dbfs = speech_level_estimator_.level_dbfs();
signal_with_levels.input_noise_level_dbfs =
noise_level_estimator_.Analyze(float_frame);
apm_data_dumper_->DumpRaw("agc2_noise_estimate_dbfs",
signal_with_levels.input_noise_level_dbfs);
signal_with_levels.limiter_audio_level_dbfs =
last_audio_level > 0 ? FloatS16ToDbfs(last_audio_level) : -90.f;
apm_data_dumper_->DumpRaw("agc2_last_limiter_audio_level",
signal_with_levels.limiter_audio_level_dbfs);
signal_with_levels.estimate_is_confident =
speech_level_estimator_.IsConfident();
// The gain applier applies the gain.
gain_applier_.Process(signal_with_levels);
} }
void AdaptiveAgc::Reset() { void AdaptiveAgc::Reset() {

View File

@ -21,6 +21,8 @@
namespace webrtc { namespace webrtc {
class ApmDataDumper; class ApmDataDumper;
// Adaptive digital gain controller.
// TODO(crbug.com/webrtc/7494): Unify with `AdaptiveDigitalGainApplier`.
class AdaptiveAgc { class AdaptiveAgc {
public: public:
explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper); explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
@ -28,7 +30,10 @@ class AdaptiveAgc {
const AudioProcessing::Config::GainController2& config); const AudioProcessing::Config::GainController2& config);
~AdaptiveAgc(); ~AdaptiveAgc();
void Process(AudioFrameView<float> float_frame, float last_audio_level); // Analyzes `frame` and applies a digital adaptive gain to it. Takes into
// account the envelope measured by the limiter.
// TODO(crbug.com/webrtc/7494): Make the class depend on the limiter.
void Process(AudioFrameView<float> frame, float limiter_envelope);
void Reset(); void Reset();
private: private:

View File

@ -85,43 +85,40 @@ float ComputeGainChangeThisFrameDb(float target_gain_db,
} }
} // namespace } // namespace
SignalWithLevels::SignalWithLevels(AudioFrameView<float> float_frame)
: float_frame(float_frame) {}
SignalWithLevels::SignalWithLevels(const SignalWithLevels&) = default;
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
ApmDataDumper* apm_data_dumper) ApmDataDumper* apm_data_dumper)
: gain_applier_(false, DbToRatio(last_gain_db_)), : apm_data_dumper_(apm_data_dumper),
apm_data_dumper_(apm_data_dumper) {} gain_applier_(
/*hard_clip_samples=*/false,
/*initial_gain_factor=*/DbToRatio(kInitialAdaptiveDigitalGainDb)),
calls_since_last_gain_log_(0),
gain_increase_allowed_(true),
last_gain_db_(kInitialAdaptiveDigitalGainDb) {}
void AdaptiveDigitalGainApplier::Process(SignalWithLevels signal_with_levels) { void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
AudioFrameView<float> frame) {
RTC_DCHECK_GE(info.input_level_dbfs, -150.f);
RTC_DCHECK_GE(frame.num_channels(), 1);
RTC_DCHECK_GE(frame.samples_per_channel(), 1);
// Log every second.
calls_since_last_gain_log_++; calls_since_last_gain_log_++;
if (calls_since_last_gain_log_ == 100) { if (calls_since_last_gain_log_ == 100) {
calls_since_last_gain_log_ = 0; calls_since_last_gain_log_ = 0;
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied", RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1); last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1);
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel", RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
-signal_with_levels.input_noise_level_dbfs, 0, -info.input_noise_level_dbfs, 0, 100, 101);
100, 101);
} }
signal_with_levels.input_level_dbfs =
std::min(signal_with_levels.input_level_dbfs, 0.f);
RTC_DCHECK_GE(signal_with_levels.input_level_dbfs, -150.f);
RTC_DCHECK_GE(signal_with_levels.float_frame.num_channels(), 1);
RTC_DCHECK_GE(signal_with_levels.float_frame.samples_per_channel(), 1);
const float target_gain_db = LimitGainByLowConfidence( const float target_gain_db = LimitGainByLowConfidence(
LimitGainByNoise(ComputeGainDb(signal_with_levels.input_level_dbfs), LimitGainByNoise(ComputeGainDb(std::min(info.input_level_dbfs, 0.f)),
signal_with_levels.input_noise_level_dbfs, info.input_noise_level_dbfs, apm_data_dumper_),
apm_data_dumper_), last_gain_db_, info.limiter_envelope_dbfs, info.estimate_is_confident);
last_gain_db_, signal_with_levels.limiter_audio_level_dbfs,
signal_with_levels.estimate_is_confident);
// Forbid increasing the gain when there is no speech. // Forbid increasing the gain when there is no speech.
gain_increase_allowed_ = signal_with_levels.vad_result.speech_probability > gain_increase_allowed_ =
kVadConfidenceThreshold; info.vad_result.speech_probability > kVadConfidenceThreshold;
const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb( const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
target_gain_db, last_gain_db_, gain_increase_allowed_); target_gain_db, last_gain_db_, gain_increase_allowed_);
@ -137,7 +134,7 @@ void AdaptiveDigitalGainApplier::Process(SignalWithLevels signal_with_levels) {
gain_applier_.SetGainFactor( gain_applier_.SetGainFactor(
DbToRatio(last_gain_db_ + gain_change_this_frame_db)); DbToRatio(last_gain_db_ + gain_change_this_frame_db));
} }
gain_applier_.ApplyGain(signal_with_levels.float_frame); gain_applier_.ApplyGain(frame);
// Remember that the gain has changed for the next iteration. // Remember that the gain has changed for the next iteration.
last_gain_db_ = last_gain_db_ + gain_change_this_frame_db; last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;

View File

@ -11,7 +11,6 @@
#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ #ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ #define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/gain_applier.h" #include "modules/audio_processing/agc2/gain_applier.h"
#include "modules/audio_processing/agc2/vad_with_level.h" #include "modules/audio_processing/agc2/vad_with_level.h"
#include "modules/audio_processing/include/audio_frame_view.h" #include "modules/audio_processing/include/audio_frame_view.h"
@ -20,36 +19,38 @@ namespace webrtc {
class ApmDataDumper; class ApmDataDumper;
struct SignalWithLevels { // Part of the adaptive digital controller that applies a digital adaptive gain.
SignalWithLevels(AudioFrameView<float> float_frame); // The gain is updated towards a target. The logic decides when gain updates are
SignalWithLevels(const SignalWithLevels&); // allowed, it controls the adaptation speed and caps the target based on the
// estimated noise level and the speech level estimate confidence.
float input_level_dbfs = -1.f;
float input_noise_level_dbfs = -1.f;
VadLevelAnalyzer::Result vad_result;
float limiter_audio_level_dbfs = -1.f;
bool estimate_is_confident = false;
AudioFrameView<float> float_frame;
};
class AdaptiveDigitalGainApplier { class AdaptiveDigitalGainApplier {
public: public:
// Information about a frame to process.
struct FrameInfo {
float input_level_dbfs; // Estimated speech plus noise level.
float input_noise_level_dbfs; // Estimated noise level.
VadLevelAnalyzer::Result vad_result;
float limiter_envelope_dbfs; // Envelope level from the limiter.
bool estimate_is_confident;
};
explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper); explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper);
// Decide what gain to apply. AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
void Process(SignalWithLevels signal_with_levels); AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
delete;
// Analyzes `info`, updates the digital gain and applies it to `frame`.
void Process(const FrameInfo& info, AudioFrameView<float> frame);
private: private:
float last_gain_db_ = kInitialAdaptiveDigitalGainDb; ApmDataDumper* const apm_data_dumper_;
GainApplier gain_applier_; GainApplier gain_applier_;
int calls_since_last_gain_log_ = 0;
// For some combinations of noise and speech probability, increasing int calls_since_last_gain_log_;
// the level is not allowed. Since we may get VAD results in bursts, bool gain_increase_allowed_;
// we keep track of this variable until the next VAD results come float last_gain_db_;
// in.
bool gain_increase_allowed_ = true;
ApmDataDumper* apm_data_dumper_ = nullptr;
}; };
} // namespace webrtc } // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ #endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_

View File

@ -20,6 +20,7 @@
namespace webrtc { namespace webrtc {
namespace { namespace {
// Constants used in place of estimated noise levels. // Constants used in place of estimated noise levels.
constexpr float kNoNoiseDbfs = -90.f; constexpr float kNoNoiseDbfs = -90.f;
constexpr float kWithNoiseDbfs = -20.f; constexpr float kWithNoiseDbfs = -20.f;
@ -36,31 +37,25 @@ float RunOnConstantLevel(int num_iterations,
for (int i = 0; i < num_iterations; ++i) { for (int i = 0; i < num_iterations; ++i) {
VectorFloatFrame fake_audio(1, 1, 1.f); VectorFloatFrame fake_audio(1, 1, 1.f);
SignalWithLevels signal_with_levels(fake_audio.float_frame_view()); AdaptiveDigitalGainApplier::FrameInfo info;
signal_with_levels.input_level_dbfs = input_level_dbfs; info.input_level_dbfs = input_level_dbfs;
signal_with_levels.input_noise_level_dbfs = kNoNoiseDbfs; info.input_noise_level_dbfs = kNoNoiseDbfs;
signal_with_levels.vad_result = vad_level; info.vad_result = vad_level;
signal_with_levels.limiter_audio_level_dbfs = -2.f; info.limiter_envelope_dbfs = -2.f;
signal_with_levels.estimate_is_confident = true; info.estimate_is_confident = true;
gain_applier->Process(signal_with_levels); gain_applier->Process(info, fake_audio.float_frame_view());
gain_linear = fake_audio.float_frame_view().channel(0)[0]; gain_linear = fake_audio.float_frame_view().channel(0)[0];
} }
return gain_linear; return gain_linear;
} }
// Returns 'SignalWithLevels' for typical GainApplier behavior. Voice on, no // Voice on, no noise, low limiter, confident level.
// noise, low limiter, confident level. constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
SignalWithLevels TestSignalWithLevel(AudioFrameView<float> float_frame) { /*input_level_dbfs=*/-1.f,
SignalWithLevels result(float_frame); /*input_noise_level_dbfs=*/kNoNoiseDbfs,
result.input_level_dbfs = -1; /*vad_result=*/kVadSpeech,
result.input_noise_level_dbfs = kNoNoiseDbfs; /*limiter_envelope_dbfs=*/-2.f,
result.vad_result = kVadSpeech; /*estimate_is_confident=*/true};
result.estimate_is_confident = true;
result.limiter_audio_level_dbfs = -2.f;
return result;
}
} // namespace
TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) { TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
ApmDataDumper apm_data_dumper(0); ApmDataDumper apm_data_dumper(0);
@ -68,9 +63,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
// Make one call with reasonable audio level values and settings. // Make one call with reasonable audio level values and settings.
VectorFloatFrame fake_audio(2, 480, 10000.f); VectorFloatFrame fake_audio(2, 480, 10000.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
signal_with_level.input_level_dbfs = -5.0; info.input_level_dbfs = -5.0;
gain_applier.Process(signal_with_level); gain_applier.Process(kFrameInfo, fake_audio.float_frame_view());
} }
// Check that the output is -kHeadroom dBFS. // Check that the output is -kHeadroom dBFS.
@ -120,9 +115,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
for (int i = 0; i < kNumFramesToAdapt; ++i) { for (int i = 0; i < kNumFramesToAdapt; ++i) {
SCOPED_TRACE(i); SCOPED_TRACE(i);
VectorFloatFrame fake_audio(1, 1, 1.f); VectorFloatFrame fake_audio(1, 1, 1.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
signal_with_level.input_level_dbfs = initial_level_dbfs; info.input_level_dbfs = initial_level_dbfs;
gain_applier.Process(signal_with_level); gain_applier.Process(info, fake_audio.float_frame_view());
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
kMaxChangePerFrameLinear); kMaxChangePerFrameLinear);
@ -133,9 +128,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
for (int i = 0; i < kNumFramesToAdapt; ++i) { for (int i = 0; i < kNumFramesToAdapt; ++i) {
SCOPED_TRACE(i); SCOPED_TRACE(i);
VectorFloatFrame fake_audio(1, 1, 1.f); VectorFloatFrame fake_audio(1, 1, 1.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
signal_with_level.input_level_dbfs = 0.f; info.input_level_dbfs = 0.f;
gain_applier.Process(signal_with_level); gain_applier.Process(info, fake_audio.float_frame_view());
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
kMaxChangePerFrameLinear); kMaxChangePerFrameLinear);
@ -151,9 +146,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
constexpr int num_samples = 480; constexpr int num_samples = 480;
VectorFloatFrame fake_audio(1, num_samples, 1.f); VectorFloatFrame fake_audio(1, num_samples, 1.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
signal_with_level.input_level_dbfs = initial_level_dbfs; info.input_level_dbfs = initial_level_dbfs;
gain_applier.Process(signal_with_level); gain_applier.Process(info, fake_audio.float_frame_view());
float maximal_difference = 0.f; float maximal_difference = 0.f;
float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb); float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb);
for (const auto& x : fake_audio.float_frame_view().channel(0)) { for (const auto& x : fake_audio.float_frame_view().channel(0)) {
@ -182,10 +177,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
for (int i = 0; i < num_initial_frames + num_frames; ++i) { for (int i = 0; i < num_initial_frames + num_frames; ++i) {
VectorFloatFrame fake_audio(1, num_samples, 1.f); VectorFloatFrame fake_audio(1, num_samples, 1.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
signal_with_level.input_level_dbfs = initial_level_dbfs; info.input_level_dbfs = initial_level_dbfs;
signal_with_level.input_noise_level_dbfs = kWithNoiseDbfs; info.input_noise_level_dbfs = kWithNoiseDbfs;
gain_applier.Process(signal_with_level); gain_applier.Process(info, fake_audio.float_frame_view());
// Wait so that the adaptive gain applier has time to lower the gain. // Wait so that the adaptive gain applier has time to lower the gain.
if (i > num_initial_frames) { if (i > num_initial_frames) {
@ -204,9 +199,9 @@ TEST(AutomaticGainController2GainApplier, CanHandlePositiveSpeechLevels) {
// Make one call with positive audio level values and settings. // Make one call with positive audio level values and settings.
VectorFloatFrame fake_audio(2, 480, 10000.f); VectorFloatFrame fake_audio(2, 480, 10000.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
signal_with_level.input_level_dbfs = 5.0f; info.input_level_dbfs = 5.f;
gain_applier.Process(signal_with_level); gain_applier.Process(info, fake_audio.float_frame_view());
} }
TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) { TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
@ -223,11 +218,11 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
for (int i = 0; i < num_initial_frames + num_frames; ++i) { for (int i = 0; i < num_initial_frames + num_frames; ++i) {
VectorFloatFrame fake_audio(1, num_samples, 1.f); VectorFloatFrame fake_audio(1, num_samples, 1.f);
auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
signal_with_level.input_level_dbfs = initial_level_dbfs; info.input_level_dbfs = initial_level_dbfs;
signal_with_level.limiter_audio_level_dbfs = 1.f; info.limiter_envelope_dbfs = 1.f;
signal_with_level.estimate_is_confident = false; info.estimate_is_confident = false;
gain_applier.Process(signal_with_level); gain_applier.Process(info, fake_audio.float_frame_view());
// Wait so that the adaptive gain applier has time to lower the gain. // Wait so that the adaptive gain applier has time to lower the gain.
if (i > num_initial_frames) { if (i > num_initial_frames) {
@ -239,4 +234,6 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
} }
} }
} }
} // namespace
} // namespace webrtc } // namespace webrtc