diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc index 21fe412aa6..3a0056cef9 100644 --- a/modules/audio_processing/agc2/adaptive_agc.cc +++ b/modules/audio_processing/agc2/adaptive_agc.cc @@ -16,6 +16,18 @@ #include "rtc_base/checks.h" namespace webrtc { +namespace { + +void DumpDebugData(const AdaptiveDigitalGainApplier::FrameInfo& info, + ApmDataDumper& dumper) { + dumper.DumpRaw("agc2_vad_probability", info.vad_result.speech_probability); + dumper.DumpRaw("agc2_vad_rms_dbfs", info.vad_result.rms_dbfs); + dumper.DumpRaw("agc2_vad_peak_dbfs", info.vad_result.peak_dbfs); + dumper.DumpRaw("agc2_noise_estimate_dbfs", info.input_noise_level_dbfs); + dumper.DumpRaw("agc2_last_limiter_audio_level", info.limiter_envelope_dbfs); +} + +} // namespace AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper) : speech_level_estimator_(apm_data_dumper), @@ -40,37 +52,17 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper, AdaptiveAgc::~AdaptiveAgc() = default; -void AdaptiveAgc::Process(AudioFrameView float_frame, - float last_audio_level) { - auto signal_with_levels = SignalWithLevels(float_frame); - signal_with_levels.vad_result = vad_.AnalyzeFrame(float_frame); - apm_data_dumper_->DumpRaw("agc2_vad_probability", - signal_with_levels.vad_result.speech_probability); - apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs", - signal_with_levels.vad_result.rms_dbfs); - apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs", - signal_with_levels.vad_result.peak_dbfs); - - speech_level_estimator_.Update(signal_with_levels.vad_result); - - signal_with_levels.input_level_dbfs = speech_level_estimator_.level_dbfs(); - - signal_with_levels.input_noise_level_dbfs = - noise_level_estimator_.Analyze(float_frame); - - apm_data_dumper_->DumpRaw("agc2_noise_estimate_dbfs", - signal_with_levels.input_noise_level_dbfs); - - signal_with_levels.limiter_audio_level_dbfs = - last_audio_level > 0 ? FloatS16ToDbfs(last_audio_level) : -90.f; - apm_data_dumper_->DumpRaw("agc2_last_limiter_audio_level", - signal_with_levels.limiter_audio_level_dbfs); - - signal_with_levels.estimate_is_confident = - speech_level_estimator_.IsConfident(); - - // The gain applier applies the gain. - gain_applier_.Process(signal_with_levels); +void AdaptiveAgc::Process(AudioFrameView frame, float limiter_envelope) { + AdaptiveDigitalGainApplier::FrameInfo info; + info.vad_result = vad_.AnalyzeFrame(frame); + speech_level_estimator_.Update(info.vad_result); + info.input_level_dbfs = speech_level_estimator_.level_dbfs(); + info.input_noise_level_dbfs = noise_level_estimator_.Analyze(frame); + info.limiter_envelope_dbfs = + limiter_envelope > 0 ? FloatS16ToDbfs(limiter_envelope) : -90.f; + info.estimate_is_confident = speech_level_estimator_.IsConfident(); + DumpDebugData(info, *apm_data_dumper_); + gain_applier_.Process(info, frame); } void AdaptiveAgc::Reset() { diff --git a/modules/audio_processing/agc2/adaptive_agc.h b/modules/audio_processing/agc2/adaptive_agc.h index 82a3f0e633..e2decb8432 100644 --- a/modules/audio_processing/agc2/adaptive_agc.h +++ b/modules/audio_processing/agc2/adaptive_agc.h @@ -21,6 +21,8 @@ namespace webrtc { class ApmDataDumper; +// Adaptive digital gain controller. +// TODO(crbug.com/webrtc/7494): Unify with `AdaptiveDigitalGainApplier`. class AdaptiveAgc { public: explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper); @@ -28,7 +30,10 @@ class AdaptiveAgc { const AudioProcessing::Config::GainController2& config); ~AdaptiveAgc(); - void Process(AudioFrameView float_frame, float last_audio_level); + // Analyzes `frame` and applies a digital adaptive gain to it. Takes into + // account the envelope measured by the limiter. + // TODO(crbug.com/webrtc/7494): Make the class depend on the limiter. + void Process(AudioFrameView frame, float limiter_envelope); void Reset(); private: diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc index 6ece83b239..14ca9853a8 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc @@ -85,43 +85,40 @@ float ComputeGainChangeThisFrameDb(float target_gain_db, } } // namespace -SignalWithLevels::SignalWithLevels(AudioFrameView float_frame) - : float_frame(float_frame) {} -SignalWithLevels::SignalWithLevels(const SignalWithLevels&) = default; - AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( ApmDataDumper* apm_data_dumper) - : gain_applier_(false, DbToRatio(last_gain_db_)), - apm_data_dumper_(apm_data_dumper) {} + : apm_data_dumper_(apm_data_dumper), + gain_applier_( + /*hard_clip_samples=*/false, + /*initial_gain_factor=*/DbToRatio(kInitialAdaptiveDigitalGainDb)), + calls_since_last_gain_log_(0), + gain_increase_allowed_(true), + last_gain_db_(kInitialAdaptiveDigitalGainDb) {} -void AdaptiveDigitalGainApplier::Process(SignalWithLevels signal_with_levels) { +void AdaptiveDigitalGainApplier::Process(const FrameInfo& info, + AudioFrameView frame) { + RTC_DCHECK_GE(info.input_level_dbfs, -150.f); + RTC_DCHECK_GE(frame.num_channels(), 1); + RTC_DCHECK_GE(frame.samples_per_channel(), 1); + + // Log every second. calls_since_last_gain_log_++; if (calls_since_last_gain_log_ == 100) { calls_since_last_gain_log_ = 0; RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied", last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1); RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel", - -signal_with_levels.input_noise_level_dbfs, 0, - 100, 101); + -info.input_noise_level_dbfs, 0, 100, 101); } - signal_with_levels.input_level_dbfs = - std::min(signal_with_levels.input_level_dbfs, 0.f); - - RTC_DCHECK_GE(signal_with_levels.input_level_dbfs, -150.f); - RTC_DCHECK_GE(signal_with_levels.float_frame.num_channels(), 1); - RTC_DCHECK_GE(signal_with_levels.float_frame.samples_per_channel(), 1); - const float target_gain_db = LimitGainByLowConfidence( - LimitGainByNoise(ComputeGainDb(signal_with_levels.input_level_dbfs), - signal_with_levels.input_noise_level_dbfs, - apm_data_dumper_), - last_gain_db_, signal_with_levels.limiter_audio_level_dbfs, - signal_with_levels.estimate_is_confident); + LimitGainByNoise(ComputeGainDb(std::min(info.input_level_dbfs, 0.f)), + info.input_noise_level_dbfs, apm_data_dumper_), + last_gain_db_, info.limiter_envelope_dbfs, info.estimate_is_confident); // Forbid increasing the gain when there is no speech. - gain_increase_allowed_ = signal_with_levels.vad_result.speech_probability > - kVadConfidenceThreshold; + gain_increase_allowed_ = + info.vad_result.speech_probability > kVadConfidenceThreshold; const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb( target_gain_db, last_gain_db_, gain_increase_allowed_); @@ -137,7 +134,7 @@ void AdaptiveDigitalGainApplier::Process(SignalWithLevels signal_with_levels) { gain_applier_.SetGainFactor( DbToRatio(last_gain_db_ + gain_change_this_frame_db)); } - gain_applier_.ApplyGain(signal_with_levels.float_frame); + gain_applier_.ApplyGain(frame); // Remember that the gain has changed for the next iteration. last_gain_db_ = last_gain_db_ + gain_change_this_frame_db; diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h index bef7017e01..4dc4863b56 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h @@ -11,7 +11,6 @@ #ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ #define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ -#include "modules/audio_processing/agc2/agc2_common.h" #include "modules/audio_processing/agc2/gain_applier.h" #include "modules/audio_processing/agc2/vad_with_level.h" #include "modules/audio_processing/include/audio_frame_view.h" @@ -20,36 +19,38 @@ namespace webrtc { class ApmDataDumper; -struct SignalWithLevels { - SignalWithLevels(AudioFrameView float_frame); - SignalWithLevels(const SignalWithLevels&); - - float input_level_dbfs = -1.f; - float input_noise_level_dbfs = -1.f; - VadLevelAnalyzer::Result vad_result; - float limiter_audio_level_dbfs = -1.f; - bool estimate_is_confident = false; - AudioFrameView float_frame; -}; - +// Part of the adaptive digital controller that applies a digital adaptive gain. +// The gain is updated towards a target. The logic decides when gain updates are +// allowed, it controls the adaptation speed and caps the target based on the +// estimated noise level and the speech level estimate confidence. class AdaptiveDigitalGainApplier { public: + // Information about a frame to process. + struct FrameInfo { + float input_level_dbfs; // Estimated speech plus noise level. + float input_noise_level_dbfs; // Estimated noise level. + VadLevelAnalyzer::Result vad_result; + float limiter_envelope_dbfs; // Envelope level from the limiter. + bool estimate_is_confident; + }; + explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper); - // Decide what gain to apply. - void Process(SignalWithLevels signal_with_levels); + AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete; + AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) = + delete; + + // Analyzes `info`, updates the digital gain and applies it to `frame`. + void Process(const FrameInfo& info, AudioFrameView frame); private: - float last_gain_db_ = kInitialAdaptiveDigitalGainDb; + ApmDataDumper* const apm_data_dumper_; GainApplier gain_applier_; - int calls_since_last_gain_log_ = 0; - // For some combinations of noise and speech probability, increasing - // the level is not allowed. Since we may get VAD results in bursts, - // we keep track of this variable until the next VAD results come - // in. - bool gain_increase_allowed_ = true; - ApmDataDumper* apm_data_dumper_ = nullptr; + int calls_since_last_gain_log_; + bool gain_increase_allowed_; + float last_gain_db_; }; + } // namespace webrtc #endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc index 9e31655d3d..a607e7527f 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc @@ -20,6 +20,7 @@ namespace webrtc { namespace { + // Constants used in place of estimated noise levels. constexpr float kNoNoiseDbfs = -90.f; constexpr float kWithNoiseDbfs = -20.f; @@ -36,31 +37,25 @@ float RunOnConstantLevel(int num_iterations, for (int i = 0; i < num_iterations; ++i) { VectorFloatFrame fake_audio(1, 1, 1.f); - SignalWithLevels signal_with_levels(fake_audio.float_frame_view()); - signal_with_levels.input_level_dbfs = input_level_dbfs; - signal_with_levels.input_noise_level_dbfs = kNoNoiseDbfs; - signal_with_levels.vad_result = vad_level; - signal_with_levels.limiter_audio_level_dbfs = -2.f; - signal_with_levels.estimate_is_confident = true; - gain_applier->Process(signal_with_levels); + AdaptiveDigitalGainApplier::FrameInfo info; + info.input_level_dbfs = input_level_dbfs; + info.input_noise_level_dbfs = kNoNoiseDbfs; + info.vad_result = vad_level; + info.limiter_envelope_dbfs = -2.f; + info.estimate_is_confident = true; + gain_applier->Process(info, fake_audio.float_frame_view()); gain_linear = fake_audio.float_frame_view().channel(0)[0]; } return gain_linear; } -// Returns 'SignalWithLevels' for typical GainApplier behavior. Voice on, no -// noise, low limiter, confident level. -SignalWithLevels TestSignalWithLevel(AudioFrameView float_frame) { - SignalWithLevels result(float_frame); - result.input_level_dbfs = -1; - result.input_noise_level_dbfs = kNoNoiseDbfs; - result.vad_result = kVadSpeech; - result.estimate_is_confident = true; - result.limiter_audio_level_dbfs = -2.f; - return result; -} - -} // namespace +// Voice on, no noise, low limiter, confident level. +constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{ + /*input_level_dbfs=*/-1.f, + /*input_noise_level_dbfs=*/kNoNoiseDbfs, + /*vad_result=*/kVadSpeech, + /*limiter_envelope_dbfs=*/-2.f, + /*estimate_is_confident=*/true}; TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) { ApmDataDumper apm_data_dumper(0); @@ -68,9 +63,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) { // Make one call with reasonable audio level values and settings. VectorFloatFrame fake_audio(2, 480, 10000.f); - auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); - signal_with_level.input_level_dbfs = -5.0; - gain_applier.Process(signal_with_level); + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; + info.input_level_dbfs = -5.0; + gain_applier.Process(kFrameInfo, fake_audio.float_frame_view()); } // Check that the output is -kHeadroom dBFS. @@ -120,9 +115,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) { for (int i = 0; i < kNumFramesToAdapt; ++i) { SCOPED_TRACE(i); VectorFloatFrame fake_audio(1, 1, 1.f); - auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); - signal_with_level.input_level_dbfs = initial_level_dbfs; - gain_applier.Process(signal_with_level); + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; + info.input_level_dbfs = initial_level_dbfs; + gain_applier.Process(info, fake_audio.float_frame_view()); float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), kMaxChangePerFrameLinear); @@ -133,9 +128,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) { for (int i = 0; i < kNumFramesToAdapt; ++i) { SCOPED_TRACE(i); VectorFloatFrame fake_audio(1, 1, 1.f); - auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); - signal_with_level.input_level_dbfs = 0.f; - gain_applier.Process(signal_with_level); + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; + info.input_level_dbfs = 0.f; + gain_applier.Process(info, fake_audio.float_frame_view()); float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), kMaxChangePerFrameLinear); @@ -151,9 +146,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) { constexpr int num_samples = 480; VectorFloatFrame fake_audio(1, num_samples, 1.f); - auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); - signal_with_level.input_level_dbfs = initial_level_dbfs; - gain_applier.Process(signal_with_level); + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; + info.input_level_dbfs = initial_level_dbfs; + gain_applier.Process(info, fake_audio.float_frame_view()); float maximal_difference = 0.f; float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb); for (const auto& x : fake_audio.float_frame_view().channel(0)) { @@ -182,10 +177,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) { for (int i = 0; i < num_initial_frames + num_frames; ++i) { VectorFloatFrame fake_audio(1, num_samples, 1.f); - auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); - signal_with_level.input_level_dbfs = initial_level_dbfs; - signal_with_level.input_noise_level_dbfs = kWithNoiseDbfs; - gain_applier.Process(signal_with_level); + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; + info.input_level_dbfs = initial_level_dbfs; + info.input_noise_level_dbfs = kWithNoiseDbfs; + gain_applier.Process(info, fake_audio.float_frame_view()); // Wait so that the adaptive gain applier has time to lower the gain. if (i > num_initial_frames) { @@ -204,9 +199,9 @@ TEST(AutomaticGainController2GainApplier, CanHandlePositiveSpeechLevels) { // Make one call with positive audio level values and settings. VectorFloatFrame fake_audio(2, 480, 10000.f); - auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); - signal_with_level.input_level_dbfs = 5.0f; - gain_applier.Process(signal_with_level); + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; + info.input_level_dbfs = 5.f; + gain_applier.Process(info, fake_audio.float_frame_view()); } TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) { @@ -223,11 +218,11 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) { for (int i = 0; i < num_initial_frames + num_frames; ++i) { VectorFloatFrame fake_audio(1, num_samples, 1.f); - auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); - signal_with_level.input_level_dbfs = initial_level_dbfs; - signal_with_level.limiter_audio_level_dbfs = 1.f; - signal_with_level.estimate_is_confident = false; - gain_applier.Process(signal_with_level); + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; + info.input_level_dbfs = initial_level_dbfs; + info.limiter_envelope_dbfs = 1.f; + info.estimate_is_confident = false; + gain_applier.Process(info, fake_audio.float_frame_view()); // Wait so that the adaptive gain applier has time to lower the gain. if (i > num_initial_frames) { @@ -239,4 +234,6 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) { } } } + +} // namespace } // namespace webrtc