diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc index 380c39c4f2..0372ccf38a 100644 --- a/modules/audio_processing/agc2/adaptive_agc.cc +++ b/modules/audio_processing/agc2/adaptive_agc.cc @@ -30,6 +30,7 @@ void DumpDebugData(const AdaptiveDigitalGainApplier::FrameInfo& info, constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1; constexpr float kMaxGainChangePerSecondDb = 3.f; +constexpr float kMaxOutputNoiseLevelDbfs = -50.f; } // namespace @@ -37,7 +38,8 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper) : speech_level_estimator_(apm_data_dumper), gain_applier_(apm_data_dumper, kGainApplierAdjacentSpeechFramesThreshold, - kMaxGainChangePerSecondDb), + kMaxGainChangePerSecondDb, + kMaxOutputNoiseLevelDbfs), apm_data_dumper_(apm_data_dumper), noise_level_estimator_(apm_data_dumper) { RTC_DCHECK(apm_data_dumper); @@ -56,7 +58,8 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper, gain_applier_( apm_data_dumper, config.adaptive_digital.gain_applier_adjacent_speech_frames_threshold, - config.adaptive_digital.max_gain_change_db_per_second), + config.adaptive_digital.max_gain_change_db_per_second, + config.adaptive_digital.max_output_noise_level_dbfs), apm_data_dumper_(apm_data_dumper), noise_level_estimator_(apm_data_dumper) { RTC_DCHECK(apm_data_dumper); diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc index ef048e614b..e7999c0d5c 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc @@ -44,12 +44,16 @@ float ComputeGainDb(float input_level_dbfs) { return 0.f; } -// We require 'gain + noise_level <= kMaxNoiseLevelDbfs'. +// Returns `target_gain` if the output noise level is below +// `max_output_noise_level_dbfs`; otherwise returns a capped gain so that the +// output noise level equals `max_output_noise_level_dbfs`. float LimitGainByNoise(float target_gain, float input_noise_level_dbfs, - ApmDataDumper* apm_data_dumper) { - const float noise_headroom_db = kMaxNoiseLevelDbfs - input_noise_level_dbfs; - apm_data_dumper->DumpRaw("agc2_noise_headroom_db", noise_headroom_db); + float max_output_noise_level_dbfs, + ApmDataDumper& apm_data_dumper) { + const float noise_headroom_db = + max_output_noise_level_dbfs - input_noise_level_dbfs; + apm_data_dumper.DumpRaw("agc2_noise_headroom_db", noise_headroom_db); return std::min(target_gain, std::max(noise_headroom_db, 0.f)); } @@ -89,7 +93,8 @@ float ComputeGainChangeThisFrameDb(float target_gain_db, AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( ApmDataDumper* apm_data_dumper, int adjacent_speech_frames_threshold, - float max_gain_change_db_per_second) + float max_gain_change_db_per_second, + float max_output_noise_level_dbfs) : apm_data_dumper_(apm_data_dumper), gain_applier_( /*hard_clip_samples=*/false, @@ -97,11 +102,14 @@ AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold), max_gain_change_db_per_10ms_(max_gain_change_db_per_second * kFrameDurationMs / 1000.f), + max_output_noise_level_dbfs_(max_output_noise_level_dbfs), calls_since_last_gain_log_(0), frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_), last_gain_db_(kInitialAdaptiveDigitalGainDb) { RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f); RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1); + RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.f); + RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.f); } void AdaptiveDigitalGainApplier::Process(const FrameInfo& info, @@ -126,7 +134,8 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info, const float target_gain_db = LimitGainByLowConfidence( LimitGainByNoise(ComputeGainDb(std::min(info.input_level_dbfs, 0.f)), - info.input_noise_level_dbfs, apm_data_dumper_), + info.input_noise_level_dbfs, + max_output_noise_level_dbfs_, *apm_data_dumper_), last_gain_db_, info.limiter_envelope_dbfs, info.estimate_is_confident); // Forbid increasing the gain until enough adjacent speech frames are diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h index ca36abcdb8..a65379f5be 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h @@ -34,12 +34,15 @@ class AdaptiveDigitalGainApplier { bool estimate_is_confident; }; + // Ctor. // `adjacent_speech_frames_threshold` indicates how many speech frames are // required before a gain increase is allowed. `max_gain_change_db_per_second` // limits the adaptation speed (uniformly operated across frames). + // `max_output_noise_level_dbfs` limits the output noise level. AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper, int adjacent_speech_frames_threshold, - float max_gain_change_db_per_second); + float max_gain_change_db_per_second, + float max_output_noise_level_dbfs); AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete; AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) = delete; @@ -54,6 +57,7 @@ class AdaptiveDigitalGainApplier { const int adjacent_speech_frames_threshold_; const float max_gain_change_db_per_10ms_; + const float max_output_noise_level_dbfs_; int calls_since_last_gain_log_; int frames_to_gain_increase_allowed_; diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc index 63763c8495..e2df700422 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc @@ -36,14 +36,18 @@ constexpr VadLevelAnalyzer::Result kVadSpeech{1.f, -20.f, 0.f}; constexpr float kMaxGainChangePerSecondDb = 3.f; constexpr float kMaxGainChangePerFrameDb = kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f; +constexpr float kMaxOutputNoiseLevelDbfs = -50.f; // Helper to instance `AdaptiveDigitalGainApplier`. struct GainApplierHelper { GainApplierHelper() + : GainApplierHelper(/*adjacent_speech_frames_threshold=*/1) {} + explicit GainApplierHelper(int adjacent_speech_frames_threshold) : apm_data_dumper(0), gain_applier(&apm_data_dumper, - /*adjacent_speech_frames_threshold=*/1, - kMaxGainChangePerSecondDb) {} + adjacent_speech_frames_threshold, + kMaxGainChangePerSecondDb, + kMaxOutputNoiseLevelDbfs) {} ApmDataDumper apm_data_dumper; AdaptiveDigitalGainApplier gain_applier; }; @@ -185,7 +189,8 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) { kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb; constexpr int num_frames = 50; - ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low"; + ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs) + << "kWithNoiseDbfs is too low"; for (int i = 0; i < num_initial_frames + num_frames; ++i) { VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f); @@ -223,7 +228,8 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) { kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb; constexpr int num_frames = 50; - ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low"; + ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs) + << "kWithNoiseDbfs is too low"; for (int i = 0; i < num_initial_frames + num_frames; ++i) { VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f); @@ -252,10 +258,8 @@ class AdaptiveDigitalGainApplierTest : public ::testing::TestWithParam { TEST_P(AdaptiveDigitalGainApplierTest, DoNotIncreaseGainWithTooFewSpeechFrames) { const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold(); - ApmDataDumper apm_data_dumper(0); - AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper, - adjacent_speech_frames_threshold, - kMaxGainChangePerFrameDb); + GainApplierHelper helper(adjacent_speech_frames_threshold); + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; info.input_level_dbfs = -25.0; @@ -263,7 +267,7 @@ TEST_P(AdaptiveDigitalGainApplierTest, for (int i = 0; i < adjacent_speech_frames_threshold; ++i) { SCOPED_TRACE(i); VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f); - gain_applier.Process(info, audio.float_frame_view()); + helper.gain_applier.Process(info, audio.float_frame_view()); const float gain = audio.float_frame_view().channel(0)[0]; if (i > 0) { EXPECT_EQ(prev_gain, gain); // No gain increase. @@ -274,23 +278,21 @@ TEST_P(AdaptiveDigitalGainApplierTest, TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) { const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold(); - ApmDataDumper apm_data_dumper(0); - AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper, - adjacent_speech_frames_threshold, - kMaxGainChangePerFrameDb); + GainApplierHelper helper(adjacent_speech_frames_threshold); + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; info.input_level_dbfs = -25.0; float prev_gain = 0.f; for (int i = 0; i < adjacent_speech_frames_threshold; ++i) { VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f); - gain_applier.Process(info, audio.float_frame_view()); + helper.gain_applier.Process(info, audio.float_frame_view()); prev_gain = audio.float_frame_view().channel(0)[0]; } // Process one more speech frame. VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f); - gain_applier.Process(info, audio.float_frame_view()); + helper.gain_applier.Process(info, audio.float_frame_view()); // The gain has increased. EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain); diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h index 5149ed6b32..5d01100eb7 100644 --- a/modules/audio_processing/agc2/agc2_common.h +++ b/modules/audio_processing/agc2/agc2_common.h @@ -32,9 +32,6 @@ constexpr float kInitialAdaptiveDigitalGainDb = 8.f; // At what limiter levels should we start decreasing the adaptive digital gain. constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs; -// This parameter must be tuned together with the noise estimator. -constexpr float kMaxNoiseLevelDbfs = -50.f; - // This is the threshold for speech. Speech frames are used for updating the // speech level, measuring the amount of speech, and decide when to allow target // gain reduction. diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index a56b315af9..d09e2ba965 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -354,6 +354,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { float extra_saturation_margin_db = 2.f; int gain_applier_adjacent_speech_frames_threshold = 1; float max_gain_change_db_per_second = 3.f; + float max_output_noise_level_dbfs = -50.f; } adaptive_digital; } gain_controller2;