diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc index dd27688abf..9d2c232ed1 100644 --- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc +++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc @@ -32,7 +32,9 @@ AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator( float extra_saturation_margin_db) : level_estimator_(level_estimator), use_saturation_protector_(use_saturation_protector), - saturation_protector_(apm_data_dumper, extra_saturation_margin_db), + saturation_protector_(apm_data_dumper, + GetInitialSaturationMarginDb(), + extra_saturation_margin_db), apm_data_dumper_(apm_data_dumper) {} void AdaptiveModeLevelEstimator::UpdateEstimation( @@ -77,7 +79,7 @@ void AdaptiveModeLevelEstimator::UpdateEstimation( last_estimate_with_offset_dbfs_ = estimate_numerator_ / estimate_denominator_; if (use_saturation_protector_) { - saturation_protector_.UpdateMargin(vad_data, + saturation_protector_.UpdateMargin(vad_data.speech_peak_dbfs, last_estimate_with_offset_dbfs_); DebugDumpEstimate(); } @@ -86,7 +88,7 @@ void AdaptiveModeLevelEstimator::UpdateEstimation( float AdaptiveModeLevelEstimator::LatestLevelEstimate() const { return rtc::SafeClamp( last_estimate_with_offset_dbfs_ + - (use_saturation_protector_ ? saturation_protector_.LastMargin() + (use_saturation_protector_ ? saturation_protector_.GetMarginDb() : 0.f), -90.f, 30.f); } diff --git a/modules/audio_processing/agc2/saturation_protector.cc b/modules/audio_processing/agc2/saturation_protector.cc index c37ef0535c..d43a0881ab 100644 --- a/modules/audio_processing/agc2/saturation_protector.cc +++ b/modules/audio_processing/agc2/saturation_protector.cc @@ -52,77 +52,70 @@ absl::optional SaturationProtector::RingBuffer::Front() const { return buffer_[rtc::SafeEq(size_, buffer_.size()) ? next_ : 0]; } -SaturationProtector::PeakEnveloper::PeakEnveloper() - : speech_time_in_estimate_ms_(0), - current_superframe_peak_dbfs_(kMinLevelDbfs) {} - -void SaturationProtector::PeakEnveloper::Reset() { - speech_time_in_estimate_ms_ = 0; - current_superframe_peak_dbfs_ = kMinLevelDbfs; - peak_delay_buffer_.Reset(); -} - -void SaturationProtector::PeakEnveloper::Process(float frame_peak_dbfs) { - // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms. - current_superframe_peak_dbfs_ = - std::max(current_superframe_peak_dbfs_, frame_peak_dbfs); - speech_time_in_estimate_ms_ += kFrameDurationMs; - if (speech_time_in_estimate_ms_ > kPeakEnveloperSuperFrameLengthMs) { - peak_delay_buffer_.PushBack(current_superframe_peak_dbfs_); - // Reset. - speech_time_in_estimate_ms_ = 0; - current_superframe_peak_dbfs_ = kMinLevelDbfs; - } -} - -float SaturationProtector::PeakEnveloper::Query() const { - return peak_delay_buffer_.Front().value_or(current_superframe_peak_dbfs_); -} - SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper) - : SaturationProtector(apm_data_dumper, GetExtraSaturationMarginOffsetDb()) { -} + : SaturationProtector(apm_data_dumper, + GetInitialSaturationMarginDb(), + GetExtraSaturationMarginOffsetDb()) {} SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper, + float initial_saturation_margin_db, float extra_saturation_margin_db) : apm_data_dumper_(apm_data_dumper), - extra_saturation_margin_db_(extra_saturation_margin_db), - last_margin_(GetInitialSaturationMarginDb()) {} - -void SaturationProtector::UpdateMargin( - const VadWithLevel::LevelAndProbability& vad_data, - float last_speech_level_estimate) { - peak_enveloper_.Process(vad_data.speech_peak_dbfs); - const float delayed_peak_dbfs = peak_enveloper_.Query(); - const float difference_db = delayed_peak_dbfs - last_speech_level_estimate; - - if (last_margin_ < difference_db) { - last_margin_ = last_margin_ * kSaturationProtectorAttackConstant + - difference_db * (1.f - kSaturationProtectorAttackConstant); - } else { - last_margin_ = last_margin_ * kSaturationProtectorDecayConstant + - difference_db * (1.f - kSaturationProtectorDecayConstant); - } - - last_margin_ = - rtc::SafeClamp(last_margin_, kMinMarginDb, kMaxMarginDb); -} - -float SaturationProtector::LastMargin() const { - return last_margin_ + extra_saturation_margin_db_; + initial_saturation_margin_db_(initial_saturation_margin_db), + extra_saturation_margin_db_(extra_saturation_margin_db) { + Reset(); } void SaturationProtector::Reset() { - peak_enveloper_.Reset(); + margin_db_ = initial_saturation_margin_db_; + peak_delay_buffer_.Reset(); + max_peaks_dbfs_ = kMinLevelDbfs; + time_since_push_ms_ = 0; +} + +void SaturationProtector::UpdateMargin(float speech_peak_dbfs, + float speech_level_dbfs) { + // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms. + max_peaks_dbfs_ = std::max(max_peaks_dbfs_, speech_peak_dbfs); + time_since_push_ms_ += kFrameDurationMs; + if (time_since_push_ms_ > + static_cast(kPeakEnveloperSuperFrameLengthMs)) { + // Push `max_peaks_dbfs_` back into the ring buffer. + peak_delay_buffer_.PushBack(max_peaks_dbfs_); + // Reset. + max_peaks_dbfs_ = kMinLevelDbfs; + time_since_push_ms_ = 0; + } + + // Update margin by comparing the estimated speech level and the delayed max + // speech peak power. + // TODO(alessiob): Check with aleloi@ why we use a delay and how to tune it. + const float difference_db = GetDelayedPeakDbfs() - speech_level_dbfs; + if (margin_db_ < difference_db) { + margin_db_ = margin_db_ * kSaturationProtectorAttackConstant + + difference_db * (1.f - kSaturationProtectorAttackConstant); + } else { + margin_db_ = margin_db_ * kSaturationProtectorDecayConstant + + difference_db * (1.f - kSaturationProtectorDecayConstant); + } + + margin_db_ = rtc::SafeClamp(margin_db_, kMinMarginDb, kMaxMarginDb); +} + +float SaturationProtector::GetDelayedPeakDbfs() const { + return peak_delay_buffer_.Front().value_or(max_peaks_dbfs_); +} + +float SaturationProtector::GetMarginDb() const { + return margin_db_ + extra_saturation_margin_db_; } void SaturationProtector::DebugDumpEstimate() const { if (apm_data_dumper_) { apm_data_dumper_->DumpRaw( "agc2_adaptive_saturation_protector_delayed_peak_dbfs", - peak_enveloper_.Query()); - apm_data_dumper_->DumpRaw("agc2_adaptive_saturation_margin_db", - last_margin_); + GetDelayedPeakDbfs()); + apm_data_dumper_->DumpRaw("agc2_adaptive_saturation_margin_db", margin_db_); } } diff --git a/modules/audio_processing/agc2/saturation_protector.h b/modules/audio_processing/agc2/saturation_protector.h index 279593b1bd..a1d5258263 100644 --- a/modules/audio_processing/agc2/saturation_protector.h +++ b/modules/audio_processing/agc2/saturation_protector.h @@ -15,7 +15,6 @@ #include "absl/types/optional.h" #include "modules/audio_processing/agc2/agc2_common.h" -#include "modules/audio_processing/agc2/vad_with_level.h" namespace webrtc { @@ -24,19 +23,19 @@ class ApmDataDumper; class SaturationProtector { public: explicit SaturationProtector(ApmDataDumper* apm_data_dumper); - SaturationProtector(ApmDataDumper* apm_data_dumper, + float initial_saturation_margin_db, float extra_saturation_margin_db); - // Updates the margin estimate. This method should be called whenever a frame - // is reliably classified as 'speech'. - void UpdateMargin(const VadWithLevel::LevelAndProbability& vad_data, - float last_speech_level_estimate); + void Reset(); + + // Updates the margin by analyzing the estimated speech level + // `speech_level_dbfs` and the peak power `speech_peak_dbfs` for an observed + // frame which is reliably classified as "speech". + void UpdateMargin(float speech_peak_dbfs, float speech_level_dbfs); // Returns latest computed margin. - float LastMargin() const; - - void Reset(); + float GetMarginDb() const; void DebugDumpEstimate() const; @@ -57,25 +56,17 @@ class SaturationProtector { int size_ = 0; }; - // Computes a delayed envelope of peaks. - class PeakEnveloper { - public: - PeakEnveloper(); - void Reset(); - void Process(float frame_peak_dbfs); - float Query() const; - - private: - size_t speech_time_in_estimate_ms_; - float current_superframe_peak_dbfs_; - RingBuffer peak_delay_buffer_; - }; + float GetDelayedPeakDbfs() const; ApmDataDumper* apm_data_dumper_; - PeakEnveloper peak_enveloper_; - + // Parameters. + const float initial_saturation_margin_db_; const float extra_saturation_margin_db_; - float last_margin_; + // State. + float margin_db_; + RingBuffer peak_delay_buffer_; + float max_peaks_dbfs_; + int time_since_push_ms_; }; } // namespace webrtc diff --git a/modules/audio_processing/agc2/saturation_protector_unittest.cc b/modules/audio_processing/agc2/saturation_protector_unittest.cc index e767644ca9..2feb3256be 100644 --- a/modules/audio_processing/agc2/saturation_protector_unittest.cc +++ b/modules/audio_processing/agc2/saturation_protector_unittest.cc @@ -19,14 +19,14 @@ namespace webrtc { namespace { float RunOnConstantLevel(int num_iterations, - VadWithLevel::LevelAndProbability vad_data, - float estimated_level_dbfs, + float speech_peak_dbfs, + float speech_level_dbfs, SaturationProtector* saturation_protector) { - float last_margin = saturation_protector->LastMargin(); + float last_margin = saturation_protector->GetMarginDb(); float max_difference = 0.f; for (int i = 0; i < num_iterations; ++i) { - saturation_protector->UpdateMargin(vad_data, estimated_level_dbfs); - const float new_margin = saturation_protector->LastMargin(); + saturation_protector->UpdateMargin(speech_peak_dbfs, speech_level_dbfs); + const float new_margin = saturation_protector->GetMarginDb(); max_difference = std::max(max_difference, std::abs(new_margin - last_margin)); last_margin = new_margin; @@ -39,10 +39,9 @@ float RunOnConstantLevel(int num_iterations, TEST(AutomaticGainController2SaturationProtector, ProtectorShouldNotCrash) { ApmDataDumper apm_data_dumper(0); SaturationProtector saturation_protector(&apm_data_dumper); - VadWithLevel::LevelAndProbability vad_data(1.f, -20.f, -10.f); - - saturation_protector.UpdateMargin(vad_data, -20.f); - static_cast(saturation_protector.LastMargin()); + saturation_protector.UpdateMargin(/*speech_peak_dbfs=*/-10.f, + /*speech_level_dbfs=*/-20.f); + static_cast(saturation_protector.GetMarginDb()); saturation_protector.DebugDumpEstimate(); } @@ -59,12 +58,11 @@ TEST(AutomaticGainController2SaturationProtector, const float kMaxDifference = 0.5 * std::abs(GetInitialSaturationMarginDb() - kCrestFactor); - static_cast(RunOnConstantLevel( - 2000, VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel), - kSpeechLevel, &saturation_protector)); + static_cast(RunOnConstantLevel(2000, kPeakLevel, kSpeechLevel, + &saturation_protector)); EXPECT_NEAR( - saturation_protector.LastMargin() - GetExtraSaturationMarginOffsetDb(), + saturation_protector.GetMarginDb() - GetExtraSaturationMarginOffsetDb(), kCrestFactor, kMaxDifference); } @@ -80,14 +78,11 @@ TEST(AutomaticGainController2SaturationProtector, ProtectorChangesSlowly) { constexpr int kNumIterations = 1000; float max_difference = RunOnConstantLevel( - kNumIterations, VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel), - kSpeechLevel, &saturation_protector); + kNumIterations, kPeakLevel, kSpeechLevel, &saturation_protector); max_difference = - std::max(RunOnConstantLevel( - kNumIterations, - VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel), - kOtherSpeechLevel, &saturation_protector), + std::max(RunOnConstantLevel(kNumIterations, kPeakLevel, kOtherSpeechLevel, + &saturation_protector), max_difference); constexpr float kMaxChangeSpeedDbPerSecond = 0.5; // 1 db / 2 seconds. @@ -108,29 +103,22 @@ TEST(AutomaticGainController2SaturationProtector, // First run on initial level. float max_difference = RunOnConstantLevel( kDelayIterations, - VadWithLevel::LevelAndProbability( - 1.f, -90.f, kInitialSpeechLevelDbfs + GetInitialSaturationMarginDb()), + kInitialSpeechLevelDbfs + GetInitialSaturationMarginDb(), kInitialSpeechLevelDbfs, &saturation_protector); // Then peak changes, but not RMS. - max_difference = - std::max(RunOnConstantLevel( - kDelayIterations, - VadWithLevel::LevelAndProbability( - 1.f, -90.f, - kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb()), - kInitialSpeechLevelDbfs, &saturation_protector), - max_difference); + max_difference = std::max( + RunOnConstantLevel(kDelayIterations, + kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb(), + kInitialSpeechLevelDbfs, &saturation_protector), + max_difference); // Then both change. - max_difference = - std::max(RunOnConstantLevel( - kDelayIterations, - VadWithLevel::LevelAndProbability( - 1.f, -90.f, - kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb()), - kLaterSpeechLevelDbfs, &saturation_protector), - max_difference); + max_difference = std::max( + RunOnConstantLevel(kDelayIterations, + kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb(), + kLaterSpeechLevelDbfs, &saturation_protector), + max_difference); // The saturation protector expects that the RMS changes roughly // 'kFullBufferSizeMs' after peaks change. This is to account for @@ -138,7 +126,7 @@ TEST(AutomaticGainController2SaturationProtector, // above is 'normal' and 'expected', and shouldn't influence the // margin by much. - const float total_difference = std::abs(saturation_protector.LastMargin() - + const float total_difference = std::abs(saturation_protector.GetMarginDb() - GetExtraSaturationMarginOffsetDb() - GetInitialSaturationMarginDb());