AGC2 saturation protector: simplify interface and impl

- Passing the speech peak power instead of VAD data
- The private class SaturationProtector::PeakEnveloper has been removed
- Added `initial_saturation_margin_db_` parameter to correctly
  initialize `last_margin_` (renamed to `margin_db_`)
- Member names have been fixed and/or shortened for better readability

Tested: Bit-exactness verified with audioproc_f

Bug: webrtc:7494
Change-Id: I6cad2974397319737c8ac201d44311bf16275f28
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/184925
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32168}
This commit is contained in:
Alessio Bazzica
2020-09-23 09:04:48 +02:00
committed by Commit Bot
parent e8e29845fa
commit 736ff83e69
4 changed files with 97 additions and 123 deletions

View File

@ -32,7 +32,9 @@ AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
float extra_saturation_margin_db) float extra_saturation_margin_db)
: level_estimator_(level_estimator), : level_estimator_(level_estimator),
use_saturation_protector_(use_saturation_protector), use_saturation_protector_(use_saturation_protector),
saturation_protector_(apm_data_dumper, extra_saturation_margin_db), saturation_protector_(apm_data_dumper,
GetInitialSaturationMarginDb(),
extra_saturation_margin_db),
apm_data_dumper_(apm_data_dumper) {} apm_data_dumper_(apm_data_dumper) {}
void AdaptiveModeLevelEstimator::UpdateEstimation( void AdaptiveModeLevelEstimator::UpdateEstimation(
@ -77,7 +79,7 @@ void AdaptiveModeLevelEstimator::UpdateEstimation(
last_estimate_with_offset_dbfs_ = estimate_numerator_ / estimate_denominator_; last_estimate_with_offset_dbfs_ = estimate_numerator_ / estimate_denominator_;
if (use_saturation_protector_) { if (use_saturation_protector_) {
saturation_protector_.UpdateMargin(vad_data, saturation_protector_.UpdateMargin(vad_data.speech_peak_dbfs,
last_estimate_with_offset_dbfs_); last_estimate_with_offset_dbfs_);
DebugDumpEstimate(); DebugDumpEstimate();
} }
@ -86,7 +88,7 @@ void AdaptiveModeLevelEstimator::UpdateEstimation(
float AdaptiveModeLevelEstimator::LatestLevelEstimate() const { float AdaptiveModeLevelEstimator::LatestLevelEstimate() const {
return rtc::SafeClamp<float>( return rtc::SafeClamp<float>(
last_estimate_with_offset_dbfs_ + last_estimate_with_offset_dbfs_ +
(use_saturation_protector_ ? saturation_protector_.LastMargin() (use_saturation_protector_ ? saturation_protector_.GetMarginDb()
: 0.f), : 0.f),
-90.f, 30.f); -90.f, 30.f);
} }

View File

@ -52,77 +52,70 @@ absl::optional<float> SaturationProtector::RingBuffer::Front() const {
return buffer_[rtc::SafeEq(size_, buffer_.size()) ? next_ : 0]; return buffer_[rtc::SafeEq(size_, buffer_.size()) ? next_ : 0];
} }
SaturationProtector::PeakEnveloper::PeakEnveloper()
: speech_time_in_estimate_ms_(0),
current_superframe_peak_dbfs_(kMinLevelDbfs) {}
void SaturationProtector::PeakEnveloper::Reset() {
speech_time_in_estimate_ms_ = 0;
current_superframe_peak_dbfs_ = kMinLevelDbfs;
peak_delay_buffer_.Reset();
}
void SaturationProtector::PeakEnveloper::Process(float frame_peak_dbfs) {
// Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
current_superframe_peak_dbfs_ =
std::max(current_superframe_peak_dbfs_, frame_peak_dbfs);
speech_time_in_estimate_ms_ += kFrameDurationMs;
if (speech_time_in_estimate_ms_ > kPeakEnveloperSuperFrameLengthMs) {
peak_delay_buffer_.PushBack(current_superframe_peak_dbfs_);
// Reset.
speech_time_in_estimate_ms_ = 0;
current_superframe_peak_dbfs_ = kMinLevelDbfs;
}
}
float SaturationProtector::PeakEnveloper::Query() const {
return peak_delay_buffer_.Front().value_or(current_superframe_peak_dbfs_);
}
SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper) SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper)
: SaturationProtector(apm_data_dumper, GetExtraSaturationMarginOffsetDb()) { : SaturationProtector(apm_data_dumper,
} GetInitialSaturationMarginDb(),
GetExtraSaturationMarginOffsetDb()) {}
SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper, SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper,
float initial_saturation_margin_db,
float extra_saturation_margin_db) float extra_saturation_margin_db)
: apm_data_dumper_(apm_data_dumper), : apm_data_dumper_(apm_data_dumper),
extra_saturation_margin_db_(extra_saturation_margin_db), initial_saturation_margin_db_(initial_saturation_margin_db),
last_margin_(GetInitialSaturationMarginDb()) {} extra_saturation_margin_db_(extra_saturation_margin_db) {
Reset();
void SaturationProtector::UpdateMargin(
const VadWithLevel::LevelAndProbability& vad_data,
float last_speech_level_estimate) {
peak_enveloper_.Process(vad_data.speech_peak_dbfs);
const float delayed_peak_dbfs = peak_enveloper_.Query();
const float difference_db = delayed_peak_dbfs - last_speech_level_estimate;
if (last_margin_ < difference_db) {
last_margin_ = last_margin_ * kSaturationProtectorAttackConstant +
difference_db * (1.f - kSaturationProtectorAttackConstant);
} else {
last_margin_ = last_margin_ * kSaturationProtectorDecayConstant +
difference_db * (1.f - kSaturationProtectorDecayConstant);
}
last_margin_ =
rtc::SafeClamp<float>(last_margin_, kMinMarginDb, kMaxMarginDb);
}
float SaturationProtector::LastMargin() const {
return last_margin_ + extra_saturation_margin_db_;
} }
void SaturationProtector::Reset() { void SaturationProtector::Reset() {
peak_enveloper_.Reset(); margin_db_ = initial_saturation_margin_db_;
peak_delay_buffer_.Reset();
max_peaks_dbfs_ = kMinLevelDbfs;
time_since_push_ms_ = 0;
}
void SaturationProtector::UpdateMargin(float speech_peak_dbfs,
float speech_level_dbfs) {
// Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
max_peaks_dbfs_ = std::max(max_peaks_dbfs_, speech_peak_dbfs);
time_since_push_ms_ += kFrameDurationMs;
if (time_since_push_ms_ >
static_cast<int>(kPeakEnveloperSuperFrameLengthMs)) {
// Push `max_peaks_dbfs_` back into the ring buffer.
peak_delay_buffer_.PushBack(max_peaks_dbfs_);
// Reset.
max_peaks_dbfs_ = kMinLevelDbfs;
time_since_push_ms_ = 0;
}
// Update margin by comparing the estimated speech level and the delayed max
// speech peak power.
// TODO(alessiob): Check with aleloi@ why we use a delay and how to tune it.
const float difference_db = GetDelayedPeakDbfs() - speech_level_dbfs;
if (margin_db_ < difference_db) {
margin_db_ = margin_db_ * kSaturationProtectorAttackConstant +
difference_db * (1.f - kSaturationProtectorAttackConstant);
} else {
margin_db_ = margin_db_ * kSaturationProtectorDecayConstant +
difference_db * (1.f - kSaturationProtectorDecayConstant);
}
margin_db_ = rtc::SafeClamp<float>(margin_db_, kMinMarginDb, kMaxMarginDb);
}
float SaturationProtector::GetDelayedPeakDbfs() const {
return peak_delay_buffer_.Front().value_or(max_peaks_dbfs_);
}
float SaturationProtector::GetMarginDb() const {
return margin_db_ + extra_saturation_margin_db_;
} }
void SaturationProtector::DebugDumpEstimate() const { void SaturationProtector::DebugDumpEstimate() const {
if (apm_data_dumper_) { if (apm_data_dumper_) {
apm_data_dumper_->DumpRaw( apm_data_dumper_->DumpRaw(
"agc2_adaptive_saturation_protector_delayed_peak_dbfs", "agc2_adaptive_saturation_protector_delayed_peak_dbfs",
peak_enveloper_.Query()); GetDelayedPeakDbfs());
apm_data_dumper_->DumpRaw("agc2_adaptive_saturation_margin_db", apm_data_dumper_->DumpRaw("agc2_adaptive_saturation_margin_db", margin_db_);
last_margin_);
} }
} }

View File

@ -15,7 +15,6 @@
#include "absl/types/optional.h" #include "absl/types/optional.h"
#include "modules/audio_processing/agc2/agc2_common.h" #include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/vad_with_level.h"
namespace webrtc { namespace webrtc {
@ -24,19 +23,19 @@ class ApmDataDumper;
class SaturationProtector { class SaturationProtector {
public: public:
explicit SaturationProtector(ApmDataDumper* apm_data_dumper); explicit SaturationProtector(ApmDataDumper* apm_data_dumper);
SaturationProtector(ApmDataDumper* apm_data_dumper, SaturationProtector(ApmDataDumper* apm_data_dumper,
float initial_saturation_margin_db,
float extra_saturation_margin_db); float extra_saturation_margin_db);
// Updates the margin estimate. This method should be called whenever a frame void Reset();
// is reliably classified as 'speech'.
void UpdateMargin(const VadWithLevel::LevelAndProbability& vad_data, // Updates the margin by analyzing the estimated speech level
float last_speech_level_estimate); // `speech_level_dbfs` and the peak power `speech_peak_dbfs` for an observed
// frame which is reliably classified as "speech".
void UpdateMargin(float speech_peak_dbfs, float speech_level_dbfs);
// Returns latest computed margin. // Returns latest computed margin.
float LastMargin() const; float GetMarginDb() const;
void Reset();
void DebugDumpEstimate() const; void DebugDumpEstimate() const;
@ -57,25 +56,17 @@ class SaturationProtector {
int size_ = 0; int size_ = 0;
}; };
// Computes a delayed envelope of peaks. float GetDelayedPeakDbfs() const;
class PeakEnveloper {
public:
PeakEnveloper();
void Reset();
void Process(float frame_peak_dbfs);
float Query() const;
private:
size_t speech_time_in_estimate_ms_;
float current_superframe_peak_dbfs_;
RingBuffer peak_delay_buffer_;
};
ApmDataDumper* apm_data_dumper_; ApmDataDumper* apm_data_dumper_;
PeakEnveloper peak_enveloper_; // Parameters.
const float initial_saturation_margin_db_;
const float extra_saturation_margin_db_; const float extra_saturation_margin_db_;
float last_margin_; // State.
float margin_db_;
RingBuffer peak_delay_buffer_;
float max_peaks_dbfs_;
int time_since_push_ms_;
}; };
} // namespace webrtc } // namespace webrtc

View File

@ -19,14 +19,14 @@
namespace webrtc { namespace webrtc {
namespace { namespace {
float RunOnConstantLevel(int num_iterations, float RunOnConstantLevel(int num_iterations,
VadWithLevel::LevelAndProbability vad_data, float speech_peak_dbfs,
float estimated_level_dbfs, float speech_level_dbfs,
SaturationProtector* saturation_protector) { SaturationProtector* saturation_protector) {
float last_margin = saturation_protector->LastMargin(); float last_margin = saturation_protector->GetMarginDb();
float max_difference = 0.f; float max_difference = 0.f;
for (int i = 0; i < num_iterations; ++i) { for (int i = 0; i < num_iterations; ++i) {
saturation_protector->UpdateMargin(vad_data, estimated_level_dbfs); saturation_protector->UpdateMargin(speech_peak_dbfs, speech_level_dbfs);
const float new_margin = saturation_protector->LastMargin(); const float new_margin = saturation_protector->GetMarginDb();
max_difference = max_difference =
std::max(max_difference, std::abs(new_margin - last_margin)); std::max(max_difference, std::abs(new_margin - last_margin));
last_margin = new_margin; last_margin = new_margin;
@ -39,10 +39,9 @@ float RunOnConstantLevel(int num_iterations,
TEST(AutomaticGainController2SaturationProtector, ProtectorShouldNotCrash) { TEST(AutomaticGainController2SaturationProtector, ProtectorShouldNotCrash) {
ApmDataDumper apm_data_dumper(0); ApmDataDumper apm_data_dumper(0);
SaturationProtector saturation_protector(&apm_data_dumper); SaturationProtector saturation_protector(&apm_data_dumper);
VadWithLevel::LevelAndProbability vad_data(1.f, -20.f, -10.f); saturation_protector.UpdateMargin(/*speech_peak_dbfs=*/-10.f,
/*speech_level_dbfs=*/-20.f);
saturation_protector.UpdateMargin(vad_data, -20.f); static_cast<void>(saturation_protector.GetMarginDb());
static_cast<void>(saturation_protector.LastMargin());
saturation_protector.DebugDumpEstimate(); saturation_protector.DebugDumpEstimate();
} }
@ -59,12 +58,11 @@ TEST(AutomaticGainController2SaturationProtector,
const float kMaxDifference = const float kMaxDifference =
0.5 * std::abs(GetInitialSaturationMarginDb() - kCrestFactor); 0.5 * std::abs(GetInitialSaturationMarginDb() - kCrestFactor);
static_cast<void>(RunOnConstantLevel( static_cast<void>(RunOnConstantLevel(2000, kPeakLevel, kSpeechLevel,
2000, VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel), &saturation_protector));
kSpeechLevel, &saturation_protector));
EXPECT_NEAR( EXPECT_NEAR(
saturation_protector.LastMargin() - GetExtraSaturationMarginOffsetDb(), saturation_protector.GetMarginDb() - GetExtraSaturationMarginOffsetDb(),
kCrestFactor, kMaxDifference); kCrestFactor, kMaxDifference);
} }
@ -80,14 +78,11 @@ TEST(AutomaticGainController2SaturationProtector, ProtectorChangesSlowly) {
constexpr int kNumIterations = 1000; constexpr int kNumIterations = 1000;
float max_difference = RunOnConstantLevel( float max_difference = RunOnConstantLevel(
kNumIterations, VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel), kNumIterations, kPeakLevel, kSpeechLevel, &saturation_protector);
kSpeechLevel, &saturation_protector);
max_difference = max_difference =
std::max(RunOnConstantLevel( std::max(RunOnConstantLevel(kNumIterations, kPeakLevel, kOtherSpeechLevel,
kNumIterations, &saturation_protector),
VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel),
kOtherSpeechLevel, &saturation_protector),
max_difference); max_difference);
constexpr float kMaxChangeSpeedDbPerSecond = 0.5; // 1 db / 2 seconds. constexpr float kMaxChangeSpeedDbPerSecond = 0.5; // 1 db / 2 seconds.
@ -108,29 +103,22 @@ TEST(AutomaticGainController2SaturationProtector,
// First run on initial level. // First run on initial level.
float max_difference = RunOnConstantLevel( float max_difference = RunOnConstantLevel(
kDelayIterations, kDelayIterations,
VadWithLevel::LevelAndProbability( kInitialSpeechLevelDbfs + GetInitialSaturationMarginDb(),
1.f, -90.f, kInitialSpeechLevelDbfs + GetInitialSaturationMarginDb()),
kInitialSpeechLevelDbfs, &saturation_protector); kInitialSpeechLevelDbfs, &saturation_protector);
// Then peak changes, but not RMS. // Then peak changes, but not RMS.
max_difference = max_difference = std::max(
std::max(RunOnConstantLevel( RunOnConstantLevel(kDelayIterations,
kDelayIterations, kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb(),
VadWithLevel::LevelAndProbability( kInitialSpeechLevelDbfs, &saturation_protector),
1.f, -90.f, max_difference);
kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb()),
kInitialSpeechLevelDbfs, &saturation_protector),
max_difference);
// Then both change. // Then both change.
max_difference = max_difference = std::max(
std::max(RunOnConstantLevel( RunOnConstantLevel(kDelayIterations,
kDelayIterations, kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb(),
VadWithLevel::LevelAndProbability( kLaterSpeechLevelDbfs, &saturation_protector),
1.f, -90.f, max_difference);
kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb()),
kLaterSpeechLevelDbfs, &saturation_protector),
max_difference);
// The saturation protector expects that the RMS changes roughly // The saturation protector expects that the RMS changes roughly
// 'kFullBufferSizeMs' after peaks change. This is to account for // 'kFullBufferSizeMs' after peaks change. This is to account for
@ -138,7 +126,7 @@ TEST(AutomaticGainController2SaturationProtector,
// above is 'normal' and 'expected', and shouldn't influence the // above is 'normal' and 'expected', and shouldn't influence the
// margin by much. // margin by much.
const float total_difference = std::abs(saturation_protector.LastMargin() - const float total_difference = std::abs(saturation_protector.GetMarginDb() -
GetExtraSaturationMarginOffsetDb() - GetExtraSaturationMarginOffsetDb() -
GetInitialSaturationMarginDb()); GetInitialSaturationMarginDb());