AGC2 saturation protector: simplify interface and impl
- Passing the speech peak power instead of VAD data - The private class SaturationProtector::PeakEnveloper has been removed - Added `initial_saturation_margin_db_` parameter to correctly initialize `last_margin_` (renamed to `margin_db_`) - Member names have been fixed and/or shortened for better readability Tested: Bit-exactness verified with audioproc_f Bug: webrtc:7494 Change-Id: I6cad2974397319737c8ac201d44311bf16275f28 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/184925 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32168}
This commit is contained in:

committed by
Commit Bot

parent
e8e29845fa
commit
736ff83e69
@ -32,7 +32,9 @@ AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
|
||||
float extra_saturation_margin_db)
|
||||
: level_estimator_(level_estimator),
|
||||
use_saturation_protector_(use_saturation_protector),
|
||||
saturation_protector_(apm_data_dumper, extra_saturation_margin_db),
|
||||
saturation_protector_(apm_data_dumper,
|
||||
GetInitialSaturationMarginDb(),
|
||||
extra_saturation_margin_db),
|
||||
apm_data_dumper_(apm_data_dumper) {}
|
||||
|
||||
void AdaptiveModeLevelEstimator::UpdateEstimation(
|
||||
@ -77,7 +79,7 @@ void AdaptiveModeLevelEstimator::UpdateEstimation(
|
||||
last_estimate_with_offset_dbfs_ = estimate_numerator_ / estimate_denominator_;
|
||||
|
||||
if (use_saturation_protector_) {
|
||||
saturation_protector_.UpdateMargin(vad_data,
|
||||
saturation_protector_.UpdateMargin(vad_data.speech_peak_dbfs,
|
||||
last_estimate_with_offset_dbfs_);
|
||||
DebugDumpEstimate();
|
||||
}
|
||||
@ -86,7 +88,7 @@ void AdaptiveModeLevelEstimator::UpdateEstimation(
|
||||
float AdaptiveModeLevelEstimator::LatestLevelEstimate() const {
|
||||
return rtc::SafeClamp<float>(
|
||||
last_estimate_with_offset_dbfs_ +
|
||||
(use_saturation_protector_ ? saturation_protector_.LastMargin()
|
||||
(use_saturation_protector_ ? saturation_protector_.GetMarginDb()
|
||||
: 0.f),
|
||||
-90.f, 30.f);
|
||||
}
|
||||
|
@ -52,77 +52,70 @@ absl::optional<float> SaturationProtector::RingBuffer::Front() const {
|
||||
return buffer_[rtc::SafeEq(size_, buffer_.size()) ? next_ : 0];
|
||||
}
|
||||
|
||||
SaturationProtector::PeakEnveloper::PeakEnveloper()
|
||||
: speech_time_in_estimate_ms_(0),
|
||||
current_superframe_peak_dbfs_(kMinLevelDbfs) {}
|
||||
|
||||
void SaturationProtector::PeakEnveloper::Reset() {
|
||||
speech_time_in_estimate_ms_ = 0;
|
||||
current_superframe_peak_dbfs_ = kMinLevelDbfs;
|
||||
peak_delay_buffer_.Reset();
|
||||
}
|
||||
|
||||
void SaturationProtector::PeakEnveloper::Process(float frame_peak_dbfs) {
|
||||
// Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
|
||||
current_superframe_peak_dbfs_ =
|
||||
std::max(current_superframe_peak_dbfs_, frame_peak_dbfs);
|
||||
speech_time_in_estimate_ms_ += kFrameDurationMs;
|
||||
if (speech_time_in_estimate_ms_ > kPeakEnveloperSuperFrameLengthMs) {
|
||||
peak_delay_buffer_.PushBack(current_superframe_peak_dbfs_);
|
||||
// Reset.
|
||||
speech_time_in_estimate_ms_ = 0;
|
||||
current_superframe_peak_dbfs_ = kMinLevelDbfs;
|
||||
}
|
||||
}
|
||||
|
||||
float SaturationProtector::PeakEnveloper::Query() const {
|
||||
return peak_delay_buffer_.Front().value_or(current_superframe_peak_dbfs_);
|
||||
}
|
||||
|
||||
SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper)
|
||||
: SaturationProtector(apm_data_dumper, GetExtraSaturationMarginOffsetDb()) {
|
||||
}
|
||||
: SaturationProtector(apm_data_dumper,
|
||||
GetInitialSaturationMarginDb(),
|
||||
GetExtraSaturationMarginOffsetDb()) {}
|
||||
|
||||
SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper,
|
||||
float initial_saturation_margin_db,
|
||||
float extra_saturation_margin_db)
|
||||
: apm_data_dumper_(apm_data_dumper),
|
||||
extra_saturation_margin_db_(extra_saturation_margin_db),
|
||||
last_margin_(GetInitialSaturationMarginDb()) {}
|
||||
|
||||
void SaturationProtector::UpdateMargin(
|
||||
const VadWithLevel::LevelAndProbability& vad_data,
|
||||
float last_speech_level_estimate) {
|
||||
peak_enveloper_.Process(vad_data.speech_peak_dbfs);
|
||||
const float delayed_peak_dbfs = peak_enveloper_.Query();
|
||||
const float difference_db = delayed_peak_dbfs - last_speech_level_estimate;
|
||||
|
||||
if (last_margin_ < difference_db) {
|
||||
last_margin_ = last_margin_ * kSaturationProtectorAttackConstant +
|
||||
difference_db * (1.f - kSaturationProtectorAttackConstant);
|
||||
} else {
|
||||
last_margin_ = last_margin_ * kSaturationProtectorDecayConstant +
|
||||
difference_db * (1.f - kSaturationProtectorDecayConstant);
|
||||
}
|
||||
|
||||
last_margin_ =
|
||||
rtc::SafeClamp<float>(last_margin_, kMinMarginDb, kMaxMarginDb);
|
||||
}
|
||||
|
||||
float SaturationProtector::LastMargin() const {
|
||||
return last_margin_ + extra_saturation_margin_db_;
|
||||
initial_saturation_margin_db_(initial_saturation_margin_db),
|
||||
extra_saturation_margin_db_(extra_saturation_margin_db) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
void SaturationProtector::Reset() {
|
||||
peak_enveloper_.Reset();
|
||||
margin_db_ = initial_saturation_margin_db_;
|
||||
peak_delay_buffer_.Reset();
|
||||
max_peaks_dbfs_ = kMinLevelDbfs;
|
||||
time_since_push_ms_ = 0;
|
||||
}
|
||||
|
||||
void SaturationProtector::UpdateMargin(float speech_peak_dbfs,
|
||||
float speech_level_dbfs) {
|
||||
// Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
|
||||
max_peaks_dbfs_ = std::max(max_peaks_dbfs_, speech_peak_dbfs);
|
||||
time_since_push_ms_ += kFrameDurationMs;
|
||||
if (time_since_push_ms_ >
|
||||
static_cast<int>(kPeakEnveloperSuperFrameLengthMs)) {
|
||||
// Push `max_peaks_dbfs_` back into the ring buffer.
|
||||
peak_delay_buffer_.PushBack(max_peaks_dbfs_);
|
||||
// Reset.
|
||||
max_peaks_dbfs_ = kMinLevelDbfs;
|
||||
time_since_push_ms_ = 0;
|
||||
}
|
||||
|
||||
// Update margin by comparing the estimated speech level and the delayed max
|
||||
// speech peak power.
|
||||
// TODO(alessiob): Check with aleloi@ why we use a delay and how to tune it.
|
||||
const float difference_db = GetDelayedPeakDbfs() - speech_level_dbfs;
|
||||
if (margin_db_ < difference_db) {
|
||||
margin_db_ = margin_db_ * kSaturationProtectorAttackConstant +
|
||||
difference_db * (1.f - kSaturationProtectorAttackConstant);
|
||||
} else {
|
||||
margin_db_ = margin_db_ * kSaturationProtectorDecayConstant +
|
||||
difference_db * (1.f - kSaturationProtectorDecayConstant);
|
||||
}
|
||||
|
||||
margin_db_ = rtc::SafeClamp<float>(margin_db_, kMinMarginDb, kMaxMarginDb);
|
||||
}
|
||||
|
||||
float SaturationProtector::GetDelayedPeakDbfs() const {
|
||||
return peak_delay_buffer_.Front().value_or(max_peaks_dbfs_);
|
||||
}
|
||||
|
||||
float SaturationProtector::GetMarginDb() const {
|
||||
return margin_db_ + extra_saturation_margin_db_;
|
||||
}
|
||||
|
||||
void SaturationProtector::DebugDumpEstimate() const {
|
||||
if (apm_data_dumper_) {
|
||||
apm_data_dumper_->DumpRaw(
|
||||
"agc2_adaptive_saturation_protector_delayed_peak_dbfs",
|
||||
peak_enveloper_.Query());
|
||||
apm_data_dumper_->DumpRaw("agc2_adaptive_saturation_margin_db",
|
||||
last_margin_);
|
||||
GetDelayedPeakDbfs());
|
||||
apm_data_dumper_->DumpRaw("agc2_adaptive_saturation_margin_db", margin_db_);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,6 @@
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "modules/audio_processing/agc2/vad_with_level.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -24,19 +23,19 @@ class ApmDataDumper;
|
||||
class SaturationProtector {
|
||||
public:
|
||||
explicit SaturationProtector(ApmDataDumper* apm_data_dumper);
|
||||
|
||||
SaturationProtector(ApmDataDumper* apm_data_dumper,
|
||||
float initial_saturation_margin_db,
|
||||
float extra_saturation_margin_db);
|
||||
|
||||
// Updates the margin estimate. This method should be called whenever a frame
|
||||
// is reliably classified as 'speech'.
|
||||
void UpdateMargin(const VadWithLevel::LevelAndProbability& vad_data,
|
||||
float last_speech_level_estimate);
|
||||
void Reset();
|
||||
|
||||
// Updates the margin by analyzing the estimated speech level
|
||||
// `speech_level_dbfs` and the peak power `speech_peak_dbfs` for an observed
|
||||
// frame which is reliably classified as "speech".
|
||||
void UpdateMargin(float speech_peak_dbfs, float speech_level_dbfs);
|
||||
|
||||
// Returns latest computed margin.
|
||||
float LastMargin() const;
|
||||
|
||||
void Reset();
|
||||
float GetMarginDb() const;
|
||||
|
||||
void DebugDumpEstimate() const;
|
||||
|
||||
@ -57,25 +56,17 @@ class SaturationProtector {
|
||||
int size_ = 0;
|
||||
};
|
||||
|
||||
// Computes a delayed envelope of peaks.
|
||||
class PeakEnveloper {
|
||||
public:
|
||||
PeakEnveloper();
|
||||
void Reset();
|
||||
void Process(float frame_peak_dbfs);
|
||||
float Query() const;
|
||||
|
||||
private:
|
||||
size_t speech_time_in_estimate_ms_;
|
||||
float current_superframe_peak_dbfs_;
|
||||
RingBuffer peak_delay_buffer_;
|
||||
};
|
||||
float GetDelayedPeakDbfs() const;
|
||||
|
||||
ApmDataDumper* apm_data_dumper_;
|
||||
PeakEnveloper peak_enveloper_;
|
||||
|
||||
// Parameters.
|
||||
const float initial_saturation_margin_db_;
|
||||
const float extra_saturation_margin_db_;
|
||||
float last_margin_;
|
||||
// State.
|
||||
float margin_db_;
|
||||
RingBuffer peak_delay_buffer_;
|
||||
float max_peaks_dbfs_;
|
||||
int time_since_push_ms_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -19,14 +19,14 @@
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
float RunOnConstantLevel(int num_iterations,
|
||||
VadWithLevel::LevelAndProbability vad_data,
|
||||
float estimated_level_dbfs,
|
||||
float speech_peak_dbfs,
|
||||
float speech_level_dbfs,
|
||||
SaturationProtector* saturation_protector) {
|
||||
float last_margin = saturation_protector->LastMargin();
|
||||
float last_margin = saturation_protector->GetMarginDb();
|
||||
float max_difference = 0.f;
|
||||
for (int i = 0; i < num_iterations; ++i) {
|
||||
saturation_protector->UpdateMargin(vad_data, estimated_level_dbfs);
|
||||
const float new_margin = saturation_protector->LastMargin();
|
||||
saturation_protector->UpdateMargin(speech_peak_dbfs, speech_level_dbfs);
|
||||
const float new_margin = saturation_protector->GetMarginDb();
|
||||
max_difference =
|
||||
std::max(max_difference, std::abs(new_margin - last_margin));
|
||||
last_margin = new_margin;
|
||||
@ -39,10 +39,9 @@ float RunOnConstantLevel(int num_iterations,
|
||||
TEST(AutomaticGainController2SaturationProtector, ProtectorShouldNotCrash) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
SaturationProtector saturation_protector(&apm_data_dumper);
|
||||
VadWithLevel::LevelAndProbability vad_data(1.f, -20.f, -10.f);
|
||||
|
||||
saturation_protector.UpdateMargin(vad_data, -20.f);
|
||||
static_cast<void>(saturation_protector.LastMargin());
|
||||
saturation_protector.UpdateMargin(/*speech_peak_dbfs=*/-10.f,
|
||||
/*speech_level_dbfs=*/-20.f);
|
||||
static_cast<void>(saturation_protector.GetMarginDb());
|
||||
saturation_protector.DebugDumpEstimate();
|
||||
}
|
||||
|
||||
@ -59,12 +58,11 @@ TEST(AutomaticGainController2SaturationProtector,
|
||||
const float kMaxDifference =
|
||||
0.5 * std::abs(GetInitialSaturationMarginDb() - kCrestFactor);
|
||||
|
||||
static_cast<void>(RunOnConstantLevel(
|
||||
2000, VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel),
|
||||
kSpeechLevel, &saturation_protector));
|
||||
static_cast<void>(RunOnConstantLevel(2000, kPeakLevel, kSpeechLevel,
|
||||
&saturation_protector));
|
||||
|
||||
EXPECT_NEAR(
|
||||
saturation_protector.LastMargin() - GetExtraSaturationMarginOffsetDb(),
|
||||
saturation_protector.GetMarginDb() - GetExtraSaturationMarginOffsetDb(),
|
||||
kCrestFactor, kMaxDifference);
|
||||
}
|
||||
|
||||
@ -80,14 +78,11 @@ TEST(AutomaticGainController2SaturationProtector, ProtectorChangesSlowly) {
|
||||
|
||||
constexpr int kNumIterations = 1000;
|
||||
float max_difference = RunOnConstantLevel(
|
||||
kNumIterations, VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel),
|
||||
kSpeechLevel, &saturation_protector);
|
||||
kNumIterations, kPeakLevel, kSpeechLevel, &saturation_protector);
|
||||
|
||||
max_difference =
|
||||
std::max(RunOnConstantLevel(
|
||||
kNumIterations,
|
||||
VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel),
|
||||
kOtherSpeechLevel, &saturation_protector),
|
||||
std::max(RunOnConstantLevel(kNumIterations, kPeakLevel, kOtherSpeechLevel,
|
||||
&saturation_protector),
|
||||
max_difference);
|
||||
|
||||
constexpr float kMaxChangeSpeedDbPerSecond = 0.5; // 1 db / 2 seconds.
|
||||
@ -108,27 +103,20 @@ TEST(AutomaticGainController2SaturationProtector,
|
||||
// First run on initial level.
|
||||
float max_difference = RunOnConstantLevel(
|
||||
kDelayIterations,
|
||||
VadWithLevel::LevelAndProbability(
|
||||
1.f, -90.f, kInitialSpeechLevelDbfs + GetInitialSaturationMarginDb()),
|
||||
kInitialSpeechLevelDbfs + GetInitialSaturationMarginDb(),
|
||||
kInitialSpeechLevelDbfs, &saturation_protector);
|
||||
|
||||
// Then peak changes, but not RMS.
|
||||
max_difference =
|
||||
std::max(RunOnConstantLevel(
|
||||
kDelayIterations,
|
||||
VadWithLevel::LevelAndProbability(
|
||||
1.f, -90.f,
|
||||
kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb()),
|
||||
max_difference = std::max(
|
||||
RunOnConstantLevel(kDelayIterations,
|
||||
kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb(),
|
||||
kInitialSpeechLevelDbfs, &saturation_protector),
|
||||
max_difference);
|
||||
|
||||
// Then both change.
|
||||
max_difference =
|
||||
std::max(RunOnConstantLevel(
|
||||
kDelayIterations,
|
||||
VadWithLevel::LevelAndProbability(
|
||||
1.f, -90.f,
|
||||
kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb()),
|
||||
max_difference = std::max(
|
||||
RunOnConstantLevel(kDelayIterations,
|
||||
kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb(),
|
||||
kLaterSpeechLevelDbfs, &saturation_protector),
|
||||
max_difference);
|
||||
|
||||
@ -138,7 +126,7 @@ TEST(AutomaticGainController2SaturationProtector,
|
||||
// above is 'normal' and 'expected', and shouldn't influence the
|
||||
// margin by much.
|
||||
|
||||
const float total_difference = std::abs(saturation_protector.LastMargin() -
|
||||
const float total_difference = std::abs(saturation_protector.GetMarginDb() -
|
||||
GetExtraSaturationMarginOffsetDb() -
|
||||
GetInitialSaturationMarginDb());
|
||||
|
||||
|
Reference in New Issue
Block a user