AGC2 Saturation Protector: switch to ring buffer

Even if small, the peak delay buffer copies N-1 elements for each frame
whereas a ring buffer is copy-free and scales better if the buffer size
increases.

Tested: Bit-exactness verified with audioproc_f

Bug: webrtc:7494
Change-Id: If8c33877b7ab1d881a0606e222b26857a82fff69
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/184920
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32165}
This commit is contained in:
Alessio Bazzica
2020-09-22 14:44:06 +02:00
committed by Commit Bot
parent a945cdadff
commit 10f6eadd48
3 changed files with 84 additions and 45 deletions

View File

@ -66,9 +66,12 @@ rtc_library("adaptive_digital") {
"../../../common_audio",
"../../../rtc_base:checks",
"../../../rtc_base:rtc_base_approved",
"../../../rtc_base:safe_compare",
"../../../rtc_base:safe_minmax",
"../../../system_wrappers:metrics",
]
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
}
rtc_library("biquad_filter") {

View File

@ -10,50 +10,73 @@
#include "modules/audio_processing/agc2/saturation_protector.h"
#include <algorithm>
#include <iterator>
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/numerics/safe_compare.h"
#include "rtc_base/numerics/safe_minmax.h"
namespace webrtc {
namespace {
void ShiftBuffer(std::array<float, kPeakEnveloperBufferSize>* buffer_) {
// Move everything one element back.
std::copy(buffer_->begin() + 1, buffer_->end(), buffer_->begin());
}
constexpr float kMinLevelDbfs = -90.f;
// Min/max margins are based on speech crest-factor.
constexpr float kMinMarginDb = 12.f;
constexpr float kMaxMarginDb = 25.f;
} // namespace
SaturationProtector::PeakEnveloper::PeakEnveloper() = default;
void SaturationProtector::RingBuffer::Reset() {
next_ = 0;
size_ = 0;
}
void SaturationProtector::RingBuffer::PushBack(float v) {
RTC_DCHECK_GE(next_, 0);
RTC_DCHECK_GE(size_, 0);
RTC_DCHECK_LT(next_, buffer_.size());
RTC_DCHECK_LE(size_, buffer_.size());
buffer_[next_++] = v;
if (rtc::SafeEq(next_, buffer_.size())) {
next_ = 0;
}
if (rtc::SafeLt(size_, buffer_.size())) {
size_++;
}
}
absl::optional<float> SaturationProtector::RingBuffer::Front() const {
if (size_ == 0) {
return absl::nullopt;
}
RTC_DCHECK_LT(next_, buffer_.size());
return buffer_[rtc::SafeEq(size_, buffer_.size()) ? next_ : 0];
}
SaturationProtector::PeakEnveloper::PeakEnveloper()
: speech_time_in_estimate_ms_(0),
current_superframe_peak_dbfs_(kMinLevelDbfs) {}
void SaturationProtector::PeakEnveloper::Reset() {
speech_time_in_estimate_ms_ = 0;
current_superframe_peak_dbfs_ = kMinLevelDbfs;
peak_delay_buffer_.Reset();
}
void SaturationProtector::PeakEnveloper::Process(float frame_peak_dbfs) {
// Update the delayed buffer and the current superframe peak.
// Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
current_superframe_peak_dbfs_ =
std::max(current_superframe_peak_dbfs_, frame_peak_dbfs);
speech_time_in_estimate_ms_ += kFrameDurationMs;
if (speech_time_in_estimate_ms_ > kPeakEnveloperSuperFrameLengthMs) {
peak_delay_buffer_.PushBack(current_superframe_peak_dbfs_);
// Reset.
speech_time_in_estimate_ms_ = 0;
const bool buffer_full = elements_in_buffer_ == kPeakEnveloperBufferSize;
if (buffer_full) {
ShiftBuffer(&peak_delay_buffer_);
*peak_delay_buffer_.rbegin() = current_superframe_peak_dbfs_;
} else {
peak_delay_buffer_[elements_in_buffer_] = current_superframe_peak_dbfs_;
elements_in_buffer_++;
}
current_superframe_peak_dbfs_ = -90.f;
current_superframe_peak_dbfs_ = kMinLevelDbfs;
}
}
float SaturationProtector::PeakEnveloper::Query() const {
float result;
if (elements_in_buffer_ > 0) {
result = peak_delay_buffer_[0];
} else {
result = current_superframe_peak_dbfs_;
}
return result;
return peak_delay_buffer_.Front().value_or(current_superframe_peak_dbfs_);
}
SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper)
@ -63,8 +86,8 @@ SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper)
SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper,
float extra_saturation_margin_db)
: apm_data_dumper_(apm_data_dumper),
last_margin_(GetInitialSaturationMarginDb()),
extra_saturation_margin_db_(extra_saturation_margin_db) {}
extra_saturation_margin_db_(extra_saturation_margin_db),
last_margin_(GetInitialSaturationMarginDb()) {}
void SaturationProtector::UpdateMargin(
const VadWithLevel::LevelAndProbability& vad_data,
@ -81,7 +104,8 @@ void SaturationProtector::UpdateMargin(
difference_db * (1.f - kSaturationProtectorDecayConstant);
}
last_margin_ = rtc::SafeClamp<float>(last_margin_, 12.f, 25.f);
last_margin_ =
rtc::SafeClamp<float>(last_margin_, kMinMarginDb, kMaxMarginDb);
}
float SaturationProtector::LastMargin() const {
@ -89,7 +113,7 @@ float SaturationProtector::LastMargin() const {
}
void SaturationProtector::Reset() {
peak_enveloper_ = PeakEnveloper();
peak_enveloper_.Reset();
}
void SaturationProtector::DebugDumpEstimate() const {

View File

@ -13,6 +13,7 @@
#include <array>
#include "absl/types/optional.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/vad_with_level.h"
@ -27,43 +28,54 @@ class SaturationProtector {
SaturationProtector(ApmDataDumper* apm_data_dumper,
float extra_saturation_margin_db);
// Update and return margin estimate. This method should be called
// whenever a frame is reliably classified as 'speech'.
//
// Returned value is in DB scale.
// Updates the margin estimate. This method should be called whenever a frame
// is reliably classified as 'speech'.
void UpdateMargin(const VadWithLevel::LevelAndProbability& vad_data,
float last_speech_level_estimate_dbfs);
float last_speech_level_estimate);
// Returns latest computed margin. Used in cases when speech is not
// detected.
// Returns latest computed margin.
float LastMargin() const;
// Resets the internal memory.
void Reset();
void DebugDumpEstimate() const;
private:
// Ring buffer which only supports (i) push back and (ii) read oldest item.
class RingBuffer {
public:
void Reset();
// Pushes back `v`. If the buffer is full, the oldest item is replaced.
void PushBack(float v);
// Returns the oldest item in the buffer. Returns an empty value if the
// buffer is empty.
absl::optional<float> Front() const;
private:
std::array<float, kPeakEnveloperBufferSize> buffer_;
int next_ = 0;
int size_ = 0;
};
// Computes a delayed envelope of peaks.
class PeakEnveloper {
public:
PeakEnveloper();
void Reset();
void Process(float frame_peak_dbfs);
float Query() const;
private:
size_t speech_time_in_estimate_ms_ = 0;
float current_superframe_peak_dbfs_ = -90.f;
size_t elements_in_buffer_ = 0;
std::array<float, kPeakEnveloperBufferSize> peak_delay_buffer_ = {};
size_t speech_time_in_estimate_ms_;
float current_superframe_peak_dbfs_;
RingBuffer peak_delay_buffer_;
};
ApmDataDumper* apm_data_dumper_;
float last_margin_;
PeakEnveloper peak_enveloper_;
const float extra_saturation_margin_db_;
float last_margin_;
};
} // namespace webrtc