AGC2: retuning and large refactoring

- Bug fix: the desired initial gain quickly dropped to 0 dB hence
  starting a call with a too low level
- New tuning to make AGC2 more robust to VAD mistakes
- Smarter max gain increase speed: to deal with an increased threshold
  of adjacent speech frames, the gain applier temporarily allows a
  faster gain increase to deal with a longer time spent waiting for
  enough speech frames in a row to be observed
- Saturation protector isolated from `AdaptiveModeLevelEstimator` to
  simplify the unit tests for the latter (non bit-exact change)
- AGC2 adaptive digital config: unnecessary params deprecated
- Code readability improvements
- Data dumps clean-up and better naming

Bug: webrtc:7494
Change-Id: I4e36059bdf2566cc2a7e1a7e95b7430ba9ae9844
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/215140
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Jesus de Vicente Pena <devicentepena@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#33736}
This commit is contained in:
Alessio Bazzica
2021-04-14 19:09:17 +02:00
committed by Commit Bot
parent d28434bd3f
commit 980c4601e1
29 changed files with 990 additions and 941 deletions

View File

@ -25,6 +25,8 @@ rtc_library("adaptive_digital") {
"adaptive_mode_level_estimator.h",
"saturation_protector.cc",
"saturation_protector.h",
"saturation_protector_buffer.cc",
"saturation_protector_buffer.h",
]
configs += [ "..:apm_debug_dump" ]
@ -177,6 +179,7 @@ rtc_library("adaptive_digital_unittests") {
"adaptive_digital_gain_applier_unittest.cc",
"adaptive_mode_level_estimator_unittest.cc",
"gain_applier_unittest.cc",
"saturation_protector_buffer_unittest.cc",
"saturation_protector_unittest.cc",
]
deps = [

View File

@ -25,15 +25,6 @@ using AdaptiveDigitalConfig =
using NoiseEstimatorType =
AudioProcessing::Config::GainController2::NoiseEstimator;
void DumpDebugData(const AdaptiveDigitalGainApplier::FrameInfo& info,
ApmDataDumper& dumper) {
dumper.DumpRaw("agc2_vad_probability", info.vad_result.speech_probability);
dumper.DumpRaw("agc2_vad_rms_dbfs", info.vad_result.rms_dbfs);
dumper.DumpRaw("agc2_vad_peak_dbfs", info.vad_result.peak_dbfs);
dumper.DumpRaw("agc2_noise_estimate_dbfs", info.input_noise_level_dbfs);
dumper.DumpRaw("agc2_last_limiter_audio_level", info.limiter_envelope_dbfs);
}
constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1;
constexpr float kMaxGainChangePerSecondDb = 3.0f;
constexpr float kMaxOutputNoiseLevelDbfs = -50.0f;
@ -72,36 +63,42 @@ constexpr NoiseEstimatorType kDefaultNoiseLevelEstimatorType =
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
: speech_level_estimator_(apm_data_dumper),
gain_applier_(apm_data_dumper,
kGainApplierAdjacentSpeechFramesThreshold,
kMaxGainChangePerSecondDb,
kMaxOutputNoiseLevelDbfs),
gain_controller_(apm_data_dumper,
kGainApplierAdjacentSpeechFramesThreshold,
kMaxGainChangePerSecondDb,
kMaxOutputNoiseLevelDbfs),
apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(
CreateNoiseLevelEstimator(kDefaultNoiseLevelEstimatorType,
apm_data_dumper)),
saturation_protector_(
CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb,
kSaturationProtectorExtraHeadroomDb,
kGainApplierAdjacentSpeechFramesThreshold,
apm_data_dumper)) {
RTC_DCHECK(apm_data_dumper);
}
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
const AdaptiveDigitalConfig& config)
: speech_level_estimator_(
apm_data_dumper,
config.level_estimator,
config.level_estimator_adjacent_speech_frames_threshold,
config.initial_saturation_margin_db,
config.extra_saturation_margin_db),
vad_(config.vad_reset_period_ms,
config.vad_probability_attack,
GetAllowedCpuFeatures(config)),
gain_applier_(apm_data_dumper,
config.gain_applier_adjacent_speech_frames_threshold,
config.max_gain_change_db_per_second,
config.max_output_noise_level_dbfs),
: speech_level_estimator_(apm_data_dumper,
config.adjacent_speech_frames_threshold),
vad_(config.vad_reset_period_ms, GetAllowedCpuFeatures(config)),
gain_controller_(apm_data_dumper,
config.adjacent_speech_frames_threshold,
config.max_gain_change_db_per_second,
config.max_output_noise_level_dbfs),
apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(
CreateNoiseLevelEstimator(config.noise_estimator, apm_data_dumper)) {
CreateNoiseLevelEstimator(config.noise_estimator, apm_data_dumper)),
saturation_protector_(
CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb,
kSaturationProtectorExtraHeadroomDb,
config.adjacent_speech_frames_threshold,
apm_data_dumper)) {
RTC_DCHECK(apm_data_dumper);
RTC_DCHECK(noise_level_estimator_);
RTC_DCHECK(saturation_protector_);
if (!config.use_saturation_protector) {
RTC_LOG(LS_WARNING) << "The saturation protector cannot be disabled.";
}
@ -111,19 +108,39 @@ AdaptiveAgc::~AdaptiveAgc() = default;
void AdaptiveAgc::Process(AudioFrameView<float> frame, float limiter_envelope) {
AdaptiveDigitalGainApplier::FrameInfo info;
info.vad_result = vad_.AnalyzeFrame(frame);
speech_level_estimator_.Update(info.vad_result);
info.input_level_dbfs = speech_level_estimator_.level_dbfs();
info.input_noise_level_dbfs = noise_level_estimator_->Analyze(frame);
info.limiter_envelope_dbfs =
limiter_envelope > 0 ? FloatS16ToDbfs(limiter_envelope) : -90.0f;
info.estimate_is_confident = speech_level_estimator_.IsConfident();
DumpDebugData(info, *apm_data_dumper_);
gain_applier_.Process(info, frame);
VadLevelAnalyzer::Result vad_result = vad_.AnalyzeFrame(frame);
info.speech_probability = vad_result.speech_probability;
apm_data_dumper_->DumpRaw("agc2_speech_probability",
vad_result.speech_probability);
apm_data_dumper_->DumpRaw("agc2_input_rms_dbfs", vad_result.rms_dbfs);
apm_data_dumper_->DumpRaw("agc2_input_peak_dbfs", vad_result.peak_dbfs);
speech_level_estimator_.Update(vad_result);
info.speech_level_dbfs = speech_level_estimator_.level_dbfs();
info.speech_level_reliable = speech_level_estimator_.IsConfident();
apm_data_dumper_->DumpRaw("agc2_speech_level_dbfs", info.speech_level_dbfs);
apm_data_dumper_->DumpRaw("agc2_speech_level_reliable",
info.speech_level_reliable);
info.noise_rms_dbfs = noise_level_estimator_->Analyze(frame);
apm_data_dumper_->DumpRaw("agc2_noise_rms_dbfs", info.noise_rms_dbfs);
saturation_protector_->Analyze(info.speech_probability, vad_result.peak_dbfs,
info.speech_level_dbfs);
info.headroom_db = saturation_protector_->HeadroomDb();
apm_data_dumper_->DumpRaw("agc2_headroom_db", info.headroom_db);
info.limiter_envelope_dbfs = FloatS16ToDbfs(limiter_envelope);
apm_data_dumper_->DumpRaw("agc2_limiter_envelope_dbfs",
info.limiter_envelope_dbfs);
gain_controller_.Process(info, frame);
}
void AdaptiveAgc::Reset() {
void AdaptiveAgc::HandleInputGainChange() {
speech_level_estimator_.Reset();
saturation_protector_->Reset();
}
} // namespace webrtc

View File

@ -16,6 +16,7 @@
#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
#include "modules/audio_processing/agc2/noise_level_estimator.h"
#include "modules/audio_processing/agc2/saturation_protector.h"
#include "modules/audio_processing/agc2/vad_with_level.h"
#include "modules/audio_processing/include/audio_frame_view.h"
#include "modules/audio_processing/include/audio_processing.h"
@ -38,14 +39,17 @@ class AdaptiveAgc {
// account the envelope measured by the limiter.
// TODO(crbug.com/webrtc/7494): Make the class depend on the limiter.
void Process(AudioFrameView<float> frame, float limiter_envelope);
void Reset();
// Handles a gain change applied to the input signal (e.g., analog gain).
void HandleInputGainChange();
private:
AdaptiveModeLevelEstimator speech_level_estimator_;
VadLevelAnalyzer vad_;
AdaptiveDigitalGainApplier gain_applier_;
AdaptiveDigitalGainApplier gain_controller_;
ApmDataDumper* const apm_data_dumper_;
std::unique_ptr<NoiseLevelEstimator> noise_level_estimator_;
std::unique_ptr<SaturationProtector> saturation_protector_;
};
} // namespace webrtc

View File

@ -23,6 +23,9 @@
namespace webrtc {
namespace {
constexpr int kHeadroomHistogramMin = 0;
constexpr int kHeadroomHistogramMax = 50;
// This function maps input level to desired applied gain. We want to
// boost the signal so that peaks are at -kHeadroomDbfs. We can't
// apply more than kMaxGainDb gain.
@ -31,17 +34,13 @@ float ComputeGainDb(float input_level_dbfs) {
if (input_level_dbfs < -(kHeadroomDbfs + kMaxGainDb)) {
return kMaxGainDb;
}
// We expect to end up here most of the time: the level is below
// -headroom, but we can boost it to -headroom.
if (input_level_dbfs < -kHeadroomDbfs) {
return -kHeadroomDbfs - input_level_dbfs;
}
// Otherwise, the level is too high and we can't boost. The
// LevelEstimator is responsible for not reporting bogus gain
// values.
RTC_DCHECK_LE(input_level_dbfs, 0.f);
// Otherwise, the level is too high and we can't boost.
RTC_DCHECK_GE(input_level_dbfs, -kHeadroomDbfs);
return 0.f;
}
@ -52,10 +51,11 @@ float LimitGainByNoise(float target_gain,
float input_noise_level_dbfs,
float max_output_noise_level_dbfs,
ApmDataDumper& apm_data_dumper) {
const float noise_headroom_db =
const float max_allowed_gain_db =
max_output_noise_level_dbfs - input_noise_level_dbfs;
apm_data_dumper.DumpRaw("agc2_noise_headroom_db", noise_headroom_db);
return std::min(target_gain, std::max(noise_headroom_db, 0.f));
apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db",
max_allowed_gain_db);
return std::min(target_gain, std::max(max_allowed_gain_db, 0.f));
}
float LimitGainByLowConfidence(float target_gain,
@ -68,8 +68,8 @@ float LimitGainByLowConfidence(float target_gain,
}
const float limiter_level_before_gain = limiter_audio_level_dbfs - last_gain;
// Compute a new gain so that limiter_level_before_gain + new_gain <=
// kLimiterThreshold.
// Compute a new gain so that `limiter_level_before_gain` + `new_target_gain`
// is not great than `kLimiterThresholdForAgcGainDbfs`.
const float new_target_gain = std::max(
kLimiterThresholdForAgcGainDbfs - limiter_level_before_gain, 0.f);
return std::min(new_target_gain, target_gain);
@ -80,13 +80,16 @@ float LimitGainByLowConfidence(float target_gain,
float ComputeGainChangeThisFrameDb(float target_gain_db,
float last_gain_db,
bool gain_increase_allowed,
float max_gain_change_db) {
float max_gain_decrease_db,
float max_gain_increase_db) {
RTC_DCHECK_GT(max_gain_decrease_db, 0);
RTC_DCHECK_GT(max_gain_increase_db, 0);
float target_gain_difference_db = target_gain_db - last_gain_db;
if (!gain_increase_allowed) {
target_gain_difference_db = std::min(target_gain_difference_db, 0.f);
}
return rtc::SafeClamp(target_gain_difference_db, -max_gain_change_db,
max_gain_change_db);
return rtc::SafeClamp(target_gain_difference_db, -max_gain_decrease_db,
max_gain_increase_db);
}
} // namespace
@ -115,7 +118,7 @@ AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
AudioFrameView<float> frame) {
RTC_DCHECK_GE(info.input_level_dbfs, -150.f);
RTC_DCHECK_GE(info.speech_level_dbfs, -150.f);
RTC_DCHECK_GE(frame.num_channels(), 1);
RTC_DCHECK(
frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
@ -123,30 +126,46 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
<< "`frame` does not look like a 10 ms frame for an APM supported sample "
"rate";
// Compute the input level used to select the desired gain.
RTC_DCHECK_GT(info.headroom_db, 0.0f);
const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db;
const float target_gain_db = LimitGainByLowConfidence(
LimitGainByNoise(ComputeGainDb(std::min(info.input_level_dbfs, 0.f)),
info.input_noise_level_dbfs,
LimitGainByNoise(ComputeGainDb(input_level_dbfs), info.noise_rms_dbfs,
max_output_noise_level_dbfs_, *apm_data_dumper_),
last_gain_db_, info.limiter_envelope_dbfs, info.estimate_is_confident);
last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable);
// Forbid increasing the gain until enough adjacent speech frames are
// observed.
if (info.vad_result.speech_probability < kVadConfidenceThreshold) {
bool first_confident_speech_frame = false;
if (info.speech_probability < kVadConfidenceThreshold) {
frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_;
} else if (frames_to_gain_increase_allowed_ > 0) {
frames_to_gain_increase_allowed_--;
first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
}
apm_data_dumper_->DumpRaw(
"agc2_adaptive_gain_applier_frames_to_gain_increase_allowed",
frames_to_gain_increase_allowed_);
const bool gain_increase_allowed = frames_to_gain_increase_allowed_ == 0;
float max_gain_increase_db = max_gain_change_db_per_10ms_;
if (first_confident_speech_frame) {
// No gain increase happened while waiting for a long enough speech
// sequence. Therefore, temporarily allow a faster gain increase.
RTC_DCHECK(gain_increase_allowed);
max_gain_increase_db *= adjacent_speech_frames_threshold_;
}
apm_data_dumper_->DumpRaw("agc2_frames_to_gain_increase_allowed",
frames_to_gain_increase_allowed_);
const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
target_gain_db, last_gain_db_,
/*gain_increase_allowed=*/frames_to_gain_increase_allowed_ == 0,
max_gain_change_db_per_10ms_);
target_gain_db, last_gain_db_, gain_increase_allowed,
/*max_gain_decrease_db=*/max_gain_change_db_per_10ms_,
max_gain_increase_db);
apm_data_dumper_->DumpRaw("agc2_want_to_change_by_db",
apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_want_to_change_by_db",
target_gain_db - last_gain_db_);
apm_data_dumper_->DumpRaw("agc2_will_change_by_db",
apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_will_change_by_db",
gain_change_this_frame_db);
// Optimization: avoid calling math functions if gain does not
@ -159,23 +178,29 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
// Remember that the gain has changed for the next iteration.
last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
apm_data_dumper_->DumpRaw("agc2_applied_gain_db", last_gain_db_);
apm_data_dumper_->DumpRaw("agc2_adaptive_gain_applier_applied_gain_db",
last_gain_db_);
// Log every 10 seconds.
calls_since_last_gain_log_++;
if (calls_since_last_gain_log_ == 1000) {
calls_since_last_gain_log_ = 0;
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedSpeechLevel",
-info.speech_level_dbfs, 0, 100, 101);
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
-info.noise_rms_dbfs, 0, 100, 101);
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.Agc2.Headroom", info.headroom_db, kHeadroomHistogramMin,
kHeadroomHistogramMax,
kHeadroomHistogramMax - kHeadroomHistogramMin + 1);
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1);
RTC_HISTOGRAM_COUNTS_LINEAR(
"WebRTC.Audio.Agc2.EstimatedSpeechPlusNoiseLevel",
-info.input_level_dbfs, 0, 100, 101);
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
-info.input_noise_level_dbfs, 0, 100, 101);
RTC_LOG(LS_INFO) << "AGC2 adaptive digital"
<< " | speech_plus_noise_dbfs: " << info.input_level_dbfs
<< " | noise_dbfs: " << info.input_noise_level_dbfs
<< " | speech_dbfs: " << info.speech_level_dbfs
<< " | noise_dbfs: " << info.noise_rms_dbfs
<< " | headroom_db: " << info.headroom_db
<< " | gain_db: " << last_gain_db_;
}
}
} // namespace webrtc

View File

@ -12,33 +12,32 @@
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
#include "modules/audio_processing/agc2/gain_applier.h"
#include "modules/audio_processing/agc2/vad_with_level.h"
#include "modules/audio_processing/include/audio_frame_view.h"
namespace webrtc {
class ApmDataDumper;
// Part of the adaptive digital controller that applies a digital adaptive gain.
// The gain is updated towards a target. The logic decides when gain updates are
// allowed, it controls the adaptation speed and caps the target based on the
// estimated noise level and the speech level estimate confidence.
// TODO(bugs.webrtc.org): Split into `GainAdaptor` and `GainApplier`.
// Selects the target digital gain, decides when and how quickly to adapt to the
// target and applies the current gain to 10 ms frames.
class AdaptiveDigitalGainApplier {
public:
// Information about a frame to process.
struct FrameInfo {
float input_level_dbfs; // Estimated speech plus noise level.
float input_noise_level_dbfs; // Estimated noise level.
VadLevelAnalyzer::Result vad_result;
float limiter_envelope_dbfs; // Envelope level from the limiter.
bool estimate_is_confident;
float speech_probability; // Probability of speech in the [0, 1] range.
float speech_level_dbfs; // Estimated speech level (dBFS).
bool speech_level_reliable; // True with reliable speech level estimation.
float noise_rms_dbfs; // Estimated noise RMS level (dBFS).
float headroom_db; // Headroom (dB).
float limiter_envelope_dbfs; // Envelope level from the limiter (dBFS).
};
// Ctor.
// `adjacent_speech_frames_threshold` indicates how many speech frames are
// required before a gain increase is allowed. `max_gain_change_db_per_second`
// limits the adaptation speed (uniformly operated across frames).
// `max_output_noise_level_dbfs` limits the output noise level.
// Ctor. `adjacent_speech_frames_threshold` indicates how many adjacent speech
// frames must be observed in order to consider the sequence as speech.
// `max_gain_change_db_per_second` limits the adaptation speed (uniformly
// operated across frames). `max_output_noise_level_dbfs` limits the output
// noise level.
AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
int adjacent_speech_frames_threshold,
float max_gain_change_db_per_second,

View File

@ -11,6 +11,7 @@
#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
#include <algorithm>
#include <memory>
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc2/agc2_common.h"
@ -26,104 +27,75 @@ constexpr int kStereo = 2;
constexpr int kFrameLen10ms8kHz = 80;
constexpr int kFrameLen10ms48kHz = 480;
constexpr float kMaxSpeechProbability = 1.0f;
// Constants used in place of estimated noise levels.
constexpr float kNoNoiseDbfs = -90.f;
constexpr float kNoNoiseDbfs = kMinLevelDbfs;
constexpr float kWithNoiseDbfs = -20.f;
static_assert(std::is_trivially_destructible<VadLevelAnalyzer::Result>::value,
"");
constexpr VadLevelAnalyzer::Result kVadSpeech{1.f, -20.f, 0.f};
constexpr float kMaxGainChangePerSecondDb = 3.f;
constexpr float kMaxGainChangePerSecondDb = 3.0f;
constexpr float kMaxGainChangePerFrameDb =
kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
constexpr float kMaxOutputNoiseLevelDbfs = -50.f;
kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.0f;
constexpr float kMaxOutputNoiseLevelDbfs = -50.0f;
// Helper to instance `AdaptiveDigitalGainApplier`.
// Helper to create initialized `AdaptiveDigitalGainApplier` objects.
struct GainApplierHelper {
GainApplierHelper()
: GainApplierHelper(/*adjacent_speech_frames_threshold=*/1) {}
explicit GainApplierHelper(int adjacent_speech_frames_threshold)
: apm_data_dumper(0),
gain_applier(&apm_data_dumper,
adjacent_speech_frames_threshold,
kMaxGainChangePerSecondDb,
kMaxOutputNoiseLevelDbfs) {}
gain_applier(std::make_unique<AdaptiveDigitalGainApplier>(
&apm_data_dumper,
adjacent_speech_frames_threshold,
kMaxGainChangePerSecondDb,
kMaxOutputNoiseLevelDbfs)) {}
ApmDataDumper apm_data_dumper;
AdaptiveDigitalGainApplier gain_applier;
std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
};
// Runs gain applier and returns the applied gain in linear scale.
float RunOnConstantLevel(int num_iterations,
VadLevelAnalyzer::Result vad_level,
float input_level_dbfs,
AdaptiveDigitalGainApplier* gain_applier) {
float gain_linear = 0.f;
for (int i = 0; i < num_iterations; ++i) {
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
AdaptiveDigitalGainApplier::FrameInfo info;
info.input_level_dbfs = input_level_dbfs;
info.input_noise_level_dbfs = kNoNoiseDbfs;
info.vad_result = vad_level;
info.limiter_envelope_dbfs = -2.f;
info.estimate_is_confident = true;
gain_applier->Process(info, fake_audio.float_frame_view());
gain_linear = fake_audio.float_frame_view().channel(0)[0];
}
return gain_linear;
}
// Voice on, no noise, low limiter, confident level.
static_assert(std::is_trivially_destructible<
AdaptiveDigitalGainApplier::FrameInfo>::value,
"");
constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
/*input_level_dbfs=*/-1.f,
/*input_noise_level_dbfs=*/kNoNoiseDbfs,
/*vad_result=*/kVadSpeech,
/*limiter_envelope_dbfs=*/-2.f,
/*estimate_is_confident=*/true};
/*speech_probability=*/kMaxSpeechProbability,
/*speech_level_dbfs=*/kInitialSpeechLevelEstimateDbfs,
/*speech_level_reliable=*/true,
/*noise_rms_dbfs=*/kNoNoiseDbfs,
/*headroom_db=*/kSaturationProtectorInitialHeadroomDb,
/*limiter_envelope_dbfs=*/-2.0f};
TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
TEST(GainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
GainApplierHelper helper;
// Make one call with reasonable audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.f);
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -5.0;
helper.gain_applier.Process(kFrameInfo, fake_audio.float_frame_view());
info.speech_level_dbfs = -5.0f;
helper.gain_applier->Process(kFrameInfo, fake_audio.float_frame_view());
}
// Check that the output is -kHeadroom dBFS.
TEST(AutomaticGainController2AdaptiveGainApplier, TargetLevelIsReached) {
GainApplierHelper helper;
constexpr float initial_level_dbfs = -5.f;
const float applied_gain = RunOnConstantLevel(
200, kVadSpeech, initial_level_dbfs, &helper.gain_applier);
EXPECT_NEAR(applied_gain, DbToRatio(-kHeadroomDbfs - initial_level_dbfs),
0.1f);
}
// Check that the output is -kHeadroom dBFS
TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) {
GainApplierHelper helper;
constexpr float initial_level_dbfs = -kHeadroomDbfs - kMaxGainDb - 10.f;
// A few extra frames for safety.
// Checks that the maximum allowed gain is applied.
TEST(GainController2AdaptiveGainApplier, MaxGainApplied) {
constexpr int kNumFramesToAdapt =
static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
const float applied_gain = RunOnConstantLevel(
kNumFramesToAdapt, kVadSpeech, initial_level_dbfs, &helper.gain_applier);
EXPECT_NEAR(applied_gain, DbToRatio(kMaxGainDb), 0.1f);
const float applied_gain_db = 20.f * std::log10(applied_gain);
GainApplierHelper helper;
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs = -60.0f;
float applied_gain;
for (int i = 0; i < kNumFramesToAdapt; ++i) {
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
helper.gain_applier->Process(info, fake_audio.float_frame_view());
applied_gain = fake_audio.float_frame_view().channel(0)[0];
}
const float applied_gain_db = 20.0f * std::log10f(applied_gain);
EXPECT_NEAR(applied_gain_db, kMaxGainDb, 0.1f);
}
TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
GainApplierHelper helper;
constexpr float initial_level_dbfs = -25.f;
constexpr float initial_level_dbfs = -25.0f;
// A few extra frames for safety.
constexpr int kNumFramesToAdapt =
static_cast<int>(initial_level_dbfs / kMaxGainChangePerFrameDb) + 10;
@ -133,10 +105,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
float last_gain_linear = 1.f;
for (int i = 0; i < kNumFramesToAdapt; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
helper.gain_applier.Process(info, fake_audio.float_frame_view());
info.speech_level_dbfs = initial_level_dbfs;
helper.gain_applier->Process(info, fake_audio.float_frame_view());
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
kMaxChangePerFrameLinear);
@ -146,10 +118,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
// Check that the same is true when gain decreases as well.
for (int i = 0; i < kNumFramesToAdapt; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = 0.f;
helper.gain_applier.Process(info, fake_audio.float_frame_view());
info.speech_level_dbfs = 0.f;
helper.gain_applier->Process(info, fake_audio.float_frame_view());
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
kMaxChangePerFrameLinear);
@ -157,17 +129,17 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
}
}
TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
GainApplierHelper helper;
constexpr float initial_level_dbfs = -25.f;
constexpr float initial_level_dbfs = -25.0f;
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
helper.gain_applier.Process(info, fake_audio.float_frame_view());
float maximal_difference = 0.f;
float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb);
info.speech_level_dbfs = initial_level_dbfs;
helper.gain_applier->Process(info, fake_audio.float_frame_view());
float maximal_difference = 0.0f;
float current_value = 1.0f * DbToRatio(kInitialAdaptiveDigitalGainDb);
for (const auto& x : fake_audio.float_frame_view().channel(0)) {
const float difference = std::abs(x - current_value);
maximal_difference = std::max(maximal_difference, difference);
@ -181,10 +153,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
EXPECT_LE(maximal_difference, kMaxChangePerSample);
}
TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) {
GainApplierHelper helper;
constexpr float initial_level_dbfs = -25.f;
constexpr float initial_level_dbfs = -25.0f;
constexpr int num_initial_frames =
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
constexpr int num_frames = 50;
@ -193,11 +165,11 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
<< "kWithNoiseDbfs is too low";
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
info.input_noise_level_dbfs = kWithNoiseDbfs;
helper.gain_applier.Process(info, fake_audio.float_frame_view());
info.speech_level_dbfs = initial_level_dbfs;
info.noise_rms_dbfs = kWithNoiseDbfs;
helper.gain_applier->Process(info, fake_audio.float_frame_view());
// Wait so that the adaptive gain applier has time to lower the gain.
if (i > num_initial_frames) {
@ -205,25 +177,25 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
*std::max_element(fake_audio.float_frame_view().channel(0).begin(),
fake_audio.float_frame_view().channel(0).end());
EXPECT_NEAR(maximal_ratio, 1.f, 0.001f);
EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f);
}
}
}
TEST(AutomaticGainController2GainApplier, CanHandlePositiveSpeechLevels) {
TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) {
GainApplierHelper helper;
// Make one call with positive audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.f);
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = 5.f;
helper.gain_applier.Process(info, fake_audio.float_frame_view());
info.speech_level_dbfs = 5.0f;
helper.gain_applier->Process(info, fake_audio.float_frame_view());
}
TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
TEST(GainController2GainApplier, AudioLevelLimitsGain) {
GainApplierHelper helper;
constexpr float initial_level_dbfs = -25.f;
constexpr float initial_level_dbfs = -25.0f;
constexpr int num_initial_frames =
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
constexpr int num_frames = 50;
@ -232,12 +204,12 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
<< "kWithNoiseDbfs is too low";
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
info.limiter_envelope_dbfs = 1.f;
info.estimate_is_confident = false;
helper.gain_applier.Process(info, fake_audio.float_frame_view());
info.speech_level_dbfs = initial_level_dbfs;
info.limiter_envelope_dbfs = 1.0f;
info.speech_level_reliable = false;
helper.gain_applier->Process(info, fake_audio.float_frame_view());
// Wait so that the adaptive gain applier has time to lower the gain.
if (i > num_initial_frames) {
@ -245,7 +217,7 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
*std::max_element(fake_audio.float_frame_view().channel(0).begin(),
fake_audio.float_frame_view().channel(0).end());
EXPECT_NEAR(maximal_ratio, 1.f, 0.001f);
EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f);
}
}
}
@ -260,14 +232,11 @@ TEST_P(AdaptiveDigitalGainApplierTest,
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
GainApplierHelper helper(adjacent_speech_frames_threshold);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -25.0;
float prev_gain = 0.f;
float prev_gain = 0.0f;
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
helper.gain_applier.Process(info, audio.float_frame_view());
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
const float gain = audio.float_frame_view().channel(0)[0];
if (i > 0) {
EXPECT_EQ(prev_gain, gain); // No gain increase.
@ -280,25 +249,23 @@ TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
GainApplierHelper helper(adjacent_speech_frames_threshold);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -25.0;
float prev_gain = 0.f;
float prev_gain = 0.0f;
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
helper.gain_applier.Process(info, audio.float_frame_view());
SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
prev_gain = audio.float_frame_view().channel(0)[0];
}
// Process one more speech frame.
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
helper.gain_applier.Process(info, audio.float_frame_view());
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
// The gain has increased.
EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
}
INSTANTIATE_TEST_SUITE_P(AutomaticGainController2,
INSTANTIATE_TEST_SUITE_P(GainController2,
AdaptiveDigitalGainApplierTest,
::testing::Values(1, 7, 31));

View File

@ -22,37 +22,17 @@ namespace {
using LevelEstimatorType =
AudioProcessing::Config::GainController2::LevelEstimator;
// Combines a level estimation with the saturation protector margins.
float ComputeLevelEstimateDbfs(float level_estimate_dbfs,
float saturation_margin_db,
float extra_saturation_margin_db) {
return rtc::SafeClamp<float>(
level_estimate_dbfs + saturation_margin_db + extra_saturation_margin_db,
-90.f, 30.f);
}
// Returns the level of given type from `vad_level`.
float GetLevel(const VadLevelAnalyzer::Result& vad_level,
LevelEstimatorType type) {
switch (type) {
case LevelEstimatorType::kRms:
return vad_level.rms_dbfs;
break;
case LevelEstimatorType::kPeak:
return vad_level.peak_dbfs;
break;
}
RTC_CHECK_NOTREACHED();
float ClampLevelEstimateDbfs(float level_estimate_dbfs) {
return rtc::SafeClamp<float>(level_estimate_dbfs, -90.f, 30.f);
}
} // namespace
bool AdaptiveModeLevelEstimator::LevelEstimatorState::operator==(
const AdaptiveModeLevelEstimator::LevelEstimatorState& b) const {
return time_to_full_buffer_ms == b.time_to_full_buffer_ms &&
return time_to_confidence_ms == b.time_to_confidence_ms &&
level_dbfs.numerator == b.level_dbfs.numerator &&
level_dbfs.denominator == b.level_dbfs.denominator &&
saturation_protector == b.saturation_protector;
level_dbfs.denominator == b.level_dbfs.denominator;
}
float AdaptiveModeLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const {
@ -64,25 +44,14 @@ AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper)
: AdaptiveModeLevelEstimator(
apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator::kRms,
kDefaultLevelEstimatorAdjacentSpeechFramesThreshold,
kDefaultInitialSaturationMarginDb,
kDefaultExtraSaturationMarginDb) {}
kDefaultLevelEstimatorAdjacentSpeechFramesThreshold) {}
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
int adjacent_speech_frames_threshold,
float initial_saturation_margin_db,
float extra_saturation_margin_db)
int adjacent_speech_frames_threshold)
: apm_data_dumper_(apm_data_dumper),
level_estimator_type_(level_estimator),
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
initial_saturation_margin_db_(initial_saturation_margin_db),
extra_saturation_margin_db_(extra_saturation_margin_db),
level_dbfs_(ComputeLevelEstimateDbfs(kInitialSpeechLevelEstimateDbfs,
initial_saturation_margin_db_,
extra_saturation_margin_db_)) {
level_dbfs_(ClampLevelEstimateDbfs(kInitialSpeechLevelEstimateDbfs)) {
RTC_DCHECK(apm_data_dumper_);
RTC_DCHECK_GE(adjacent_speech_frames_threshold_, 1);
Reset();
@ -96,8 +65,6 @@ void AdaptiveModeLevelEstimator::Update(
RTC_DCHECK_LT(vad_level.peak_dbfs, 50.f);
RTC_DCHECK_GE(vad_level.speech_probability, 0.f);
RTC_DCHECK_LE(vad_level.speech_probability, 1.f);
DumpDebugData();
if (vad_level.speech_probability < kVadConfidenceThreshold) {
// Not a speech frame.
if (adjacent_speech_frames_threshold_ > 1) {
@ -115,89 +82,82 @@ void AdaptiveModeLevelEstimator::Update(
}
}
num_adjacent_speech_frames_ = 0;
return;
}
// Speech frame observed.
num_adjacent_speech_frames_++;
// Update preliminary level estimate.
RTC_DCHECK_GE(preliminary_state_.time_to_full_buffer_ms, 0);
const bool buffer_is_full = preliminary_state_.time_to_full_buffer_ms == 0;
if (!buffer_is_full) {
preliminary_state_.time_to_full_buffer_ms -= kFrameDurationMs;
}
// Weighted average of levels with speech probability as weight.
RTC_DCHECK_GT(vad_level.speech_probability, 0.f);
const float leak_factor = buffer_is_full ? kFullBufferLeakFactor : 1.f;
preliminary_state_.level_dbfs.numerator =
preliminary_state_.level_dbfs.numerator * leak_factor +
GetLevel(vad_level, level_estimator_type_) * vad_level.speech_probability;
preliminary_state_.level_dbfs.denominator =
preliminary_state_.level_dbfs.denominator * leak_factor +
vad_level.speech_probability;
const float level_dbfs = preliminary_state_.level_dbfs.GetRatio();
UpdateSaturationProtectorState(vad_level.peak_dbfs, level_dbfs,
preliminary_state_.saturation_protector);
if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
// `preliminary_state_` is now reliable. Update the last level estimation.
level_dbfs_ = ComputeLevelEstimateDbfs(
level_dbfs, preliminary_state_.saturation_protector.margin_db,
extra_saturation_margin_db_);
} else {
// Speech frame observed.
num_adjacent_speech_frames_++;
// Update preliminary level estimate.
RTC_DCHECK_GE(preliminary_state_.time_to_confidence_ms, 0);
const bool buffer_is_full = preliminary_state_.time_to_confidence_ms == 0;
if (!buffer_is_full) {
preliminary_state_.time_to_confidence_ms -= kFrameDurationMs;
}
// Weighted average of levels with speech probability as weight.
RTC_DCHECK_GT(vad_level.speech_probability, 0.f);
const float leak_factor = buffer_is_full ? kLevelEstimatorLeakFactor : 1.f;
preliminary_state_.level_dbfs.numerator =
preliminary_state_.level_dbfs.numerator * leak_factor +
vad_level.rms_dbfs * vad_level.speech_probability;
preliminary_state_.level_dbfs.denominator =
preliminary_state_.level_dbfs.denominator * leak_factor +
vad_level.speech_probability;
const float level_dbfs = preliminary_state_.level_dbfs.GetRatio();
if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
// `preliminary_state_` is now reliable. Update the last level estimation.
level_dbfs_ = ClampLevelEstimateDbfs(level_dbfs);
}
}
DumpDebugData();
}
bool AdaptiveModeLevelEstimator::IsConfident() const {
if (adjacent_speech_frames_threshold_ == 1) {
// Ignore `reliable_state_` when a single frame is enough to update the
// level estimate (because it is not used).
return preliminary_state_.time_to_full_buffer_ms == 0;
return preliminary_state_.time_to_confidence_ms == 0;
}
// Once confident, it remains confident.
RTC_DCHECK(reliable_state_.time_to_full_buffer_ms != 0 ||
preliminary_state_.time_to_full_buffer_ms == 0);
RTC_DCHECK(reliable_state_.time_to_confidence_ms != 0 ||
preliminary_state_.time_to_confidence_ms == 0);
// During the first long enough speech sequence, `reliable_state_` must be
// ignored since `preliminary_state_` is used.
return reliable_state_.time_to_full_buffer_ms == 0 ||
return reliable_state_.time_to_confidence_ms == 0 ||
(num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_ &&
preliminary_state_.time_to_full_buffer_ms == 0);
preliminary_state_.time_to_confidence_ms == 0);
}
void AdaptiveModeLevelEstimator::Reset() {
ResetLevelEstimatorState(preliminary_state_);
ResetLevelEstimatorState(reliable_state_);
level_dbfs_ = ComputeLevelEstimateDbfs(kInitialSpeechLevelEstimateDbfs,
initial_saturation_margin_db_,
extra_saturation_margin_db_);
level_dbfs_ = ClampLevelEstimateDbfs(kInitialSpeechLevelEstimateDbfs);
num_adjacent_speech_frames_ = 0;
}
void AdaptiveModeLevelEstimator::ResetLevelEstimatorState(
LevelEstimatorState& state) const {
state.time_to_full_buffer_ms = kFullBufferSizeMs;
state.level_dbfs.numerator = 0.f;
state.level_dbfs.denominator = 0.f;
ResetSaturationProtectorState(initial_saturation_margin_db_,
state.saturation_protector);
state.time_to_confidence_ms = kLevelEstimatorTimeToConfidenceMs;
state.level_dbfs.numerator = kInitialSpeechLevelEstimateDbfs;
state.level_dbfs.denominator = 1.0f;
}
void AdaptiveModeLevelEstimator::DumpDebugData() const {
apm_data_dumper_->DumpRaw("agc2_adaptive_level_estimate_dbfs", level_dbfs_);
apm_data_dumper_->DumpRaw("agc2_adaptive_num_adjacent_speech_frames",
num_adjacent_speech_frames_);
apm_data_dumper_->DumpRaw("agc2_adaptive_preliminary_level_estimate_num",
preliminary_state_.level_dbfs.numerator);
apm_data_dumper_->DumpRaw("agc2_adaptive_preliminary_level_estimate_den",
preliminary_state_.level_dbfs.denominator);
apm_data_dumper_->DumpRaw("agc2_adaptive_preliminary_saturation_margin_db",
preliminary_state_.saturation_protector.margin_db);
apm_data_dumper_->DumpRaw("agc2_adaptive_preliminary_time_to_full_buffer_ms",
preliminary_state_.time_to_full_buffer_ms);
apm_data_dumper_->DumpRaw("agc2_adaptive_reliable_time_to_full_buffer_ms",
reliable_state_.time_to_full_buffer_ms);
apm_data_dumper_->DumpRaw(
"agc2_adaptive_level_estimator_num_adjacent_speech_frames",
num_adjacent_speech_frames_);
apm_data_dumper_->DumpRaw(
"agc2_adaptive_level_estimator_preliminary_level_estimate_num",
preliminary_state_.level_dbfs.numerator);
apm_data_dumper_->DumpRaw(
"agc2_adaptive_level_estimator_preliminary_level_estimate_den",
preliminary_state_.level_dbfs.denominator);
apm_data_dumper_->DumpRaw(
"agc2_adaptive_level_estimator_preliminary_time_to_confidence_ms",
preliminary_state_.time_to_confidence_ms);
apm_data_dumper_->DumpRaw(
"agc2_adaptive_level_estimator_reliable_time_to_confidence_ms",
reliable_state_.time_to_confidence_ms);
}
} // namespace webrtc

View File

@ -15,7 +15,6 @@
#include <type_traits>
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/saturation_protector.h"
#include "modules/audio_processing/agc2/vad_with_level.h"
#include "modules/audio_processing/include/audio_processing.h"
@ -29,12 +28,8 @@ class AdaptiveModeLevelEstimator {
AdaptiveModeLevelEstimator(const AdaptiveModeLevelEstimator&) = delete;
AdaptiveModeLevelEstimator& operator=(const AdaptiveModeLevelEstimator&) =
delete;
AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
int adjacent_speech_frames_threshold,
float initial_saturation_margin_db,
float extra_saturation_margin_db);
AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper,
int adjacent_speech_frames_threshold);
// Updates the level estimation.
void Update(const VadLevelAnalyzer::Result& vad_data);
@ -57,10 +52,9 @@ class AdaptiveModeLevelEstimator {
float denominator;
float GetRatio() const;
};
// TODO(crbug.com/webrtc/7494): Remove time_to_full_buffer_ms if redundant.
int time_to_full_buffer_ms;
// TODO(crbug.com/webrtc/7494): Remove time_to_confidence_ms if redundant.
int time_to_confidence_ms;
Ratio level_dbfs;
SaturationProtectorState saturation_protector;
};
static_assert(std::is_trivially_copyable<LevelEstimatorState>::value, "");
@ -70,11 +64,7 @@ class AdaptiveModeLevelEstimator {
ApmDataDumper* const apm_data_dumper_;
const AudioProcessing::Config::GainController2::LevelEstimator
level_estimator_type_;
const int adjacent_speech_frames_threshold_;
const float initial_saturation_margin_db_;
const float extra_saturation_margin_db_;
LevelEstimatorState preliminary_state_;
LevelEstimatorState reliable_state_;
float level_dbfs_;

View File

@ -19,22 +19,34 @@
namespace webrtc {
namespace {
constexpr float kInitialSaturationMarginDb = 20.f;
constexpr float kExtraSaturationMarginDb = 2.f;
// Number of speech frames that the level estimator must observe in order to
// become confident about the estimated level.
constexpr int kNumFramesToConfidence =
kLevelEstimatorTimeToConfidenceMs / kFrameDurationMs;
static_assert(kNumFramesToConfidence > 0, "");
static_assert(kInitialSpeechLevelEstimateDbfs < 0.f, "");
constexpr float kVadLevelRms = kInitialSpeechLevelEstimateDbfs / 2.f;
constexpr float kVadLevelPeak = kInitialSpeechLevelEstimateDbfs / 3.f;
// Fake levels and speech probabilities used in the tests.
static_assert(kInitialSpeechLevelEstimateDbfs < 0.0f, "");
constexpr float kVadLevelRms = kInitialSpeechLevelEstimateDbfs / 2.0f;
constexpr float kVadLevelPeak = kInitialSpeechLevelEstimateDbfs / 3.0f;
static_assert(kVadLevelRms < kVadLevelPeak, "");
static_assert(kVadLevelRms > kInitialSpeechLevelEstimateDbfs, "");
static_assert(kVadLevelRms - kInitialSpeechLevelEstimateDbfs > 5.0f,
"Adjust `kVadLevelRms` so that the difference from the initial "
"level is wide enough for the tests.");
constexpr VadLevelAnalyzer::Result kVadDataSpeech{/*speech_probability=*/1.f,
constexpr VadLevelAnalyzer::Result kVadDataSpeech{/*speech_probability=*/1.0f,
kVadLevelRms, kVadLevelPeak};
constexpr VadLevelAnalyzer::Result kVadDataNonSpeech{
/*speech_probability=*/kVadConfidenceThreshold / 2.f, kVadLevelRms,
/*speech_probability=*/kVadConfidenceThreshold / 2.0f, kVadLevelRms,
kVadLevelPeak};
constexpr float kMinSpeechProbability = 0.f;
constexpr float kMaxSpeechProbability = 1.f;
constexpr float kMinSpeechProbability = 0.0f;
constexpr float kMaxSpeechProbability = 1.0f;
constexpr float kConvergenceSpeedTestsLevelTolerance = 0.5f;
// Provides the `vad_level` value `num_iterations` times to `level_estimator`.
void RunOnConstantLevel(int num_iterations,
const VadLevelAnalyzer::Result& vad_level,
AdaptiveModeLevelEstimator& level_estimator) {
@ -43,172 +55,125 @@ void RunOnConstantLevel(int num_iterations,
}
}
// Level estimator with data dumper.
struct TestLevelEstimator {
TestLevelEstimator()
: data_dumper(0),
estimator(std::make_unique<AdaptiveModeLevelEstimator>(
&data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator::kRms,
/*adjacent_speech_frames_threshold=*/1,
kInitialSaturationMarginDb,
kExtraSaturationMarginDb)) {}
/*adjacent_speech_frames_threshold=*/1)) {}
ApmDataDumper data_dumper;
std::unique_ptr<AdaptiveModeLevelEstimator> estimator;
};
TEST(AutomaticGainController2AdaptiveModeLevelEstimator,
EstimatorShouldNotCrash) {
// Checks the initially estimated level.
TEST(GainController2AdaptiveModeLevelEstimator, CheckInitialEstimate) {
TestLevelEstimator level_estimator;
VadLevelAnalyzer::Result vad_level{kMaxSpeechProbability, /*rms_dbfs=*/-20.f,
/*peak_dbfs=*/-10.f};
level_estimator.estimator->Update(vad_level);
static_cast<void>(level_estimator.estimator->level_dbfs());
EXPECT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
kInitialSpeechLevelEstimateDbfs);
}
TEST(AutomaticGainController2AdaptiveModeLevelEstimator, LevelShouldStabilize) {
// Checks that the level estimator converges to a constant input speech level.
TEST(GainController2AdaptiveModeLevelEstimator, LevelStabilizes) {
TestLevelEstimator level_estimator;
constexpr float kSpeechPeakDbfs = -15.f;
RunOnConstantLevel(100,
VadLevelAnalyzer::Result{kMaxSpeechProbability,
/*rms_dbfs=*/kSpeechPeakDbfs -
kInitialSaturationMarginDb,
kSpeechPeakDbfs},
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, kVadDataSpeech,
*level_estimator.estimator);
EXPECT_NEAR(
level_estimator.estimator->level_dbfs() - kExtraSaturationMarginDb,
kSpeechPeakDbfs, 0.1f);
const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
RunOnConstantLevel(/*num_iterations=*/1, kVadDataSpeech,
*level_estimator.estimator);
EXPECT_NEAR(level_estimator.estimator->level_dbfs(), estimated_level_dbfs,
0.1f);
}
TEST(AutomaticGainController2AdaptiveModeLevelEstimator,
EstimatorIgnoresZeroProbabilityFrames) {
// Checks that the level controller does not become confident when too few
// speech frames are observed.
TEST(GainController2AdaptiveModeLevelEstimator, IsNotConfident) {
TestLevelEstimator level_estimator;
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2,
kVadDataSpeech, *level_estimator.estimator);
EXPECT_FALSE(level_estimator.estimator->IsConfident());
}
// Run for one second of fake audio.
constexpr float kSpeechRmsDbfs = -25.f;
RunOnConstantLevel(100,
VadLevelAnalyzer::Result{kMaxSpeechProbability,
/*rms_dbfs=*/kSpeechRmsDbfs -
kInitialSaturationMarginDb,
/*peak_dbfs=*/kSpeechRmsDbfs},
// Checks that the level controller becomes confident when enough speech frames
// are observed.
TEST(GainController2AdaptiveModeLevelEstimator, IsConfident) {
TestLevelEstimator level_estimator;
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, kVadDataSpeech,
*level_estimator.estimator);
EXPECT_TRUE(level_estimator.estimator->IsConfident());
}
// Run for one more second, but mark as not speech.
constexpr float kNoiseRmsDbfs = 0.f;
RunOnConstantLevel(100,
// Checks that the estimated level is not affected by the level of non-speech
// frames.
TEST(GainController2AdaptiveModeLevelEstimator,
EstimatorIgnoresNonSpeechFrames) {
TestLevelEstimator level_estimator;
// Simulate speech.
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, kVadDataSpeech,
*level_estimator.estimator);
const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
// Simulate full-scale non-speech.
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
VadLevelAnalyzer::Result{kMinSpeechProbability,
/*rms_dbfs=*/kNoiseRmsDbfs,
/*peak_dbfs=*/kNoiseRmsDbfs},
/*rms_dbfs=*/0.0f,
/*peak_dbfs=*/0.0f},
*level_estimator.estimator);
// Level should not have changed.
EXPECT_NEAR(
level_estimator.estimator->level_dbfs() - kExtraSaturationMarginDb,
kSpeechRmsDbfs, 0.1f);
// No estimated level change is expected.
EXPECT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
estimated_level_dbfs);
}
TEST(AutomaticGainController2AdaptiveModeLevelEstimator, TimeToAdapt) {
// Checks the convergence speed of the estimator before it becomes confident.
TEST(GainController2AdaptiveModeLevelEstimator,
ConvergenceSpeedBeforeConfidence) {
TestLevelEstimator level_estimator;
// Run for one 'window size' interval.
constexpr float kInitialSpeechRmsDbfs = -30.f;
RunOnConstantLevel(
kFullBufferSizeMs / kFrameDurationMs,
VadLevelAnalyzer::Result{
kMaxSpeechProbability,
/*rms_dbfs=*/kInitialSpeechRmsDbfs - kInitialSaturationMarginDb,
/*peak_dbfs=*/kInitialSpeechRmsDbfs},
*level_estimator.estimator);
// Run for one half 'window size' interval. This should not be enough to
// adapt.
constexpr float kDifferentSpeechRmsDbfs = -10.f;
// It should at most differ by 25% after one half 'window size' interval.
// TODO(crbug.com/webrtc/7494): Add constexpr for repeated expressions.
const float kMaxDifferenceDb =
0.25f * std::abs(kDifferentSpeechRmsDbfs - kInitialSpeechRmsDbfs);
RunOnConstantLevel(
static_cast<int>(kFullBufferSizeMs / kFrameDurationMs / 2),
VadLevelAnalyzer::Result{
kMaxSpeechProbability,
/*rms_dbfs=*/kDifferentSpeechRmsDbfs - kInitialSaturationMarginDb,
/*peak_dbfs=*/kDifferentSpeechRmsDbfs},
*level_estimator.estimator);
EXPECT_GT(std::abs(kDifferentSpeechRmsDbfs -
level_estimator.estimator->level_dbfs()),
kMaxDifferenceDb);
// Run for some more time. Afterwards, we should have adapted.
RunOnConstantLevel(
static_cast<int>(3 * kFullBufferSizeMs / kFrameDurationMs),
VadLevelAnalyzer::Result{
kMaxSpeechProbability,
/*rms_dbfs=*/kDifferentSpeechRmsDbfs - kInitialSaturationMarginDb,
/*peak_dbfs=*/kDifferentSpeechRmsDbfs},
*level_estimator.estimator);
EXPECT_NEAR(
level_estimator.estimator->level_dbfs() - kExtraSaturationMarginDb,
kDifferentSpeechRmsDbfs, kMaxDifferenceDb * 0.5f);
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, kVadDataSpeech,
*level_estimator.estimator);
EXPECT_NEAR(level_estimator.estimator->level_dbfs(), kVadDataSpeech.rms_dbfs,
kConvergenceSpeedTestsLevelTolerance);
}
TEST(AutomaticGainController2AdaptiveModeLevelEstimator,
ResetGivesFastAdaptation) {
// Checks the convergence speed of the estimator after it becomes confident.
TEST(GainController2AdaptiveModeLevelEstimator,
ConvergenceSpeedAfterConfidence) {
TestLevelEstimator level_estimator;
// Run the level estimator for one window size interval. This gives time to
// adapt.
constexpr float kInitialSpeechRmsDbfs = -30.f;
// Reach confidence using the initial level estimate.
RunOnConstantLevel(
kFullBufferSizeMs / kFrameDurationMs,
/*num_iterations=*/kNumFramesToConfidence,
VadLevelAnalyzer::Result{
kMaxSpeechProbability,
/*rms_dbfs=*/kInitialSpeechRmsDbfs - kInitialSaturationMarginDb,
/*peak_dbfs=*/kInitialSpeechRmsDbfs},
/*rms_dbfs=*/kInitialSpeechLevelEstimateDbfs,
/*peak_dbfs=*/kInitialSpeechLevelEstimateDbfs + 6.0f},
*level_estimator.estimator);
constexpr float kDifferentSpeechRmsDbfs = -10.f;
// Reset and run one half window size interval.
level_estimator.estimator->Reset();
// No estimate change should occur, but confidence is achieved.
ASSERT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
kInitialSpeechLevelEstimateDbfs);
ASSERT_TRUE(level_estimator.estimator->IsConfident());
// After confidence.
constexpr float kConvergenceTimeAfterConfidenceNumFrames = 600; // 6 seconds.
static_assert(
kConvergenceTimeAfterConfidenceNumFrames > kNumFramesToConfidence, "");
RunOnConstantLevel(
kFullBufferSizeMs / kFrameDurationMs / 2,
VadLevelAnalyzer::Result{
kMaxSpeechProbability,
/*rms_dbfs=*/kDifferentSpeechRmsDbfs - kInitialSaturationMarginDb,
/*peak_dbfs=*/kDifferentSpeechRmsDbfs},
*level_estimator.estimator);
// The level should be close to 'kDifferentSpeechRmsDbfs'.
const float kMaxDifferenceDb =
0.1f * std::abs(kDifferentSpeechRmsDbfs - kInitialSpeechRmsDbfs);
EXPECT_LT(std::abs(kDifferentSpeechRmsDbfs -
(level_estimator.estimator->level_dbfs() -
kExtraSaturationMarginDb)),
kMaxDifferenceDb);
/*num_iterations=*/kConvergenceTimeAfterConfidenceNumFrames,
kVadDataSpeech, *level_estimator.estimator);
EXPECT_NEAR(level_estimator.estimator->level_dbfs(), kVadDataSpeech.rms_dbfs,
kConvergenceSpeedTestsLevelTolerance);
}
struct TestConfig {
int min_consecutive_speech_frames;
float initial_saturation_margin_db;
float extra_saturation_margin_db;
class AdaptiveModeLevelEstimatorParametrization
: public ::testing::TestWithParam<int> {
protected:
int adjacent_speech_frames_threshold() const { return GetParam(); }
};
class AdaptiveModeLevelEstimatorTest
: public ::testing::TestWithParam<TestConfig> {};
TEST_P(AdaptiveModeLevelEstimatorTest, DoNotAdaptToShortSpeechSegments) {
const auto params = GetParam();
TEST_P(AdaptiveModeLevelEstimatorParametrization,
DoNotAdaptToShortSpeechSegments) {
ApmDataDumper apm_data_dumper(0);
AdaptiveModeLevelEstimator level_estimator(
&apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator::kRms,
params.min_consecutive_speech_frames, params.initial_saturation_margin_db,
params.extra_saturation_margin_db);
&apm_data_dumper, adjacent_speech_frames_threshold());
const float initial_level = level_estimator.level_dbfs();
ASSERT_LT(initial_level, kVadDataSpeech.rms_dbfs);
for (int i = 0; i < params.min_consecutive_speech_frames - 1; ++i) {
ASSERT_LT(initial_level, kVadDataSpeech.peak_dbfs);
for (int i = 0; i < adjacent_speech_frames_threshold() - 1; ++i) {
SCOPED_TRACE(i);
level_estimator.Update(kVadDataSpeech);
EXPECT_EQ(initial_level, level_estimator.level_dbfs());
@ -217,26 +182,21 @@ TEST_P(AdaptiveModeLevelEstimatorTest, DoNotAdaptToShortSpeechSegments) {
EXPECT_EQ(initial_level, level_estimator.level_dbfs());
}
TEST_P(AdaptiveModeLevelEstimatorTest, AdaptToEnoughSpeechSegments) {
const auto params = GetParam();
TEST_P(AdaptiveModeLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
ApmDataDumper apm_data_dumper(0);
AdaptiveModeLevelEstimator level_estimator(
&apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator::kRms,
params.min_consecutive_speech_frames, params.initial_saturation_margin_db,
params.extra_saturation_margin_db);
&apm_data_dumper, adjacent_speech_frames_threshold());
const float initial_level = level_estimator.level_dbfs();
ASSERT_LT(initial_level, kVadDataSpeech.rms_dbfs);
for (int i = 0; i < params.min_consecutive_speech_frames; ++i) {
ASSERT_LT(initial_level, kVadDataSpeech.peak_dbfs);
for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
level_estimator.Update(kVadDataSpeech);
}
EXPECT_LT(initial_level, level_estimator.level_dbfs());
}
INSTANTIATE_TEST_SUITE_P(AutomaticGainController2,
AdaptiveModeLevelEstimatorTest,
::testing::Values(TestConfig{1, 0.f, 0.f},
TestConfig{9, 0.f, 0.f}));
INSTANTIATE_TEST_SUITE_P(GainController2,
AdaptiveModeLevelEstimatorParametrization,
::testing::Values(1, 9, 17));
} // namespace
} // namespace webrtc

View File

@ -11,20 +11,19 @@
#ifndef MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_
#define MODULES_AUDIO_PROCESSING_AGC2_AGC2_COMMON_H_
#include <stddef.h>
namespace webrtc {
constexpr float kMinFloatS16Value = -32768.0f;
constexpr float kMaxFloatS16Value = 32767.0f;
constexpr float kMaxAbsFloatS16Value = 32768.0f;
// Minimum audio level in dBFS scale for S16 samples.
constexpr float kMinLevelDbfs = -90.31f;
constexpr int kFrameDurationMs = 10;
constexpr int kSubFramesInFrame = 20;
constexpr int kMaximalNumberOfSamplesPerChannel = 480;
constexpr float kAttackFilterConstant = 0.0f;
// Adaptive digital gain applier settings below.
constexpr float kHeadroomDbfs = 1.0f;
constexpr float kMaxGainDb = 30.0f;
@ -37,43 +36,29 @@ constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs;
// gain reduction.
constexpr float kVadConfidenceThreshold = 0.95f;
// The amount of 'memory' of the Level Estimator. Decides leak factors.
constexpr int kFullBufferSizeMs = 1200;
constexpr float kFullBufferLeakFactor = 1.0f - 1.0f / kFullBufferSizeMs;
constexpr float kInitialSpeechLevelEstimateDbfs = -30.0f;
// Adaptive digital level estimator parameters.
// Number of milliseconds of speech frames to observe to make the estimator
// confident.
constexpr float kLevelEstimatorTimeToConfidenceMs = 400;
constexpr float kLevelEstimatorLeakFactor =
1.0f - 1.0f / kLevelEstimatorTimeToConfidenceMs;
// Robust VAD probability and speech decisions.
constexpr int kDefaultVadRnnResetPeriodMs = 1500;
static_assert(kDefaultVadRnnResetPeriodMs % kFrameDurationMs == 0, "");
constexpr float kDefaultSmoothedVadProbabilityAttack = 1.0f;
constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 1;
constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 12;
// Saturation Protector settings.
constexpr float kDefaultInitialSaturationMarginDb = 20.0f;
constexpr float kDefaultExtraSaturationMarginDb = 2.0f;
constexpr float kSaturationProtectorInitialHeadroomDb = 20.0f;
constexpr float kSaturationProtectorExtraHeadroomDb = 5.0f;
constexpr int kSaturationProtectorBufferSize = 4;
constexpr int kPeakEnveloperSuperFrameLengthMs = 400;
static_assert(kFullBufferSizeMs % kPeakEnveloperSuperFrameLengthMs == 0,
"Full buffer size should be a multiple of super frame length for "
"optimal Saturation Protector performance.");
constexpr int kPeakEnveloperBufferSize =
kFullBufferSizeMs / kPeakEnveloperSuperFrameLengthMs + 1;
// This value is 10 ** (-1/20 * frame_size_ms / satproc_attack_ms),
// where satproc_attack_ms is 5000.
constexpr float kSaturationProtectorAttackConstant = 0.9988493699365052f;
// This value is 10 ** (-1/20 * frame_size_ms / satproc_decay_ms),
// where satproc_decay_ms is 1000.
constexpr float kSaturationProtectorDecayConstant = 0.9997697679981565f;
// This is computed from kDecayMs by
// 10 ** (-1/20 * subframe_duration / kDecayMs).
// |subframe_duration| is |kFrameDurationMs / kSubFramesInFrame|.
// kDecayMs is defined in agc2_testing_common.h
constexpr float kDecayFilterConstant = 0.9998848773724686f;
// Set the initial speech level estimate so that `kInitialAdaptiveDigitalGainDb`
// is applied at the beginning of the call.
constexpr float kInitialSpeechLevelEstimateDbfs =
-kSaturationProtectorExtraHeadroomDb -
kSaturationProtectorInitialHeadroomDb - kInitialAdaptiveDigitalGainDb -
kHeadroomDbfs;
// Number of interpolation points for each region of the limiter.
// These values have been tuned to limit the interpolated gain curve error given

View File

@ -14,7 +14,7 @@
namespace webrtc {
TEST(AutomaticGainController2Common, TestLinSpace) {
TEST(GainController2TestingCommon, LinSpace) {
std::vector<double> points1 = test::LinSpace(-1.0, 2.0, 4);
const std::vector<double> expected_points1{{-1.0, 0.0, 1.0, 2.0}};
EXPECT_EQ(expected_points1, points1);

View File

@ -22,6 +22,14 @@ namespace {
constexpr float kInitialFilterStateLevel = 0.f;
// Instant attack.
constexpr float kAttackFilterConstant = 0.f;
// This is computed from kDecayMs by
// 10 ** (-1/20 * subframe_duration / kDecayMs).
// |subframe_duration| is |kFrameDurationMs / kSubFramesInFrame|.
// kDecayMs is defined in agc2_testing_common.h
constexpr float kDecayFilterConstant = 0.9998848773724686f;
} // namespace
FixedDigitalLevelEstimator::FixedDigitalLevelEstimator(

View File

@ -101,25 +101,25 @@ float TimeMsToDecreaseLevel(int sample_rate_hz,
}
} // namespace
TEST(AutomaticGainController2LevelEstimator, EstimatorShouldNotCrash) {
TEST(GainController2FixedDigitalLevelEstimator, EstimatorShouldNotCrash) {
TestLevelEstimator(8000, 1, 0, std::numeric_limits<float>::lowest(),
std::numeric_limits<float>::max());
}
TEST(AutomaticGainController2LevelEstimator,
TEST(GainController2FixedDigitalLevelEstimator,
EstimatorShouldEstimateConstantLevel) {
TestLevelEstimator(10000, 1, kInputLevel, kInputLevel * 0.99,
kInputLevel * 1.01);
}
TEST(AutomaticGainController2LevelEstimator,
TEST(GainController2FixedDigitalLevelEstimator,
EstimatorShouldEstimateConstantLevelForManyChannels) {
constexpr size_t num_channels = 10;
TestLevelEstimator(20000, num_channels, kInputLevel, kInputLevel * 0.99,
kInputLevel * 1.01);
}
TEST(AutomaticGainController2LevelEstimator, TimeToDecreaseForLowLevel) {
TEST(GainController2FixedDigitalLevelEstimator, TimeToDecreaseForLowLevel) {
constexpr float kLevelReductionDb = 25;
constexpr float kInitialLowLevel = -40;
constexpr float kExpectedTime = kLevelReductionDb * test::kDecayMs;
@ -131,7 +131,8 @@ TEST(AutomaticGainController2LevelEstimator, TimeToDecreaseForLowLevel) {
EXPECT_LE(time_to_decrease, kExpectedTime * 1.1);
}
TEST(AutomaticGainController2LevelEstimator, TimeToDecreaseForFullScaleLevel) {
TEST(GainController2FixedDigitalLevelEstimator,
TimeToDecreaseForFullScaleLevel) {
constexpr float kLevelReductionDb = 25;
constexpr float kExpectedTime = kLevelReductionDb * test::kDecayMs;
@ -142,7 +143,7 @@ TEST(AutomaticGainController2LevelEstimator, TimeToDecreaseForFullScaleLevel) {
EXPECT_LE(time_to_decrease, kExpectedTime * 1.1);
}
TEST(AutomaticGainController2LevelEstimator,
TEST(GainController2FixedDigitalLevelEstimator,
TimeToDecreaseForMultipleChannels) {
constexpr float kLevelReductionDb = 25;
constexpr float kExpectedTime = kLevelReductionDb * test::kDecayMs;

View File

@ -75,7 +75,7 @@ class InterpolatedGainCurve {
private:
// For comparing 'approximation_params_*_' with ones computed by
// ComputeInterpolatedGainCurve.
FRIEND_TEST_ALL_PREFIXES(AutomaticGainController2InterpolatedGainCurve,
FRIEND_TEST_ALL_PREFIXES(GainController2InterpolatedGainCurve,
CheckApproximationParams);
struct RegionLogger {

View File

@ -34,7 +34,7 @@ const LimiterDbGainCurve limiter;
} // namespace
TEST(AutomaticGainController2InterpolatedGainCurve, CreateUse) {
TEST(GainController2InterpolatedGainCurve, CreateUse) {
InterpolatedGainCurve igc(&apm_data_dumper, "");
const auto levels = test::LinSpace(
@ -44,7 +44,7 @@ TEST(AutomaticGainController2InterpolatedGainCurve, CreateUse) {
}
}
TEST(AutomaticGainController2InterpolatedGainCurve, CheckValidOutput) {
TEST(GainController2InterpolatedGainCurve, CheckValidOutput) {
InterpolatedGainCurve igc(&apm_data_dumper, "");
const auto levels = test::LinSpace(
@ -57,7 +57,7 @@ TEST(AutomaticGainController2InterpolatedGainCurve, CheckValidOutput) {
}
}
TEST(AutomaticGainController2InterpolatedGainCurve, CheckMonotonicity) {
TEST(GainController2InterpolatedGainCurve, CheckMonotonicity) {
InterpolatedGainCurve igc(&apm_data_dumper, "");
const auto levels = test::LinSpace(
@ -71,7 +71,7 @@ TEST(AutomaticGainController2InterpolatedGainCurve, CheckMonotonicity) {
}
}
TEST(AutomaticGainController2InterpolatedGainCurve, CheckApproximation) {
TEST(GainController2InterpolatedGainCurve, CheckApproximation) {
InterpolatedGainCurve igc(&apm_data_dumper, "");
const auto levels = test::LinSpace(
@ -84,7 +84,7 @@ TEST(AutomaticGainController2InterpolatedGainCurve, CheckApproximation) {
}
}
TEST(AutomaticGainController2InterpolatedGainCurve, CheckRegionBoundaries) {
TEST(GainController2InterpolatedGainCurve, CheckRegionBoundaries) {
InterpolatedGainCurve igc(&apm_data_dumper, "");
const std::vector<double> levels{
@ -102,7 +102,7 @@ TEST(AutomaticGainController2InterpolatedGainCurve, CheckRegionBoundaries) {
EXPECT_EQ(1ul, stats.look_ups_saturation_region);
}
TEST(AutomaticGainController2InterpolatedGainCurve, CheckIdentityRegion) {
TEST(GainController2InterpolatedGainCurve, CheckIdentityRegion) {
constexpr size_t kNumSteps = 10;
InterpolatedGainCurve igc(&apm_data_dumper, "");
@ -120,8 +120,7 @@ TEST(AutomaticGainController2InterpolatedGainCurve, CheckIdentityRegion) {
EXPECT_EQ(0ul, stats.look_ups_saturation_region);
}
TEST(AutomaticGainController2InterpolatedGainCurve,
CheckNoOverApproximationKnee) {
TEST(GainController2InterpolatedGainCurve, CheckNoOverApproximationKnee) {
constexpr size_t kNumSteps = 10;
InterpolatedGainCurve igc(&apm_data_dumper, "");
@ -142,8 +141,7 @@ TEST(AutomaticGainController2InterpolatedGainCurve,
EXPECT_EQ(0ul, stats.look_ups_saturation_region);
}
TEST(AutomaticGainController2InterpolatedGainCurve,
CheckNoOverApproximationBeyondKnee) {
TEST(GainController2InterpolatedGainCurve, CheckNoOverApproximationBeyondKnee) {
constexpr size_t kNumSteps = 10;
InterpolatedGainCurve igc(&apm_data_dumper, "");
@ -164,7 +162,7 @@ TEST(AutomaticGainController2InterpolatedGainCurve,
EXPECT_EQ(0ul, stats.look_ups_saturation_region);
}
TEST(AutomaticGainController2InterpolatedGainCurve,
TEST(GainController2InterpolatedGainCurve,
CheckNoOverApproximationWithSaturation) {
constexpr size_t kNumSteps = 3;
InterpolatedGainCurve igc(&apm_data_dumper, "");
@ -184,7 +182,7 @@ TEST(AutomaticGainController2InterpolatedGainCurve,
EXPECT_EQ(kNumSteps, stats.look_ups_saturation_region);
}
TEST(AutomaticGainController2InterpolatedGainCurve, CheckApproximationParams) {
TEST(GainController2InterpolatedGainCurve, CheckApproximationParams) {
test::InterpolatedParameters parameters =
test::ComputeInterpolatedGainCurveApproximationParams();

View File

@ -184,7 +184,7 @@ class NoiseFloorEstimator : public NoiseLevelEstimator {
const float frame_energy = FrameEnergy(frame);
if (frame_energy <= min_noise_energy_) {
// Ignore frames when muted or below the minimum measurable energy.
data_dumper_->DumpRaw("agc2_noise_floor_preliminary_level",
data_dumper_->DumpRaw("agc2_noise_floor_estimator_preliminary_level",
noise_energy_);
return EnergyToDbfs(noise_energy_, frame.samples_per_channel());
}
@ -196,7 +196,7 @@ class NoiseFloorEstimator : public NoiseLevelEstimator {
preliminary_noise_energy_ = frame_energy;
preliminary_noise_energy_set_ = true;
}
data_dumper_->DumpRaw("agc2_noise_floor_preliminary_level",
data_dumper_->DumpRaw("agc2_noise_floor_estimator_preliminary_level",
preliminary_noise_energy_);
if (counter_ == 0) {

View File

@ -10,84 +10,59 @@
#include "modules/audio_processing/agc2/saturation_protector.h"
#include <memory>
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/saturation_protector_buffer.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_minmax.h"
namespace webrtc {
namespace {
constexpr float kMinLevelDbfs = -90.f;
constexpr int kPeakEnveloperSuperFrameLengthMs = 400;
constexpr float kMinMarginDb = 12.0f;
constexpr float kMaxMarginDb = 25.0f;
constexpr float kAttack = 0.9988493699365052f;
constexpr float kDecay = 0.9997697679981565f;
// Min/max margins are based on speech crest-factor.
constexpr float kMinMarginDb = 12.f;
constexpr float kMaxMarginDb = 25.f;
using saturation_protector_impl::RingBuffer;
} // namespace
bool RingBuffer::operator==(const RingBuffer& b) const {
RTC_DCHECK_LE(size_, buffer_.size());
RTC_DCHECK_LE(b.size_, b.buffer_.size());
if (size_ != b.size_) {
return false;
// Saturation protector state. Defined outside of `SaturationProtectorImpl` to
// implement check-point and restore ops.
struct SaturationProtectorState {
bool operator==(const SaturationProtectorState& s) const {
return headroom_db == s.headroom_db &&
peak_delay_buffer == s.peak_delay_buffer &&
max_peaks_dbfs == s.max_peaks_dbfs &&
time_since_push_ms == s.time_since_push_ms;
}
for (int i = 0, i0 = FrontIndex(), i1 = b.FrontIndex(); i < size_;
++i, ++i0, ++i1) {
if (buffer_[i0 % buffer_.size()] != b.buffer_[i1 % b.buffer_.size()]) {
return false;
}
inline bool operator!=(const SaturationProtectorState& s) const {
return !(*this == s);
}
return true;
}
void RingBuffer::Reset() {
next_ = 0;
size_ = 0;
}
float headroom_db;
SaturationProtectorBuffer peak_delay_buffer;
float max_peaks_dbfs;
int time_since_push_ms; // Time since the last ring buffer push operation.
};
void RingBuffer::PushBack(float v) {
RTC_DCHECK_GE(next_, 0);
RTC_DCHECK_GE(size_, 0);
RTC_DCHECK_LT(next_, buffer_.size());
RTC_DCHECK_LE(size_, buffer_.size());
buffer_[next_++] = v;
if (rtc::SafeEq(next_, buffer_.size())) {
next_ = 0;
}
if (rtc::SafeLt(size_, buffer_.size())) {
size_++;
}
}
absl::optional<float> RingBuffer::Front() const {
if (size_ == 0) {
return absl::nullopt;
}
RTC_DCHECK_LT(FrontIndex(), buffer_.size());
return buffer_[FrontIndex()];
}
bool SaturationProtectorState::operator==(
const SaturationProtectorState& b) const {
return margin_db == b.margin_db && peak_delay_buffer == b.peak_delay_buffer &&
max_peaks_dbfs == b.max_peaks_dbfs &&
time_since_push_ms == b.time_since_push_ms;
}
void ResetSaturationProtectorState(float initial_margin_db,
// Resets the saturation protector state.
void ResetSaturationProtectorState(float initial_headroom_db,
SaturationProtectorState& state) {
state.margin_db = initial_margin_db;
state.headroom_db = initial_headroom_db;
state.peak_delay_buffer.Reset();
state.max_peaks_dbfs = kMinLevelDbfs;
state.time_since_push_ms = 0;
}
void UpdateSaturationProtectorState(float speech_peak_dbfs,
// Updates `state` by analyzing the estimated speech level `speech_level_dbfs`
// and the peak level `peak_dbfs` for an observed frame. `state` must not be
// modified without calling this function.
void UpdateSaturationProtectorState(float peak_dbfs,
float speech_level_dbfs,
SaturationProtectorState& state) {
// Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, speech_peak_dbfs);
state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs);
state.time_since_push_ms += kFrameDurationMs;
if (rtc::SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) {
// Push `max_peaks_dbfs` back into the ring buffer.
@ -97,25 +72,117 @@ void UpdateSaturationProtectorState(float speech_peak_dbfs,
state.time_since_push_ms = 0;
}
// Update margin by comparing the estimated speech level and the delayed max
// speech peak power.
// TODO(alessiob): Check with aleloi@ why we use a delay and how to tune it.
// Update the headroom by comparing the estimated speech level and the delayed
// max speech peak.
const float delayed_peak_dbfs =
state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs);
const float difference_db = delayed_peak_dbfs - speech_level_dbfs;
if (difference_db > state.margin_db) {
if (difference_db > state.headroom_db) {
// Attack.
state.margin_db =
state.margin_db * kSaturationProtectorAttackConstant +
difference_db * (1.f - kSaturationProtectorAttackConstant);
state.headroom_db =
state.headroom_db * kAttack + difference_db * (1.0f - kAttack);
} else {
// Decay.
state.margin_db = state.margin_db * kSaturationProtectorDecayConstant +
difference_db * (1.f - kSaturationProtectorDecayConstant);
state.headroom_db =
state.headroom_db * kDecay + difference_db * (1.0f - kDecay);
}
state.margin_db =
rtc::SafeClamp<float>(state.margin_db, kMinMarginDb, kMaxMarginDb);
state.headroom_db =
rtc::SafeClamp<float>(state.headroom_db, kMinMarginDb, kMaxMarginDb);
}
// Saturation protector which recommends a headroom based on the recent peaks.
class SaturationProtectorImpl : public SaturationProtector {
public:
explicit SaturationProtectorImpl(float initial_headroom_db,
float extra_headroom_db,
int adjacent_speech_frames_threshold,
ApmDataDumper* apm_data_dumper)
: apm_data_dumper_(apm_data_dumper),
initial_headroom_db_(initial_headroom_db),
extra_headroom_db_(extra_headroom_db),
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) {
Reset();
}
SaturationProtectorImpl(const SaturationProtectorImpl&) = delete;
SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete;
~SaturationProtectorImpl() = default;
float HeadroomDb() override { return headroom_db_; }
void Analyze(float speech_probability,
float peak_dbfs,
float speech_level_dbfs) override {
if (speech_probability < kVadConfidenceThreshold) {
// Not a speech frame.
if (adjacent_speech_frames_threshold_ > 1) {
// When two or more adjacent speech frames are required in order to
// update the state, we need to decide whether to discard or confirm the
// updates based on the speech sequence length.
if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
// First non-speech frame after a long enough sequence of speech
// frames. Update the reliable state.
reliable_state_ = preliminary_state_;
} else if (num_adjacent_speech_frames_ > 0) {
// First non-speech frame after a too short sequence of speech frames.
// Reset to the last reliable state.
preliminary_state_ = reliable_state_;
}
}
num_adjacent_speech_frames_ = 0;
} else {
// Speech frame observed.
num_adjacent_speech_frames_++;
// Update preliminary level estimate.
UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs,
preliminary_state_);
if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
// `preliminary_state_` is now reliable. Update the headroom.
headroom_db_ = preliminary_state_.headroom_db + extra_headroom_db_;
}
}
DumpDebugData();
}
void Reset() override {
num_adjacent_speech_frames_ = 0;
headroom_db_ = initial_headroom_db_ + extra_headroom_db_;
ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_);
ResetSaturationProtectorState(initial_headroom_db_, reliable_state_);
}
private:
void DumpDebugData() {
apm_data_dumper_->DumpRaw(
"agc2_saturation_protector_preliminary_max_peak_dbfs",
preliminary_state_.max_peaks_dbfs);
apm_data_dumper_->DumpRaw(
"agc2_saturation_protector_reliable_max_peak_dbfs",
reliable_state_.max_peaks_dbfs);
}
ApmDataDumper* const apm_data_dumper_;
const float initial_headroom_db_;
const float extra_headroom_db_;
const int adjacent_speech_frames_threshold_;
int num_adjacent_speech_frames_;
float headroom_db_;
SaturationProtectorState preliminary_state_;
SaturationProtectorState reliable_state_;
};
} // namespace
std::unique_ptr<SaturationProtector> CreateSaturationProtector(
float initial_headroom_db,
float extra_headroom_db,
int adjacent_speech_frames_threshold,
ApmDataDumper* apm_data_dumper) {
return std::make_unique<SaturationProtectorImpl>(
initial_headroom_db, extra_headroom_db, adjacent_speech_frames_threshold,
apm_data_dumper);
}
} // namespace webrtc

View File

@ -11,71 +11,36 @@
#ifndef MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_
#define MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_
#include <array>
#include "absl/types/optional.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "rtc_base/numerics/safe_compare.h"
#include <memory>
namespace webrtc {
namespace saturation_protector_impl {
class ApmDataDumper;
// Ring buffer which only supports (i) push back and (ii) read oldest item.
class RingBuffer {
// Saturation protector. Analyzes peak levels and recommends a headroom to
// reduce the chances of clipping.
class SaturationProtector {
public:
bool operator==(const RingBuffer& b) const;
inline bool operator!=(const RingBuffer& b) const { return !(*this == b); }
virtual ~SaturationProtector() = default;
// Maximum number of values that the buffer can contain.
int Capacity() const { return buffer_.size(); }
// Number of values in the buffer.
int Size() const { return size_; }
// Returns the recommended headroom in dB.
virtual float HeadroomDb() = 0;
void Reset();
// Pushes back `v`. If the buffer is full, the oldest value is replaced.
void PushBack(float v);
// Returns the oldest item in the buffer. Returns an empty value if the
// buffer is empty.
absl::optional<float> Front() const;
// Analyzes the peak level of a 10 ms frame along with its speech probability
// and the current speech level estimate to update the recommended headroom.
virtual void Analyze(float speech_probability,
float peak_dbfs,
float speech_level_dbfs) = 0;
private:
inline int FrontIndex() const {
return rtc::SafeEq(size_, buffer_.size()) ? next_ : 0;
}
// `buffer_` has `size_` elements (up to the size of `buffer_`) and `next_` is
// the position where the next new value is written in `buffer_`.
std::array<float, kPeakEnveloperBufferSize> buffer_;
int next_ = 0;
int size_ = 0;
// Resets the internal state.
virtual void Reset() = 0;
};
} // namespace saturation_protector_impl
// Saturation protector state. Exposed publicly for check-pointing and restore
// ops.
struct SaturationProtectorState {
bool operator==(const SaturationProtectorState& s) const;
inline bool operator!=(const SaturationProtectorState& s) const {
return !(*this == s);
}
float margin_db; // Recommended margin.
saturation_protector_impl::RingBuffer peak_delay_buffer;
float max_peaks_dbfs;
int time_since_push_ms; // Time since the last ring buffer push operation.
};
// Resets the saturation protector state.
void ResetSaturationProtectorState(float initial_margin_db,
SaturationProtectorState& state);
// Updates `state` by analyzing the estimated speech level `speech_level_dbfs`
// and the peak power `speech_peak_dbfs` for an observed frame which is
// reliably classified as "speech". `state` must not be modified without calling
// this function.
void UpdateSaturationProtectorState(float speech_peak_dbfs,
float speech_level_dbfs,
SaturationProtectorState& state);
// Creates a saturation protector that starts at `initial_headroom_db`.
std::unique_ptr<SaturationProtector> CreateSaturationProtector(
float initial_headroom_db,
float extra_headroom_db,
int adjacent_speech_frames_threshold,
ApmDataDumper* apm_data_dumper);
} // namespace webrtc

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/saturation_protector_buffer.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_compare.h"
namespace webrtc {
SaturationProtectorBuffer::SaturationProtectorBuffer() = default;
SaturationProtectorBuffer::~SaturationProtectorBuffer() = default;
bool SaturationProtectorBuffer::operator==(
const SaturationProtectorBuffer& b) const {
RTC_DCHECK_LE(size_, buffer_.size());
RTC_DCHECK_LE(b.size_, b.buffer_.size());
if (size_ != b.size_) {
return false;
}
for (int i = 0, i0 = FrontIndex(), i1 = b.FrontIndex(); i < size_;
++i, ++i0, ++i1) {
if (buffer_[i0 % buffer_.size()] != b.buffer_[i1 % b.buffer_.size()]) {
return false;
}
}
return true;
}
int SaturationProtectorBuffer::Capacity() const {
return buffer_.size();
}
int SaturationProtectorBuffer::Size() const {
return size_;
}
void SaturationProtectorBuffer::Reset() {
next_ = 0;
size_ = 0;
}
void SaturationProtectorBuffer::PushBack(float v) {
RTC_DCHECK_GE(next_, 0);
RTC_DCHECK_GE(size_, 0);
RTC_DCHECK_LT(next_, buffer_.size());
RTC_DCHECK_LE(size_, buffer_.size());
buffer_[next_++] = v;
if (rtc::SafeEq(next_, buffer_.size())) {
next_ = 0;
}
if (rtc::SafeLt(size_, buffer_.size())) {
size_++;
}
}
absl::optional<float> SaturationProtectorBuffer::Front() const {
if (size_ == 0) {
return absl::nullopt;
}
RTC_DCHECK_LT(FrontIndex(), buffer_.size());
return buffer_[FrontIndex()];
}
int SaturationProtectorBuffer::FrontIndex() const {
return rtc::SafeEq(size_, buffer_.size()) ? next_ : 0;
}
} // namespace webrtc

View File

@ -0,0 +1,59 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_BUFFER_H_
#define MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_BUFFER_H_
#include <array>
#include "absl/types/optional.h"
#include "modules/audio_processing/agc2/agc2_common.h"
namespace webrtc {
// Ring buffer for the saturation protector which only supports (i) push back
// and (ii) read oldest item.
class SaturationProtectorBuffer {
public:
SaturationProtectorBuffer();
~SaturationProtectorBuffer();
bool operator==(const SaturationProtectorBuffer& b) const;
inline bool operator!=(const SaturationProtectorBuffer& b) const {
return !(*this == b);
}
// Maximum number of values that the buffer can contain.
int Capacity() const;
// Number of values in the buffer.
int Size() const;
void Reset();
// Pushes back `v`. If the buffer is full, the oldest value is replaced.
void PushBack(float v);
// Returns the oldest item in the buffer. Returns an empty value if the
// buffer is empty.
absl::optional<float> Front() const;
private:
int FrontIndex() const;
// `buffer_` has `size_` elements (up to the size of `buffer_`) and `next_` is
// the position where the next new value is written in `buffer_`.
std::array<float, kSaturationProtectorBufferSize> buffer_;
int next_ = 0;
int size_ = 0;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_BUFFER_H_

View File

@ -0,0 +1,73 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/saturation_protector_buffer.h"
#include "test/gmock.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
using ::testing::Eq;
using ::testing::Optional;
TEST(GainController2SaturationProtectorBuffer, Init) {
SaturationProtectorBuffer b;
EXPECT_EQ(b.Size(), 0);
EXPECT_FALSE(b.Front().has_value());
}
TEST(GainController2SaturationProtectorBuffer, PushBack) {
SaturationProtectorBuffer b;
constexpr float kValue = 123.0f;
b.PushBack(kValue);
EXPECT_EQ(b.Size(), 1);
EXPECT_THAT(b.Front(), Optional(Eq(kValue)));
}
TEST(GainController2SaturationProtectorBuffer, Reset) {
SaturationProtectorBuffer b;
b.PushBack(123.0f);
b.Reset();
EXPECT_EQ(b.Size(), 0);
EXPECT_FALSE(b.Front().has_value());
}
// Checks that the front value does not change until the ring buffer gets full.
TEST(GainController2SaturationProtectorBuffer, FrontUntilBufferIsFull) {
SaturationProtectorBuffer b;
constexpr float kValue = 123.0f;
b.PushBack(kValue);
for (int i = 1; i < b.Capacity(); ++i) {
SCOPED_TRACE(i);
EXPECT_THAT(b.Front(), Optional(Eq(kValue)));
b.PushBack(kValue + i);
}
}
// Checks that when the buffer is full it behaves as a shift register.
TEST(GainController2SaturationProtectorBuffer, FrontIsDelayed) {
SaturationProtectorBuffer b;
// Fill the buffer.
for (int i = 0; i < b.Capacity(); ++i) {
b.PushBack(i);
}
// The ring buffer should now behave as a shift register with a delay equal to
// its capacity.
for (int i = b.Capacity(); i < 2 * b.Capacity() + 1; ++i) {
SCOPED_TRACE(i);
EXPECT_THAT(b.Front(), Optional(Eq(i - b.Capacity())));
b.PushBack(i);
}
}
} // namespace
} // namespace webrtc

View File

@ -10,181 +10,166 @@
#include "modules/audio_processing/agc2/saturation_protector.h"
#include <algorithm>
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/gunit.h"
#include "test/gmock.h"
namespace webrtc {
namespace {
constexpr float kInitialMarginDb = 20.f;
constexpr float kInitialHeadroomDb = 20.0f;
constexpr float kNoExtraHeadroomDb = 0.0f;
constexpr int kNoAdjacentSpeechFramesRequired = 1;
constexpr float kMaxSpeechProbability = 1.0f;
using saturation_protector_impl::RingBuffer;
SaturationProtectorState CreateSaturationProtectorState() {
SaturationProtectorState state;
ResetSaturationProtectorState(kInitialMarginDb, state);
return state;
}
// Updates `state` for `num_iterations` times with constant speech level and
// peak powers and returns the maximum margin.
// Calls `Analyze(speech_probability, peak_dbfs, speech_level_dbfs)`
// `num_iterations` times on `saturation_protector` and return the largest
// headroom difference between two consecutive calls.
float RunOnConstantLevel(int num_iterations,
float speech_peak_dbfs,
float speech_probability,
float peak_dbfs,
float speech_level_dbfs,
SaturationProtectorState& state) {
float last_margin = state.margin_db;
float max_difference = 0.f;
SaturationProtector& saturation_protector) {
float last_headroom = saturation_protector.HeadroomDb();
float max_difference = 0.0f;
for (int i = 0; i < num_iterations; ++i) {
UpdateSaturationProtectorState(speech_peak_dbfs, speech_level_dbfs, state);
const float new_margin = state.margin_db;
saturation_protector.Analyze(speech_probability, peak_dbfs,
speech_level_dbfs);
const float new_headroom = saturation_protector.HeadroomDb();
max_difference =
std::max(max_difference, std::abs(new_margin - last_margin));
last_margin = new_margin;
std::max(max_difference, std::fabs(new_headroom - last_headroom));
last_headroom = new_headroom;
}
return max_difference;
}
} // namespace
TEST(AutomaticGainController2SaturationProtector, RingBufferInit) {
RingBuffer b;
EXPECT_EQ(b.Size(), 0);
EXPECT_FALSE(b.Front().has_value());
}
TEST(AutomaticGainController2SaturationProtector, RingBufferPushBack) {
RingBuffer b;
constexpr float kValue = 123.f;
b.PushBack(kValue);
EXPECT_EQ(b.Size(), 1);
ASSERT_TRUE(b.Front().has_value());
EXPECT_EQ(b.Front().value(), kValue);
}
TEST(AutomaticGainController2SaturationProtector, RingBufferReset) {
RingBuffer b;
b.PushBack(123.f);
b.Reset();
EXPECT_EQ(b.Size(), 0);
EXPECT_FALSE(b.Front().has_value());
}
// Checks that the front value does not change until the ring buffer gets full.
TEST(AutomaticGainController2SaturationProtector,
RingBufferFrontUntilBufferIsFull) {
RingBuffer b;
constexpr float kValue = 123.f;
b.PushBack(kValue);
for (int i = 1; i < b.Capacity(); ++i) {
EXPECT_EQ(b.Front().value(), kValue);
b.PushBack(kValue + i);
}
}
// Checks that when the buffer is full it behaves as a shift register.
TEST(AutomaticGainController2SaturationProtector,
FullRingBufferFrontIsDelayed) {
RingBuffer b;
// Fill the buffer.
for (int i = 0; i < b.Capacity(); ++i) {
b.PushBack(i);
}
// The ring buffer should now behave as a shift register with a delay equal to
// its capacity.
for (int i = b.Capacity(); i < 2 * b.Capacity() + 1; ++i) {
EXPECT_EQ(b.Front().value(), i - b.Capacity());
b.PushBack(i);
}
}
// Checks that a state after reset equals a state after construction.
TEST(AutomaticGainController2SaturationProtector, ResetState) {
SaturationProtectorState init_state;
ResetSaturationProtectorState(kInitialMarginDb, init_state);
SaturationProtectorState state;
ResetSaturationProtectorState(kInitialMarginDb, state);
RunOnConstantLevel(/*num_iterations=*/10, /*speech_level_dbfs=*/-20.f,
/*speech_peak_dbfs=*/-10.f, state);
ASSERT_NE(init_state, state); // Make sure that there are side-effects.
ResetSaturationProtectorState(kInitialMarginDb, state);
EXPECT_EQ(init_state, state);
// Checks that the returned headroom value is correctly reset.
TEST(GainController2SaturationProtector, Reset) {
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
&apm_data_dumper);
const float initial_headroom_db = saturation_protector->HeadroomDb();
RunOnConstantLevel(/*num_iterations=*/10, kMaxSpeechProbability,
/*peak_dbfs=*/0.0f,
/*speech_level_dbfs=*/-10.0f, *saturation_protector);
// Make sure that there are side-effects.
ASSERT_NE(initial_headroom_db, saturation_protector->HeadroomDb());
saturation_protector->Reset();
EXPECT_EQ(initial_headroom_db, saturation_protector->HeadroomDb());
}
// Checks that the estimate converges to the ratio between peaks and level
// estimator values after a while.
TEST(AutomaticGainController2SaturationProtector,
ProtectorEstimatesCrestRatio) {
TEST(GainController2SaturationProtector, EstimatesCrestRatio) {
constexpr int kNumIterations = 2000;
constexpr float kPeakLevel = -20.f;
constexpr float kCrestFactor = kInitialMarginDb + 1.f;
constexpr float kSpeechLevel = kPeakLevel - kCrestFactor;
const float kMaxDifference = 0.5f * std::abs(kInitialMarginDb - kCrestFactor);
constexpr float kPeakLevelDbfs = -20.0f;
constexpr float kCrestFactorDb = kInitialHeadroomDb + 1.0f;
constexpr float kSpeechLevelDbfs = kPeakLevelDbfs - kCrestFactorDb;
const float kMaxDifferenceDb =
0.5f * std::fabs(kInitialHeadroomDb - kCrestFactorDb);
auto state = CreateSaturationProtectorState();
RunOnConstantLevel(kNumIterations, kPeakLevel, kSpeechLevel, state);
EXPECT_NEAR(state.margin_db, kCrestFactor, kMaxDifference);
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
&apm_data_dumper);
RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs,
kSpeechLevelDbfs, *saturation_protector);
EXPECT_NEAR(saturation_protector->HeadroomDb(), kCrestFactorDb,
kMaxDifferenceDb);
}
// Checks that the margin does not change too quickly.
TEST(AutomaticGainController2SaturationProtector, ChangeSlowly) {
// Checks that the extra headroom is applied.
TEST(GainController2SaturationProtector, ExtraHeadroomApplied) {
constexpr float kExtraHeadroomDb = 5.1234f;
constexpr int kNumIterations = 10;
constexpr float kPeakLevelDbfs = -20.0f;
constexpr float kSpeechLevelDbfs = kPeakLevelDbfs - 15.0f;
ApmDataDumper apm_data_dumper(0);
auto saturation_protector_no_extra = CreateSaturationProtector(
kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
&apm_data_dumper);
for (int i = 0; i < kNumIterations; ++i) {
saturation_protector_no_extra->Analyze(kMaxSpeechProbability,
kPeakLevelDbfs, kSpeechLevelDbfs);
}
auto saturation_protector_extra = CreateSaturationProtector(
kInitialHeadroomDb, kExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
&apm_data_dumper);
for (int i = 0; i < kNumIterations; ++i) {
saturation_protector_extra->Analyze(kMaxSpeechProbability, kPeakLevelDbfs,
kSpeechLevelDbfs);
}
EXPECT_EQ(saturation_protector_no_extra->HeadroomDb() + kExtraHeadroomDb,
saturation_protector_extra->HeadroomDb());
}
// Checks that the headroom does not change too quickly.
TEST(GainController2SaturationProtector, ChangeSlowly) {
constexpr int kNumIterations = 1000;
constexpr float kPeakLevel = -20.f;
constexpr float kCrestFactor = kInitialMarginDb - 5.f;
constexpr float kOtherCrestFactor = kInitialMarginDb;
constexpr float kSpeechLevel = kPeakLevel - kCrestFactor;
constexpr float kOtherSpeechLevel = kPeakLevel - kOtherCrestFactor;
auto state = CreateSaturationProtectorState();
float max_difference =
RunOnConstantLevel(kNumIterations, kPeakLevel, kSpeechLevel, state);
max_difference = std::max(
RunOnConstantLevel(kNumIterations, kPeakLevel, kOtherSpeechLevel, state),
max_difference);
constexpr float kPeakLevelDbfs = -20.f;
constexpr float kCrestFactorDb = kInitialHeadroomDb - 5.f;
constexpr float kOtherCrestFactorDb = kInitialHeadroomDb;
constexpr float kSpeechLevelDbfs = kPeakLevelDbfs - kCrestFactorDb;
constexpr float kOtherSpeechLevelDbfs = kPeakLevelDbfs - kOtherCrestFactorDb;
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
&apm_data_dumper);
float max_difference_db =
RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs,
kSpeechLevelDbfs, *saturation_protector);
max_difference_db = std::max(
RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs,
kOtherSpeechLevelDbfs, *saturation_protector),
max_difference_db);
constexpr float kMaxChangeSpeedDbPerSecond = 0.5f; // 1 db / 2 seconds.
EXPECT_LE(max_difference,
EXPECT_LE(max_difference_db,
kMaxChangeSpeedDbPerSecond / 1000 * kFrameDurationMs);
}
// Checks that there is a delay between input change and margin adaptations.
TEST(AutomaticGainController2SaturationProtector, AdaptToDelayedChanges) {
constexpr int kDelayIterations = kFullBufferSizeMs / kFrameDurationMs;
constexpr float kInitialSpeechLevelDbfs = -30.f;
constexpr float kLaterSpeechLevelDbfs = -15.f;
class SaturationProtectorParametrization
: public ::testing::TestWithParam<int> {
protected:
int adjacent_speech_frames_threshold() const { return GetParam(); }
};
auto state = CreateSaturationProtectorState();
// First run on initial level.
float max_difference = RunOnConstantLevel(
kDelayIterations, kInitialSpeechLevelDbfs + kInitialMarginDb,
kInitialSpeechLevelDbfs, state);
// Then peak changes, but not RMS.
max_difference =
std::max(RunOnConstantLevel(kDelayIterations,
kLaterSpeechLevelDbfs + kInitialMarginDb,
kInitialSpeechLevelDbfs, state),
max_difference);
// Then both change.
max_difference =
std::max(RunOnConstantLevel(kDelayIterations,
kLaterSpeechLevelDbfs + kInitialMarginDb,
kLaterSpeechLevelDbfs, state),
max_difference);
// The saturation protector expects that the RMS changes roughly
// 'kFullBufferSizeMs' after peaks change. This is to account for delay
// introduced by the level estimator. Therefore, the input above is 'normal'
// and 'expected', and shouldn't influence the margin by much.
const float total_difference = std::abs(state.margin_db - kInitialMarginDb);
EXPECT_LE(total_difference, 0.05f);
EXPECT_LE(max_difference, 0.01f);
TEST_P(SaturationProtectorParametrization, DoNotAdaptToShortSpeechSegments) {
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
kInitialHeadroomDb, kNoExtraHeadroomDb,
adjacent_speech_frames_threshold(), &apm_data_dumper);
const float initial_headroom_db = saturation_protector->HeadroomDb();
RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() - 1,
kMaxSpeechProbability,
/*peak_dbfs=*/0.0f,
/*speech_level_dbfs=*/-10.0f, *saturation_protector);
// No adaptation expected.
EXPECT_EQ(initial_headroom_db, saturation_protector->HeadroomDb());
}
TEST_P(SaturationProtectorParametrization, AdaptToEnoughSpeechSegments) {
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
kInitialHeadroomDb, kNoExtraHeadroomDb,
adjacent_speech_frames_threshold(), &apm_data_dumper);
const float initial_headroom_db = saturation_protector->HeadroomDb();
RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() + 1,
kMaxSpeechProbability,
/*peak_dbfs=*/0.0f,
/*speech_level_dbfs=*/-10.0f, *saturation_protector);
// Adaptation expected.
EXPECT_NE(initial_headroom_db, saturation_protector->HeadroomDb());
}
INSTANTIATE_TEST_SUITE_P(GainController2,
SaturationProtectorParametrization,
::testing::Values(2, 9, 17));
} // namespace
} // namespace webrtc

View File

@ -65,43 +65,23 @@ class Vad : public VoiceActivityDetector {
rnn_vad::RnnVad rnn_vad_;
};
// Returns an updated version of `p_old` by using instant decay and the given
// `attack` on a new VAD probability value `p_new`.
float SmoothedVadProbability(float p_old, float p_new, float attack) {
RTC_DCHECK_GT(attack, 0.0f);
RTC_DCHECK_LE(attack, 1.0f);
if (p_new < p_old || attack == 1.0f) {
// Instant decay (or no smoothing).
return p_new;
} else {
// Attack phase.
return attack * p_new + (1.0f - attack) * p_old;
}
}
} // namespace
VadLevelAnalyzer::VadLevelAnalyzer()
: VadLevelAnalyzer(kDefaultVadRnnResetPeriodMs,
kDefaultSmoothedVadProbabilityAttack,
GetAvailableCpuFeatures()) {}
: VadLevelAnalyzer(kDefaultVadRnnResetPeriodMs, GetAvailableCpuFeatures()) {
}
VadLevelAnalyzer::VadLevelAnalyzer(int vad_reset_period_ms,
float vad_probability_attack,
const AvailableCpuFeatures& cpu_features)
: VadLevelAnalyzer(vad_reset_period_ms,
vad_probability_attack,
std::make_unique<Vad>(cpu_features)) {}
VadLevelAnalyzer::VadLevelAnalyzer(int vad_reset_period_ms,
float vad_probability_attack,
std::unique_ptr<VoiceActivityDetector> vad)
: vad_(std::move(vad)),
vad_reset_period_frames_(
rtc::CheckedDivExact(vad_reset_period_ms, kFrameDurationMs)),
vad_probability_attack_(vad_probability_attack),
time_to_vad_reset_(vad_reset_period_frames_),
vad_probability_(0.0f) {
time_to_vad_reset_(vad_reset_period_frames_) {
RTC_DCHECK(vad_);
RTC_DCHECK_GT(vad_reset_period_frames_, 1);
}
@ -123,11 +103,7 @@ VadLevelAnalyzer::Result VadLevelAnalyzer::AnalyzeFrame(
peak = std::max(std::fabs(x), peak);
rms += x * x;
}
// Compute smoothed speech probability.
vad_probability_ = SmoothedVadProbability(
/*p_old=*/vad_probability_, /*p_new=*/vad_->ComputeProbability(frame),
vad_probability_attack_);
return {vad_probability_,
return {vad_->ComputeProbability(frame),
FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel())),
FloatS16ToDbfs(peak)};
}

View File

@ -37,18 +37,15 @@ class VadLevelAnalyzer {
virtual float ComputeProbability(AudioFrameView<const float> frame) = 0;
};
// Ctor. Uses the default VAD.
// Ctor. Uses the default VAD with the default settings.
VadLevelAnalyzer();
// Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call
// `VadLevelAnalyzer::Reset()`; it must be equal to or greater than the
// duration of two frames. `vad_probability_attack` is a number in (0,1] used
// to smooth the speech probability (instant decay, slow attack).
// duration of two frames. Uses `cpu_features` to instantiate the default VAD.
VadLevelAnalyzer(int vad_reset_period_ms,
float vad_probability_attack,
const AvailableCpuFeatures& cpu_features);
// Ctor. Uses a custom `vad`.
VadLevelAnalyzer(int vad_reset_period_ms,
float vad_probability_attack,
std::unique_ptr<VoiceActivityDetector> vad);
VadLevelAnalyzer(const VadLevelAnalyzer&) = delete;
@ -61,9 +58,7 @@ class VadLevelAnalyzer {
private:
std::unique_ptr<VoiceActivityDetector> vad_;
const int vad_reset_period_frames_;
const float vad_probability_attack_;
int time_to_vad_reset_;
float vad_probability_;
};
} // namespace webrtc

View File

@ -29,9 +29,6 @@ using ::testing::ReturnRoundRobin;
constexpr int kNoVadPeriodicReset =
kFrameDurationMs * (std::numeric_limits<int>::max() / kFrameDurationMs);
constexpr float kInstantAttack = 1.0f;
constexpr float kSlowAttack = 0.1f;
constexpr int kSampleRateHz = 8000;
class MockVad : public VadLevelAnalyzer::VoiceActivityDetector {
@ -48,7 +45,6 @@ class MockVad : public VadLevelAnalyzer::VoiceActivityDetector {
// restart from the beginning.
std::unique_ptr<VadLevelAnalyzer> CreateVadLevelAnalyzerWithMockVad(
int vad_reset_period_ms,
float vad_probability_attack,
const std::vector<float>& speech_probabilities,
int expected_vad_reset_calls = 0) {
auto vad = std::make_unique<MockVad>();
@ -58,8 +54,8 @@ std::unique_ptr<VadLevelAnalyzer> CreateVadLevelAnalyzerWithMockVad(
if (expected_vad_reset_calls >= 0) {
EXPECT_CALL(*vad, Reset).Times(expected_vad_reset_calls);
}
return std::make_unique<VadLevelAnalyzer>(
vad_reset_period_ms, vad_probability_attack, std::move(vad));
return std::make_unique<VadLevelAnalyzer>(vad_reset_period_ms,
std::move(vad));
}
// 10 ms mono frame.
@ -75,7 +71,7 @@ struct FrameWithView {
const AudioFrameView<const float> view;
};
TEST(AutomaticGainController2VadLevelAnalyzer, PeakLevelGreaterThanRmsLevel) {
TEST(GainController2VadLevelAnalyzer, PeakLevelGreaterThanRmsLevel) {
// Handcrafted frame so that the average is lower than the peak value.
FrameWithView frame(1000.0f); // Constant frame.
frame.samples[10] = 2000.0f; // Except for one peak value.
@ -88,14 +84,13 @@ TEST(AutomaticGainController2VadLevelAnalyzer, PeakLevelGreaterThanRmsLevel) {
EXPECT_LT(levels_and_vad_prob.rms_dbfs, levels_and_vad_prob.peak_dbfs);
}
// Checks that the unprocessed and the smoothed speech probabilities match when
// instant attack is used.
TEST(AutomaticGainController2VadLevelAnalyzer, NoSpeechProbabilitySmoothing) {
// Checks that the expect VAD probabilities are returned.
TEST(GainController2VadLevelAnalyzer, NoSpeechProbabilitySmoothing) {
const std::vector<float> speech_probabilities{0.709f, 0.484f, 0.882f, 0.167f,
0.44f, 0.525f, 0.858f, 0.314f,
0.653f, 0.965f, 0.413f, 0.0f};
auto analyzer = CreateVadLevelAnalyzerWithMockVad(
kNoVadPeriodicReset, kInstantAttack, speech_probabilities);
auto analyzer = CreateVadLevelAnalyzerWithMockVad(kNoVadPeriodicReset,
speech_probabilities);
FrameWithView frame;
for (int i = 0; rtc::SafeLt(i, speech_probabilities.size()); ++i) {
SCOPED_TRACE(i);
@ -104,45 +99,11 @@ TEST(AutomaticGainController2VadLevelAnalyzer, NoSpeechProbabilitySmoothing) {
}
}
// Checks that the smoothed speech probability does not instantly converge to
// the unprocessed one when slow attack is used.
TEST(AutomaticGainController2VadLevelAnalyzer,
SlowAttackSpeechProbabilitySmoothing) {
const std::vector<float> speech_probabilities{0.0f, 0.0f, 1.0f,
1.0f, 1.0f, 1.0f};
auto analyzer = CreateVadLevelAnalyzerWithMockVad(
kNoVadPeriodicReset, kSlowAttack, speech_probabilities);
FrameWithView frame;
float prev_probability = 0.0f;
for (int i = 0; rtc::SafeLt(i, speech_probabilities.size()); ++i) {
SCOPED_TRACE(i);
const float smoothed_probability =
analyzer->AnalyzeFrame(frame.view).speech_probability;
EXPECT_LT(smoothed_probability, 1.0f); // Not enough time to reach 1.
EXPECT_LE(prev_probability, smoothed_probability); // Converge towards 1.
prev_probability = smoothed_probability;
}
}
// Checks that the smoothed speech probability instantly decays to the
// unprocessed one when slow attack is used.
TEST(AutomaticGainController2VadLevelAnalyzer, SpeechProbabilityInstantDecay) {
const std::vector<float> speech_probabilities{1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 0.0f};
auto analyzer = CreateVadLevelAnalyzerWithMockVad(
kNoVadPeriodicReset, kSlowAttack, speech_probabilities);
FrameWithView frame;
for (int i = 0; rtc::SafeLt(i, speech_probabilities.size() - 1); ++i) {
analyzer->AnalyzeFrame(frame.view);
}
EXPECT_EQ(0.0f, analyzer->AnalyzeFrame(frame.view).speech_probability);
}
// Checks that the VAD is not periodically reset.
TEST(AutomaticGainController2VadLevelAnalyzer, VadNoPeriodicReset) {
TEST(GainController2VadLevelAnalyzer, VadNoPeriodicReset) {
constexpr int kNumFrames = 19;
auto analyzer = CreateVadLevelAnalyzerWithMockVad(
kNoVadPeriodicReset, kSlowAttack, /*speech_probabilities=*/{1.0f},
kNoVadPeriodicReset, /*speech_probabilities=*/{1.0f},
/*expected_vad_reset_calls=*/0);
FrameWithView frame;
for (int i = 0; i < kNumFrames; ++i) {
@ -161,7 +122,7 @@ class VadPeriodResetParametrization
TEST_P(VadPeriodResetParametrization, VadPeriodicReset) {
auto analyzer = CreateVadLevelAnalyzerWithMockVad(
/*vad_reset_period_ms=*/vad_reset_period_frames() * kFrameDurationMs,
kSlowAttack, /*speech_probabilities=*/{1.0f},
/*speech_probabilities=*/{1.0f},
/*expected_vad_reset_calls=*/num_frames() / vad_reset_period_frames());
FrameWithView frame;
for (int i = 0; i < num_frames(); ++i) {
@ -169,7 +130,7 @@ TEST_P(VadPeriodResetParametrization, VadPeriodicReset) {
}
}
INSTANTIATE_TEST_SUITE_P(AutomaticGainController2VadLevelAnalyzer,
INSTANTIATE_TEST_SUITE_P(GainController2VadLevelAnalyzer,
VadPeriodResetParametrization,
::testing::Combine(::testing::Values(1, 19, 123),
::testing::Values(2, 5, 20, 53)));

View File

@ -73,7 +73,7 @@ void GainController2::Process(AudioBuffer* audio) {
void GainController2::NotifyAnalogLevel(int level) {
if (analog_level_ != level && adaptive_agc_) {
adaptive_agc_->Reset();
adaptive_agc_->HandleInputGainChange();
}
analog_level_ = level;
}

View File

@ -11,6 +11,7 @@
#include "modules/audio_processing/gain_controller2.h"
#include <algorithm>
#include <cmath>
#include <memory>
#include "api/array_view.h"
@ -68,7 +69,8 @@ std::unique_ptr<GainController2> CreateAgc2FixedDigitalMode(
return agc2;
}
float GainAfterProcessingFile(GainController2* gain_controller) {
float GainDbAfterProcessingFile(GainController2& gain_controller,
int max_duration_ms) {
// Set up an AudioBuffer to be filled from the speech file.
constexpr size_t kStereo = 2u;
const StreamConfig capture_config(AudioProcessing::kSampleRate48kHz, kStereo,
@ -82,24 +84,29 @@ float GainAfterProcessingFile(GainController2* gain_controller) {
std::vector<float> capture_input(capture_config.num_frames() *
capture_config.num_channels());
// The file should contain at least this many frames. Every iteration, we put
// a frame through the gain controller.
const int kNumFramesToProcess = 100;
for (int frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) {
// Process the input file which must be long enough to cover
// `max_duration_ms`.
RTC_DCHECK_GT(max_duration_ms, 0);
const int num_frames = rtc::CheckedDivExact(max_duration_ms, 10);
for (int i = 0; i < num_frames; ++i) {
ReadFloatSamplesFromStereoFile(capture_config.num_frames(),
capture_config.num_channels(), &capture_file,
capture_input);
test::CopyVectorToAudioBuffer(capture_config, capture_input, &ab);
gain_controller->Process(&ab);
gain_controller.Process(&ab);
}
// Send in a last frame with values constant 1 (It's low enough to detect high
// gain, and for ease of computation). The applied gain is the result.
// Send in a last frame with minimum dBFS level.
constexpr float sample_value = 1.f;
SetAudioBufferSamples(sample_value, &ab);
gain_controller->Process(&ab);
return ab.channels()[0][0];
gain_controller.Process(&ab);
// Measure the RMS level after processing.
float rms = 0.0f;
for (size_t i = 0; i < capture_config.num_frames(); ++i) {
rms += ab.channels()[0][i] * ab.channels()[0][i];
}
// Return the applied gain in dB.
return 20.0f * std::log10(std::sqrt(rms / capture_config.num_frames()));
}
} // namespace
@ -324,34 +331,20 @@ INSTANTIATE_TEST_SUITE_P(
48000,
true)));
TEST(GainController2, UsageSaturationMargin) {
// Checks that the gain applied at the end of a PCM samples file is close to the
// expected value.
TEST(GainController2, CheckGainAdaptiveDigital) {
constexpr float kExpectedGainDb = 4.3f;
constexpr float kToleranceDb = 0.5f;
GainController2 gain_controller2;
gain_controller2.Initialize(AudioProcessing::kSampleRate48kHz);
AudioProcessing::Config::GainController2 config;
// Check that samples are not amplified as much when extra margin is
// high. They should not be amplified at all, but only after convergence. GC2
// starts with a gain, and it takes time until it's down to 0 dB.
config.fixed_digital.gain_db = 0.f;
config.adaptive_digital.enabled = true;
config.adaptive_digital.extra_saturation_margin_db = 50.f;
gain_controller2.ApplyConfig(config);
EXPECT_LT(GainAfterProcessingFile(&gain_controller2), 2.f);
}
TEST(GainController2, UsageNoSaturationMargin) {
GainController2 gain_controller2;
gain_controller2.Initialize(AudioProcessing::kSampleRate48kHz);
AudioProcessing::Config::GainController2 config;
// Check that some gain is applied if there is no margin.
config.fixed_digital.gain_db = 0.f;
config.adaptive_digital.enabled = true;
config.adaptive_digital.extra_saturation_margin_db = 0.f;
gain_controller2.ApplyConfig(config);
EXPECT_GT(GainAfterProcessingFile(&gain_controller2), 1.9f);
EXPECT_NEAR(
GainDbAfterProcessingFile(gain_controller2, /*max_duration_ms=*/2000),
kExpectedGainDb, kToleranceDb);
}
} // namespace test

View File

@ -46,17 +46,6 @@ std::string GainController1ModeToString(const Agc1Config::Mode& mode) {
RTC_CHECK_NOTREACHED();
}
std::string GainController2LevelEstimatorToString(
const Agc2Config::LevelEstimator& level) {
switch (level) {
case Agc2Config::LevelEstimator::kRms:
return "Rms";
case Agc2Config::LevelEstimator::kPeak:
return "Peak";
}
RTC_CHECK_NOTREACHED();
}
std::string GainController2NoiseEstimatorToString(
const Agc2Config::NoiseEstimator& type) {
switch (type) {
@ -174,20 +163,10 @@ std::string AudioProcessing::Config::ToString() const {
<< gain_controller2.adaptive_digital.enabled << ", noise_estimator: "
<< GainController2NoiseEstimatorToString(
gain_controller2.adaptive_digital.noise_estimator)
<< ", level_estimator: { vad_probability_attack: "
<< gain_controller2.adaptive_digital.vad_probability_attack << ", type: "
<< GainController2LevelEstimatorToString(
gain_controller2.adaptive_digital.level_estimator)
<< ", vad_reset_period_ms: "
<< gain_controller2.adaptive_digital.vad_reset_period_ms
<< ", adjacent_speech_frames_threshold: "
<< gain_controller2.adaptive_digital
.level_estimator_adjacent_speech_frames_threshold
<< ", initial_saturation_margin_db: "
<< gain_controller2.adaptive_digital.initial_saturation_margin_db
<< ", extra_saturation_margin_db: "
<< gain_controller2.adaptive_digital.extra_saturation_margin_db
<< " }, gain_applier: { adjacent_speech_frames_threshold: "
<< gain_controller2.adaptive_digital
.gain_applier_adjacent_speech_frames_threshold
<< gain_controller2.adaptive_digital.adjacent_speech_frames_threshold
<< ", max_gain_change_db_per_second: "
<< gain_controller2.adaptive_digital.max_gain_change_db_per_second
<< ", max_output_noise_level_dbfs: "
@ -195,7 +174,7 @@ std::string AudioProcessing::Config::ToString() const {
<< ", sse2_allowed: " << gain_controller2.adaptive_digital.sse2_allowed
<< ", avx2_allowed: " << gain_controller2.adaptive_digital.avx2_allowed
<< ", neon_allowed: " << gain_controller2.adaptive_digital.neon_allowed
<< " }}}, residual_echo_detector: { enabled: "
<< "}}, residual_echo_detector: { enabled: "
<< residual_echo_detector.enabled
<< " }, level_estimation: { enabled: " << level_estimation.enabled
<< " }}";

View File

@ -349,6 +349,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
return !(*this == rhs);
}
// TODO(crbug.com/webrtc/7494): Remove `LevelEstimator`.
enum LevelEstimator { kRms, kPeak };
enum NoiseEstimator { kStationaryNoise, kNoiseFloor };
bool enabled = false;
@ -359,19 +360,20 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
bool enabled = false;
NoiseEstimator noise_estimator = kNoiseFloor;
int vad_reset_period_ms = 1500;
float vad_probability_attack = 0.9f;
LevelEstimator level_estimator = kRms;
int level_estimator_adjacent_speech_frames_threshold = 11;
// TODO(crbug.com/webrtc/7494): Remove `use_saturation_protector`.
bool use_saturation_protector = true;
float initial_saturation_margin_db = 20.0f;
float extra_saturation_margin_db = 5.0f;
int gain_applier_adjacent_speech_frames_threshold = 11;
int adjacent_speech_frames_threshold = 12;
float max_gain_change_db_per_second = 3.0f;
float max_output_noise_level_dbfs = -55.0f;
float max_output_noise_level_dbfs = -50.0f;
bool sse2_allowed = true;
bool avx2_allowed = true;
bool neon_allowed = true;
// TODO(crbug.com/webrtc/7494): Remove deprecated settings below.
float vad_probability_attack = 1.0f;
LevelEstimator level_estimator = kRms;
int level_estimator_adjacent_speech_frames_threshold = 12;
bool use_saturation_protector = true;
float initial_saturation_margin_db = 25.0f;
float extra_saturation_margin_db = 5.0f;
int gain_applier_adjacent_speech_frames_threshold = 12;
} adaptive_digital;
} gain_controller2;