AGC2 config: allow tuning of headroom, max gain and initial gain
This CL does *not* change the behavior of the AGC2 adaptive digital controller - bitexactness verified with audioproc_f on a collection of AEC dumps and Wav files (42 recordings in total). Tested: compiled Chrome with this patch and made an appr.tc test call Bug: webrtc:7494 Change-Id: Ia8a9f6fbc3a3459b888a2eed87e108f0d39cfe99 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/233520 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/main@{#35140}
This commit is contained in:

committed by
WebRTC LUCI CQ

parent
41b4397e1a
commit
a850e6c8b6
@ -178,6 +178,7 @@ rtc_library("adaptive_digital_unittests") {
|
||||
":common",
|
||||
":gain_applier",
|
||||
":test_utils",
|
||||
"..:api",
|
||||
"..:apm_logging",
|
||||
"..:audio_frame_view",
|
||||
"../../../api:array_view",
|
||||
|
@ -43,14 +43,9 @@ AvailableCpuFeatures GetAllowedCpuFeatures(
|
||||
|
||||
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
|
||||
const AdaptiveDigitalConfig& config)
|
||||
: speech_level_estimator_(apm_data_dumper,
|
||||
config.adjacent_speech_frames_threshold),
|
||||
: speech_level_estimator_(apm_data_dumper, config),
|
||||
vad_(config.vad_reset_period_ms, GetAllowedCpuFeatures(config)),
|
||||
gain_controller_(apm_data_dumper,
|
||||
config.adjacent_speech_frames_threshold,
|
||||
config.max_gain_change_db_per_second,
|
||||
config.max_output_noise_level_dbfs,
|
||||
config.dry_run),
|
||||
gain_controller_(apm_data_dumper, config),
|
||||
apm_data_dumper_(apm_data_dumper),
|
||||
noise_level_estimator_(CreateNoiseFloorEstimator(apm_data_dumper)),
|
||||
saturation_protector_(
|
||||
|
@ -23,31 +23,38 @@
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
using AdaptiveDigitalConfig =
|
||||
AudioProcessing::Config::GainController2::AdaptiveDigital;
|
||||
|
||||
constexpr int kHeadroomHistogramMin = 0;
|
||||
constexpr int kHeadroomHistogramMax = 50;
|
||||
constexpr int kGainDbHistogramMax = 30;
|
||||
|
||||
// This function maps input level to desired applied gain. We want to
|
||||
// boost the signal so that peaks are at -kHeadroomDbfs. We can't
|
||||
// apply more than kMaxGainDb gain.
|
||||
float ComputeGainDb(float input_level_dbfs) {
|
||||
// If the level is very low, boost it as much as we can.
|
||||
if (input_level_dbfs < -(kHeadroomDbfs + kMaxGainDb)) {
|
||||
return kMaxGainDb;
|
||||
// Computes the gain for `input_level_dbfs` to reach `-config.headroom_db`.
|
||||
// Clamps the gain in [0, `config.max_gain_db`]. `config.headroom_db` is a
|
||||
// safety margin to allow transient peaks to exceed the target peak level
|
||||
// without clipping.
|
||||
float ComputeGainDb(float input_level_dbfs,
|
||||
const AdaptiveDigitalConfig& config) {
|
||||
// If the level is very low, apply the maximum gain.
|
||||
if (input_level_dbfs < -(config.headroom_db + config.max_gain_db)) {
|
||||
return config.max_gain_db;
|
||||
}
|
||||
// We expect to end up here most of the time: the level is below
|
||||
// -headroom, but we can boost it to -headroom.
|
||||
if (input_level_dbfs < -kHeadroomDbfs) {
|
||||
return -kHeadroomDbfs - input_level_dbfs;
|
||||
if (input_level_dbfs < -config.headroom_db) {
|
||||
return -config.headroom_db - input_level_dbfs;
|
||||
}
|
||||
// Otherwise, the level is too high and we can't boost.
|
||||
RTC_DCHECK_GE(input_level_dbfs, -kHeadroomDbfs);
|
||||
return 0.f;
|
||||
// The level is too high and we can't boost.
|
||||
RTC_DCHECK_GE(input_level_dbfs, -config.headroom_db);
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
// Returns `target_gain` if the output noise level is below
|
||||
// `max_output_noise_level_dbfs`; otherwise returns a capped gain so that the
|
||||
// output noise level equals `max_output_noise_level_dbfs`.
|
||||
float LimitGainByNoise(float target_gain,
|
||||
// Returns `target_gain_db` if applying such a gain to `input_noise_level_dbfs`
|
||||
// does not exceed `max_output_noise_level_dbfs`. Otherwise lowers and returns
|
||||
// `target_gain_db` so that the output noise level equals
|
||||
// `max_output_noise_level_dbfs`.
|
||||
float LimitGainByNoise(float target_gain_db,
|
||||
float input_noise_level_dbfs,
|
||||
float max_output_noise_level_dbfs,
|
||||
ApmDataDumper& apm_data_dumper) {
|
||||
@ -55,24 +62,25 @@ float LimitGainByNoise(float target_gain,
|
||||
max_output_noise_level_dbfs - input_noise_level_dbfs;
|
||||
apm_data_dumper.DumpRaw("agc2_adaptive_gain_applier_max_allowed_gain_db",
|
||||
max_allowed_gain_db);
|
||||
return std::min(target_gain, std::max(max_allowed_gain_db, 0.f));
|
||||
return std::min(target_gain_db, std::max(max_allowed_gain_db, 0.0f));
|
||||
}
|
||||
|
||||
float LimitGainByLowConfidence(float target_gain,
|
||||
float last_gain,
|
||||
float LimitGainByLowConfidence(float target_gain_db,
|
||||
float last_gain_db,
|
||||
float limiter_audio_level_dbfs,
|
||||
bool estimate_is_confident) {
|
||||
if (estimate_is_confident ||
|
||||
limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) {
|
||||
return target_gain;
|
||||
return target_gain_db;
|
||||
}
|
||||
const float limiter_level_before_gain = limiter_audio_level_dbfs - last_gain;
|
||||
const float limiter_level_dbfs_before_gain =
|
||||
limiter_audio_level_dbfs - last_gain_db;
|
||||
|
||||
// Compute a new gain so that `limiter_level_before_gain` + `new_target_gain`
|
||||
// is not great than `kLimiterThresholdForAgcGainDbfs`.
|
||||
const float new_target_gain = std::max(
|
||||
kLimiterThresholdForAgcGainDbfs - limiter_level_before_gain, 0.f);
|
||||
return std::min(new_target_gain, target_gain);
|
||||
// Compute a new gain so that `limiter_level_dbfs_before_gain` +
|
||||
// `new_target_gain_db` is not great than `kLimiterThresholdForAgcGainDbfs`.
|
||||
const float new_target_gain_db = std::max(
|
||||
kLimiterThresholdForAgcGainDbfs - limiter_level_dbfs_before_gain, 0.0f);
|
||||
return std::min(new_target_gain_db, target_gain_db);
|
||||
}
|
||||
|
||||
// Computes how the gain should change during this frame.
|
||||
@ -86,7 +94,7 @@ float ComputeGainChangeThisFrameDb(float target_gain_db,
|
||||
RTC_DCHECK_GT(max_gain_increase_db, 0);
|
||||
float target_gain_difference_db = target_gain_db - last_gain_db;
|
||||
if (!gain_increase_allowed) {
|
||||
target_gain_difference_db = std::min(target_gain_difference_db, 0.f);
|
||||
target_gain_difference_db = std::min(target_gain_difference_db, 0.0f);
|
||||
}
|
||||
return rtc::SafeClamp(target_gain_difference_db, -max_gain_decrease_db,
|
||||
max_gain_increase_db);
|
||||
@ -110,32 +118,28 @@ void CopyAudio(AudioFrameView<const float> src,
|
||||
|
||||
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
|
||||
ApmDataDumper* apm_data_dumper,
|
||||
int adjacent_speech_frames_threshold,
|
||||
float max_gain_change_db_per_second,
|
||||
float max_output_noise_level_dbfs,
|
||||
bool dry_run)
|
||||
const AudioProcessing::Config::GainController2::AdaptiveDigital& config)
|
||||
: apm_data_dumper_(apm_data_dumper),
|
||||
gain_applier_(
|
||||
/*hard_clip_samples=*/false,
|
||||
/*initial_gain_factor=*/DbToRatio(kInitialAdaptiveDigitalGainDb)),
|
||||
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
|
||||
max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
|
||||
kFrameDurationMs / 1000.f),
|
||||
max_output_noise_level_dbfs_(max_output_noise_level_dbfs),
|
||||
dry_run_(dry_run),
|
||||
/*initial_gain_factor=*/DbToRatio(config.initial_gain_db)),
|
||||
config_(config),
|
||||
max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second *
|
||||
kFrameDurationMs / 1000.0f),
|
||||
calls_since_last_gain_log_(0),
|
||||
frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
|
||||
last_gain_db_(kInitialAdaptiveDigitalGainDb) {
|
||||
RTC_DCHECK_GT(max_gain_change_db_per_second, 0.0f);
|
||||
frames_to_gain_increase_allowed_(
|
||||
config_.adjacent_speech_frames_threshold),
|
||||
last_gain_db_(config_.initial_gain_db) {
|
||||
RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f);
|
||||
RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
|
||||
RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.0f);
|
||||
RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.0f);
|
||||
RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f);
|
||||
RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f);
|
||||
Initialize(/*sample_rate_hz=*/48000, /*num_channels=*/1);
|
||||
}
|
||||
|
||||
void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz,
|
||||
int num_channels) {
|
||||
if (!dry_run_) {
|
||||
if (!config_.dry_run) {
|
||||
return;
|
||||
}
|
||||
RTC_DCHECK_GT(sample_rate_hz, 0);
|
||||
@ -159,7 +163,7 @@ void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz,
|
||||
|
||||
void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
|
||||
AudioFrameView<float> frame) {
|
||||
RTC_DCHECK_GE(info.speech_level_dbfs, -150.f);
|
||||
RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f);
|
||||
RTC_DCHECK_GE(frame.num_channels(), 1);
|
||||
RTC_DCHECK(
|
||||
frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
|
||||
@ -172,15 +176,16 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
|
||||
const float input_level_dbfs = info.speech_level_dbfs + info.headroom_db;
|
||||
|
||||
const float target_gain_db = LimitGainByLowConfidence(
|
||||
LimitGainByNoise(ComputeGainDb(input_level_dbfs), info.noise_rms_dbfs,
|
||||
max_output_noise_level_dbfs_, *apm_data_dumper_),
|
||||
LimitGainByNoise(ComputeGainDb(input_level_dbfs, config_),
|
||||
info.noise_rms_dbfs, config_.max_output_noise_level_dbfs,
|
||||
*apm_data_dumper_),
|
||||
last_gain_db_, info.limiter_envelope_dbfs, info.speech_level_reliable);
|
||||
|
||||
// Forbid increasing the gain until enough adjacent speech frames are
|
||||
// observed.
|
||||
bool first_confident_speech_frame = false;
|
||||
if (info.speech_probability < kVadConfidenceThreshold) {
|
||||
frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_;
|
||||
frames_to_gain_increase_allowed_ = config_.adjacent_speech_frames_threshold;
|
||||
} else if (frames_to_gain_increase_allowed_ > 0) {
|
||||
frames_to_gain_increase_allowed_--;
|
||||
first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
|
||||
@ -196,7 +201,7 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
|
||||
// No gain increase happened while waiting for a long enough speech
|
||||
// sequence. Therefore, temporarily allow a faster gain increase.
|
||||
RTC_DCHECK(gain_increase_allowed);
|
||||
max_gain_increase_db *= adjacent_speech_frames_threshold_;
|
||||
max_gain_increase_db *= config_.adjacent_speech_frames_threshold;
|
||||
}
|
||||
|
||||
const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
|
||||
@ -217,7 +222,7 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
|
||||
}
|
||||
|
||||
// Modify `frame` only if not running in "dry run" mode.
|
||||
if (!dry_run_) {
|
||||
if (!config_.dry_run) {
|
||||
gain_applier_.ApplyGain(frame);
|
||||
} else {
|
||||
// Copy `frame` so that `ApplyGain()` is called (on a copy).
|
||||
@ -247,7 +252,8 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
|
||||
kHeadroomHistogramMax,
|
||||
kHeadroomHistogramMax - kHeadroomHistogramMin + 1);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
|
||||
last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1);
|
||||
last_gain_db_, 0, kGainDbHistogramMax,
|
||||
kGainDbHistogramMax + 1);
|
||||
RTC_LOG(LS_INFO) << "AGC2 adaptive digital"
|
||||
<< " | speech_dbfs: " << info.speech_level_dbfs
|
||||
<< " | noise_dbfs: " << info.noise_rms_dbfs
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
#include "modules/audio_processing/agc2/gain_applier.h"
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
#include "modules/audio_processing/include/audio_processing.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -35,16 +36,9 @@ class AdaptiveDigitalGainApplier {
|
||||
float limiter_envelope_dbfs; // Envelope level from the limiter (dBFS).
|
||||
};
|
||||
|
||||
// Ctor. `adjacent_speech_frames_threshold` indicates how many adjacent speech
|
||||
// frames must be observed in order to consider the sequence as speech.
|
||||
// `max_gain_change_db_per_second` limits the adaptation speed (uniformly
|
||||
// operated across frames). `max_output_noise_level_dbfs` limits the output
|
||||
// noise level. If `dry_run` is true, `Process()` will not modify the audio.
|
||||
AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
|
||||
int adjacent_speech_frames_threshold,
|
||||
float max_gain_change_db_per_second,
|
||||
float max_output_noise_level_dbfs,
|
||||
bool dry_run);
|
||||
AdaptiveDigitalGainApplier(
|
||||
ApmDataDumper* apm_data_dumper,
|
||||
const AudioProcessing::Config::GainController2::AdaptiveDigital& config);
|
||||
AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
|
||||
AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
|
||||
delete;
|
||||
@ -59,10 +53,8 @@ class AdaptiveDigitalGainApplier {
|
||||
ApmDataDumper* const apm_data_dumper_;
|
||||
GainApplier gain_applier_;
|
||||
|
||||
const int adjacent_speech_frames_threshold_;
|
||||
const AudioProcessing::Config::GainController2::AdaptiveDigital config_;
|
||||
const float max_gain_change_db_per_10ms_;
|
||||
const float max_output_noise_level_dbfs_;
|
||||
const bool dry_run_;
|
||||
|
||||
int calls_since_last_gain_log_;
|
||||
int frames_to_gain_increase_allowed_;
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "modules/audio_processing/agc2/vector_float_frame.h"
|
||||
#include "modules/audio_processing/include/audio_processing.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/gunit.h"
|
||||
|
||||
@ -33,57 +34,68 @@ constexpr float kMaxSpeechProbability = 1.0f;
|
||||
constexpr float kNoNoiseDbfs = kMinLevelDbfs;
|
||||
constexpr float kWithNoiseDbfs = -20.0f;
|
||||
|
||||
constexpr float kMaxGainChangePerSecondDb = 3.0f;
|
||||
constexpr float kMaxGainChangePerFrameDb =
|
||||
kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.0f;
|
||||
constexpr float kMaxOutputNoiseLevelDbfs = -50.0f;
|
||||
// Number of additional frames to process in the tests to ensure that the tested
|
||||
// adaptation processes have converged.
|
||||
constexpr int kNumExtraFrames = 10;
|
||||
|
||||
constexpr float GetMaxGainChangePerFrameDb(
|
||||
float max_gain_change_db_per_second) {
|
||||
return max_gain_change_db_per_second * kFrameDurationMs / 1000.0f;
|
||||
}
|
||||
|
||||
using AdaptiveDigitalConfig =
|
||||
AudioProcessing::Config::GainController2::AdaptiveDigital;
|
||||
|
||||
constexpr AdaptiveDigitalConfig kDefaultConfig{};
|
||||
|
||||
// Helper to create initialized `AdaptiveDigitalGainApplier` objects.
|
||||
struct GainApplierHelper {
|
||||
GainApplierHelper()
|
||||
: GainApplierHelper(/*adjacent_speech_frames_threshold=*/1) {}
|
||||
explicit GainApplierHelper(int adjacent_speech_frames_threshold)
|
||||
explicit GainApplierHelper(const AdaptiveDigitalConfig& config)
|
||||
: apm_data_dumper(0),
|
||||
gain_applier(std::make_unique<AdaptiveDigitalGainApplier>(
|
||||
&apm_data_dumper,
|
||||
adjacent_speech_frames_threshold,
|
||||
kMaxGainChangePerSecondDb,
|
||||
kMaxOutputNoiseLevelDbfs,
|
||||
/*dry_run=*/false)) {}
|
||||
gain_applier(
|
||||
std::make_unique<AdaptiveDigitalGainApplier>(&apm_data_dumper,
|
||||
config)) {}
|
||||
ApmDataDumper apm_data_dumper;
|
||||
std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
|
||||
};
|
||||
|
||||
// Sample frame information for the tests mocking noiseless speech detected
|
||||
// with maximum probability and with level, headroom and limiter envelope chosen
|
||||
// so that the resulting gain equals `kInitialAdaptiveDigitalGainDb` - i.e., no
|
||||
// gain adaptation is expected.
|
||||
constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
|
||||
/*speech_probability=*/kMaxSpeechProbability,
|
||||
/*speech_level_dbfs=*/kInitialSpeechLevelEstimateDbfs,
|
||||
/*speech_level_reliable=*/true,
|
||||
/*noise_rms_dbfs=*/kNoNoiseDbfs,
|
||||
/*headroom_db=*/kSaturationProtectorInitialHeadroomDb,
|
||||
/*limiter_envelope_dbfs=*/-2.0f};
|
||||
// Returns a `FrameInfo` sample to simulate noiseless speech detected with
|
||||
// maximum probability and with level, headroom and limiter envelope chosen
|
||||
// so that the resulting gain equals the default initial adaptive digital gain
|
||||
// i.e., no gain adaptation is expected.
|
||||
AdaptiveDigitalGainApplier::FrameInfo GetFrameInfoToNotAdapt(
|
||||
const AdaptiveDigitalConfig& config) {
|
||||
AdaptiveDigitalGainApplier::FrameInfo info;
|
||||
info.speech_probability = kMaxSpeechProbability;
|
||||
info.speech_level_dbfs = -config.initial_gain_db - config.headroom_db;
|
||||
info.speech_level_reliable = true;
|
||||
info.noise_rms_dbfs = kNoNoiseDbfs;
|
||||
info.headroom_db = config.headroom_db;
|
||||
info.limiter_envelope_dbfs = -2.0f;
|
||||
return info;
|
||||
}
|
||||
|
||||
TEST(GainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
|
||||
GainApplierHelper helper;
|
||||
GainApplierHelper helper(kDefaultConfig);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
|
||||
// Make one call with reasonable audio level values and settings.
|
||||
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
info.speech_level_dbfs = -5.0f;
|
||||
helper.gain_applier->Process(kFrameInfo, fake_audio.float_frame_view());
|
||||
helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig),
|
||||
fake_audio.float_frame_view());
|
||||
}
|
||||
|
||||
// Checks that the maximum allowed gain is applied.
|
||||
TEST(GainController2AdaptiveGainApplier, MaxGainApplied) {
|
||||
constexpr int kNumFramesToAdapt =
|
||||
static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
|
||||
static_cast<int>(kDefaultConfig.max_gain_db /
|
||||
GetMaxGainChangePerFrameDb(
|
||||
kDefaultConfig.max_gain_change_db_per_second)) +
|
||||
kNumExtraFrames;
|
||||
|
||||
GainApplierHelper helper;
|
||||
GainApplierHelper helper(kDefaultConfig);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
AdaptiveDigitalGainApplier::FrameInfo info =
|
||||
GetFrameInfoToNotAdapt(kDefaultConfig);
|
||||
info.speech_level_dbfs = -60.0f;
|
||||
float applied_gain;
|
||||
for (int i = 0; i < kNumFramesToAdapt; ++i) {
|
||||
@ -92,30 +104,33 @@ TEST(GainController2AdaptiveGainApplier, MaxGainApplied) {
|
||||
applied_gain = fake_audio.float_frame_view().channel(0)[0];
|
||||
}
|
||||
const float applied_gain_db = 20.0f * std::log10f(applied_gain);
|
||||
EXPECT_NEAR(applied_gain_db, kMaxGainDb, 0.1f);
|
||||
EXPECT_NEAR(applied_gain_db, kDefaultConfig.max_gain_db, 0.1f);
|
||||
}
|
||||
|
||||
TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
|
||||
GainApplierHelper helper;
|
||||
GainApplierHelper helper(kDefaultConfig);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
|
||||
|
||||
constexpr float initial_level_dbfs = -25.0f;
|
||||
// A few extra frames for safety.
|
||||
constexpr float kMaxGainChangeDbPerFrame =
|
||||
GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
|
||||
constexpr int kNumFramesToAdapt =
|
||||
static_cast<int>(initial_level_dbfs / kMaxGainChangePerFrameDb) + 10;
|
||||
static_cast<int>(initial_level_dbfs / kMaxGainChangeDbPerFrame) +
|
||||
kNumExtraFrames;
|
||||
|
||||
const float kMaxChangePerFrameLinear = DbToRatio(kMaxGainChangePerFrameDb);
|
||||
const float max_change_per_frame_linear = DbToRatio(kMaxGainChangeDbPerFrame);
|
||||
|
||||
float last_gain_linear = 1.f;
|
||||
for (int i = 0; i < kNumFramesToAdapt; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
AdaptiveDigitalGainApplier::FrameInfo info =
|
||||
GetFrameInfoToNotAdapt(kDefaultConfig);
|
||||
info.speech_level_dbfs = initial_level_dbfs;
|
||||
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
||||
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
|
||||
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
|
||||
kMaxChangePerFrameLinear);
|
||||
max_change_per_frame_linear);
|
||||
last_gain_linear = current_gain_linear;
|
||||
}
|
||||
|
||||
@ -123,56 +138,61 @@ TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
|
||||
for (int i = 0; i < kNumFramesToAdapt; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
AdaptiveDigitalGainApplier::FrameInfo info =
|
||||
GetFrameInfoToNotAdapt(kDefaultConfig);
|
||||
info.speech_level_dbfs = 0.f;
|
||||
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
||||
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
|
||||
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
|
||||
kMaxChangePerFrameLinear);
|
||||
max_change_per_frame_linear);
|
||||
last_gain_linear = current_gain_linear;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
|
||||
GainApplierHelper helper;
|
||||
GainApplierHelper helper(kDefaultConfig);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
|
||||
constexpr float initial_level_dbfs = -25.0f;
|
||||
|
||||
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
AdaptiveDigitalGainApplier::FrameInfo info =
|
||||
GetFrameInfoToNotAdapt(kDefaultConfig);
|
||||
info.speech_level_dbfs = initial_level_dbfs;
|
||||
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
||||
float maximal_difference = 0.0f;
|
||||
float current_value = 1.0f * DbToRatio(kInitialAdaptiveDigitalGainDb);
|
||||
float current_value = 1.0f * DbToRatio(kDefaultConfig.initial_gain_db);
|
||||
for (const auto& x : fake_audio.float_frame_view().channel(0)) {
|
||||
const float difference = std::abs(x - current_value);
|
||||
maximal_difference = std::max(maximal_difference, difference);
|
||||
current_value = x;
|
||||
}
|
||||
|
||||
const float kMaxChangePerFrameLinear = DbToRatio(kMaxGainChangePerFrameDb);
|
||||
const float kMaxChangePerSample =
|
||||
kMaxChangePerFrameLinear / kFrameLen10ms48kHz;
|
||||
const float max_change_per_frame_linear = DbToRatio(
|
||||
GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second));
|
||||
const float max_change_per_sample =
|
||||
max_change_per_frame_linear / kFrameLen10ms48kHz;
|
||||
|
||||
EXPECT_LE(maximal_difference, kMaxChangePerSample);
|
||||
EXPECT_LE(maximal_difference, max_change_per_sample);
|
||||
}
|
||||
|
||||
TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) {
|
||||
GainApplierHelper helper;
|
||||
GainApplierHelper helper(kDefaultConfig);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
|
||||
constexpr float initial_level_dbfs = -25.0f;
|
||||
constexpr int num_initial_frames =
|
||||
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
|
||||
kDefaultConfig.initial_gain_db /
|
||||
GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
|
||||
constexpr int num_frames = 50;
|
||||
|
||||
ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
|
||||
ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs)
|
||||
<< "kWithNoiseDbfs is too low";
|
||||
|
||||
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
|
||||
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
AdaptiveDigitalGainApplier::FrameInfo info =
|
||||
GetFrameInfoToNotAdapt(kDefaultConfig);
|
||||
info.speech_level_dbfs = initial_level_dbfs;
|
||||
info.noise_rms_dbfs = kWithNoiseDbfs;
|
||||
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
||||
@ -189,31 +209,34 @@ TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) {
|
||||
}
|
||||
|
||||
TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) {
|
||||
GainApplierHelper helper;
|
||||
GainApplierHelper helper(kDefaultConfig);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
|
||||
|
||||
// Make one call with positive audio level values and settings.
|
||||
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
AdaptiveDigitalGainApplier::FrameInfo info =
|
||||
GetFrameInfoToNotAdapt(kDefaultConfig);
|
||||
info.speech_level_dbfs = 5.0f;
|
||||
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
||||
}
|
||||
|
||||
TEST(GainController2GainApplier, AudioLevelLimitsGain) {
|
||||
GainApplierHelper helper;
|
||||
GainApplierHelper helper(kDefaultConfig);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
|
||||
constexpr float initial_level_dbfs = -25.0f;
|
||||
constexpr int num_initial_frames =
|
||||
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
|
||||
kDefaultConfig.initial_gain_db /
|
||||
GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
|
||||
constexpr int num_frames = 50;
|
||||
|
||||
ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
|
||||
ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs)
|
||||
<< "kWithNoiseDbfs is too low";
|
||||
|
||||
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
|
||||
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
AdaptiveDigitalGainApplier::FrameInfo info =
|
||||
GetFrameInfoToNotAdapt(kDefaultConfig);
|
||||
info.speech_level_dbfs = initial_level_dbfs;
|
||||
info.limiter_envelope_dbfs = 1.0f;
|
||||
info.speech_level_reliable = false;
|
||||
@ -232,21 +255,22 @@ TEST(GainController2GainApplier, AudioLevelLimitsGain) {
|
||||
|
||||
class AdaptiveDigitalGainApplierTest : public ::testing::TestWithParam<int> {
|
||||
protected:
|
||||
int AdjacentSpeechFramesThreshold() const { return GetParam(); }
|
||||
int adjacent_speech_frames_threshold() const { return GetParam(); }
|
||||
};
|
||||
|
||||
TEST_P(AdaptiveDigitalGainApplierTest,
|
||||
DoNotIncreaseGainWithTooFewSpeechFrames) {
|
||||
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
|
||||
GainApplierHelper helper(adjacent_speech_frames_threshold);
|
||||
AdaptiveDigitalConfig config;
|
||||
config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
|
||||
GainApplierHelper helper(config);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
|
||||
// Lower the speech level so that the target gain will be increased.
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
|
||||
info.speech_level_dbfs -= 12.0f;
|
||||
|
||||
float prev_gain = 0.0f;
|
||||
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
|
||||
for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
||||
helper.gain_applier->Process(info, audio.float_frame_view());
|
||||
@ -259,16 +283,17 @@ TEST_P(AdaptiveDigitalGainApplierTest,
|
||||
}
|
||||
|
||||
TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
|
||||
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
|
||||
GainApplierHelper helper(adjacent_speech_frames_threshold);
|
||||
AdaptiveDigitalConfig config;
|
||||
config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
|
||||
GainApplierHelper helper(config);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
|
||||
// Lower the speech level so that the target gain will be increased.
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
|
||||
info.speech_level_dbfs -= 12.0f;
|
||||
|
||||
float prev_gain = 0.0f;
|
||||
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
|
||||
for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
||||
helper.gain_applier->Process(info, audio.float_frame_view());
|
||||
@ -289,63 +314,65 @@ INSTANTIATE_TEST_SUITE_P(GainController2,
|
||||
|
||||
// Checks that the input is never modified when running in dry run mode.
|
||||
TEST(GainController2GainApplier, DryRunDoesNotChangeInput) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveDigitalGainApplier gain_applier(
|
||||
&apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
|
||||
kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
|
||||
AdaptiveDigitalConfig config;
|
||||
config.dry_run = true;
|
||||
GainApplierHelper helper(config);
|
||||
|
||||
// Simulate an input signal with log speech level.
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
|
||||
info.speech_level_dbfs = -60.0f;
|
||||
// Allow enough time to reach the maximum gain.
|
||||
constexpr int kNumFramesToAdapt =
|
||||
static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
|
||||
const int num_frames_to_adapt =
|
||||
static_cast<int>(
|
||||
config.max_gain_db /
|
||||
GetMaxGainChangePerFrameDb(config.max_gain_change_db_per_second)) +
|
||||
kNumExtraFrames;
|
||||
constexpr float kPcmSamples = 123.456f;
|
||||
// Run the gain applier and check that the PCM samples are not modified.
|
||||
gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
|
||||
for (int i = 0; i < kNumFramesToAdapt; ++i) {
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
|
||||
for (int i = 0; i < num_frames_to_adapt; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples);
|
||||
gain_applier.Process(info, fake_audio.float_frame_view());
|
||||
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
||||
EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples);
|
||||
}
|
||||
}
|
||||
|
||||
// Checks that no sample is modified before and after the sample rate changes.
|
||||
TEST(GainController2GainApplier, DryRunHandlesSampleRateChange) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveDigitalGainApplier gain_applier(
|
||||
&apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
|
||||
kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
AdaptiveDigitalConfig config;
|
||||
config.dry_run = true;
|
||||
GainApplierHelper helper(config);
|
||||
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
|
||||
info.speech_level_dbfs = -60.0f;
|
||||
constexpr float kPcmSamples = 123.456f;
|
||||
VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
|
||||
gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
|
||||
gain_applier.Process(info, fake_audio_8k.float_frame_view());
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
|
||||
helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
|
||||
EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
|
||||
gain_applier.Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples);
|
||||
gain_applier.Process(info, fake_audio_48k.float_frame_view());
|
||||
helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
|
||||
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
|
||||
}
|
||||
|
||||
// Checks that no sample is modified before and after the number of channels
|
||||
// changes.
|
||||
TEST(GainController2GainApplier, DryRunHandlesNumChannelsChange) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveDigitalGainApplier gain_applier(
|
||||
&apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
|
||||
kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
AdaptiveDigitalConfig config;
|
||||
config.dry_run = true;
|
||||
GainApplierHelper helper(config);
|
||||
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
|
||||
info.speech_level_dbfs = -60.0f;
|
||||
constexpr float kPcmSamples = 123.456f;
|
||||
VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
|
||||
gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
|
||||
gain_applier.Process(info, fake_audio_8k.float_frame_view());
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
|
||||
helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
|
||||
EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
|
||||
VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples);
|
||||
gain_applier.Initialize(/*sample_rate_hz=*/8000, kStereo);
|
||||
gain_applier.Process(info, fake_audio_48k.float_frame_view());
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kStereo);
|
||||
helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
|
||||
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
|
||||
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples);
|
||||
}
|
||||
|
@ -20,7 +20,14 @@ namespace webrtc {
|
||||
namespace {
|
||||
|
||||
float ClampLevelEstimateDbfs(float level_estimate_dbfs) {
|
||||
return rtc::SafeClamp<float>(level_estimate_dbfs, -90.f, 30.f);
|
||||
return rtc::SafeClamp<float>(level_estimate_dbfs, -90.0f, 30.0f);
|
||||
}
|
||||
|
||||
// Returns the initial speech level estimate needed to apply the initial gain.
|
||||
float GetInitialSpeechLevelEstimateDbfs(
|
||||
const AudioProcessing::Config::GainController2::AdaptiveDigital& config) {
|
||||
return ClampLevelEstimateDbfs(-kSaturationProtectorInitialHeadroomDb -
|
||||
config.initial_gain_db - config.headroom_db);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@ -37,18 +44,14 @@ float AdaptiveModeLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const {
|
||||
return numerator / denominator;
|
||||
}
|
||||
|
||||
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
|
||||
ApmDataDumper* apm_data_dumper)
|
||||
: AdaptiveModeLevelEstimator(
|
||||
apm_data_dumper,
|
||||
kDefaultLevelEstimatorAdjacentSpeechFramesThreshold) {}
|
||||
|
||||
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
|
||||
ApmDataDumper* apm_data_dumper,
|
||||
int adjacent_speech_frames_threshold)
|
||||
const AudioProcessing::Config::GainController2::AdaptiveDigital& config)
|
||||
: apm_data_dumper_(apm_data_dumper),
|
||||
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
|
||||
level_dbfs_(ClampLevelEstimateDbfs(kInitialSpeechLevelEstimateDbfs)) {
|
||||
initial_speech_level_dbfs_(GetInitialSpeechLevelEstimateDbfs(config)),
|
||||
adjacent_speech_frames_threshold_(
|
||||
config.adjacent_speech_frames_threshold),
|
||||
level_dbfs_(initial_speech_level_dbfs_) {
|
||||
RTC_DCHECK(apm_data_dumper_);
|
||||
RTC_DCHECK_GE(adjacent_speech_frames_threshold_, 1);
|
||||
Reset();
|
||||
@ -128,14 +131,14 @@ bool AdaptiveModeLevelEstimator::IsConfident() const {
|
||||
void AdaptiveModeLevelEstimator::Reset() {
|
||||
ResetLevelEstimatorState(preliminary_state_);
|
||||
ResetLevelEstimatorState(reliable_state_);
|
||||
level_dbfs_ = ClampLevelEstimateDbfs(kInitialSpeechLevelEstimateDbfs);
|
||||
level_dbfs_ = initial_speech_level_dbfs_;
|
||||
num_adjacent_speech_frames_ = 0;
|
||||
}
|
||||
|
||||
void AdaptiveModeLevelEstimator::ResetLevelEstimatorState(
|
||||
LevelEstimatorState& state) const {
|
||||
state.time_to_confidence_ms = kLevelEstimatorTimeToConfidenceMs;
|
||||
state.level_dbfs.numerator = kInitialSpeechLevelEstimateDbfs;
|
||||
state.level_dbfs.numerator = initial_speech_level_dbfs_;
|
||||
state.level_dbfs.denominator = 1.0f;
|
||||
}
|
||||
|
||||
|
@ -24,12 +24,12 @@ class ApmDataDumper;
|
||||
// Level estimator for the digital adaptive gain controller.
|
||||
class AdaptiveModeLevelEstimator {
|
||||
public:
|
||||
explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper);
|
||||
AdaptiveModeLevelEstimator(
|
||||
ApmDataDumper* apm_data_dumper,
|
||||
const AudioProcessing::Config::GainController2::AdaptiveDigital& config);
|
||||
AdaptiveModeLevelEstimator(const AdaptiveModeLevelEstimator&) = delete;
|
||||
AdaptiveModeLevelEstimator& operator=(const AdaptiveModeLevelEstimator&) =
|
||||
delete;
|
||||
AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper,
|
||||
int adjacent_speech_frames_threshold);
|
||||
|
||||
// Updates the level estimation.
|
||||
void Update(const VadLevelAnalyzer::Result& vad_data);
|
||||
@ -63,6 +63,7 @@ class AdaptiveModeLevelEstimator {
|
||||
|
||||
ApmDataDumper* const apm_data_dumper_;
|
||||
|
||||
const float initial_speech_level_dbfs_;
|
||||
const int adjacent_speech_frames_threshold_;
|
||||
LevelEstimatorState preliminary_state_;
|
||||
LevelEstimatorState reliable_state_;
|
||||
|
@ -13,37 +13,22 @@
|
||||
#include <memory>
|
||||
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "modules/audio_processing/include/audio_processing.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/gunit.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
using AdaptiveDigitalConfig =
|
||||
AudioProcessing::Config::GainController2::AdaptiveDigital;
|
||||
|
||||
// Number of speech frames that the level estimator must observe in order to
|
||||
// become confident about the estimated level.
|
||||
constexpr int kNumFramesToConfidence =
|
||||
kLevelEstimatorTimeToConfidenceMs / kFrameDurationMs;
|
||||
static_assert(kNumFramesToConfidence > 0, "");
|
||||
|
||||
// Fake levels and speech probabilities used in the tests.
|
||||
static_assert(kInitialSpeechLevelEstimateDbfs < 0.0f, "");
|
||||
constexpr float kVadLevelRms = kInitialSpeechLevelEstimateDbfs / 2.0f;
|
||||
constexpr float kVadLevelPeak = kInitialSpeechLevelEstimateDbfs / 3.0f;
|
||||
static_assert(kVadLevelRms < kVadLevelPeak, "");
|
||||
static_assert(kVadLevelRms > kInitialSpeechLevelEstimateDbfs, "");
|
||||
static_assert(kVadLevelRms - kInitialSpeechLevelEstimateDbfs > 5.0f,
|
||||
"Adjust `kVadLevelRms` so that the difference from the initial "
|
||||
"level is wide enough for the tests.");
|
||||
|
||||
constexpr VadLevelAnalyzer::Result kVadDataSpeech{/*speech_probability=*/1.0f,
|
||||
kVadLevelRms, kVadLevelPeak};
|
||||
constexpr VadLevelAnalyzer::Result kVadDataNonSpeech{
|
||||
/*speech_probability=*/kVadConfidenceThreshold / 2.0f, kVadLevelRms,
|
||||
kVadLevelPeak};
|
||||
|
||||
constexpr float kMinSpeechProbability = 0.0f;
|
||||
constexpr float kMaxSpeechProbability = 1.0f;
|
||||
|
||||
constexpr float kConvergenceSpeedTestsLevelTolerance = 0.5f;
|
||||
|
||||
// Provides the `vad_level` value `num_iterations` times to `level_estimator`.
|
||||
@ -55,31 +40,51 @@ void RunOnConstantLevel(int num_iterations,
|
||||
}
|
||||
}
|
||||
|
||||
constexpr AdaptiveDigitalConfig GetAdaptiveDigitalConfig(
|
||||
int adjacent_speech_frames_threshold) {
|
||||
AdaptiveDigitalConfig config;
|
||||
config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold;
|
||||
return config;
|
||||
}
|
||||
|
||||
// Level estimator with data dumper.
|
||||
struct TestLevelEstimator {
|
||||
TestLevelEstimator()
|
||||
explicit TestLevelEstimator(int adjacent_speech_frames_threshold)
|
||||
: data_dumper(0),
|
||||
estimator(std::make_unique<AdaptiveModeLevelEstimator>(
|
||||
&data_dumper,
|
||||
/*adjacent_speech_frames_threshold=*/1)) {}
|
||||
GetAdaptiveDigitalConfig(adjacent_speech_frames_threshold))),
|
||||
initial_speech_level_dbfs(estimator->level_dbfs()),
|
||||
vad_level_rms(initial_speech_level_dbfs / 2.0f),
|
||||
vad_level_peak(initial_speech_level_dbfs / 3.0f),
|
||||
vad_data_speech(
|
||||
{/*speech_probability=*/1.0f, vad_level_rms, vad_level_peak}),
|
||||
vad_data_non_speech(
|
||||
{/*speech_probability=*/kVadConfidenceThreshold / 2.0f,
|
||||
vad_level_rms, vad_level_peak}) {
|
||||
RTC_DCHECK_LT(vad_level_rms, vad_level_peak);
|
||||
RTC_DCHECK_LT(initial_speech_level_dbfs, vad_level_rms);
|
||||
RTC_DCHECK_GT(vad_level_rms - initial_speech_level_dbfs, 5.0f)
|
||||
<< "Adjust `vad_level_rms` so that the difference from the initial "
|
||||
"level is wide enough for the tests";
|
||||
}
|
||||
ApmDataDumper data_dumper;
|
||||
std::unique_ptr<AdaptiveModeLevelEstimator> estimator;
|
||||
const float initial_speech_level_dbfs;
|
||||
const float vad_level_rms;
|
||||
const float vad_level_peak;
|
||||
const VadLevelAnalyzer::Result vad_data_speech;
|
||||
const VadLevelAnalyzer::Result vad_data_non_speech;
|
||||
};
|
||||
|
||||
// Checks the initially estimated level.
|
||||
TEST(GainController2AdaptiveModeLevelEstimator, CheckInitialEstimate) {
|
||||
TestLevelEstimator level_estimator;
|
||||
EXPECT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
|
||||
kInitialSpeechLevelEstimateDbfs);
|
||||
}
|
||||
|
||||
// Checks that the level estimator converges to a constant input speech level.
|
||||
TEST(GainController2AdaptiveModeLevelEstimator, LevelStabilizes) {
|
||||
TestLevelEstimator level_estimator;
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, kVadDataSpeech,
|
||||
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
||||
level_estimator.vad_data_speech,
|
||||
*level_estimator.estimator);
|
||||
const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
|
||||
RunOnConstantLevel(/*num_iterations=*/1, kVadDataSpeech,
|
||||
RunOnConstantLevel(/*num_iterations=*/1, level_estimator.vad_data_speech,
|
||||
*level_estimator.estimator);
|
||||
EXPECT_NEAR(level_estimator.estimator->level_dbfs(), estimated_level_dbfs,
|
||||
0.1f);
|
||||
@ -88,17 +93,19 @@ TEST(GainController2AdaptiveModeLevelEstimator, LevelStabilizes) {
|
||||
// Checks that the level controller does not become confident when too few
|
||||
// speech frames are observed.
|
||||
TEST(GainController2AdaptiveModeLevelEstimator, IsNotConfident) {
|
||||
TestLevelEstimator level_estimator;
|
||||
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2,
|
||||
kVadDataSpeech, *level_estimator.estimator);
|
||||
level_estimator.vad_data_speech,
|
||||
*level_estimator.estimator);
|
||||
EXPECT_FALSE(level_estimator.estimator->IsConfident());
|
||||
}
|
||||
|
||||
// Checks that the level controller becomes confident when enough speech frames
|
||||
// are observed.
|
||||
TEST(GainController2AdaptiveModeLevelEstimator, IsConfident) {
|
||||
TestLevelEstimator level_estimator;
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, kVadDataSpeech,
|
||||
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
||||
level_estimator.vad_data_speech,
|
||||
*level_estimator.estimator);
|
||||
EXPECT_TRUE(level_estimator.estimator->IsConfident());
|
||||
}
|
||||
@ -107,14 +114,15 @@ TEST(GainController2AdaptiveModeLevelEstimator, IsConfident) {
|
||||
// frames.
|
||||
TEST(GainController2AdaptiveModeLevelEstimator,
|
||||
EstimatorIgnoresNonSpeechFrames) {
|
||||
TestLevelEstimator level_estimator;
|
||||
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
||||
// Simulate speech.
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, kVadDataSpeech,
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
||||
level_estimator.vad_data_speech,
|
||||
*level_estimator.estimator);
|
||||
const float estimated_level_dbfs = level_estimator.estimator->level_dbfs();
|
||||
// Simulate full-scale non-speech.
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
||||
VadLevelAnalyzer::Result{kMinSpeechProbability,
|
||||
VadLevelAnalyzer::Result{/*speech_probability=*/0.0f,
|
||||
/*rms_dbfs=*/0.0f,
|
||||
/*peak_dbfs=*/0.0f},
|
||||
*level_estimator.estimator);
|
||||
@ -126,28 +134,30 @@ TEST(GainController2AdaptiveModeLevelEstimator,
|
||||
// Checks the convergence speed of the estimator before it becomes confident.
|
||||
TEST(GainController2AdaptiveModeLevelEstimator,
|
||||
ConvergenceSpeedBeforeConfidence) {
|
||||
TestLevelEstimator level_estimator;
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, kVadDataSpeech,
|
||||
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
||||
level_estimator.vad_data_speech,
|
||||
*level_estimator.estimator);
|
||||
EXPECT_NEAR(level_estimator.estimator->level_dbfs(), kVadDataSpeech.rms_dbfs,
|
||||
EXPECT_NEAR(level_estimator.estimator->level_dbfs(),
|
||||
level_estimator.vad_data_speech.rms_dbfs,
|
||||
kConvergenceSpeedTestsLevelTolerance);
|
||||
}
|
||||
|
||||
// Checks the convergence speed of the estimator after it becomes confident.
|
||||
TEST(GainController2AdaptiveModeLevelEstimator,
|
||||
ConvergenceSpeedAfterConfidence) {
|
||||
TestLevelEstimator level_estimator;
|
||||
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
||||
// Reach confidence using the initial level estimate.
|
||||
RunOnConstantLevel(
|
||||
/*num_iterations=*/kNumFramesToConfidence,
|
||||
VadLevelAnalyzer::Result{
|
||||
kMaxSpeechProbability,
|
||||
/*rms_dbfs=*/kInitialSpeechLevelEstimateDbfs,
|
||||
/*peak_dbfs=*/kInitialSpeechLevelEstimateDbfs + 6.0f},
|
||||
/*speech_probability=*/1.0f,
|
||||
/*rms_dbfs=*/level_estimator.initial_speech_level_dbfs,
|
||||
/*peak_dbfs=*/level_estimator.initial_speech_level_dbfs + 6.0f},
|
||||
*level_estimator.estimator);
|
||||
// No estimate change should occur, but confidence is achieved.
|
||||
ASSERT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
|
||||
kInitialSpeechLevelEstimateDbfs);
|
||||
level_estimator.initial_speech_level_dbfs);
|
||||
ASSERT_TRUE(level_estimator.estimator->IsConfident());
|
||||
// After confidence.
|
||||
constexpr float kConvergenceTimeAfterConfidenceNumFrames = 600; // 6 seconds.
|
||||
@ -155,8 +165,9 @@ TEST(GainController2AdaptiveModeLevelEstimator,
|
||||
kConvergenceTimeAfterConfidenceNumFrames > kNumFramesToConfidence, "");
|
||||
RunOnConstantLevel(
|
||||
/*num_iterations=*/kConvergenceTimeAfterConfidenceNumFrames,
|
||||
kVadDataSpeech, *level_estimator.estimator);
|
||||
EXPECT_NEAR(level_estimator.estimator->level_dbfs(), kVadDataSpeech.rms_dbfs,
|
||||
level_estimator.vad_data_speech, *level_estimator.estimator);
|
||||
EXPECT_NEAR(level_estimator.estimator->level_dbfs(),
|
||||
level_estimator.vad_data_speech.rms_dbfs,
|
||||
kConvergenceSpeedTestsLevelTolerance);
|
||||
}
|
||||
|
||||
@ -168,30 +179,26 @@ class AdaptiveModeLevelEstimatorParametrization
|
||||
|
||||
TEST_P(AdaptiveModeLevelEstimatorParametrization,
|
||||
DoNotAdaptToShortSpeechSegments) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveModeLevelEstimator level_estimator(
|
||||
&apm_data_dumper, adjacent_speech_frames_threshold());
|
||||
const float initial_level = level_estimator.level_dbfs();
|
||||
ASSERT_LT(initial_level, kVadDataSpeech.peak_dbfs);
|
||||
TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
|
||||
const float initial_level = level_estimator.estimator->level_dbfs();
|
||||
ASSERT_LT(initial_level, level_estimator.vad_data_speech.peak_dbfs);
|
||||
for (int i = 0; i < adjacent_speech_frames_threshold() - 1; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
level_estimator.Update(kVadDataSpeech);
|
||||
EXPECT_EQ(initial_level, level_estimator.level_dbfs());
|
||||
level_estimator.estimator->Update(level_estimator.vad_data_speech);
|
||||
EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
|
||||
}
|
||||
level_estimator.Update(kVadDataNonSpeech);
|
||||
EXPECT_EQ(initial_level, level_estimator.level_dbfs());
|
||||
level_estimator.estimator->Update(level_estimator.vad_data_non_speech);
|
||||
EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
|
||||
}
|
||||
|
||||
TEST_P(AdaptiveModeLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveModeLevelEstimator level_estimator(
|
||||
&apm_data_dumper, adjacent_speech_frames_threshold());
|
||||
const float initial_level = level_estimator.level_dbfs();
|
||||
ASSERT_LT(initial_level, kVadDataSpeech.peak_dbfs);
|
||||
TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
|
||||
const float initial_level = level_estimator.estimator->level_dbfs();
|
||||
ASSERT_LT(initial_level, level_estimator.vad_data_speech.peak_dbfs);
|
||||
for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
|
||||
level_estimator.Update(kVadDataSpeech);
|
||||
level_estimator.estimator->Update(level_estimator.vad_data_speech);
|
||||
}
|
||||
EXPECT_LT(initial_level, level_estimator.level_dbfs());
|
||||
EXPECT_LT(initial_level, level_estimator.estimator->level_dbfs());
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(GainController2,
|
||||
|
@ -24,38 +24,26 @@ constexpr int kFrameDurationMs = 10;
|
||||
constexpr int kSubFramesInFrame = 20;
|
||||
constexpr int kMaximalNumberOfSamplesPerChannel = 480;
|
||||
|
||||
// Adaptive digital gain applier settings below.
|
||||
constexpr float kHeadroomDbfs = 6.0f;
|
||||
constexpr float kMaxGainDb = 30.0f;
|
||||
constexpr float kInitialAdaptiveDigitalGainDb = 8.0f;
|
||||
// Adaptive digital gain applier settings.
|
||||
|
||||
// At what limiter levels should we start decreasing the adaptive digital gain.
|
||||
constexpr float kLimiterThresholdForAgcGainDbfs = -1.0f;
|
||||
|
||||
// This is the threshold for speech. Speech frames are used for updating the
|
||||
// speech level, measuring the amount of speech, and decide when to allow target
|
||||
// gain reduction.
|
||||
// gain changes.
|
||||
constexpr float kVadConfidenceThreshold = 0.95f;
|
||||
|
||||
// Adaptive digital level estimator parameters.
|
||||
// Number of milliseconds of speech frames to observe to make the estimator
|
||||
// confident.
|
||||
constexpr float kLevelEstimatorTimeToConfidenceMs = 400;
|
||||
constexpr float kLevelEstimatorLeakFactor =
|
||||
1.0f - 1.0f / kLevelEstimatorTimeToConfidenceMs;
|
||||
|
||||
// Robust VAD probability and speech decisions.
|
||||
constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 12;
|
||||
|
||||
// Saturation Protector settings.
|
||||
constexpr float kSaturationProtectorInitialHeadroomDb = 20.0f;
|
||||
constexpr int kSaturationProtectorBufferSize = 4;
|
||||
|
||||
// Set the initial speech level estimate so that `kInitialAdaptiveDigitalGainDb`
|
||||
// is applied at the beginning of the call.
|
||||
constexpr float kInitialSpeechLevelEstimateDbfs =
|
||||
-kSaturationProtectorInitialHeadroomDb - kInitialAdaptiveDigitalGainDb -
|
||||
kHeadroomDbfs;
|
||||
|
||||
// Number of interpolation points for each region of the limiter.
|
||||
// These values have been tuned to limit the interpolated gain curve error given
|
||||
// the limiter parameters and allowing a maximum error of +/- 32768^-1.
|
||||
|
@ -3107,6 +3107,18 @@ TEST(AudioProcessing, GainController2ConfigEqual) {
|
||||
b_adaptive.dry_run = a_adaptive.dry_run;
|
||||
EXPECT_EQ(a, b);
|
||||
|
||||
a_adaptive.headroom_db += 1.0f;
|
||||
b_adaptive.headroom_db = a_adaptive.headroom_db;
|
||||
EXPECT_EQ(a, b);
|
||||
|
||||
a_adaptive.max_gain_db += 1.0f;
|
||||
b_adaptive.max_gain_db = a_adaptive.max_gain_db;
|
||||
EXPECT_EQ(a, b);
|
||||
|
||||
a_adaptive.initial_gain_db += 1.0f;
|
||||
b_adaptive.initial_gain_db = a_adaptive.initial_gain_db;
|
||||
EXPECT_EQ(a, b);
|
||||
|
||||
a_adaptive.vad_reset_period_ms++;
|
||||
b_adaptive.vad_reset_period_ms = a_adaptive.vad_reset_period_ms;
|
||||
EXPECT_EQ(a, b);
|
||||
@ -3164,6 +3176,18 @@ TEST(AudioProcessing, GainController2ConfigNotEqual) {
|
||||
EXPECT_NE(a, b);
|
||||
a_adaptive = b_adaptive;
|
||||
|
||||
a_adaptive.headroom_db += 1.0f;
|
||||
EXPECT_NE(a, b);
|
||||
a_adaptive = b_adaptive;
|
||||
|
||||
a_adaptive.max_gain_db += 1.0f;
|
||||
EXPECT_NE(a, b);
|
||||
a_adaptive = b_adaptive;
|
||||
|
||||
a_adaptive.initial_gain_db += 1.0f;
|
||||
EXPECT_NE(a, b);
|
||||
a_adaptive = b_adaptive;
|
||||
|
||||
a_adaptive.vad_reset_period_ms++;
|
||||
EXPECT_NE(a, b);
|
||||
a_adaptive = b_adaptive;
|
||||
|
@ -105,7 +105,9 @@ bool GainController2::Validate(
|
||||
const AudioProcessing::Config::GainController2& config) {
|
||||
const auto& fixed = config.fixed_digital;
|
||||
const auto& adaptive = config.adaptive_digital;
|
||||
return fixed.gain_db >= 0.f && fixed.gain_db < 50.f &&
|
||||
return fixed.gain_db >= 0.0f && fixed.gain_db < 50.f &&
|
||||
adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f &&
|
||||
adaptive.initial_gain_db >= 0.0f &&
|
||||
adaptive.max_gain_change_db_per_second > 0.0f &&
|
||||
adaptive.max_output_noise_level_dbfs <= 0.0f;
|
||||
}
|
||||
|
@ -89,6 +89,36 @@ TEST(GainController2, CheckFixedDigitalConfig) {
|
||||
EXPECT_TRUE(GainController2::Validate(config));
|
||||
}
|
||||
|
||||
TEST(GainController2, CheckHeadroomDb) {
|
||||
AudioProcessing::Config::GainController2 config;
|
||||
config.adaptive_digital.headroom_db = -1.0f;
|
||||
EXPECT_FALSE(GainController2::Validate(config));
|
||||
config.adaptive_digital.headroom_db = 0.0f;
|
||||
EXPECT_TRUE(GainController2::Validate(config));
|
||||
config.adaptive_digital.headroom_db = 5.0f;
|
||||
EXPECT_TRUE(GainController2::Validate(config));
|
||||
}
|
||||
|
||||
TEST(GainController2, CheckMaxGainDb) {
|
||||
AudioProcessing::Config::GainController2 config;
|
||||
config.adaptive_digital.max_gain_db = -1.0f;
|
||||
EXPECT_FALSE(GainController2::Validate(config));
|
||||
config.adaptive_digital.max_gain_db = 0.0f;
|
||||
EXPECT_FALSE(GainController2::Validate(config));
|
||||
config.adaptive_digital.max_gain_db = 5.0f;
|
||||
EXPECT_TRUE(GainController2::Validate(config));
|
||||
}
|
||||
|
||||
TEST(GainController2, CheckInitialGainDb) {
|
||||
AudioProcessing::Config::GainController2 config;
|
||||
config.adaptive_digital.initial_gain_db = -1.0f;
|
||||
EXPECT_FALSE(GainController2::Validate(config));
|
||||
config.adaptive_digital.initial_gain_db = 0.0f;
|
||||
EXPECT_TRUE(GainController2::Validate(config));
|
||||
config.adaptive_digital.initial_gain_db = 5.0f;
|
||||
EXPECT_TRUE(GainController2::Validate(config));
|
||||
}
|
||||
|
||||
TEST(GainController2, CheckAdaptiveDigitalMaxGainChangeSpeedConfig) {
|
||||
AudioProcessing::Config::GainController2 config;
|
||||
config.adaptive_digital.max_gain_change_db_per_second = -1.0f;
|
||||
|
@ -90,6 +90,8 @@ bool Agc1Config::operator==(const Agc1Config& rhs) const {
|
||||
bool Agc2Config::AdaptiveDigital::operator==(
|
||||
const Agc2Config::AdaptiveDigital& rhs) const {
|
||||
return enabled == rhs.enabled && dry_run == rhs.dry_run &&
|
||||
headroom_db == rhs.headroom_db && max_gain_db == rhs.max_gain_db &&
|
||||
initial_gain_db == rhs.initial_gain_db &&
|
||||
vad_reset_period_ms == rhs.vad_reset_period_ms &&
|
||||
adjacent_speech_frames_threshold ==
|
||||
rhs.adjacent_speech_frames_threshold &&
|
||||
@ -197,6 +199,10 @@ std::string AudioProcessing::Config::ToString() const {
|
||||
<< " }, adaptive_digital: { enabled: "
|
||||
<< gain_controller2.adaptive_digital.enabled
|
||||
<< ", dry_run: " << gain_controller2.adaptive_digital.dry_run
|
||||
<< ", headroom_db: " << gain_controller2.adaptive_digital.headroom_db
|
||||
<< ", max_gain_db: " << gain_controller2.adaptive_digital.max_gain_db
|
||||
<< ", initial_gain_db: "
|
||||
<< gain_controller2.adaptive_digital.initial_gain_db
|
||||
<< ", vad_reset_period_ms: "
|
||||
<< gain_controller2.adaptive_digital.vad_reset_period_ms
|
||||
<< ", adjacent_speech_frames_threshold: "
|
||||
|
@ -367,12 +367,19 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
|
||||
}
|
||||
|
||||
bool enabled = false;
|
||||
// Run the adaptive digital controller but the signal is not modified.
|
||||
// When true, the adaptive digital controller runs but the signal is not
|
||||
// modified.
|
||||
bool dry_run = false;
|
||||
float headroom_db = 6.0f;
|
||||
// TODO(bugs.webrtc.org/7494): Consider removing and inferring from
|
||||
// `max_output_noise_level_dbfs`.
|
||||
float max_gain_db = 30.0f;
|
||||
float initial_gain_db = 8.0f;
|
||||
int vad_reset_period_ms = 1500;
|
||||
int adjacent_speech_frames_threshold = 12;
|
||||
float max_gain_change_db_per_second = 3.0f;
|
||||
float max_output_noise_level_dbfs = -50.0f;
|
||||
// TODO(bugs.webrtc.org/7494): Replace with field trials.
|
||||
bool sse2_allowed = true;
|
||||
bool avx2_allowed = true;
|
||||
bool neon_allowed = true;
|
||||
|
Reference in New Issue
Block a user