AGC2: max output noise level now part of config

Tested: bit-exactness verified with audioproc_f

Bug: webrtc:7494
Change-Id: Ic42f09dc13560494963cdcd338a0c52a729e108d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186266
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Jakob Ivarsson <jakobi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32282}
This commit is contained in:
Alessio Bazzica
2020-10-01 17:16:56 +02:00
committed by Commit Bot
parent c082eba758
commit 9a625e7aef
6 changed files with 43 additions and 27 deletions

View File

@ -30,6 +30,7 @@ void DumpDebugData(const AdaptiveDigitalGainApplier::FrameInfo& info,
constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1; constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1;
constexpr float kMaxGainChangePerSecondDb = 3.f; constexpr float kMaxGainChangePerSecondDb = 3.f;
constexpr float kMaxOutputNoiseLevelDbfs = -50.f;
} // namespace } // namespace
@ -37,7 +38,8 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
: speech_level_estimator_(apm_data_dumper), : speech_level_estimator_(apm_data_dumper),
gain_applier_(apm_data_dumper, gain_applier_(apm_data_dumper,
kGainApplierAdjacentSpeechFramesThreshold, kGainApplierAdjacentSpeechFramesThreshold,
kMaxGainChangePerSecondDb), kMaxGainChangePerSecondDb,
kMaxOutputNoiseLevelDbfs),
apm_data_dumper_(apm_data_dumper), apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(apm_data_dumper) { noise_level_estimator_(apm_data_dumper) {
RTC_DCHECK(apm_data_dumper); RTC_DCHECK(apm_data_dumper);
@ -56,7 +58,8 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
gain_applier_( gain_applier_(
apm_data_dumper, apm_data_dumper,
config.adaptive_digital.gain_applier_adjacent_speech_frames_threshold, config.adaptive_digital.gain_applier_adjacent_speech_frames_threshold,
config.adaptive_digital.max_gain_change_db_per_second), config.adaptive_digital.max_gain_change_db_per_second,
config.adaptive_digital.max_output_noise_level_dbfs),
apm_data_dumper_(apm_data_dumper), apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(apm_data_dumper) { noise_level_estimator_(apm_data_dumper) {
RTC_DCHECK(apm_data_dumper); RTC_DCHECK(apm_data_dumper);

View File

@ -44,12 +44,16 @@ float ComputeGainDb(float input_level_dbfs) {
return 0.f; return 0.f;
} }
// We require 'gain + noise_level <= kMaxNoiseLevelDbfs'. // Returns `target_gain` if the output noise level is below
// `max_output_noise_level_dbfs`; otherwise returns a capped gain so that the
// output noise level equals `max_output_noise_level_dbfs`.
float LimitGainByNoise(float target_gain, float LimitGainByNoise(float target_gain,
float input_noise_level_dbfs, float input_noise_level_dbfs,
ApmDataDumper* apm_data_dumper) { float max_output_noise_level_dbfs,
const float noise_headroom_db = kMaxNoiseLevelDbfs - input_noise_level_dbfs; ApmDataDumper& apm_data_dumper) {
apm_data_dumper->DumpRaw("agc2_noise_headroom_db", noise_headroom_db); const float noise_headroom_db =
max_output_noise_level_dbfs - input_noise_level_dbfs;
apm_data_dumper.DumpRaw("agc2_noise_headroom_db", noise_headroom_db);
return std::min(target_gain, std::max(noise_headroom_db, 0.f)); return std::min(target_gain, std::max(noise_headroom_db, 0.f));
} }
@ -89,7 +93,8 @@ float ComputeGainChangeThisFrameDb(float target_gain_db,
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
ApmDataDumper* apm_data_dumper, ApmDataDumper* apm_data_dumper,
int adjacent_speech_frames_threshold, int adjacent_speech_frames_threshold,
float max_gain_change_db_per_second) float max_gain_change_db_per_second,
float max_output_noise_level_dbfs)
: apm_data_dumper_(apm_data_dumper), : apm_data_dumper_(apm_data_dumper),
gain_applier_( gain_applier_(
/*hard_clip_samples=*/false, /*hard_clip_samples=*/false,
@ -97,11 +102,14 @@ AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold), adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
max_gain_change_db_per_10ms_(max_gain_change_db_per_second * max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
kFrameDurationMs / 1000.f), kFrameDurationMs / 1000.f),
max_output_noise_level_dbfs_(max_output_noise_level_dbfs),
calls_since_last_gain_log_(0), calls_since_last_gain_log_(0),
frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_), frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
last_gain_db_(kInitialAdaptiveDigitalGainDb) { last_gain_db_(kInitialAdaptiveDigitalGainDb) {
RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f); RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f);
RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1); RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.f);
RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.f);
} }
void AdaptiveDigitalGainApplier::Process(const FrameInfo& info, void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
@ -126,7 +134,8 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
const float target_gain_db = LimitGainByLowConfidence( const float target_gain_db = LimitGainByLowConfidence(
LimitGainByNoise(ComputeGainDb(std::min(info.input_level_dbfs, 0.f)), LimitGainByNoise(ComputeGainDb(std::min(info.input_level_dbfs, 0.f)),
info.input_noise_level_dbfs, apm_data_dumper_), info.input_noise_level_dbfs,
max_output_noise_level_dbfs_, *apm_data_dumper_),
last_gain_db_, info.limiter_envelope_dbfs, info.estimate_is_confident); last_gain_db_, info.limiter_envelope_dbfs, info.estimate_is_confident);
// Forbid increasing the gain until enough adjacent speech frames are // Forbid increasing the gain until enough adjacent speech frames are

View File

@ -34,12 +34,15 @@ class AdaptiveDigitalGainApplier {
bool estimate_is_confident; bool estimate_is_confident;
}; };
// Ctor.
// `adjacent_speech_frames_threshold` indicates how many speech frames are // `adjacent_speech_frames_threshold` indicates how many speech frames are
// required before a gain increase is allowed. `max_gain_change_db_per_second` // required before a gain increase is allowed. `max_gain_change_db_per_second`
// limits the adaptation speed (uniformly operated across frames). // limits the adaptation speed (uniformly operated across frames).
// `max_output_noise_level_dbfs` limits the output noise level.
AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper, AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
int adjacent_speech_frames_threshold, int adjacent_speech_frames_threshold,
float max_gain_change_db_per_second); float max_gain_change_db_per_second,
float max_output_noise_level_dbfs);
AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete; AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) = AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
delete; delete;
@ -54,6 +57,7 @@ class AdaptiveDigitalGainApplier {
const int adjacent_speech_frames_threshold_; const int adjacent_speech_frames_threshold_;
const float max_gain_change_db_per_10ms_; const float max_gain_change_db_per_10ms_;
const float max_output_noise_level_dbfs_;
int calls_since_last_gain_log_; int calls_since_last_gain_log_;
int frames_to_gain_increase_allowed_; int frames_to_gain_increase_allowed_;

View File

@ -36,14 +36,18 @@ constexpr VadLevelAnalyzer::Result kVadSpeech{1.f, -20.f, 0.f};
constexpr float kMaxGainChangePerSecondDb = 3.f; constexpr float kMaxGainChangePerSecondDb = 3.f;
constexpr float kMaxGainChangePerFrameDb = constexpr float kMaxGainChangePerFrameDb =
kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f; kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
constexpr float kMaxOutputNoiseLevelDbfs = -50.f;
// Helper to instance `AdaptiveDigitalGainApplier`. // Helper to instance `AdaptiveDigitalGainApplier`.
struct GainApplierHelper { struct GainApplierHelper {
GainApplierHelper() GainApplierHelper()
: GainApplierHelper(/*adjacent_speech_frames_threshold=*/1) {}
explicit GainApplierHelper(int adjacent_speech_frames_threshold)
: apm_data_dumper(0), : apm_data_dumper(0),
gain_applier(&apm_data_dumper, gain_applier(&apm_data_dumper,
/*adjacent_speech_frames_threshold=*/1, adjacent_speech_frames_threshold,
kMaxGainChangePerSecondDb) {} kMaxGainChangePerSecondDb,
kMaxOutputNoiseLevelDbfs) {}
ApmDataDumper apm_data_dumper; ApmDataDumper apm_data_dumper;
AdaptiveDigitalGainApplier gain_applier; AdaptiveDigitalGainApplier gain_applier;
}; };
@ -185,7 +189,8 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb; kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
constexpr int num_frames = 50; constexpr int num_frames = 50;
ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low"; ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
<< "kWithNoiseDbfs is too low";
for (int i = 0; i < num_initial_frames + num_frames; ++i) { for (int i = 0; i < num_initial_frames + num_frames; ++i) {
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f); VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
@ -223,7 +228,8 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb; kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
constexpr int num_frames = 50; constexpr int num_frames = 50;
ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low"; ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
<< "kWithNoiseDbfs is too low";
for (int i = 0; i < num_initial_frames + num_frames; ++i) { for (int i = 0; i < num_initial_frames + num_frames; ++i) {
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f); VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
@ -252,10 +258,8 @@ class AdaptiveDigitalGainApplierTest : public ::testing::TestWithParam<int> {
TEST_P(AdaptiveDigitalGainApplierTest, TEST_P(AdaptiveDigitalGainApplierTest,
DoNotIncreaseGainWithTooFewSpeechFrames) { DoNotIncreaseGainWithTooFewSpeechFrames) {
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold(); const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
ApmDataDumper apm_data_dumper(0); GainApplierHelper helper(adjacent_speech_frames_threshold);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
adjacent_speech_frames_threshold,
kMaxGainChangePerFrameDb);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -25.0; info.input_level_dbfs = -25.0;
@ -263,7 +267,7 @@ TEST_P(AdaptiveDigitalGainApplierTest,
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) { for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
SCOPED_TRACE(i); SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f); VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
gain_applier.Process(info, audio.float_frame_view()); helper.gain_applier.Process(info, audio.float_frame_view());
const float gain = audio.float_frame_view().channel(0)[0]; const float gain = audio.float_frame_view().channel(0)[0];
if (i > 0) { if (i > 0) {
EXPECT_EQ(prev_gain, gain); // No gain increase. EXPECT_EQ(prev_gain, gain); // No gain increase.
@ -274,23 +278,21 @@ TEST_P(AdaptiveDigitalGainApplierTest,
TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) { TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold(); const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
ApmDataDumper apm_data_dumper(0); GainApplierHelper helper(adjacent_speech_frames_threshold);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
adjacent_speech_frames_threshold,
kMaxGainChangePerFrameDb);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -25.0; info.input_level_dbfs = -25.0;
float prev_gain = 0.f; float prev_gain = 0.f;
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) { for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f); VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
gain_applier.Process(info, audio.float_frame_view()); helper.gain_applier.Process(info, audio.float_frame_view());
prev_gain = audio.float_frame_view().channel(0)[0]; prev_gain = audio.float_frame_view().channel(0)[0];
} }
// Process one more speech frame. // Process one more speech frame.
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f); VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
gain_applier.Process(info, audio.float_frame_view()); helper.gain_applier.Process(info, audio.float_frame_view());
// The gain has increased. // The gain has increased.
EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain); EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);

View File

@ -32,9 +32,6 @@ constexpr float kInitialAdaptiveDigitalGainDb = 8.f;
// At what limiter levels should we start decreasing the adaptive digital gain. // At what limiter levels should we start decreasing the adaptive digital gain.
constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs; constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs;
// This parameter must be tuned together with the noise estimator.
constexpr float kMaxNoiseLevelDbfs = -50.f;
// This is the threshold for speech. Speech frames are used for updating the // This is the threshold for speech. Speech frames are used for updating the
// speech level, measuring the amount of speech, and decide when to allow target // speech level, measuring the amount of speech, and decide when to allow target
// gain reduction. // gain reduction.

View File

@ -354,6 +354,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
float extra_saturation_margin_db = 2.f; float extra_saturation_margin_db = 2.f;
int gain_applier_adjacent_speech_frames_threshold = 1; int gain_applier_adjacent_speech_frames_threshold = 1;
float max_gain_change_db_per_second = 3.f; float max_gain_change_db_per_second = 3.f;
float max_output_noise_level_dbfs = -50.f;
} adaptive_digital; } adaptive_digital;
} gain_controller2; } gain_controller2;