AGC2: max output noise level now part of config
Tested: bit-exactness verified with audioproc_f Bug: webrtc:7494 Change-Id: Ic42f09dc13560494963cdcd338a0c52a729e108d Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186266 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Jakob Ivarsson <jakobi@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32282}
This commit is contained in:

committed by
Commit Bot

parent
c082eba758
commit
9a625e7aef
@ -30,6 +30,7 @@ void DumpDebugData(const AdaptiveDigitalGainApplier::FrameInfo& info,
|
|||||||
|
|
||||||
constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1;
|
constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1;
|
||||||
constexpr float kMaxGainChangePerSecondDb = 3.f;
|
constexpr float kMaxGainChangePerSecondDb = 3.f;
|
||||||
|
constexpr float kMaxOutputNoiseLevelDbfs = -50.f;
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
@ -37,7 +38,8 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
|
|||||||
: speech_level_estimator_(apm_data_dumper),
|
: speech_level_estimator_(apm_data_dumper),
|
||||||
gain_applier_(apm_data_dumper,
|
gain_applier_(apm_data_dumper,
|
||||||
kGainApplierAdjacentSpeechFramesThreshold,
|
kGainApplierAdjacentSpeechFramesThreshold,
|
||||||
kMaxGainChangePerSecondDb),
|
kMaxGainChangePerSecondDb,
|
||||||
|
kMaxOutputNoiseLevelDbfs),
|
||||||
apm_data_dumper_(apm_data_dumper),
|
apm_data_dumper_(apm_data_dumper),
|
||||||
noise_level_estimator_(apm_data_dumper) {
|
noise_level_estimator_(apm_data_dumper) {
|
||||||
RTC_DCHECK(apm_data_dumper);
|
RTC_DCHECK(apm_data_dumper);
|
||||||
@ -56,7 +58,8 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
|
|||||||
gain_applier_(
|
gain_applier_(
|
||||||
apm_data_dumper,
|
apm_data_dumper,
|
||||||
config.adaptive_digital.gain_applier_adjacent_speech_frames_threshold,
|
config.adaptive_digital.gain_applier_adjacent_speech_frames_threshold,
|
||||||
config.adaptive_digital.max_gain_change_db_per_second),
|
config.adaptive_digital.max_gain_change_db_per_second,
|
||||||
|
config.adaptive_digital.max_output_noise_level_dbfs),
|
||||||
apm_data_dumper_(apm_data_dumper),
|
apm_data_dumper_(apm_data_dumper),
|
||||||
noise_level_estimator_(apm_data_dumper) {
|
noise_level_estimator_(apm_data_dumper) {
|
||||||
RTC_DCHECK(apm_data_dumper);
|
RTC_DCHECK(apm_data_dumper);
|
||||||
|
@ -44,12 +44,16 @@ float ComputeGainDb(float input_level_dbfs) {
|
|||||||
return 0.f;
|
return 0.f;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We require 'gain + noise_level <= kMaxNoiseLevelDbfs'.
|
// Returns `target_gain` if the output noise level is below
|
||||||
|
// `max_output_noise_level_dbfs`; otherwise returns a capped gain so that the
|
||||||
|
// output noise level equals `max_output_noise_level_dbfs`.
|
||||||
float LimitGainByNoise(float target_gain,
|
float LimitGainByNoise(float target_gain,
|
||||||
float input_noise_level_dbfs,
|
float input_noise_level_dbfs,
|
||||||
ApmDataDumper* apm_data_dumper) {
|
float max_output_noise_level_dbfs,
|
||||||
const float noise_headroom_db = kMaxNoiseLevelDbfs - input_noise_level_dbfs;
|
ApmDataDumper& apm_data_dumper) {
|
||||||
apm_data_dumper->DumpRaw("agc2_noise_headroom_db", noise_headroom_db);
|
const float noise_headroom_db =
|
||||||
|
max_output_noise_level_dbfs - input_noise_level_dbfs;
|
||||||
|
apm_data_dumper.DumpRaw("agc2_noise_headroom_db", noise_headroom_db);
|
||||||
return std::min(target_gain, std::max(noise_headroom_db, 0.f));
|
return std::min(target_gain, std::max(noise_headroom_db, 0.f));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -89,7 +93,8 @@ float ComputeGainChangeThisFrameDb(float target_gain_db,
|
|||||||
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
|
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
|
||||||
ApmDataDumper* apm_data_dumper,
|
ApmDataDumper* apm_data_dumper,
|
||||||
int adjacent_speech_frames_threshold,
|
int adjacent_speech_frames_threshold,
|
||||||
float max_gain_change_db_per_second)
|
float max_gain_change_db_per_second,
|
||||||
|
float max_output_noise_level_dbfs)
|
||||||
: apm_data_dumper_(apm_data_dumper),
|
: apm_data_dumper_(apm_data_dumper),
|
||||||
gain_applier_(
|
gain_applier_(
|
||||||
/*hard_clip_samples=*/false,
|
/*hard_clip_samples=*/false,
|
||||||
@ -97,11 +102,14 @@ AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
|
|||||||
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
|
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
|
||||||
max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
|
max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
|
||||||
kFrameDurationMs / 1000.f),
|
kFrameDurationMs / 1000.f),
|
||||||
|
max_output_noise_level_dbfs_(max_output_noise_level_dbfs),
|
||||||
calls_since_last_gain_log_(0),
|
calls_since_last_gain_log_(0),
|
||||||
frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
|
frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
|
||||||
last_gain_db_(kInitialAdaptiveDigitalGainDb) {
|
last_gain_db_(kInitialAdaptiveDigitalGainDb) {
|
||||||
RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f);
|
RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f);
|
||||||
RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
|
RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
|
||||||
|
RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.f);
|
||||||
|
RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.f);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
|
void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
|
||||||
@ -126,7 +134,8 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
|
|||||||
|
|
||||||
const float target_gain_db = LimitGainByLowConfidence(
|
const float target_gain_db = LimitGainByLowConfidence(
|
||||||
LimitGainByNoise(ComputeGainDb(std::min(info.input_level_dbfs, 0.f)),
|
LimitGainByNoise(ComputeGainDb(std::min(info.input_level_dbfs, 0.f)),
|
||||||
info.input_noise_level_dbfs, apm_data_dumper_),
|
info.input_noise_level_dbfs,
|
||||||
|
max_output_noise_level_dbfs_, *apm_data_dumper_),
|
||||||
last_gain_db_, info.limiter_envelope_dbfs, info.estimate_is_confident);
|
last_gain_db_, info.limiter_envelope_dbfs, info.estimate_is_confident);
|
||||||
|
|
||||||
// Forbid increasing the gain until enough adjacent speech frames are
|
// Forbid increasing the gain until enough adjacent speech frames are
|
||||||
|
@ -34,12 +34,15 @@ class AdaptiveDigitalGainApplier {
|
|||||||
bool estimate_is_confident;
|
bool estimate_is_confident;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Ctor.
|
||||||
// `adjacent_speech_frames_threshold` indicates how many speech frames are
|
// `adjacent_speech_frames_threshold` indicates how many speech frames are
|
||||||
// required before a gain increase is allowed. `max_gain_change_db_per_second`
|
// required before a gain increase is allowed. `max_gain_change_db_per_second`
|
||||||
// limits the adaptation speed (uniformly operated across frames).
|
// limits the adaptation speed (uniformly operated across frames).
|
||||||
|
// `max_output_noise_level_dbfs` limits the output noise level.
|
||||||
AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
|
AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
|
||||||
int adjacent_speech_frames_threshold,
|
int adjacent_speech_frames_threshold,
|
||||||
float max_gain_change_db_per_second);
|
float max_gain_change_db_per_second,
|
||||||
|
float max_output_noise_level_dbfs);
|
||||||
AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
|
AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
|
||||||
AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
|
AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
|
||||||
delete;
|
delete;
|
||||||
@ -54,6 +57,7 @@ class AdaptiveDigitalGainApplier {
|
|||||||
|
|
||||||
const int adjacent_speech_frames_threshold_;
|
const int adjacent_speech_frames_threshold_;
|
||||||
const float max_gain_change_db_per_10ms_;
|
const float max_gain_change_db_per_10ms_;
|
||||||
|
const float max_output_noise_level_dbfs_;
|
||||||
|
|
||||||
int calls_since_last_gain_log_;
|
int calls_since_last_gain_log_;
|
||||||
int frames_to_gain_increase_allowed_;
|
int frames_to_gain_increase_allowed_;
|
||||||
|
@ -36,14 +36,18 @@ constexpr VadLevelAnalyzer::Result kVadSpeech{1.f, -20.f, 0.f};
|
|||||||
constexpr float kMaxGainChangePerSecondDb = 3.f;
|
constexpr float kMaxGainChangePerSecondDb = 3.f;
|
||||||
constexpr float kMaxGainChangePerFrameDb =
|
constexpr float kMaxGainChangePerFrameDb =
|
||||||
kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
|
kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
|
||||||
|
constexpr float kMaxOutputNoiseLevelDbfs = -50.f;
|
||||||
|
|
||||||
// Helper to instance `AdaptiveDigitalGainApplier`.
|
// Helper to instance `AdaptiveDigitalGainApplier`.
|
||||||
struct GainApplierHelper {
|
struct GainApplierHelper {
|
||||||
GainApplierHelper()
|
GainApplierHelper()
|
||||||
|
: GainApplierHelper(/*adjacent_speech_frames_threshold=*/1) {}
|
||||||
|
explicit GainApplierHelper(int adjacent_speech_frames_threshold)
|
||||||
: apm_data_dumper(0),
|
: apm_data_dumper(0),
|
||||||
gain_applier(&apm_data_dumper,
|
gain_applier(&apm_data_dumper,
|
||||||
/*adjacent_speech_frames_threshold=*/1,
|
adjacent_speech_frames_threshold,
|
||||||
kMaxGainChangePerSecondDb) {}
|
kMaxGainChangePerSecondDb,
|
||||||
|
kMaxOutputNoiseLevelDbfs) {}
|
||||||
ApmDataDumper apm_data_dumper;
|
ApmDataDumper apm_data_dumper;
|
||||||
AdaptiveDigitalGainApplier gain_applier;
|
AdaptiveDigitalGainApplier gain_applier;
|
||||||
};
|
};
|
||||||
@ -185,7 +189,8 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
|
|||||||
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
|
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
|
||||||
constexpr int num_frames = 50;
|
constexpr int num_frames = 50;
|
||||||
|
|
||||||
ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low";
|
ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
|
||||||
|
<< "kWithNoiseDbfs is too low";
|
||||||
|
|
||||||
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
|
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
|
||||||
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
|
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
|
||||||
@ -223,7 +228,8 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
|
|||||||
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
|
kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
|
||||||
constexpr int num_frames = 50;
|
constexpr int num_frames = 50;
|
||||||
|
|
||||||
ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low";
|
ASSERT_GT(kWithNoiseDbfs, kMaxOutputNoiseLevelDbfs)
|
||||||
|
<< "kWithNoiseDbfs is too low";
|
||||||
|
|
||||||
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
|
for (int i = 0; i < num_initial_frames + num_frames; ++i) {
|
||||||
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
|
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
|
||||||
@ -252,10 +258,8 @@ class AdaptiveDigitalGainApplierTest : public ::testing::TestWithParam<int> {
|
|||||||
TEST_P(AdaptiveDigitalGainApplierTest,
|
TEST_P(AdaptiveDigitalGainApplierTest,
|
||||||
DoNotIncreaseGainWithTooFewSpeechFrames) {
|
DoNotIncreaseGainWithTooFewSpeechFrames) {
|
||||||
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
|
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
|
||||||
ApmDataDumper apm_data_dumper(0);
|
GainApplierHelper helper(adjacent_speech_frames_threshold);
|
||||||
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
|
|
||||||
adjacent_speech_frames_threshold,
|
|
||||||
kMaxGainChangePerFrameDb);
|
|
||||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||||
info.input_level_dbfs = -25.0;
|
info.input_level_dbfs = -25.0;
|
||||||
|
|
||||||
@ -263,7 +267,7 @@ TEST_P(AdaptiveDigitalGainApplierTest,
|
|||||||
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
|
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
|
||||||
SCOPED_TRACE(i);
|
SCOPED_TRACE(i);
|
||||||
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
|
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
|
||||||
gain_applier.Process(info, audio.float_frame_view());
|
helper.gain_applier.Process(info, audio.float_frame_view());
|
||||||
const float gain = audio.float_frame_view().channel(0)[0];
|
const float gain = audio.float_frame_view().channel(0)[0];
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
EXPECT_EQ(prev_gain, gain); // No gain increase.
|
EXPECT_EQ(prev_gain, gain); // No gain increase.
|
||||||
@ -274,23 +278,21 @@ TEST_P(AdaptiveDigitalGainApplierTest,
|
|||||||
|
|
||||||
TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
|
TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
|
||||||
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
|
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
|
||||||
ApmDataDumper apm_data_dumper(0);
|
GainApplierHelper helper(adjacent_speech_frames_threshold);
|
||||||
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
|
|
||||||
adjacent_speech_frames_threshold,
|
|
||||||
kMaxGainChangePerFrameDb);
|
|
||||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||||
info.input_level_dbfs = -25.0;
|
info.input_level_dbfs = -25.0;
|
||||||
|
|
||||||
float prev_gain = 0.f;
|
float prev_gain = 0.f;
|
||||||
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
|
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
|
||||||
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
|
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
|
||||||
gain_applier.Process(info, audio.float_frame_view());
|
helper.gain_applier.Process(info, audio.float_frame_view());
|
||||||
prev_gain = audio.float_frame_view().channel(0)[0];
|
prev_gain = audio.float_frame_view().channel(0)[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process one more speech frame.
|
// Process one more speech frame.
|
||||||
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
|
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.f);
|
||||||
gain_applier.Process(info, audio.float_frame_view());
|
helper.gain_applier.Process(info, audio.float_frame_view());
|
||||||
|
|
||||||
// The gain has increased.
|
// The gain has increased.
|
||||||
EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
|
EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
|
||||||
|
@ -32,9 +32,6 @@ constexpr float kInitialAdaptiveDigitalGainDb = 8.f;
|
|||||||
// At what limiter levels should we start decreasing the adaptive digital gain.
|
// At what limiter levels should we start decreasing the adaptive digital gain.
|
||||||
constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs;
|
constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs;
|
||||||
|
|
||||||
// This parameter must be tuned together with the noise estimator.
|
|
||||||
constexpr float kMaxNoiseLevelDbfs = -50.f;
|
|
||||||
|
|
||||||
// This is the threshold for speech. Speech frames are used for updating the
|
// This is the threshold for speech. Speech frames are used for updating the
|
||||||
// speech level, measuring the amount of speech, and decide when to allow target
|
// speech level, measuring the amount of speech, and decide when to allow target
|
||||||
// gain reduction.
|
// gain reduction.
|
||||||
|
@ -354,6 +354,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
|
|||||||
float extra_saturation_margin_db = 2.f;
|
float extra_saturation_margin_db = 2.f;
|
||||||
int gain_applier_adjacent_speech_frames_threshold = 1;
|
int gain_applier_adjacent_speech_frames_threshold = 1;
|
||||||
float max_gain_change_db_per_second = 3.f;
|
float max_gain_change_db_per_second = 3.f;
|
||||||
|
float max_output_noise_level_dbfs = -50.f;
|
||||||
} adaptive_digital;
|
} adaptive_digital;
|
||||||
} gain_controller2;
|
} gain_controller2;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user