AGC2: adding level estimation option (RMS or peak-based).
This CL makes possible to choose the level estimation for the adaptive digital GC of AGC2. The options are RMS (default and currently used estimator) and peak-based (already computed, but not used). Besides adding the new AGC2 config param for the level estimator, this CL also refactors the config class by making it more structured. Bug: webrtc:7494 Change-Id: I20eb558ca50f13536aa7bdea08d21de3b630f8bc Reviewed-on: https://webrtc-review.googlesource.com/c/110144 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Alex Loiko <aleloi@webrtc.org> Cr-Commit-Position: refs/heads/master@{#25620}
This commit is contained in:

committed by
Commit Bot

parent
44ca9a392a
commit
1e2542f593
@ -429,6 +429,7 @@ if (rtc_include_tests) {
|
||||
"agc2:biquad_filter_unittests",
|
||||
"agc2:fixed_digital_unittests",
|
||||
"agc2:noise_estimator_unittests",
|
||||
"agc2:rnn_vad_with_level_unittests",
|
||||
"agc2:test_utils",
|
||||
"agc2/rnn_vad:unittests",
|
||||
"test/conversational_speech:unittest",
|
||||
|
@ -27,6 +27,7 @@ rtc_source_set("level_estimation_agc") {
|
||||
":gain_applier",
|
||||
":noise_level_estimator",
|
||||
":rnn_vad_with_level",
|
||||
"..:api",
|
||||
"..:apm_logging",
|
||||
"..:audio_frame_view",
|
||||
"../../../api:array_view",
|
||||
@ -58,6 +59,7 @@ rtc_source_set("adaptive_digital") {
|
||||
":gain_applier",
|
||||
":noise_level_estimator",
|
||||
":rnn_vad_with_level",
|
||||
"..:api",
|
||||
"..:apm_logging",
|
||||
"..:audio_frame_view",
|
||||
"../../../api:array_view",
|
||||
@ -257,6 +259,18 @@ rtc_source_set("noise_estimator_unittests") {
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("rnn_vad_with_level_unittests") {
|
||||
testonly = true
|
||||
sources = [
|
||||
"vad_with_level_unittest.cc",
|
||||
]
|
||||
deps = [
|
||||
":rnn_vad_with_level",
|
||||
"..:audio_frame_view",
|
||||
"../../../rtc_base:rtc_base_tests_utils",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("test_utils") {
|
||||
testonly = true
|
||||
visibility = [
|
||||
|
@ -26,8 +26,12 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
|
||||
}
|
||||
|
||||
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
|
||||
float extra_saturation_margin_db)
|
||||
: speech_level_estimator_(apm_data_dumper, extra_saturation_margin_db),
|
||||
const AudioProcessing::Config::GainController2& config)
|
||||
: speech_level_estimator_(
|
||||
apm_data_dumper,
|
||||
config.adaptive_digital.level_estimator,
|
||||
config.adaptive_digital.use_saturation_protector,
|
||||
config.adaptive_digital.extra_saturation_margin_db),
|
||||
gain_applier_(apm_data_dumper),
|
||||
apm_data_dumper_(apm_data_dumper),
|
||||
noise_level_estimator_(apm_data_dumper) {
|
||||
@ -44,9 +48,9 @@ void AdaptiveAgc::Process(AudioFrameView<float> float_frame,
|
||||
signal_with_levels.vad_result.speech_probability);
|
||||
apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs",
|
||||
signal_with_levels.vad_result.speech_rms_dbfs);
|
||||
|
||||
apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs",
|
||||
signal_with_levels.vad_result.speech_peak_dbfs);
|
||||
|
||||
speech_level_estimator_.UpdateEstimation(signal_with_levels.vad_result);
|
||||
|
||||
signal_with_levels.input_level_dbfs =
|
||||
@ -68,7 +72,6 @@ void AdaptiveAgc::Process(AudioFrameView<float> float_frame,
|
||||
|
||||
// The gain applier applies the gain.
|
||||
gain_applier_.Process(signal_with_levels);
|
||||
;
|
||||
}
|
||||
|
||||
void AdaptiveAgc::Reset() {
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "modules/audio_processing/agc2/noise_level_estimator.h"
|
||||
#include "modules/audio_processing/agc2/vad_with_level.h"
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
#include "modules/audio_processing/include/audio_processing.h"
|
||||
|
||||
namespace webrtc {
|
||||
class ApmDataDumper;
|
||||
@ -23,7 +24,8 @@ class ApmDataDumper;
|
||||
class AdaptiveAgc {
|
||||
public:
|
||||
explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
|
||||
AdaptiveAgc(ApmDataDumper* apm_data_dumper, float extra_saturation_margin_db);
|
||||
AdaptiveAgc(ApmDataDumper* apm_data_dumper,
|
||||
const AudioProcessing::Config::GainController2& config);
|
||||
~AdaptiveAgc();
|
||||
|
||||
void Process(AudioFrameView<float> float_frame, float last_audio_level);
|
||||
|
@ -19,13 +19,20 @@ namespace webrtc {
|
||||
|
||||
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
|
||||
ApmDataDumper* apm_data_dumper)
|
||||
: saturation_protector_(apm_data_dumper),
|
||||
: level_estimator_(
|
||||
AudioProcessing::Config::GainController2::LevelEstimator::kRms),
|
||||
use_saturation_protector_(true),
|
||||
saturation_protector_(apm_data_dumper),
|
||||
apm_data_dumper_(apm_data_dumper) {}
|
||||
|
||||
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
|
||||
ApmDataDumper* apm_data_dumper,
|
||||
AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
|
||||
bool use_saturation_protector,
|
||||
float extra_saturation_margin_db)
|
||||
: saturation_protector_(apm_data_dumper, extra_saturation_margin_db),
|
||||
: level_estimator_(level_estimator),
|
||||
use_saturation_protector_(use_saturation_protector),
|
||||
saturation_protector_(apm_data_dumper, extra_saturation_margin_db),
|
||||
apm_data_dumper_(apm_data_dumper) {}
|
||||
|
||||
void AdaptiveModeLevelEstimator::UpdateEstimation(
|
||||
@ -49,20 +56,38 @@ void AdaptiveModeLevelEstimator::UpdateEstimation(
|
||||
|
||||
const float leak_factor = buffer_is_full ? kFullBufferLeakFactor : 1.f;
|
||||
|
||||
// Read speech level estimation.
|
||||
float speech_level_dbfs = 0.f;
|
||||
using LevelEstimatorType =
|
||||
AudioProcessing::Config::GainController2::LevelEstimator;
|
||||
switch (level_estimator_) {
|
||||
case LevelEstimatorType::kRms:
|
||||
speech_level_dbfs = vad_data.speech_rms_dbfs;
|
||||
break;
|
||||
case LevelEstimatorType::kPeak:
|
||||
speech_level_dbfs = vad_data.speech_peak_dbfs;
|
||||
break;
|
||||
}
|
||||
|
||||
// Update speech level estimation.
|
||||
estimate_numerator_ = estimate_numerator_ * leak_factor +
|
||||
vad_data.speech_rms_dbfs * vad_data.speech_probability;
|
||||
speech_level_dbfs * vad_data.speech_probability;
|
||||
estimate_denominator_ =
|
||||
estimate_denominator_ * leak_factor + vad_data.speech_probability;
|
||||
|
||||
last_estimate_with_offset_dbfs_ = estimate_numerator_ / estimate_denominator_;
|
||||
|
||||
saturation_protector_.UpdateMargin(vad_data, last_estimate_with_offset_dbfs_);
|
||||
if (use_saturation_protector_) {
|
||||
saturation_protector_.UpdateMargin(vad_data,
|
||||
last_estimate_with_offset_dbfs_);
|
||||
DebugDumpEstimate();
|
||||
}
|
||||
}
|
||||
|
||||
float AdaptiveModeLevelEstimator::LatestLevelEstimate() const {
|
||||
return rtc::SafeClamp<float>(
|
||||
last_estimate_with_offset_dbfs_ + saturation_protector_.LastMargin(),
|
||||
last_estimate_with_offset_dbfs_ +
|
||||
(use_saturation_protector_ ? saturation_protector_.LastMargin()
|
||||
: 0.f),
|
||||
-90.f, 30.f);
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "modules/audio_processing/agc2/agc2_common.h" // kFullBufferSizeMs...
|
||||
#include "modules/audio_processing/agc2/saturation_protector.h"
|
||||
#include "modules/audio_processing/agc2/vad_with_level.h"
|
||||
#include "modules/audio_processing/include/audio_processing.h"
|
||||
|
||||
namespace webrtc {
|
||||
class ApmDataDumper;
|
||||
@ -23,7 +24,10 @@ class ApmDataDumper;
|
||||
class AdaptiveModeLevelEstimator {
|
||||
public:
|
||||
explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper);
|
||||
AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper,
|
||||
AdaptiveModeLevelEstimator(
|
||||
ApmDataDumper* apm_data_dumper,
|
||||
AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
|
||||
bool use_saturation_protector,
|
||||
float extra_saturation_margin_db);
|
||||
void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data);
|
||||
float LatestLevelEstimate() const;
|
||||
@ -35,6 +39,9 @@ class AdaptiveModeLevelEstimator {
|
||||
private:
|
||||
void DebugDumpEstimate();
|
||||
|
||||
const AudioProcessing::Config::GainController2::LevelEstimator
|
||||
level_estimator_;
|
||||
const bool use_saturation_protector_;
|
||||
size_t buffer_size_ms_ = 0;
|
||||
float last_estimate_with_offset_dbfs_ = kInitialSpeechLevelEstimateDbfs;
|
||||
float estimate_numerator_ = 0.f;
|
||||
|
40
modules/audio_processing/agc2/vad_with_level_unittest.cc
Normal file
40
modules/audio_processing/agc2/vad_with_level_unittest.cc
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc2/vad_with_level.h"
|
||||
|
||||
#include "rtc_base/gunit.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace test {
|
||||
|
||||
TEST(AutomaticGainController2VadWithLevelEstimator,
|
||||
PeakLevelGreaterThanRmsLevel) {
|
||||
constexpr size_t kSampleRateHz = 8000;
|
||||
|
||||
// 10 ms input frame, constant except for one peak value.
|
||||
// Handcrafted so that the average is lower than the peak value.
|
||||
std::array<float, kSampleRateHz / 100> frame;
|
||||
frame.fill(1000.f);
|
||||
frame[10] = 2000.f;
|
||||
float* const channel0 = frame.data();
|
||||
AudioFrameView<float> frame_view(&channel0, 1, frame.size());
|
||||
|
||||
// Compute audio frame levels (the VAD result is ignored).
|
||||
VadWithLevel vad_with_level;
|
||||
auto levels_and_vad_prob = vad_with_level.AnalyzeFrame(frame_view);
|
||||
|
||||
// Compare peak and RMS levels.
|
||||
EXPECT_LT(levels_and_vad_prob.speech_rms_dbfs,
|
||||
levels_and_vad_prob.speech_peak_dbfs);
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace webrtc
|
@ -65,30 +65,52 @@ void GainController2::ApplyConfig(
|
||||
RTC_DCHECK(Validate(config))
|
||||
<< " the invalid config was " << ToString(config);
|
||||
|
||||
if (config.fixed_gain_db != config_.fixed_gain_db) {
|
||||
config_ = config;
|
||||
if (config.fixed_digital.gain_db != config_.fixed_digital.gain_db) {
|
||||
// Reset the limiter to quickly react on abrupt level changes caused by
|
||||
// large changes of the fixed gain.
|
||||
limiter_.Reset();
|
||||
}
|
||||
config_ = config;
|
||||
gain_applier_.SetGainFactor(DbToRatio(config_.fixed_gain_db));
|
||||
adaptive_digital_mode_ = config_.adaptive_digital_mode;
|
||||
adaptive_agc_.reset(
|
||||
new AdaptiveAgc(data_dumper_.get(), config_.extra_saturation_margin_db));
|
||||
gain_applier_.SetGainFactor(DbToRatio(config_.fixed_digital.gain_db));
|
||||
adaptive_digital_mode_ = config_.adaptive_digital.enabled;
|
||||
adaptive_agc_.reset(new AdaptiveAgc(data_dumper_.get(), config_));
|
||||
}
|
||||
|
||||
bool GainController2::Validate(
|
||||
const AudioProcessing::Config::GainController2& config) {
|
||||
return config.fixed_gain_db >= 0.f && config.fixed_gain_db < 50.f &&
|
||||
config.extra_saturation_margin_db >= 0.f &&
|
||||
config.extra_saturation_margin_db <= 100.f;
|
||||
return config.fixed_digital.gain_db >= 0.f &&
|
||||
config.fixed_digital.gain_db < 50.f &&
|
||||
config.adaptive_digital.extra_saturation_margin_db >= 0.f &&
|
||||
config.adaptive_digital.extra_saturation_margin_db <= 100.f;
|
||||
}
|
||||
|
||||
std::string GainController2::ToString(
|
||||
const AudioProcessing::Config::GainController2& config) {
|
||||
rtc::StringBuilder ss;
|
||||
ss << "{enabled: " << (config.enabled ? "true" : "false") << ", "
|
||||
<< "fixed_gain_dB: " << config.fixed_gain_db << "}";
|
||||
std::string adaptive_digital_level_estimator;
|
||||
using LevelEstimatorType =
|
||||
AudioProcessing::Config::GainController2::LevelEstimator;
|
||||
switch (config.adaptive_digital.level_estimator) {
|
||||
case LevelEstimatorType::kRms:
|
||||
adaptive_digital_level_estimator = "RMS";
|
||||
break;
|
||||
case LevelEstimatorType::kPeak:
|
||||
adaptive_digital_level_estimator = "peak";
|
||||
break;
|
||||
}
|
||||
// clang-format off
|
||||
// clang formatting doesn't respect custom nested style.
|
||||
ss << "{"
|
||||
<< "enabled: " << (config.enabled ? "true" : "false") << ", "
|
||||
<< "fixed_digital: {gain_db: " << config.fixed_digital.gain_db << "}, "
|
||||
<< "adaptive_digital: {"
|
||||
<< "enabled: "
|
||||
<< (config.adaptive_digital.enabled ? "true" : "false") << ", "
|
||||
<< "level_estimator: " << adaptive_digital_level_estimator << ", "
|
||||
<< "extra_saturation_margin_db:"
|
||||
<< config.adaptive_digital.extra_saturation_margin_db << "}"
|
||||
<< "}";
|
||||
// clang-format on
|
||||
return ss.Release();
|
||||
}
|
||||
|
||||
|
@ -52,8 +52,8 @@ float RunAgc2WithConstantInput(GainController2* agc2,
|
||||
AudioProcessing::Config::GainController2 CreateAgc2FixedDigitalModeConfig(
|
||||
float fixed_gain_db) {
|
||||
AudioProcessing::Config::GainController2 config;
|
||||
config.adaptive_digital_mode = false;
|
||||
config.fixed_gain_db = fixed_gain_db;
|
||||
config.adaptive_digital.enabled = false;
|
||||
config.fixed_digital.gain_db = fixed_gain_db;
|
||||
// TODO(alessiob): Check why ASSERT_TRUE() below does not compile.
|
||||
EXPECT_TRUE(GainController2::Validate(config));
|
||||
return config;
|
||||
@ -113,29 +113,26 @@ TEST(GainController2, CreateApplyConfig) {
|
||||
gain_controller2->ApplyConfig(config);
|
||||
|
||||
// Check that attenuation is not allowed.
|
||||
config.fixed_gain_db = -5.f;
|
||||
config.fixed_digital.gain_db = -5.f;
|
||||
EXPECT_FALSE(GainController2::Validate(config));
|
||||
|
||||
// Check that valid configurations are applied.
|
||||
for (const float& fixed_gain_db : {0.f, 5.f, 10.f, 40.f}) {
|
||||
config.fixed_gain_db = fixed_gain_db;
|
||||
config.fixed_digital.gain_db = fixed_gain_db;
|
||||
EXPECT_TRUE(GainController2::Validate(config));
|
||||
gain_controller2->ApplyConfig(config);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GainController2, ToString) {
|
||||
// Tests GainController2::ToString().
|
||||
// Tests GainController2::ToString(). Only test the enabled property.
|
||||
AudioProcessing::Config::GainController2 config;
|
||||
config.fixed_gain_db = 5.f;
|
||||
|
||||
config.enabled = false;
|
||||
EXPECT_EQ("{enabled: false, fixed_gain_dB: 5}",
|
||||
GainController2::ToString(config));
|
||||
EXPECT_EQ("{enabled: false", GainController2::ToString(config).substr(0, 15));
|
||||
|
||||
config.enabled = true;
|
||||
EXPECT_EQ("{enabled: true, fixed_gain_dB: 5}",
|
||||
GainController2::ToString(config));
|
||||
EXPECT_EQ("{enabled: true", GainController2::ToString(config).substr(0, 14));
|
||||
}
|
||||
|
||||
TEST(GainController2FixedDigital, GainShouldChangeOnSetGain) {
|
||||
@ -263,8 +260,8 @@ TEST(GainController2, UsageSaturationMargin) {
|
||||
// Check that samples are not amplified as much when extra margin is
|
||||
// high. They should not be amplified at all, but only after convergence. GC2
|
||||
// starts with a gain, and it takes time until it's down to 0 dB.
|
||||
config.extra_saturation_margin_db = 50.f;
|
||||
config.fixed_gain_db = 0.f;
|
||||
config.fixed_digital.gain_db = 0.f;
|
||||
config.adaptive_digital.extra_saturation_margin_db = 50.f;
|
||||
gain_controller2.ApplyConfig(config);
|
||||
|
||||
EXPECT_LT(GainAfterProcessingFile(&gain_controller2), 2.f);
|
||||
@ -276,8 +273,8 @@ TEST(GainController2, UsageNoSaturationMargin) {
|
||||
|
||||
AudioProcessing::Config::GainController2 config;
|
||||
// Check that some gain is applied if there is no margin.
|
||||
config.extra_saturation_margin_db = 0.f;
|
||||
config.fixed_gain_db = 0.f;
|
||||
config.fixed_digital.gain_db = 0.f;
|
||||
config.adaptive_digital.extra_saturation_margin_db = 0.f;
|
||||
gain_controller2.ApplyConfig(config);
|
||||
|
||||
EXPECT_GT(GainAfterProcessingFile(&gain_controller2), 2.f);
|
||||
|
@ -270,10 +270,23 @@ class AudioProcessing : public rtc::RefCountInterface {
|
||||
// first applies a fixed gain. The adaptive digital AGC can be turned off by
|
||||
// setting |adaptive_digital_mode=false|.
|
||||
struct GainController2 {
|
||||
enum LevelEstimator { kRms, kPeak };
|
||||
bool enabled = false;
|
||||
bool adaptive_digital_mode = true;
|
||||
struct {
|
||||
float gain_db = 0.f;
|
||||
} fixed_digital;
|
||||
struct {
|
||||
bool enabled = true;
|
||||
LevelEstimator level_estimator = kRms;
|
||||
bool use_saturation_protector = true;
|
||||
float extra_saturation_margin_db = 2.f;
|
||||
} adaptive_digital;
|
||||
// Deprecated.
|
||||
// TODO(webrtc:7494): Switch to fixed_digital.gain_db and remove.
|
||||
float fixed_gain_db = 0.f;
|
||||
// Deprecated.
|
||||
// TODO(webrtc:7494): Switch to adaptive_digital.enabled and remove.
|
||||
bool adaptive_digital_mode = false;
|
||||
} gain_controller2;
|
||||
|
||||
// Explicit copy assignment implementation to avoid issues with memory
|
||||
|
@ -351,9 +351,10 @@ void AudioProcessingSimulator::CreateAudioProcessor() {
|
||||
}
|
||||
if (settings_.use_agc2) {
|
||||
apm_config.gain_controller2.enabled = *settings_.use_agc2;
|
||||
apm_config.gain_controller2.fixed_gain_db = settings_.agc2_fixed_gain_db;
|
||||
apm_config.gain_controller2.fixed_digital.gain_db =
|
||||
settings_.agc2_fixed_gain_db;
|
||||
if (settings_.agc2_use_adaptive_gain) {
|
||||
apm_config.gain_controller2.adaptive_digital_mode =
|
||||
apm_config.gain_controller2.adaptive_digital.enabled =
|
||||
*settings_.agc2_use_adaptive_gain;
|
||||
}
|
||||
}
|
||||
|
@ -151,7 +151,7 @@ std::unique_ptr<AudioProcessing> CreateApm(test::FuzzDataHelper* fuzz_data,
|
||||
apm_config.high_pass_filter.enabled = hpf;
|
||||
apm_config.gain_controller2.enabled = use_agc2_limiter;
|
||||
|
||||
apm_config.gain_controller2.fixed_gain_db = gain_controller2_gain_db;
|
||||
apm_config.gain_controller2.fixed_digital.gain_db = gain_controller2_gain_db;
|
||||
|
||||
apm->ApplyConfig(apm_config);
|
||||
|
||||
|
Reference in New Issue
Block a user