AGC2: adding level estimation option (RMS or peak-based).

This CL makes possible to choose the level estimation for the adaptive
digital GC of AGC2. The options are RMS (default and currently used
estimator) and peak-based (already computed, but not used).

Besides adding the new AGC2 config param for the level estimator, this CL
also refactors the config class by making it more structured.

Bug: webrtc:7494
Change-Id: I20eb558ca50f13536aa7bdea08d21de3b630f8bc
Reviewed-on: https://webrtc-review.googlesource.com/c/110144
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Alex Loiko <aleloi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#25620}
This commit is contained in:
Alessio Bazzica
2018-11-13 14:44:15 +01:00
committed by Commit Bot
parent 44ca9a392a
commit 1e2542f593
12 changed files with 169 additions and 44 deletions

View File

@ -429,6 +429,7 @@ if (rtc_include_tests) {
"agc2:biquad_filter_unittests", "agc2:biquad_filter_unittests",
"agc2:fixed_digital_unittests", "agc2:fixed_digital_unittests",
"agc2:noise_estimator_unittests", "agc2:noise_estimator_unittests",
"agc2:rnn_vad_with_level_unittests",
"agc2:test_utils", "agc2:test_utils",
"agc2/rnn_vad:unittests", "agc2/rnn_vad:unittests",
"test/conversational_speech:unittest", "test/conversational_speech:unittest",

View File

@ -27,6 +27,7 @@ rtc_source_set("level_estimation_agc") {
":gain_applier", ":gain_applier",
":noise_level_estimator", ":noise_level_estimator",
":rnn_vad_with_level", ":rnn_vad_with_level",
"..:api",
"..:apm_logging", "..:apm_logging",
"..:audio_frame_view", "..:audio_frame_view",
"../../../api:array_view", "../../../api:array_view",
@ -58,6 +59,7 @@ rtc_source_set("adaptive_digital") {
":gain_applier", ":gain_applier",
":noise_level_estimator", ":noise_level_estimator",
":rnn_vad_with_level", ":rnn_vad_with_level",
"..:api",
"..:apm_logging", "..:apm_logging",
"..:audio_frame_view", "..:audio_frame_view",
"../../../api:array_view", "../../../api:array_view",
@ -257,6 +259,18 @@ rtc_source_set("noise_estimator_unittests") {
] ]
} }
rtc_source_set("rnn_vad_with_level_unittests") {
testonly = true
sources = [
"vad_with_level_unittest.cc",
]
deps = [
":rnn_vad_with_level",
"..:audio_frame_view",
"../../../rtc_base:rtc_base_tests_utils",
]
}
rtc_source_set("test_utils") { rtc_source_set("test_utils") {
testonly = true testonly = true
visibility = [ visibility = [

View File

@ -26,8 +26,12 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
} }
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper, AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
float extra_saturation_margin_db) const AudioProcessing::Config::GainController2& config)
: speech_level_estimator_(apm_data_dumper, extra_saturation_margin_db), : speech_level_estimator_(
apm_data_dumper,
config.adaptive_digital.level_estimator,
config.adaptive_digital.use_saturation_protector,
config.adaptive_digital.extra_saturation_margin_db),
gain_applier_(apm_data_dumper), gain_applier_(apm_data_dumper),
apm_data_dumper_(apm_data_dumper), apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(apm_data_dumper) { noise_level_estimator_(apm_data_dumper) {
@ -44,9 +48,9 @@ void AdaptiveAgc::Process(AudioFrameView<float> float_frame,
signal_with_levels.vad_result.speech_probability); signal_with_levels.vad_result.speech_probability);
apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs", apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs",
signal_with_levels.vad_result.speech_rms_dbfs); signal_with_levels.vad_result.speech_rms_dbfs);
apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs", apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs",
signal_with_levels.vad_result.speech_peak_dbfs); signal_with_levels.vad_result.speech_peak_dbfs);
speech_level_estimator_.UpdateEstimation(signal_with_levels.vad_result); speech_level_estimator_.UpdateEstimation(signal_with_levels.vad_result);
signal_with_levels.input_level_dbfs = signal_with_levels.input_level_dbfs =
@ -68,7 +72,6 @@ void AdaptiveAgc::Process(AudioFrameView<float> float_frame,
// The gain applier applies the gain. // The gain applier applies the gain.
gain_applier_.Process(signal_with_levels); gain_applier_.Process(signal_with_levels);
;
} }
void AdaptiveAgc::Reset() { void AdaptiveAgc::Reset() {

View File

@ -16,6 +16,7 @@
#include "modules/audio_processing/agc2/noise_level_estimator.h" #include "modules/audio_processing/agc2/noise_level_estimator.h"
#include "modules/audio_processing/agc2/vad_with_level.h" #include "modules/audio_processing/agc2/vad_with_level.h"
#include "modules/audio_processing/include/audio_frame_view.h" #include "modules/audio_processing/include/audio_frame_view.h"
#include "modules/audio_processing/include/audio_processing.h"
namespace webrtc { namespace webrtc {
class ApmDataDumper; class ApmDataDumper;
@ -23,7 +24,8 @@ class ApmDataDumper;
class AdaptiveAgc { class AdaptiveAgc {
public: public:
explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper); explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
AdaptiveAgc(ApmDataDumper* apm_data_dumper, float extra_saturation_margin_db); AdaptiveAgc(ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2& config);
~AdaptiveAgc(); ~AdaptiveAgc();
void Process(AudioFrameView<float> float_frame, float last_audio_level); void Process(AudioFrameView<float> float_frame, float last_audio_level);

View File

@ -19,13 +19,20 @@ namespace webrtc {
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator( AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper) ApmDataDumper* apm_data_dumper)
: saturation_protector_(apm_data_dumper), : level_estimator_(
AudioProcessing::Config::GainController2::LevelEstimator::kRms),
use_saturation_protector_(true),
saturation_protector_(apm_data_dumper),
apm_data_dumper_(apm_data_dumper) {} apm_data_dumper_(apm_data_dumper) {}
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator( AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper, ApmDataDumper* apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
bool use_saturation_protector,
float extra_saturation_margin_db) float extra_saturation_margin_db)
: saturation_protector_(apm_data_dumper, extra_saturation_margin_db), : level_estimator_(level_estimator),
use_saturation_protector_(use_saturation_protector),
saturation_protector_(apm_data_dumper, extra_saturation_margin_db),
apm_data_dumper_(apm_data_dumper) {} apm_data_dumper_(apm_data_dumper) {}
void AdaptiveModeLevelEstimator::UpdateEstimation( void AdaptiveModeLevelEstimator::UpdateEstimation(
@ -49,20 +56,38 @@ void AdaptiveModeLevelEstimator::UpdateEstimation(
const float leak_factor = buffer_is_full ? kFullBufferLeakFactor : 1.f; const float leak_factor = buffer_is_full ? kFullBufferLeakFactor : 1.f;
// Read speech level estimation.
float speech_level_dbfs = 0.f;
using LevelEstimatorType =
AudioProcessing::Config::GainController2::LevelEstimator;
switch (level_estimator_) {
case LevelEstimatorType::kRms:
speech_level_dbfs = vad_data.speech_rms_dbfs;
break;
case LevelEstimatorType::kPeak:
speech_level_dbfs = vad_data.speech_peak_dbfs;
break;
}
// Update speech level estimation.
estimate_numerator_ = estimate_numerator_ * leak_factor + estimate_numerator_ = estimate_numerator_ * leak_factor +
vad_data.speech_rms_dbfs * vad_data.speech_probability; speech_level_dbfs * vad_data.speech_probability;
estimate_denominator_ = estimate_denominator_ =
estimate_denominator_ * leak_factor + vad_data.speech_probability; estimate_denominator_ * leak_factor + vad_data.speech_probability;
last_estimate_with_offset_dbfs_ = estimate_numerator_ / estimate_denominator_; last_estimate_with_offset_dbfs_ = estimate_numerator_ / estimate_denominator_;
saturation_protector_.UpdateMargin(vad_data, last_estimate_with_offset_dbfs_); if (use_saturation_protector_) {
saturation_protector_.UpdateMargin(vad_data,
last_estimate_with_offset_dbfs_);
DebugDumpEstimate(); DebugDumpEstimate();
} }
}
float AdaptiveModeLevelEstimator::LatestLevelEstimate() const { float AdaptiveModeLevelEstimator::LatestLevelEstimate() const {
return rtc::SafeClamp<float>( return rtc::SafeClamp<float>(
last_estimate_with_offset_dbfs_ + saturation_protector_.LastMargin(), last_estimate_with_offset_dbfs_ +
(use_saturation_protector_ ? saturation_protector_.LastMargin()
: 0.f),
-90.f, 30.f); -90.f, 30.f);
} }

View File

@ -16,6 +16,7 @@
#include "modules/audio_processing/agc2/agc2_common.h" // kFullBufferSizeMs... #include "modules/audio_processing/agc2/agc2_common.h" // kFullBufferSizeMs...
#include "modules/audio_processing/agc2/saturation_protector.h" #include "modules/audio_processing/agc2/saturation_protector.h"
#include "modules/audio_processing/agc2/vad_with_level.h" #include "modules/audio_processing/agc2/vad_with_level.h"
#include "modules/audio_processing/include/audio_processing.h"
namespace webrtc { namespace webrtc {
class ApmDataDumper; class ApmDataDumper;
@ -23,7 +24,10 @@ class ApmDataDumper;
class AdaptiveModeLevelEstimator { class AdaptiveModeLevelEstimator {
public: public:
explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper); explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper);
AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper, AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
bool use_saturation_protector,
float extra_saturation_margin_db); float extra_saturation_margin_db);
void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data); void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data);
float LatestLevelEstimate() const; float LatestLevelEstimate() const;
@ -35,6 +39,9 @@ class AdaptiveModeLevelEstimator {
private: private:
void DebugDumpEstimate(); void DebugDumpEstimate();
const AudioProcessing::Config::GainController2::LevelEstimator
level_estimator_;
const bool use_saturation_protector_;
size_t buffer_size_ms_ = 0; size_t buffer_size_ms_ = 0;
float last_estimate_with_offset_dbfs_ = kInitialSpeechLevelEstimateDbfs; float last_estimate_with_offset_dbfs_ = kInitialSpeechLevelEstimateDbfs;
float estimate_numerator_ = 0.f; float estimate_numerator_ = 0.f;

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/vad_with_level.h"
#include "rtc_base/gunit.h"
namespace webrtc {
namespace test {
TEST(AutomaticGainController2VadWithLevelEstimator,
PeakLevelGreaterThanRmsLevel) {
constexpr size_t kSampleRateHz = 8000;
// 10 ms input frame, constant except for one peak value.
// Handcrafted so that the average is lower than the peak value.
std::array<float, kSampleRateHz / 100> frame;
frame.fill(1000.f);
frame[10] = 2000.f;
float* const channel0 = frame.data();
AudioFrameView<float> frame_view(&channel0, 1, frame.size());
// Compute audio frame levels (the VAD result is ignored).
VadWithLevel vad_with_level;
auto levels_and_vad_prob = vad_with_level.AnalyzeFrame(frame_view);
// Compare peak and RMS levels.
EXPECT_LT(levels_and_vad_prob.speech_rms_dbfs,
levels_and_vad_prob.speech_peak_dbfs);
}
} // namespace test
} // namespace webrtc

View File

@ -65,30 +65,52 @@ void GainController2::ApplyConfig(
RTC_DCHECK(Validate(config)) RTC_DCHECK(Validate(config))
<< " the invalid config was " << ToString(config); << " the invalid config was " << ToString(config);
if (config.fixed_gain_db != config_.fixed_gain_db) { config_ = config;
if (config.fixed_digital.gain_db != config_.fixed_digital.gain_db) {
// Reset the limiter to quickly react on abrupt level changes caused by // Reset the limiter to quickly react on abrupt level changes caused by
// large changes of the fixed gain. // large changes of the fixed gain.
limiter_.Reset(); limiter_.Reset();
} }
config_ = config; gain_applier_.SetGainFactor(DbToRatio(config_.fixed_digital.gain_db));
gain_applier_.SetGainFactor(DbToRatio(config_.fixed_gain_db)); adaptive_digital_mode_ = config_.adaptive_digital.enabled;
adaptive_digital_mode_ = config_.adaptive_digital_mode; adaptive_agc_.reset(new AdaptiveAgc(data_dumper_.get(), config_));
adaptive_agc_.reset(
new AdaptiveAgc(data_dumper_.get(), config_.extra_saturation_margin_db));
} }
bool GainController2::Validate( bool GainController2::Validate(
const AudioProcessing::Config::GainController2& config) { const AudioProcessing::Config::GainController2& config) {
return config.fixed_gain_db >= 0.f && config.fixed_gain_db < 50.f && return config.fixed_digital.gain_db >= 0.f &&
config.extra_saturation_margin_db >= 0.f && config.fixed_digital.gain_db < 50.f &&
config.extra_saturation_margin_db <= 100.f; config.adaptive_digital.extra_saturation_margin_db >= 0.f &&
config.adaptive_digital.extra_saturation_margin_db <= 100.f;
} }
std::string GainController2::ToString( std::string GainController2::ToString(
const AudioProcessing::Config::GainController2& config) { const AudioProcessing::Config::GainController2& config) {
rtc::StringBuilder ss; rtc::StringBuilder ss;
ss << "{enabled: " << (config.enabled ? "true" : "false") << ", " std::string adaptive_digital_level_estimator;
<< "fixed_gain_dB: " << config.fixed_gain_db << "}"; using LevelEstimatorType =
AudioProcessing::Config::GainController2::LevelEstimator;
switch (config.adaptive_digital.level_estimator) {
case LevelEstimatorType::kRms:
adaptive_digital_level_estimator = "RMS";
break;
case LevelEstimatorType::kPeak:
adaptive_digital_level_estimator = "peak";
break;
}
// clang-format off
// clang formatting doesn't respect custom nested style.
ss << "{"
<< "enabled: " << (config.enabled ? "true" : "false") << ", "
<< "fixed_digital: {gain_db: " << config.fixed_digital.gain_db << "}, "
<< "adaptive_digital: {"
<< "enabled: "
<< (config.adaptive_digital.enabled ? "true" : "false") << ", "
<< "level_estimator: " << adaptive_digital_level_estimator << ", "
<< "extra_saturation_margin_db:"
<< config.adaptive_digital.extra_saturation_margin_db << "}"
<< "}";
// clang-format on
return ss.Release(); return ss.Release();
} }

View File

@ -52,8 +52,8 @@ float RunAgc2WithConstantInput(GainController2* agc2,
AudioProcessing::Config::GainController2 CreateAgc2FixedDigitalModeConfig( AudioProcessing::Config::GainController2 CreateAgc2FixedDigitalModeConfig(
float fixed_gain_db) { float fixed_gain_db) {
AudioProcessing::Config::GainController2 config; AudioProcessing::Config::GainController2 config;
config.adaptive_digital_mode = false; config.adaptive_digital.enabled = false;
config.fixed_gain_db = fixed_gain_db; config.fixed_digital.gain_db = fixed_gain_db;
// TODO(alessiob): Check why ASSERT_TRUE() below does not compile. // TODO(alessiob): Check why ASSERT_TRUE() below does not compile.
EXPECT_TRUE(GainController2::Validate(config)); EXPECT_TRUE(GainController2::Validate(config));
return config; return config;
@ -113,29 +113,26 @@ TEST(GainController2, CreateApplyConfig) {
gain_controller2->ApplyConfig(config); gain_controller2->ApplyConfig(config);
// Check that attenuation is not allowed. // Check that attenuation is not allowed.
config.fixed_gain_db = -5.f; config.fixed_digital.gain_db = -5.f;
EXPECT_FALSE(GainController2::Validate(config)); EXPECT_FALSE(GainController2::Validate(config));
// Check that valid configurations are applied. // Check that valid configurations are applied.
for (const float& fixed_gain_db : {0.f, 5.f, 10.f, 40.f}) { for (const float& fixed_gain_db : {0.f, 5.f, 10.f, 40.f}) {
config.fixed_gain_db = fixed_gain_db; config.fixed_digital.gain_db = fixed_gain_db;
EXPECT_TRUE(GainController2::Validate(config)); EXPECT_TRUE(GainController2::Validate(config));
gain_controller2->ApplyConfig(config); gain_controller2->ApplyConfig(config);
} }
} }
TEST(GainController2, ToString) { TEST(GainController2, ToString) {
// Tests GainController2::ToString(). // Tests GainController2::ToString(). Only test the enabled property.
AudioProcessing::Config::GainController2 config; AudioProcessing::Config::GainController2 config;
config.fixed_gain_db = 5.f;
config.enabled = false; config.enabled = false;
EXPECT_EQ("{enabled: false, fixed_gain_dB: 5}", EXPECT_EQ("{enabled: false", GainController2::ToString(config).substr(0, 15));
GainController2::ToString(config));
config.enabled = true; config.enabled = true;
EXPECT_EQ("{enabled: true, fixed_gain_dB: 5}", EXPECT_EQ("{enabled: true", GainController2::ToString(config).substr(0, 14));
GainController2::ToString(config));
} }
TEST(GainController2FixedDigital, GainShouldChangeOnSetGain) { TEST(GainController2FixedDigital, GainShouldChangeOnSetGain) {
@ -263,8 +260,8 @@ TEST(GainController2, UsageSaturationMargin) {
// Check that samples are not amplified as much when extra margin is // Check that samples are not amplified as much when extra margin is
// high. They should not be amplified at all, but only after convergence. GC2 // high. They should not be amplified at all, but only after convergence. GC2
// starts with a gain, and it takes time until it's down to 0 dB. // starts with a gain, and it takes time until it's down to 0 dB.
config.extra_saturation_margin_db = 50.f; config.fixed_digital.gain_db = 0.f;
config.fixed_gain_db = 0.f; config.adaptive_digital.extra_saturation_margin_db = 50.f;
gain_controller2.ApplyConfig(config); gain_controller2.ApplyConfig(config);
EXPECT_LT(GainAfterProcessingFile(&gain_controller2), 2.f); EXPECT_LT(GainAfterProcessingFile(&gain_controller2), 2.f);
@ -276,8 +273,8 @@ TEST(GainController2, UsageNoSaturationMargin) {
AudioProcessing::Config::GainController2 config; AudioProcessing::Config::GainController2 config;
// Check that some gain is applied if there is no margin. // Check that some gain is applied if there is no margin.
config.extra_saturation_margin_db = 0.f; config.fixed_digital.gain_db = 0.f;
config.fixed_gain_db = 0.f; config.adaptive_digital.extra_saturation_margin_db = 0.f;
gain_controller2.ApplyConfig(config); gain_controller2.ApplyConfig(config);
EXPECT_GT(GainAfterProcessingFile(&gain_controller2), 2.f); EXPECT_GT(GainAfterProcessingFile(&gain_controller2), 2.f);

View File

@ -270,10 +270,23 @@ class AudioProcessing : public rtc::RefCountInterface {
// first applies a fixed gain. The adaptive digital AGC can be turned off by // first applies a fixed gain. The adaptive digital AGC can be turned off by
// setting |adaptive_digital_mode=false|. // setting |adaptive_digital_mode=false|.
struct GainController2 { struct GainController2 {
enum LevelEstimator { kRms, kPeak };
bool enabled = false; bool enabled = false;
bool adaptive_digital_mode = true; struct {
float gain_db = 0.f;
} fixed_digital;
struct {
bool enabled = true;
LevelEstimator level_estimator = kRms;
bool use_saturation_protector = true;
float extra_saturation_margin_db = 2.f; float extra_saturation_margin_db = 2.f;
} adaptive_digital;
// Deprecated.
// TODO(webrtc:7494): Switch to fixed_digital.gain_db and remove.
float fixed_gain_db = 0.f; float fixed_gain_db = 0.f;
// Deprecated.
// TODO(webrtc:7494): Switch to adaptive_digital.enabled and remove.
bool adaptive_digital_mode = false;
} gain_controller2; } gain_controller2;
// Explicit copy assignment implementation to avoid issues with memory // Explicit copy assignment implementation to avoid issues with memory

View File

@ -351,9 +351,10 @@ void AudioProcessingSimulator::CreateAudioProcessor() {
} }
if (settings_.use_agc2) { if (settings_.use_agc2) {
apm_config.gain_controller2.enabled = *settings_.use_agc2; apm_config.gain_controller2.enabled = *settings_.use_agc2;
apm_config.gain_controller2.fixed_gain_db = settings_.agc2_fixed_gain_db; apm_config.gain_controller2.fixed_digital.gain_db =
settings_.agc2_fixed_gain_db;
if (settings_.agc2_use_adaptive_gain) { if (settings_.agc2_use_adaptive_gain) {
apm_config.gain_controller2.adaptive_digital_mode = apm_config.gain_controller2.adaptive_digital.enabled =
*settings_.agc2_use_adaptive_gain; *settings_.agc2_use_adaptive_gain;
} }
} }

View File

@ -151,7 +151,7 @@ std::unique_ptr<AudioProcessing> CreateApm(test::FuzzDataHelper* fuzz_data,
apm_config.high_pass_filter.enabled = hpf; apm_config.high_pass_filter.enabled = hpf;
apm_config.gain_controller2.enabled = use_agc2_limiter; apm_config.gain_controller2.enabled = use_agc2_limiter;
apm_config.gain_controller2.fixed_gain_db = gain_controller2_gain_db; apm_config.gain_controller2.fixed_digital.gain_db = gain_controller2_gain_db;
apm->ApplyConfig(apm_config); apm->ApplyConfig(apm_config);