AGC2: adding level estimation option (RMS or peak-based).

This CL makes possible to choose the level estimation for the adaptive
digital GC of AGC2. The options are RMS (default and currently used
estimator) and peak-based (already computed, but not used).

Besides adding the new AGC2 config param for the level estimator, this CL
also refactors the config class by making it more structured.

Bug: webrtc:7494
Change-Id: I20eb558ca50f13536aa7bdea08d21de3b630f8bc
Reviewed-on: https://webrtc-review.googlesource.com/c/110144
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Alex Loiko <aleloi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#25620}
This commit is contained in:
Alessio Bazzica
2018-11-13 14:44:15 +01:00
committed by Commit Bot
parent 44ca9a392a
commit 1e2542f593
12 changed files with 169 additions and 44 deletions

View File

@ -429,6 +429,7 @@ if (rtc_include_tests) {
"agc2:biquad_filter_unittests",
"agc2:fixed_digital_unittests",
"agc2:noise_estimator_unittests",
"agc2:rnn_vad_with_level_unittests",
"agc2:test_utils",
"agc2/rnn_vad:unittests",
"test/conversational_speech:unittest",

View File

@ -27,6 +27,7 @@ rtc_source_set("level_estimation_agc") {
":gain_applier",
":noise_level_estimator",
":rnn_vad_with_level",
"..:api",
"..:apm_logging",
"..:audio_frame_view",
"../../../api:array_view",
@ -58,6 +59,7 @@ rtc_source_set("adaptive_digital") {
":gain_applier",
":noise_level_estimator",
":rnn_vad_with_level",
"..:api",
"..:apm_logging",
"..:audio_frame_view",
"../../../api:array_view",
@ -257,6 +259,18 @@ rtc_source_set("noise_estimator_unittests") {
]
}
rtc_source_set("rnn_vad_with_level_unittests") {
testonly = true
sources = [
"vad_with_level_unittest.cc",
]
deps = [
":rnn_vad_with_level",
"..:audio_frame_view",
"../../../rtc_base:rtc_base_tests_utils",
]
}
rtc_source_set("test_utils") {
testonly = true
visibility = [

View File

@ -26,8 +26,12 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
}
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
float extra_saturation_margin_db)
: speech_level_estimator_(apm_data_dumper, extra_saturation_margin_db),
const AudioProcessing::Config::GainController2& config)
: speech_level_estimator_(
apm_data_dumper,
config.adaptive_digital.level_estimator,
config.adaptive_digital.use_saturation_protector,
config.adaptive_digital.extra_saturation_margin_db),
gain_applier_(apm_data_dumper),
apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(apm_data_dumper) {
@ -44,9 +48,9 @@ void AdaptiveAgc::Process(AudioFrameView<float> float_frame,
signal_with_levels.vad_result.speech_probability);
apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs",
signal_with_levels.vad_result.speech_rms_dbfs);
apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs",
signal_with_levels.vad_result.speech_peak_dbfs);
speech_level_estimator_.UpdateEstimation(signal_with_levels.vad_result);
signal_with_levels.input_level_dbfs =
@ -68,7 +72,6 @@ void AdaptiveAgc::Process(AudioFrameView<float> float_frame,
// The gain applier applies the gain.
gain_applier_.Process(signal_with_levels);
;
}
void AdaptiveAgc::Reset() {

View File

@ -16,6 +16,7 @@
#include "modules/audio_processing/agc2/noise_level_estimator.h"
#include "modules/audio_processing/agc2/vad_with_level.h"
#include "modules/audio_processing/include/audio_frame_view.h"
#include "modules/audio_processing/include/audio_processing.h"
namespace webrtc {
class ApmDataDumper;
@ -23,7 +24,8 @@ class ApmDataDumper;
class AdaptiveAgc {
public:
explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
AdaptiveAgc(ApmDataDumper* apm_data_dumper, float extra_saturation_margin_db);
AdaptiveAgc(ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2& config);
~AdaptiveAgc();
void Process(AudioFrameView<float> float_frame, float last_audio_level);

View File

@ -19,13 +19,20 @@ namespace webrtc {
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper)
: saturation_protector_(apm_data_dumper),
: level_estimator_(
AudioProcessing::Config::GainController2::LevelEstimator::kRms),
use_saturation_protector_(true),
saturation_protector_(apm_data_dumper),
apm_data_dumper_(apm_data_dumper) {}
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
bool use_saturation_protector,
float extra_saturation_margin_db)
: saturation_protector_(apm_data_dumper, extra_saturation_margin_db),
: level_estimator_(level_estimator),
use_saturation_protector_(use_saturation_protector),
saturation_protector_(apm_data_dumper, extra_saturation_margin_db),
apm_data_dumper_(apm_data_dumper) {}
void AdaptiveModeLevelEstimator::UpdateEstimation(
@ -49,20 +56,38 @@ void AdaptiveModeLevelEstimator::UpdateEstimation(
const float leak_factor = buffer_is_full ? kFullBufferLeakFactor : 1.f;
// Read speech level estimation.
float speech_level_dbfs = 0.f;
using LevelEstimatorType =
AudioProcessing::Config::GainController2::LevelEstimator;
switch (level_estimator_) {
case LevelEstimatorType::kRms:
speech_level_dbfs = vad_data.speech_rms_dbfs;
break;
case LevelEstimatorType::kPeak:
speech_level_dbfs = vad_data.speech_peak_dbfs;
break;
}
// Update speech level estimation.
estimate_numerator_ = estimate_numerator_ * leak_factor +
vad_data.speech_rms_dbfs * vad_data.speech_probability;
speech_level_dbfs * vad_data.speech_probability;
estimate_denominator_ =
estimate_denominator_ * leak_factor + vad_data.speech_probability;
last_estimate_with_offset_dbfs_ = estimate_numerator_ / estimate_denominator_;
saturation_protector_.UpdateMargin(vad_data, last_estimate_with_offset_dbfs_);
if (use_saturation_protector_) {
saturation_protector_.UpdateMargin(vad_data,
last_estimate_with_offset_dbfs_);
DebugDumpEstimate();
}
}
float AdaptiveModeLevelEstimator::LatestLevelEstimate() const {
return rtc::SafeClamp<float>(
last_estimate_with_offset_dbfs_ + saturation_protector_.LastMargin(),
last_estimate_with_offset_dbfs_ +
(use_saturation_protector_ ? saturation_protector_.LastMargin()
: 0.f),
-90.f, 30.f);
}

View File

@ -16,6 +16,7 @@
#include "modules/audio_processing/agc2/agc2_common.h" // kFullBufferSizeMs...
#include "modules/audio_processing/agc2/saturation_protector.h"
#include "modules/audio_processing/agc2/vad_with_level.h"
#include "modules/audio_processing/include/audio_processing.h"
namespace webrtc {
class ApmDataDumper;
@ -23,7 +24,10 @@ class ApmDataDumper;
class AdaptiveModeLevelEstimator {
public:
explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper);
AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper,
AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
bool use_saturation_protector,
float extra_saturation_margin_db);
void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data);
float LatestLevelEstimate() const;
@ -35,6 +39,9 @@ class AdaptiveModeLevelEstimator {
private:
void DebugDumpEstimate();
const AudioProcessing::Config::GainController2::LevelEstimator
level_estimator_;
const bool use_saturation_protector_;
size_t buffer_size_ms_ = 0;
float last_estimate_with_offset_dbfs_ = kInitialSpeechLevelEstimateDbfs;
float estimate_numerator_ = 0.f;

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/vad_with_level.h"
#include "rtc_base/gunit.h"
namespace webrtc {
namespace test {
TEST(AutomaticGainController2VadWithLevelEstimator,
PeakLevelGreaterThanRmsLevel) {
constexpr size_t kSampleRateHz = 8000;
// 10 ms input frame, constant except for one peak value.
// Handcrafted so that the average is lower than the peak value.
std::array<float, kSampleRateHz / 100> frame;
frame.fill(1000.f);
frame[10] = 2000.f;
float* const channel0 = frame.data();
AudioFrameView<float> frame_view(&channel0, 1, frame.size());
// Compute audio frame levels (the VAD result is ignored).
VadWithLevel vad_with_level;
auto levels_and_vad_prob = vad_with_level.AnalyzeFrame(frame_view);
// Compare peak and RMS levels.
EXPECT_LT(levels_and_vad_prob.speech_rms_dbfs,
levels_and_vad_prob.speech_peak_dbfs);
}
} // namespace test
} // namespace webrtc

View File

@ -65,30 +65,52 @@ void GainController2::ApplyConfig(
RTC_DCHECK(Validate(config))
<< " the invalid config was " << ToString(config);
if (config.fixed_gain_db != config_.fixed_gain_db) {
config_ = config;
if (config.fixed_digital.gain_db != config_.fixed_digital.gain_db) {
// Reset the limiter to quickly react on abrupt level changes caused by
// large changes of the fixed gain.
limiter_.Reset();
}
config_ = config;
gain_applier_.SetGainFactor(DbToRatio(config_.fixed_gain_db));
adaptive_digital_mode_ = config_.adaptive_digital_mode;
adaptive_agc_.reset(
new AdaptiveAgc(data_dumper_.get(), config_.extra_saturation_margin_db));
gain_applier_.SetGainFactor(DbToRatio(config_.fixed_digital.gain_db));
adaptive_digital_mode_ = config_.adaptive_digital.enabled;
adaptive_agc_.reset(new AdaptiveAgc(data_dumper_.get(), config_));
}
bool GainController2::Validate(
const AudioProcessing::Config::GainController2& config) {
return config.fixed_gain_db >= 0.f && config.fixed_gain_db < 50.f &&
config.extra_saturation_margin_db >= 0.f &&
config.extra_saturation_margin_db <= 100.f;
return config.fixed_digital.gain_db >= 0.f &&
config.fixed_digital.gain_db < 50.f &&
config.adaptive_digital.extra_saturation_margin_db >= 0.f &&
config.adaptive_digital.extra_saturation_margin_db <= 100.f;
}
std::string GainController2::ToString(
const AudioProcessing::Config::GainController2& config) {
rtc::StringBuilder ss;
ss << "{enabled: " << (config.enabled ? "true" : "false") << ", "
<< "fixed_gain_dB: " << config.fixed_gain_db << "}";
std::string adaptive_digital_level_estimator;
using LevelEstimatorType =
AudioProcessing::Config::GainController2::LevelEstimator;
switch (config.adaptive_digital.level_estimator) {
case LevelEstimatorType::kRms:
adaptive_digital_level_estimator = "RMS";
break;
case LevelEstimatorType::kPeak:
adaptive_digital_level_estimator = "peak";
break;
}
// clang-format off
// clang formatting doesn't respect custom nested style.
ss << "{"
<< "enabled: " << (config.enabled ? "true" : "false") << ", "
<< "fixed_digital: {gain_db: " << config.fixed_digital.gain_db << "}, "
<< "adaptive_digital: {"
<< "enabled: "
<< (config.adaptive_digital.enabled ? "true" : "false") << ", "
<< "level_estimator: " << adaptive_digital_level_estimator << ", "
<< "extra_saturation_margin_db:"
<< config.adaptive_digital.extra_saturation_margin_db << "}"
<< "}";
// clang-format on
return ss.Release();
}

View File

@ -52,8 +52,8 @@ float RunAgc2WithConstantInput(GainController2* agc2,
AudioProcessing::Config::GainController2 CreateAgc2FixedDigitalModeConfig(
float fixed_gain_db) {
AudioProcessing::Config::GainController2 config;
config.adaptive_digital_mode = false;
config.fixed_gain_db = fixed_gain_db;
config.adaptive_digital.enabled = false;
config.fixed_digital.gain_db = fixed_gain_db;
// TODO(alessiob): Check why ASSERT_TRUE() below does not compile.
EXPECT_TRUE(GainController2::Validate(config));
return config;
@ -113,29 +113,26 @@ TEST(GainController2, CreateApplyConfig) {
gain_controller2->ApplyConfig(config);
// Check that attenuation is not allowed.
config.fixed_gain_db = -5.f;
config.fixed_digital.gain_db = -5.f;
EXPECT_FALSE(GainController2::Validate(config));
// Check that valid configurations are applied.
for (const float& fixed_gain_db : {0.f, 5.f, 10.f, 40.f}) {
config.fixed_gain_db = fixed_gain_db;
config.fixed_digital.gain_db = fixed_gain_db;
EXPECT_TRUE(GainController2::Validate(config));
gain_controller2->ApplyConfig(config);
}
}
TEST(GainController2, ToString) {
// Tests GainController2::ToString().
// Tests GainController2::ToString(). Only test the enabled property.
AudioProcessing::Config::GainController2 config;
config.fixed_gain_db = 5.f;
config.enabled = false;
EXPECT_EQ("{enabled: false, fixed_gain_dB: 5}",
GainController2::ToString(config));
EXPECT_EQ("{enabled: false", GainController2::ToString(config).substr(0, 15));
config.enabled = true;
EXPECT_EQ("{enabled: true, fixed_gain_dB: 5}",
GainController2::ToString(config));
EXPECT_EQ("{enabled: true", GainController2::ToString(config).substr(0, 14));
}
TEST(GainController2FixedDigital, GainShouldChangeOnSetGain) {
@ -263,8 +260,8 @@ TEST(GainController2, UsageSaturationMargin) {
// Check that samples are not amplified as much when extra margin is
// high. They should not be amplified at all, but only after convergence. GC2
// starts with a gain, and it takes time until it's down to 0 dB.
config.extra_saturation_margin_db = 50.f;
config.fixed_gain_db = 0.f;
config.fixed_digital.gain_db = 0.f;
config.adaptive_digital.extra_saturation_margin_db = 50.f;
gain_controller2.ApplyConfig(config);
EXPECT_LT(GainAfterProcessingFile(&gain_controller2), 2.f);
@ -276,8 +273,8 @@ TEST(GainController2, UsageNoSaturationMargin) {
AudioProcessing::Config::GainController2 config;
// Check that some gain is applied if there is no margin.
config.extra_saturation_margin_db = 0.f;
config.fixed_gain_db = 0.f;
config.fixed_digital.gain_db = 0.f;
config.adaptive_digital.extra_saturation_margin_db = 0.f;
gain_controller2.ApplyConfig(config);
EXPECT_GT(GainAfterProcessingFile(&gain_controller2), 2.f);

View File

@ -270,10 +270,23 @@ class AudioProcessing : public rtc::RefCountInterface {
// first applies a fixed gain. The adaptive digital AGC can be turned off by
// setting |adaptive_digital_mode=false|.
struct GainController2 {
enum LevelEstimator { kRms, kPeak };
bool enabled = false;
bool adaptive_digital_mode = true;
struct {
float gain_db = 0.f;
} fixed_digital;
struct {
bool enabled = true;
LevelEstimator level_estimator = kRms;
bool use_saturation_protector = true;
float extra_saturation_margin_db = 2.f;
} adaptive_digital;
// Deprecated.
// TODO(webrtc:7494): Switch to fixed_digital.gain_db and remove.
float fixed_gain_db = 0.f;
// Deprecated.
// TODO(webrtc:7494): Switch to adaptive_digital.enabled and remove.
bool adaptive_digital_mode = false;
} gain_controller2;
// Explicit copy assignment implementation to avoid issues with memory

View File

@ -351,9 +351,10 @@ void AudioProcessingSimulator::CreateAudioProcessor() {
}
if (settings_.use_agc2) {
apm_config.gain_controller2.enabled = *settings_.use_agc2;
apm_config.gain_controller2.fixed_gain_db = settings_.agc2_fixed_gain_db;
apm_config.gain_controller2.fixed_digital.gain_db =
settings_.agc2_fixed_gain_db;
if (settings_.agc2_use_adaptive_gain) {
apm_config.gain_controller2.adaptive_digital_mode =
apm_config.gain_controller2.adaptive_digital.enabled =
*settings_.agc2_use_adaptive_gain;
}
}

View File

@ -151,7 +151,7 @@ std::unique_ptr<AudioProcessing> CreateApm(test::FuzzDataHelper* fuzz_data,
apm_config.high_pass_filter.enabled = hpf;
apm_config.gain_controller2.enabled = use_agc2_limiter;
apm_config.gain_controller2.fixed_gain_db = gain_controller2_gain_db;
apm_config.gain_controller2.fixed_digital.gain_db = gain_controller2_gain_db;
apm->ApplyConfig(apm_config);