AGC2: prepare to move speech level estimator into GainController2

- build target isolated
- `AdaptiveModeLevelEstimator` renamed to `SpeechLevelEstimator`

Bug: webrtc:7494
Change-Id: If16caec2269b2ed1b2ee27c3687a8f8875f55c8c
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/280441
Reviewed-by: Hanna Silen <silen@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38469}
This commit is contained in:
Alessio Bazzica
2022-10-25 15:28:07 +02:00
committed by WebRTC LUCI CQ
parent 8d7273357d
commit d89dff767c
7 changed files with 79 additions and 48 deletions

View File

@ -421,6 +421,7 @@ if (rtc_include_tests) {
"agc2:input_volume_controller_unittests",
"agc2:input_volume_stats_reporter_unittests",
"agc2:noise_estimator_unittests",
"agc2:speech_level_estimator_unittest",
"agc2:test_utils",
"agc2:vad_wrapper_unittests",
"agc2/rnn_vad:unittests",

View File

@ -15,14 +15,36 @@ group("agc2") {
]
}
rtc_library("speech_level_estimator") {
sources = [
"speech_level_estimator.cc",
"speech_level_estimator.h",
]
visibility = [
"..:gain_controller2",
"./*",
]
configs += [ "..:apm_debug_dump" ]
deps = [
":common",
"..:api",
"..:apm_logging",
"../../../api:array_view",
"../../../rtc_base:checks",
"../../../rtc_base:logging",
"../../../rtc_base:safe_minmax",
]
}
rtc_library("adaptive_digital") {
sources = [
"adaptive_digital_gain_applier.cc",
"adaptive_digital_gain_applier.h",
"adaptive_digital_gain_controller.cc",
"adaptive_digital_gain_controller.h",
"adaptive_mode_level_estimator.cc",
"adaptive_mode_level_estimator.h",
"saturation_protector.cc",
"saturation_protector.h",
"saturation_protector_buffer.cc",
@ -38,10 +60,9 @@ rtc_library("adaptive_digital") {
deps = [
":common",
":cpu_features",
":gain_applier",
":noise_level_estimator",
":vad_wrapper",
":speech_level_estimator",
"..:api",
"..:apm_logging",
"..:audio_frame_view",
@ -244,13 +265,27 @@ rtc_library("cpu_features") {
]
}
rtc_library("speech_level_estimator_unittest") {
testonly = true
configs += [ "..:apm_debug_dump" ]
sources = [ "speech_level_estimator_unittest.cc" ]
deps = [
":common",
":speech_level_estimator",
"..:api",
"..:apm_logging",
"../../../rtc_base:gunit_helpers",
"../../../test:test_support",
]
}
rtc_library("adaptive_digital_unittests") {
testonly = true
configs += [ "..:apm_debug_dump" ]
sources = [
"adaptive_digital_gain_applier_unittest.cc",
"adaptive_mode_level_estimator_unittest.cc",
"gain_applier_unittest.cc",
"saturation_protector_buffer_unittest.cc",
"saturation_protector_unittest.cc",

View File

@ -13,7 +13,6 @@
#include <algorithm>
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc2/vad_wrapper.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"

View File

@ -15,9 +15,9 @@
#include "absl/types/optional.h"
#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
#include "modules/audio_processing/agc2/noise_level_estimator.h"
#include "modules/audio_processing/agc2/saturation_protector.h"
#include "modules/audio_processing/agc2/speech_level_estimator.h"
#include "modules/audio_processing/include/audio_frame_view.h"
#include "modules/audio_processing/include/audio_processing.h"
@ -56,7 +56,7 @@ class AdaptiveDigitalGainController {
absl::optional<float> GetSpeechLevelDbfsIfConfident() const;
private:
AdaptiveModeLevelEstimator speech_level_estimator_;
SpeechLevelEstimator speech_level_estimator_;
AdaptiveDigitalGainApplier gain_controller_;
ApmDataDumper* const apm_data_dumper_;
std::unique_ptr<NoiseLevelEstimator> noise_level_estimator_;

View File

@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
#include "modules/audio_processing/agc2/speech_level_estimator.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
@ -32,19 +32,19 @@ float GetInitialSpeechLevelEstimateDbfs(
} // namespace
bool AdaptiveModeLevelEstimator::LevelEstimatorState::operator==(
const AdaptiveModeLevelEstimator::LevelEstimatorState& b) const {
bool SpeechLevelEstimator::LevelEstimatorState::operator==(
const SpeechLevelEstimator::LevelEstimatorState& b) const {
return time_to_confidence_ms == b.time_to_confidence_ms &&
level_dbfs.numerator == b.level_dbfs.numerator &&
level_dbfs.denominator == b.level_dbfs.denominator;
}
float AdaptiveModeLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const {
float SpeechLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const {
RTC_DCHECK_NE(denominator, 0.f);
return numerator / denominator;
}
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
SpeechLevelEstimator::SpeechLevelEstimator(
ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2::AdaptiveDigital& config)
: apm_data_dumper_(apm_data_dumper),
@ -57,9 +57,9 @@ AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
Reset();
}
void AdaptiveModeLevelEstimator::Update(float rms_dbfs,
float peak_dbfs,
float speech_probability) {
void SpeechLevelEstimator::Update(float rms_dbfs,
float peak_dbfs,
float speech_probability) {
RTC_DCHECK_GT(rms_dbfs, -150.0f);
RTC_DCHECK_LT(rms_dbfs, 50.0f);
RTC_DCHECK_GT(peak_dbfs, -150.0f);
@ -113,7 +113,7 @@ void AdaptiveModeLevelEstimator::Update(float rms_dbfs,
DumpDebugData();
}
bool AdaptiveModeLevelEstimator::IsConfident() const {
bool SpeechLevelEstimator::IsConfident() const {
if (adjacent_speech_frames_threshold_ == 1) {
// Ignore `reliable_state_` when a single frame is enough to update the
// level estimate (because it is not used).
@ -129,21 +129,21 @@ bool AdaptiveModeLevelEstimator::IsConfident() const {
preliminary_state_.time_to_confidence_ms == 0);
}
void AdaptiveModeLevelEstimator::Reset() {
void SpeechLevelEstimator::Reset() {
ResetLevelEstimatorState(preliminary_state_);
ResetLevelEstimatorState(reliable_state_);
level_dbfs_ = initial_speech_level_dbfs_;
num_adjacent_speech_frames_ = 0;
}
void AdaptiveModeLevelEstimator::ResetLevelEstimatorState(
void SpeechLevelEstimator::ResetLevelEstimatorState(
LevelEstimatorState& state) const {
state.time_to_confidence_ms = kLevelEstimatorTimeToConfidenceMs;
state.level_dbfs.numerator = initial_speech_level_dbfs_;
state.level_dbfs.denominator = 1.0f;
}
void AdaptiveModeLevelEstimator::DumpDebugData() const {
void SpeechLevelEstimator::DumpDebugData() const {
apm_data_dumper_->DumpRaw(
"agc2_adaptive_level_estimator_num_adjacent_speech_frames",
num_adjacent_speech_frames_);

View File

@ -8,29 +8,29 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
#ifndef MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_
#define MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_
#include <stddef.h>
#include <type_traits>
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/vad_wrapper.h"
#include "modules/audio_processing/include/audio_processing.h"
namespace webrtc {
class ApmDataDumper;
// Level estimator for the digital adaptive gain controller.
class AdaptiveModeLevelEstimator {
// Active speech level estimator based on the analysis of the following
// framewise properties: RMS level (dBFS), peak level (dBFS), speech
// probability.
class SpeechLevelEstimator {
public:
AdaptiveModeLevelEstimator(
SpeechLevelEstimator(
ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2::AdaptiveDigital& config);
AdaptiveModeLevelEstimator(const AdaptiveModeLevelEstimator&) = delete;
AdaptiveModeLevelEstimator& operator=(const AdaptiveModeLevelEstimator&) =
delete;
SpeechLevelEstimator(const SpeechLevelEstimator&) = delete;
SpeechLevelEstimator& operator=(const SpeechLevelEstimator&) = delete;
// Updates the level estimation.
void Update(float rms_dbfs, float peak_dbfs, float speech_probability);
@ -74,4 +74,4 @@ class AdaptiveModeLevelEstimator {
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
#endif // MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_

View File

@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
#include "modules/audio_processing/agc2/speech_level_estimator.h"
#include <memory>
@ -36,7 +36,7 @@ void RunOnConstantLevel(int num_iterations,
float rms_dbfs,
float peak_dbfs,
float speech_probability,
AdaptiveModeLevelEstimator& level_estimator) {
SpeechLevelEstimator& level_estimator) {
for (int i = 0; i < num_iterations; ++i) {
level_estimator.Update(rms_dbfs, peak_dbfs, speech_probability);
}
@ -57,7 +57,7 @@ constexpr float kMaxSpeechProbability = 1.0f;
struct TestLevelEstimator {
explicit TestLevelEstimator(int adjacent_speech_frames_threshold)
: data_dumper(0),
estimator(std::make_unique<AdaptiveModeLevelEstimator>(
estimator(std::make_unique<SpeechLevelEstimator>(
&data_dumper,
GetAdaptiveDigitalConfig(adjacent_speech_frames_threshold))),
initial_speech_level_dbfs(estimator->level_dbfs()),
@ -70,14 +70,14 @@ struct TestLevelEstimator {
"level is wide enough for the tests";
}
ApmDataDumper data_dumper;
std::unique_ptr<AdaptiveModeLevelEstimator> estimator;
std::unique_ptr<SpeechLevelEstimator> estimator;
const float initial_speech_level_dbfs;
const float level_rms_dbfs;
const float level_peak_dbfs;
};
// Checks that the level estimator converges to a constant input speech level.
TEST(GainController2AdaptiveModeLevelEstimator, LevelStabilizes) {
TEST(GainController2SpeechLevelEstimator, LevelStabilizes) {
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
level_estimator.level_rms_dbfs,
@ -93,7 +93,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, LevelStabilizes) {
// Checks that the level controller does not become confident when too few
// speech frames are observed.
TEST(GainController2AdaptiveModeLevelEstimator, IsNotConfident) {
TEST(GainController2SpeechLevelEstimator, IsNotConfident) {
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2,
level_estimator.level_rms_dbfs,
@ -104,7 +104,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, IsNotConfident) {
// Checks that the level controller becomes confident when enough speech frames
// are observed.
TEST(GainController2AdaptiveModeLevelEstimator, IsConfident) {
TEST(GainController2SpeechLevelEstimator, IsConfident) {
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
level_estimator.level_rms_dbfs,
@ -115,8 +115,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, IsConfident) {
// Checks that the estimated level is not affected by the level of non-speech
// frames.
TEST(GainController2AdaptiveModeLevelEstimator,
EstimatorIgnoresNonSpeechFrames) {
TEST(GainController2SpeechLevelEstimator, EstimatorIgnoresNonSpeechFrames) {
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
// Simulate speech.
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
@ -134,8 +133,7 @@ TEST(GainController2AdaptiveModeLevelEstimator,
}
// Checks the convergence speed of the estimator before it becomes confident.
TEST(GainController2AdaptiveModeLevelEstimator,
ConvergenceSpeedBeforeConfidence) {
TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedBeforeConfidence) {
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
level_estimator.level_rms_dbfs,
@ -147,8 +145,7 @@ TEST(GainController2AdaptiveModeLevelEstimator,
}
// Checks the convergence speed of the estimator after it becomes confident.
TEST(GainController2AdaptiveModeLevelEstimator,
ConvergenceSpeedAfterConfidence) {
TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedAfterConfidence) {
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
// Reach confidence using the initial level estimate.
RunOnConstantLevel(
@ -173,14 +170,13 @@ TEST(GainController2AdaptiveModeLevelEstimator,
kConvergenceSpeedTestsLevelTolerance);
}
class AdaptiveModeLevelEstimatorParametrization
class SpeechLevelEstimatorParametrization
: public ::testing::TestWithParam<int> {
protected:
int adjacent_speech_frames_threshold() const { return GetParam(); }
};
TEST_P(AdaptiveModeLevelEstimatorParametrization,
DoNotAdaptToShortSpeechSegments) {
TEST_P(SpeechLevelEstimatorParametrization, DoNotAdaptToShortSpeechSegments) {
TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
const float initial_level = level_estimator.estimator->level_dbfs();
ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
@ -197,7 +193,7 @@ TEST_P(AdaptiveModeLevelEstimatorParametrization,
EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
}
TEST_P(AdaptiveModeLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
TEST_P(SpeechLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
const float initial_level = level_estimator.estimator->level_dbfs();
ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
@ -210,7 +206,7 @@ TEST_P(AdaptiveModeLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
}
INSTANTIATE_TEST_SUITE_P(GainController2,
AdaptiveModeLevelEstimatorParametrization,
SpeechLevelEstimatorParametrization,
::testing::Values(1, 9, 17));
} // namespace