diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn index 9af11db449..13fb9025d8 100644 --- a/modules/audio_processing/BUILD.gn +++ b/modules/audio_processing/BUILD.gn @@ -421,6 +421,7 @@ if (rtc_include_tests) { "agc2:input_volume_controller_unittests", "agc2:input_volume_stats_reporter_unittests", "agc2:noise_estimator_unittests", + "agc2:speech_level_estimator_unittest", "agc2:test_utils", "agc2:vad_wrapper_unittests", "agc2/rnn_vad:unittests", diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn index b7a4030448..0e20ae6768 100644 --- a/modules/audio_processing/agc2/BUILD.gn +++ b/modules/audio_processing/agc2/BUILD.gn @@ -15,14 +15,36 @@ group("agc2") { ] } +rtc_library("speech_level_estimator") { + sources = [ + "speech_level_estimator.cc", + "speech_level_estimator.h", + ] + + visibility = [ + "..:gain_controller2", + "./*", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":common", + "..:api", + "..:apm_logging", + "../../../api:array_view", + "../../../rtc_base:checks", + "../../../rtc_base:logging", + "../../../rtc_base:safe_minmax", + ] +} + rtc_library("adaptive_digital") { sources = [ "adaptive_digital_gain_applier.cc", "adaptive_digital_gain_applier.h", "adaptive_digital_gain_controller.cc", "adaptive_digital_gain_controller.h", - "adaptive_mode_level_estimator.cc", - "adaptive_mode_level_estimator.h", "saturation_protector.cc", "saturation_protector.h", "saturation_protector_buffer.cc", @@ -38,10 +60,9 @@ rtc_library("adaptive_digital") { deps = [ ":common", - ":cpu_features", ":gain_applier", ":noise_level_estimator", - ":vad_wrapper", + ":speech_level_estimator", "..:api", "..:apm_logging", "..:audio_frame_view", @@ -244,13 +265,27 @@ rtc_library("cpu_features") { ] } +rtc_library("speech_level_estimator_unittest") { + testonly = true + configs += [ "..:apm_debug_dump" ] + + sources = [ "speech_level_estimator_unittest.cc" ] + deps = [ + ":common", + ":speech_level_estimator", + "..:api", + "..:apm_logging", + "../../../rtc_base:gunit_helpers", + "../../../test:test_support", + ] +} + rtc_library("adaptive_digital_unittests") { testonly = true configs += [ "..:apm_debug_dump" ] sources = [ "adaptive_digital_gain_applier_unittest.cc", - "adaptive_mode_level_estimator_unittest.cc", "gain_applier_unittest.cc", "saturation_protector_buffer_unittest.cc", "saturation_protector_unittest.cc", diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc index c579ced55d..c396ee044a 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc @@ -13,7 +13,6 @@ #include #include "common_audio/include/audio_util.h" -#include "modules/audio_processing/agc2/vad_wrapper.h" #include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/checks.h" #include "rtc_base/logging.h" diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h b/modules/audio_processing/agc2/adaptive_digital_gain_controller.h index af7f0238ec..78c508836b 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h +++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller.h @@ -15,9 +15,9 @@ #include "absl/types/optional.h" #include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h" -#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h" #include "modules/audio_processing/agc2/noise_level_estimator.h" #include "modules/audio_processing/agc2/saturation_protector.h" +#include "modules/audio_processing/agc2/speech_level_estimator.h" #include "modules/audio_processing/include/audio_frame_view.h" #include "modules/audio_processing/include/audio_processing.h" @@ -56,7 +56,7 @@ class AdaptiveDigitalGainController { absl::optional GetSpeechLevelDbfsIfConfident() const; private: - AdaptiveModeLevelEstimator speech_level_estimator_; + SpeechLevelEstimator speech_level_estimator_; AdaptiveDigitalGainApplier gain_controller_; ApmDataDumper* const apm_data_dumper_; std::unique_ptr noise_level_estimator_; diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc b/modules/audio_processing/agc2/speech_level_estimator.cc similarity index 88% rename from modules/audio_processing/agc2/adaptive_mode_level_estimator.cc rename to modules/audio_processing/agc2/speech_level_estimator.cc index fe021fec05..8e234f7d7f 100644 --- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc +++ b/modules/audio_processing/agc2/speech_level_estimator.cc @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h" +#include "modules/audio_processing/agc2/speech_level_estimator.h" #include "modules/audio_processing/agc2/agc2_common.h" #include "modules/audio_processing/logging/apm_data_dumper.h" @@ -32,19 +32,19 @@ float GetInitialSpeechLevelEstimateDbfs( } // namespace -bool AdaptiveModeLevelEstimator::LevelEstimatorState::operator==( - const AdaptiveModeLevelEstimator::LevelEstimatorState& b) const { +bool SpeechLevelEstimator::LevelEstimatorState::operator==( + const SpeechLevelEstimator::LevelEstimatorState& b) const { return time_to_confidence_ms == b.time_to_confidence_ms && level_dbfs.numerator == b.level_dbfs.numerator && level_dbfs.denominator == b.level_dbfs.denominator; } -float AdaptiveModeLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const { +float SpeechLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const { RTC_DCHECK_NE(denominator, 0.f); return numerator / denominator; } -AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator( +SpeechLevelEstimator::SpeechLevelEstimator( ApmDataDumper* apm_data_dumper, const AudioProcessing::Config::GainController2::AdaptiveDigital& config) : apm_data_dumper_(apm_data_dumper), @@ -57,9 +57,9 @@ AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator( Reset(); } -void AdaptiveModeLevelEstimator::Update(float rms_dbfs, - float peak_dbfs, - float speech_probability) { +void SpeechLevelEstimator::Update(float rms_dbfs, + float peak_dbfs, + float speech_probability) { RTC_DCHECK_GT(rms_dbfs, -150.0f); RTC_DCHECK_LT(rms_dbfs, 50.0f); RTC_DCHECK_GT(peak_dbfs, -150.0f); @@ -113,7 +113,7 @@ void AdaptiveModeLevelEstimator::Update(float rms_dbfs, DumpDebugData(); } -bool AdaptiveModeLevelEstimator::IsConfident() const { +bool SpeechLevelEstimator::IsConfident() const { if (adjacent_speech_frames_threshold_ == 1) { // Ignore `reliable_state_` when a single frame is enough to update the // level estimate (because it is not used). @@ -129,21 +129,21 @@ bool AdaptiveModeLevelEstimator::IsConfident() const { preliminary_state_.time_to_confidence_ms == 0); } -void AdaptiveModeLevelEstimator::Reset() { +void SpeechLevelEstimator::Reset() { ResetLevelEstimatorState(preliminary_state_); ResetLevelEstimatorState(reliable_state_); level_dbfs_ = initial_speech_level_dbfs_; num_adjacent_speech_frames_ = 0; } -void AdaptiveModeLevelEstimator::ResetLevelEstimatorState( +void SpeechLevelEstimator::ResetLevelEstimatorState( LevelEstimatorState& state) const { state.time_to_confidence_ms = kLevelEstimatorTimeToConfidenceMs; state.level_dbfs.numerator = initial_speech_level_dbfs_; state.level_dbfs.denominator = 1.0f; } -void AdaptiveModeLevelEstimator::DumpDebugData() const { +void SpeechLevelEstimator::DumpDebugData() const { apm_data_dumper_->DumpRaw( "agc2_adaptive_level_estimator_num_adjacent_speech_frames", num_adjacent_speech_frames_); diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h b/modules/audio_processing/agc2/speech_level_estimator.h similarity index 78% rename from modules/audio_processing/agc2/adaptive_mode_level_estimator.h rename to modules/audio_processing/agc2/speech_level_estimator.h index 989c8c3572..25e949119c 100644 --- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h +++ b/modules/audio_processing/agc2/speech_level_estimator.h @@ -8,29 +8,29 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_ -#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_ +#ifndef MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_ #include #include #include "modules/audio_processing/agc2/agc2_common.h" -#include "modules/audio_processing/agc2/vad_wrapper.h" #include "modules/audio_processing/include/audio_processing.h" namespace webrtc { class ApmDataDumper; -// Level estimator for the digital adaptive gain controller. -class AdaptiveModeLevelEstimator { +// Active speech level estimator based on the analysis of the following +// framewise properties: RMS level (dBFS), peak level (dBFS), speech +// probability. +class SpeechLevelEstimator { public: - AdaptiveModeLevelEstimator( + SpeechLevelEstimator( ApmDataDumper* apm_data_dumper, const AudioProcessing::Config::GainController2::AdaptiveDigital& config); - AdaptiveModeLevelEstimator(const AdaptiveModeLevelEstimator&) = delete; - AdaptiveModeLevelEstimator& operator=(const AdaptiveModeLevelEstimator&) = - delete; + SpeechLevelEstimator(const SpeechLevelEstimator&) = delete; + SpeechLevelEstimator& operator=(const SpeechLevelEstimator&) = delete; // Updates the level estimation. void Update(float rms_dbfs, float peak_dbfs, float speech_probability); @@ -74,4 +74,4 @@ class AdaptiveModeLevelEstimator { } // namespace webrtc -#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_ +#endif // MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_ diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc b/modules/audio_processing/agc2/speech_level_estimator_unittest.cc similarity index 89% rename from modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc rename to modules/audio_processing/agc2/speech_level_estimator_unittest.cc index 684fca188a..57208de014 100644 --- a/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc +++ b/modules/audio_processing/agc2/speech_level_estimator_unittest.cc @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h" +#include "modules/audio_processing/agc2/speech_level_estimator.h" #include @@ -36,7 +36,7 @@ void RunOnConstantLevel(int num_iterations, float rms_dbfs, float peak_dbfs, float speech_probability, - AdaptiveModeLevelEstimator& level_estimator) { + SpeechLevelEstimator& level_estimator) { for (int i = 0; i < num_iterations; ++i) { level_estimator.Update(rms_dbfs, peak_dbfs, speech_probability); } @@ -57,7 +57,7 @@ constexpr float kMaxSpeechProbability = 1.0f; struct TestLevelEstimator { explicit TestLevelEstimator(int adjacent_speech_frames_threshold) : data_dumper(0), - estimator(std::make_unique( + estimator(std::make_unique( &data_dumper, GetAdaptiveDigitalConfig(adjacent_speech_frames_threshold))), initial_speech_level_dbfs(estimator->level_dbfs()), @@ -70,14 +70,14 @@ struct TestLevelEstimator { "level is wide enough for the tests"; } ApmDataDumper data_dumper; - std::unique_ptr estimator; + std::unique_ptr estimator; const float initial_speech_level_dbfs; const float level_rms_dbfs; const float level_peak_dbfs; }; // Checks that the level estimator converges to a constant input speech level. -TEST(GainController2AdaptiveModeLevelEstimator, LevelStabilizes) { +TEST(GainController2SpeechLevelEstimator, LevelStabilizes) { TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, level_estimator.level_rms_dbfs, @@ -93,7 +93,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, LevelStabilizes) { // Checks that the level controller does not become confident when too few // speech frames are observed. -TEST(GainController2AdaptiveModeLevelEstimator, IsNotConfident) { +TEST(GainController2SpeechLevelEstimator, IsNotConfident) { TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2, level_estimator.level_rms_dbfs, @@ -104,7 +104,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, IsNotConfident) { // Checks that the level controller becomes confident when enough speech frames // are observed. -TEST(GainController2AdaptiveModeLevelEstimator, IsConfident) { +TEST(GainController2SpeechLevelEstimator, IsConfident) { TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, level_estimator.level_rms_dbfs, @@ -115,8 +115,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, IsConfident) { // Checks that the estimated level is not affected by the level of non-speech // frames. -TEST(GainController2AdaptiveModeLevelEstimator, - EstimatorIgnoresNonSpeechFrames) { +TEST(GainController2SpeechLevelEstimator, EstimatorIgnoresNonSpeechFrames) { TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); // Simulate speech. RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, @@ -134,8 +133,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, } // Checks the convergence speed of the estimator before it becomes confident. -TEST(GainController2AdaptiveModeLevelEstimator, - ConvergenceSpeedBeforeConfidence) { +TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedBeforeConfidence) { TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence, level_estimator.level_rms_dbfs, @@ -147,8 +145,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, } // Checks the convergence speed of the estimator after it becomes confident. -TEST(GainController2AdaptiveModeLevelEstimator, - ConvergenceSpeedAfterConfidence) { +TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedAfterConfidence) { TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1); // Reach confidence using the initial level estimate. RunOnConstantLevel( @@ -173,14 +170,13 @@ TEST(GainController2AdaptiveModeLevelEstimator, kConvergenceSpeedTestsLevelTolerance); } -class AdaptiveModeLevelEstimatorParametrization +class SpeechLevelEstimatorParametrization : public ::testing::TestWithParam { protected: int adjacent_speech_frames_threshold() const { return GetParam(); } }; -TEST_P(AdaptiveModeLevelEstimatorParametrization, - DoNotAdaptToShortSpeechSegments) { +TEST_P(SpeechLevelEstimatorParametrization, DoNotAdaptToShortSpeechSegments) { TestLevelEstimator level_estimator(adjacent_speech_frames_threshold()); const float initial_level = level_estimator.estimator->level_dbfs(); ASSERT_LT(initial_level, level_estimator.level_peak_dbfs); @@ -197,7 +193,7 @@ TEST_P(AdaptiveModeLevelEstimatorParametrization, EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs()); } -TEST_P(AdaptiveModeLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) { +TEST_P(SpeechLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) { TestLevelEstimator level_estimator(adjacent_speech_frames_threshold()); const float initial_level = level_estimator.estimator->level_dbfs(); ASSERT_LT(initial_level, level_estimator.level_peak_dbfs); @@ -210,7 +206,7 @@ TEST_P(AdaptiveModeLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) { } INSTANTIATE_TEST_SUITE_P(GainController2, - AdaptiveModeLevelEstimatorParametrization, + SpeechLevelEstimatorParametrization, ::testing::Values(1, 9, 17)); } // namespace