AGC2: prepare to move speech level estimator into GainController2
- build target isolated - `AdaptiveModeLevelEstimator` renamed to `SpeechLevelEstimator` Bug: webrtc:7494 Change-Id: If16caec2269b2ed1b2ee27c3687a8f8875f55c8c Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/280441 Reviewed-by: Hanna Silen <silen@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38469}
This commit is contained in:
committed by
WebRTC LUCI CQ
parent
8d7273357d
commit
d89dff767c
@ -421,6 +421,7 @@ if (rtc_include_tests) {
|
||||
"agc2:input_volume_controller_unittests",
|
||||
"agc2:input_volume_stats_reporter_unittests",
|
||||
"agc2:noise_estimator_unittests",
|
||||
"agc2:speech_level_estimator_unittest",
|
||||
"agc2:test_utils",
|
||||
"agc2:vad_wrapper_unittests",
|
||||
"agc2/rnn_vad:unittests",
|
||||
|
||||
@ -15,14 +15,36 @@ group("agc2") {
|
||||
]
|
||||
}
|
||||
|
||||
rtc_library("speech_level_estimator") {
|
||||
sources = [
|
||||
"speech_level_estimator.cc",
|
||||
"speech_level_estimator.h",
|
||||
]
|
||||
|
||||
visibility = [
|
||||
"..:gain_controller2",
|
||||
"./*",
|
||||
]
|
||||
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
|
||||
deps = [
|
||||
":common",
|
||||
"..:api",
|
||||
"..:apm_logging",
|
||||
"../../../api:array_view",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:logging",
|
||||
"../../../rtc_base:safe_minmax",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_library("adaptive_digital") {
|
||||
sources = [
|
||||
"adaptive_digital_gain_applier.cc",
|
||||
"adaptive_digital_gain_applier.h",
|
||||
"adaptive_digital_gain_controller.cc",
|
||||
"adaptive_digital_gain_controller.h",
|
||||
"adaptive_mode_level_estimator.cc",
|
||||
"adaptive_mode_level_estimator.h",
|
||||
"saturation_protector.cc",
|
||||
"saturation_protector.h",
|
||||
"saturation_protector_buffer.cc",
|
||||
@ -38,10 +60,9 @@ rtc_library("adaptive_digital") {
|
||||
|
||||
deps = [
|
||||
":common",
|
||||
":cpu_features",
|
||||
":gain_applier",
|
||||
":noise_level_estimator",
|
||||
":vad_wrapper",
|
||||
":speech_level_estimator",
|
||||
"..:api",
|
||||
"..:apm_logging",
|
||||
"..:audio_frame_view",
|
||||
@ -244,13 +265,27 @@ rtc_library("cpu_features") {
|
||||
]
|
||||
}
|
||||
|
||||
rtc_library("speech_level_estimator_unittest") {
|
||||
testonly = true
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
|
||||
sources = [ "speech_level_estimator_unittest.cc" ]
|
||||
deps = [
|
||||
":common",
|
||||
":speech_level_estimator",
|
||||
"..:api",
|
||||
"..:apm_logging",
|
||||
"../../../rtc_base:gunit_helpers",
|
||||
"../../../test:test_support",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_library("adaptive_digital_unittests") {
|
||||
testonly = true
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
|
||||
sources = [
|
||||
"adaptive_digital_gain_applier_unittest.cc",
|
||||
"adaptive_mode_level_estimator_unittest.cc",
|
||||
"gain_applier_unittest.cc",
|
||||
"saturation_protector_buffer_unittest.cc",
|
||||
"saturation_protector_unittest.cc",
|
||||
|
||||
@ -13,7 +13,6 @@
|
||||
#include <algorithm>
|
||||
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "modules/audio_processing/agc2/vad_wrapper.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
|
||||
@ -15,9 +15,9 @@
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
|
||||
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
|
||||
#include "modules/audio_processing/agc2/noise_level_estimator.h"
|
||||
#include "modules/audio_processing/agc2/saturation_protector.h"
|
||||
#include "modules/audio_processing/agc2/speech_level_estimator.h"
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
#include "modules/audio_processing/include/audio_processing.h"
|
||||
|
||||
@ -56,7 +56,7 @@ class AdaptiveDigitalGainController {
|
||||
absl::optional<float> GetSpeechLevelDbfsIfConfident() const;
|
||||
|
||||
private:
|
||||
AdaptiveModeLevelEstimator speech_level_estimator_;
|
||||
SpeechLevelEstimator speech_level_estimator_;
|
||||
AdaptiveDigitalGainApplier gain_controller_;
|
||||
ApmDataDumper* const apm_data_dumper_;
|
||||
std::unique_ptr<NoiseLevelEstimator> noise_level_estimator_;
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
|
||||
#include "modules/audio_processing/agc2/speech_level_estimator.h"
|
||||
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
@ -32,19 +32,19 @@ float GetInitialSpeechLevelEstimateDbfs(
|
||||
|
||||
} // namespace
|
||||
|
||||
bool AdaptiveModeLevelEstimator::LevelEstimatorState::operator==(
|
||||
const AdaptiveModeLevelEstimator::LevelEstimatorState& b) const {
|
||||
bool SpeechLevelEstimator::LevelEstimatorState::operator==(
|
||||
const SpeechLevelEstimator::LevelEstimatorState& b) const {
|
||||
return time_to_confidence_ms == b.time_to_confidence_ms &&
|
||||
level_dbfs.numerator == b.level_dbfs.numerator &&
|
||||
level_dbfs.denominator == b.level_dbfs.denominator;
|
||||
}
|
||||
|
||||
float AdaptiveModeLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const {
|
||||
float SpeechLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const {
|
||||
RTC_DCHECK_NE(denominator, 0.f);
|
||||
return numerator / denominator;
|
||||
}
|
||||
|
||||
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
|
||||
SpeechLevelEstimator::SpeechLevelEstimator(
|
||||
ApmDataDumper* apm_data_dumper,
|
||||
const AudioProcessing::Config::GainController2::AdaptiveDigital& config)
|
||||
: apm_data_dumper_(apm_data_dumper),
|
||||
@ -57,9 +57,9 @@ AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
|
||||
Reset();
|
||||
}
|
||||
|
||||
void AdaptiveModeLevelEstimator::Update(float rms_dbfs,
|
||||
float peak_dbfs,
|
||||
float speech_probability) {
|
||||
void SpeechLevelEstimator::Update(float rms_dbfs,
|
||||
float peak_dbfs,
|
||||
float speech_probability) {
|
||||
RTC_DCHECK_GT(rms_dbfs, -150.0f);
|
||||
RTC_DCHECK_LT(rms_dbfs, 50.0f);
|
||||
RTC_DCHECK_GT(peak_dbfs, -150.0f);
|
||||
@ -113,7 +113,7 @@ void AdaptiveModeLevelEstimator::Update(float rms_dbfs,
|
||||
DumpDebugData();
|
||||
}
|
||||
|
||||
bool AdaptiveModeLevelEstimator::IsConfident() const {
|
||||
bool SpeechLevelEstimator::IsConfident() const {
|
||||
if (adjacent_speech_frames_threshold_ == 1) {
|
||||
// Ignore `reliable_state_` when a single frame is enough to update the
|
||||
// level estimate (because it is not used).
|
||||
@ -129,21 +129,21 @@ bool AdaptiveModeLevelEstimator::IsConfident() const {
|
||||
preliminary_state_.time_to_confidence_ms == 0);
|
||||
}
|
||||
|
||||
void AdaptiveModeLevelEstimator::Reset() {
|
||||
void SpeechLevelEstimator::Reset() {
|
||||
ResetLevelEstimatorState(preliminary_state_);
|
||||
ResetLevelEstimatorState(reliable_state_);
|
||||
level_dbfs_ = initial_speech_level_dbfs_;
|
||||
num_adjacent_speech_frames_ = 0;
|
||||
}
|
||||
|
||||
void AdaptiveModeLevelEstimator::ResetLevelEstimatorState(
|
||||
void SpeechLevelEstimator::ResetLevelEstimatorState(
|
||||
LevelEstimatorState& state) const {
|
||||
state.time_to_confidence_ms = kLevelEstimatorTimeToConfidenceMs;
|
||||
state.level_dbfs.numerator = initial_speech_level_dbfs_;
|
||||
state.level_dbfs.denominator = 1.0f;
|
||||
}
|
||||
|
||||
void AdaptiveModeLevelEstimator::DumpDebugData() const {
|
||||
void SpeechLevelEstimator::DumpDebugData() const {
|
||||
apm_data_dumper_->DumpRaw(
|
||||
"agc2_adaptive_level_estimator_num_adjacent_speech_frames",
|
||||
num_adjacent_speech_frames_);
|
||||
@ -8,29 +8,29 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "modules/audio_processing/agc2/vad_wrapper.h"
|
||||
#include "modules/audio_processing/include/audio_processing.h"
|
||||
|
||||
namespace webrtc {
|
||||
class ApmDataDumper;
|
||||
|
||||
// Level estimator for the digital adaptive gain controller.
|
||||
class AdaptiveModeLevelEstimator {
|
||||
// Active speech level estimator based on the analysis of the following
|
||||
// framewise properties: RMS level (dBFS), peak level (dBFS), speech
|
||||
// probability.
|
||||
class SpeechLevelEstimator {
|
||||
public:
|
||||
AdaptiveModeLevelEstimator(
|
||||
SpeechLevelEstimator(
|
||||
ApmDataDumper* apm_data_dumper,
|
||||
const AudioProcessing::Config::GainController2::AdaptiveDigital& config);
|
||||
AdaptiveModeLevelEstimator(const AdaptiveModeLevelEstimator&) = delete;
|
||||
AdaptiveModeLevelEstimator& operator=(const AdaptiveModeLevelEstimator&) =
|
||||
delete;
|
||||
SpeechLevelEstimator(const SpeechLevelEstimator&) = delete;
|
||||
SpeechLevelEstimator& operator=(const SpeechLevelEstimator&) = delete;
|
||||
|
||||
// Updates the level estimation.
|
||||
void Update(float rms_dbfs, float peak_dbfs, float speech_probability);
|
||||
@ -74,4 +74,4 @@ class AdaptiveModeLevelEstimator {
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC2_SPEECH_LEVEL_ESTIMATOR_H_
|
||||
@ -8,7 +8,7 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
|
||||
#include "modules/audio_processing/agc2/speech_level_estimator.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
@ -36,7 +36,7 @@ void RunOnConstantLevel(int num_iterations,
|
||||
float rms_dbfs,
|
||||
float peak_dbfs,
|
||||
float speech_probability,
|
||||
AdaptiveModeLevelEstimator& level_estimator) {
|
||||
SpeechLevelEstimator& level_estimator) {
|
||||
for (int i = 0; i < num_iterations; ++i) {
|
||||
level_estimator.Update(rms_dbfs, peak_dbfs, speech_probability);
|
||||
}
|
||||
@ -57,7 +57,7 @@ constexpr float kMaxSpeechProbability = 1.0f;
|
||||
struct TestLevelEstimator {
|
||||
explicit TestLevelEstimator(int adjacent_speech_frames_threshold)
|
||||
: data_dumper(0),
|
||||
estimator(std::make_unique<AdaptiveModeLevelEstimator>(
|
||||
estimator(std::make_unique<SpeechLevelEstimator>(
|
||||
&data_dumper,
|
||||
GetAdaptiveDigitalConfig(adjacent_speech_frames_threshold))),
|
||||
initial_speech_level_dbfs(estimator->level_dbfs()),
|
||||
@ -70,14 +70,14 @@ struct TestLevelEstimator {
|
||||
"level is wide enough for the tests";
|
||||
}
|
||||
ApmDataDumper data_dumper;
|
||||
std::unique_ptr<AdaptiveModeLevelEstimator> estimator;
|
||||
std::unique_ptr<SpeechLevelEstimator> estimator;
|
||||
const float initial_speech_level_dbfs;
|
||||
const float level_rms_dbfs;
|
||||
const float level_peak_dbfs;
|
||||
};
|
||||
|
||||
// Checks that the level estimator converges to a constant input speech level.
|
||||
TEST(GainController2AdaptiveModeLevelEstimator, LevelStabilizes) {
|
||||
TEST(GainController2SpeechLevelEstimator, LevelStabilizes) {
|
||||
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
||||
level_estimator.level_rms_dbfs,
|
||||
@ -93,7 +93,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, LevelStabilizes) {
|
||||
|
||||
// Checks that the level controller does not become confident when too few
|
||||
// speech frames are observed.
|
||||
TEST(GainController2AdaptiveModeLevelEstimator, IsNotConfident) {
|
||||
TEST(GainController2SpeechLevelEstimator, IsNotConfident) {
|
||||
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence / 2,
|
||||
level_estimator.level_rms_dbfs,
|
||||
@ -104,7 +104,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, IsNotConfident) {
|
||||
|
||||
// Checks that the level controller becomes confident when enough speech frames
|
||||
// are observed.
|
||||
TEST(GainController2AdaptiveModeLevelEstimator, IsConfident) {
|
||||
TEST(GainController2SpeechLevelEstimator, IsConfident) {
|
||||
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
||||
level_estimator.level_rms_dbfs,
|
||||
@ -115,8 +115,7 @@ TEST(GainController2AdaptiveModeLevelEstimator, IsConfident) {
|
||||
|
||||
// Checks that the estimated level is not affected by the level of non-speech
|
||||
// frames.
|
||||
TEST(GainController2AdaptiveModeLevelEstimator,
|
||||
EstimatorIgnoresNonSpeechFrames) {
|
||||
TEST(GainController2SpeechLevelEstimator, EstimatorIgnoresNonSpeechFrames) {
|
||||
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
||||
// Simulate speech.
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
||||
@ -134,8 +133,7 @@ TEST(GainController2AdaptiveModeLevelEstimator,
|
||||
}
|
||||
|
||||
// Checks the convergence speed of the estimator before it becomes confident.
|
||||
TEST(GainController2AdaptiveModeLevelEstimator,
|
||||
ConvergenceSpeedBeforeConfidence) {
|
||||
TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedBeforeConfidence) {
|
||||
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
||||
RunOnConstantLevel(/*num_iterations=*/kNumFramesToConfidence,
|
||||
level_estimator.level_rms_dbfs,
|
||||
@ -147,8 +145,7 @@ TEST(GainController2AdaptiveModeLevelEstimator,
|
||||
}
|
||||
|
||||
// Checks the convergence speed of the estimator after it becomes confident.
|
||||
TEST(GainController2AdaptiveModeLevelEstimator,
|
||||
ConvergenceSpeedAfterConfidence) {
|
||||
TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedAfterConfidence) {
|
||||
TestLevelEstimator level_estimator(/*adjacent_speech_frames_threshold=*/1);
|
||||
// Reach confidence using the initial level estimate.
|
||||
RunOnConstantLevel(
|
||||
@ -173,14 +170,13 @@ TEST(GainController2AdaptiveModeLevelEstimator,
|
||||
kConvergenceSpeedTestsLevelTolerance);
|
||||
}
|
||||
|
||||
class AdaptiveModeLevelEstimatorParametrization
|
||||
class SpeechLevelEstimatorParametrization
|
||||
: public ::testing::TestWithParam<int> {
|
||||
protected:
|
||||
int adjacent_speech_frames_threshold() const { return GetParam(); }
|
||||
};
|
||||
|
||||
TEST_P(AdaptiveModeLevelEstimatorParametrization,
|
||||
DoNotAdaptToShortSpeechSegments) {
|
||||
TEST_P(SpeechLevelEstimatorParametrization, DoNotAdaptToShortSpeechSegments) {
|
||||
TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
|
||||
const float initial_level = level_estimator.estimator->level_dbfs();
|
||||
ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
|
||||
@ -197,7 +193,7 @@ TEST_P(AdaptiveModeLevelEstimatorParametrization,
|
||||
EXPECT_EQ(initial_level, level_estimator.estimator->level_dbfs());
|
||||
}
|
||||
|
||||
TEST_P(AdaptiveModeLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
|
||||
TEST_P(SpeechLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
|
||||
TestLevelEstimator level_estimator(adjacent_speech_frames_threshold());
|
||||
const float initial_level = level_estimator.estimator->level_dbfs();
|
||||
ASSERT_LT(initial_level, level_estimator.level_peak_dbfs);
|
||||
@ -210,7 +206,7 @@ TEST_P(AdaptiveModeLevelEstimatorParametrization, AdaptToEnoughSpeechSegments) {
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(GainController2,
|
||||
AdaptiveModeLevelEstimatorParametrization,
|
||||
SpeechLevelEstimatorParametrization,
|
||||
::testing::Values(1, 9, 17));
|
||||
|
||||
} // namespace
|
||||
Reference in New Issue
Block a user