From 27fed4513f8d38e294a549b0389f237df7acb724 Mon Sep 17 00:00:00 2001 From: Hanna Silen Date: Tue, 22 Nov 2022 15:00:58 +0100 Subject: [PATCH] InputVolumeController: Make speech_probability non-optional Make the argument speech_probability non-optional in InputVolumeController::Process() and MonoInputVolumeController::Process(). Additional clean-up: Remove the flag enabled in the config. Add unit tests for MonoInputVolumeController. Bug: webrtc:7494 Change-Id: Ie28af77dc628bf71d09ce1ff033d39031f77a21e Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/283700 Reviewed-by: Alessio Bazzica Commit-Queue: Hanna Silen Cr-Commit-Position: refs/heads/main@{#38710} --- .../agc2/input_volume_controller.cc | 37 +- .../agc2/input_volume_controller.h | 8 +- .../agc2/input_volume_controller_unittest.cc | 778 ++++++++++++------ modules/audio_processing/gain_controller2.cc | 9 +- .../gain_controller2_unittest.cc | 3 + 5 files changed, 568 insertions(+), 267 deletions(-) diff --git a/modules/audio_processing/agc2/input_volume_controller.cc b/modules/audio_processing/agc2/input_volume_controller.cc index fecb090fbc..ef03aaad4a 100644 --- a/modules/audio_processing/agc2/input_volume_controller.cc +++ b/modules/audio_processing/agc2/input_volume_controller.cc @@ -187,14 +187,13 @@ void MonoInputVolumeController::Initialize() { is_first_frame_ = true; } -// A speech segment is considered active if at least +// A speeh segment is considered active if at least // `update_input_volume_wait_frames_` new frames have been processed since the // previous update and the ratio of non-silence frames (i.e., frames with a -// non-empty `speech_probability` value above `speech_probability_threshold_`) -// is at least `speech_ratio_threshold_`. -void MonoInputVolumeController::Process( - absl::optional rms_error_dbfs, - absl::optional speech_probability) { +// `speech_probability` higher than `speech_probability_threshold_`) is at least +// `speech_ratio_threshold_`. +void MonoInputVolumeController::Process(absl::optional rms_error_dbfs, + float speech_probability) { if (check_volume_on_next_process_) { check_volume_on_next_process_ = false; // We have to wait until the first process call to check the volume, @@ -203,8 +202,7 @@ void MonoInputVolumeController::Process( } // Count frames with a high speech probability as speech. - if (speech_probability.has_value() && - *speech_probability >= speech_probability_threshold_) { + if (speech_probability >= speech_probability_threshold_) { ++speech_frames_since_update_input_volume_; } @@ -364,8 +362,7 @@ void MonoInputVolumeController::UpdateInputVolume(int rms_error_dbfs) { InputVolumeController::InputVolumeController(int num_capture_channels, const Config& config) - : analog_controller_enabled_(config.enabled), - num_capture_channels_(num_capture_channels), + : num_capture_channels_(num_capture_channels), min_mic_level_override_(GetMinMicLevelOverride()), capture_output_used_(true), clipped_level_step_(config.clipped_level_step), @@ -384,9 +381,8 @@ InputVolumeController::InputVolumeController(int num_capture_channels, target_range_max_dbfs_(config.target_range_max_dbfs), target_range_min_dbfs_(config.target_range_min_dbfs), channel_controllers_(num_capture_channels) { - RTC_LOG(LS_INFO) << "[agc] analog controller enabled: " - << (analog_controller_enabled_ ? "yes" : "no"); const int min_mic_level = min_mic_level_override_.value_or(kMinMicLevel); + RTC_LOG(LS_INFO) << "[agc] Input volume controller enabled"; RTC_LOG(LS_INFO) << "[agc] Min mic level: " << min_mic_level << " (overridden: " << (min_mic_level_override_.has_value() ? "yes" : "no") @@ -475,10 +471,12 @@ void InputVolumeController::AnalyzePreProcess(const AudioBuffer& audio_buffer) { } } } + if (clipping_detected) { RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio=" << clipped_ratio; } + int step = clipped_level_step_; if (clipping_predicted) { predicted_step = std::max(predicted_step, clipped_level_step_); @@ -487,6 +485,7 @@ void InputVolumeController::AnalyzePreProcess(const AudioBuffer& audio_buffer) { step = predicted_step; } } + if (clipping_detected || (clipping_predicted && use_clipping_predictor_step_)) { for (auto& state_ch : channel_controllers_) { @@ -497,10 +496,11 @@ void InputVolumeController::AnalyzePreProcess(const AudioBuffer& audio_buffer) { clipping_predictor_->Reset(); } } + AggregateChannelLevels(); } -void InputVolumeController::Process(absl::optional speech_probability, +void InputVolumeController::Process(float speech_probability, absl::optional speech_level_dbfs) { AggregateChannelLevels(); @@ -509,7 +509,7 @@ void InputVolumeController::Process(absl::optional speech_probability, } absl::optional rms_error_dbfs; - if (speech_probability.has_value() && speech_level_dbfs.has_value()) { + if (speech_level_dbfs.has_value()) { // Compute the error for all frames (both speech and non-speech frames). rms_error_dbfs = GetSpeechLevelErrorDb( *speech_level_dbfs, target_range_min_dbfs_, target_range_max_dbfs_); @@ -527,14 +527,11 @@ void InputVolumeController::HandleCaptureOutputUsedChange( for (auto& controller : channel_controllers_) { controller->HandleCaptureOutputUsedChange(capture_output_used); } + capture_output_used_ = capture_output_used; } void InputVolumeController::set_stream_analog_level(int level) { - if (!analog_controller_enabled_) { - recommended_input_volume_ = level; - } - for (auto& controller : channel_controllers_) { controller->set_stream_analog_level(level); } @@ -559,9 +556,7 @@ void InputVolumeController::AggregateChannelLevels() { std::max(new_recommended_input_volume, *min_mic_level_override_); } - if (analog_controller_enabled_) { - recommended_input_volume_ = new_recommended_input_volume; - } + recommended_input_volume_ = new_recommended_input_volume; } } // namespace webrtc diff --git a/modules/audio_processing/agc2/input_volume_controller.h b/modules/audio_processing/agc2/input_volume_controller.h index d2f3970c81..2cd764e6ee 100644 --- a/modules/audio_processing/agc2/input_volume_controller.h +++ b/modules/audio_processing/agc2/input_volume_controller.h @@ -35,7 +35,6 @@ class InputVolumeController final { public: // Config for the constructor. struct Config { - bool enabled = false; // Lowest input volume level that will be applied in response to clipping. int clipped_level_min = 70; // Amount input volume level is lowered with every clipping event. Limited @@ -98,7 +97,7 @@ class InputVolumeController final { // `speech_level_dbfs`. Must be called after `AnalyzePreProcess()`. The value // of `speech_probability` is expected to be in the range [0.0f, 1.0f] and // `speech_level_dbfs` in the the range [-90.f, 30.0f]. - void Process(absl::optional speech_probability, + void Process(float speech_probability, absl::optional speech_level_dbfs); // TODO(bugs.webrtc.org/7494): Return recommended input volume and remove @@ -142,8 +141,6 @@ class InputVolumeController final { void AggregateChannelLevels(); - const bool analog_controller_enabled_; - const int num_capture_channels_; // If not empty, the value is used to override the minimum input volume. @@ -213,8 +210,7 @@ class MonoInputVolumeController { // whether `rms_error_dbfs` is positive or negative. Updates are only allowed // for active speech segments and when `rms_error_dbfs` is not empty. Must be // called after `HandleClipping()`. - void Process(absl::optional rms_error_dbfs, - absl::optional speech_probability); + void Process(absl::optional rms_error_dbfs, float speech_probability); // Returns the recommended input volume. Must be called after `Process()`. int recommended_analog_level() const { return recommended_input_volume_; } diff --git a/modules/audio_processing/agc2/input_volume_controller_unittest.cc b/modules/audio_processing/agc2/input_volume_controller_unittest.cc index e9be177a7f..4c3413bef3 100644 --- a/modules/audio_processing/agc2/input_volume_controller_unittest.cc +++ b/modules/audio_processing/agc2/input_volume_controller_unittest.cc @@ -43,6 +43,7 @@ constexpr int kClippedLevelStep = 15; constexpr float kClippedRatioThreshold = 0.1f; constexpr int kClippedWaitFrames = 300; constexpr float kHighSpeechProbability = 0.7f; +constexpr float kLowSpeechProbability = 0.1f; constexpr float kSpeechLevel = -25.0f; constexpr float kSpeechProbabilityThreshold = 0.5f; constexpr float kSpeechRatioThreshold = 0.8f; @@ -65,7 +66,6 @@ std::unique_ptr CreateInputVolumeController( bool enable_clipping_predictor = false, int update_input_volume_wait_frames = 0) { InputVolumeControllerConfig config{ - .enabled = true, .clipped_level_min = kClippedMin, .clipped_level_step = clipped_level_step, .clipped_ratio_threshold = clipped_ratio_threshold, @@ -180,7 +180,7 @@ void WriteAudioBufferSamples(float samples_value, // `InputVolumeControllerTestHelper::CallAgcSequence()` instead. void CallPreProcessAndProcess(int num_calls, const AudioBuffer& audio_buffer, - absl::optional speech_probability, + float speech_probability, absl::optional speech_level, InputVolumeController& manager) { for (int n = 0; n < num_calls; ++n) { @@ -221,7 +221,7 @@ class SpeechSamplesReader { // `Process()`. void Feed(int num_frames, int gain_db, - absl::optional speech_probability, + float speech_probability, absl::optional speech_level, InputVolumeController& agc) { float gain = std::pow(10.0f, gain_db / 20.0f); // From dB to linear gain. @@ -253,13 +253,24 @@ class SpeechSamplesReader { const std::streamsize buffer_num_bytes_; }; +// Runs the MonoInputVolumeControl processing sequence following the API +// contract. Returns the updated recommended input volume. +float UpdateRecommendedInputVolume(MonoInputVolumeController& controller, + int applied_input_volume, + float speech_probability, + const absl::optional rms_error_dbfs) { + controller.set_stream_analog_level(applied_input_volume); + EXPECT_EQ(controller.recommended_analog_level(), applied_input_volume); + controller.Process(rms_error_dbfs, speech_probability); + return controller.recommended_analog_level(); +} + } // namespace // TODO(bugs.webrtc.org/12874): Use constexpr struct with designated // initializers once fixed. constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() { InputVolumeControllerConfig config{ - .enabled = true, .clipped_level_min = kClippedMin, .clipped_level_step = kClippedLevelStep, .clipped_ratio_threshold = kClippedRatioThreshold, @@ -274,12 +285,6 @@ constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() { return config; } -constexpr InputVolumeControllerConfig GetDisabledInputVolumeControllerConfig() { - InputVolumeControllerConfig config = GetInputVolumeControllerTestConfig(); - config.enabled = false; - return config; -} - // Helper class that provides an `InputVolumeController` instance with an // `AudioBuffer` instance and `CallAgcSequence()`, a helper method that runs the // `InputVolumeController` instance on the `AudioBuffer` one by sticking to the @@ -307,7 +312,7 @@ class InputVolumeControllerTestHelper { // - Uses `audio_buffer` to call `AnalyzePreProcess()` and `Process()`; // Returns the recommended input volume. int CallAgcSequence(int applied_input_volume, - absl::optional speech_probability, + float speech_probability, absl::optional speech_level) { manager.set_stream_analog_level(applied_input_volume); manager.AnalyzePreProcess(audio_buffer); @@ -320,7 +325,7 @@ class InputVolumeControllerTestHelper { // TODO(bugs.webrtc.org/7494): Let the caller write `audio_buffer` and use // `CallAgcSequence()`. void CallProcess(int num_calls, - absl::optional speech_probability, + float speech_probability, absl::optional speech_level) { for (int i = 0; i < num_calls; ++i) { manager.Process(speech_probability, speech_level); @@ -388,7 +393,9 @@ class InputVolumeControllerParametrizedTest return std::get<0>(GetParam()).value_or(kMinMicLevel); } + // TODO(webrtc:7494): Remove, `RmsErrorHasValue()` always returns true. bool RmsErrorHasValue() const { return std::get<1>(GetParam()); } + absl::optional GetValueOrEmpty(float value) const { return RmsErrorHasValue() ? absl::optional(value) : absl::nullopt; } @@ -403,199 +410,109 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine(testing::Values(absl::nullopt, 12, 20), testing::Values(true))); -// Checks that when the analog controller is disabled, no downward adaptation -// takes place. -// TODO(webrtc:7494): Revisit the test after moving the number of update wait -// frames to AMP config. The test passes but internally the gain update timing -// differs. -TEST_P(InputVolumeControllerParametrizedTest, - DisabledAnalogAgcDoesNotAdaptDownwards) { - InputVolumeController manager_no_analog_agc( - kNumChannels, GetDisabledInputVolumeControllerConfig()); - manager_no_analog_agc.Initialize(); - InputVolumeController manager_with_analog_agc( - kNumChannels, GetInputVolumeControllerTestConfig()); - manager_with_analog_agc.Initialize(); - - AudioBuffer audio_buffer(kSampleRateHz, kNumChannels, kSampleRateHz, - kNumChannels, kSampleRateHz, kNumChannels); - - constexpr int kInputVolume = 250; - static_assert(kInputVolume > kInitialInputVolume, "Increase `kInputVolume`."); - manager_no_analog_agc.set_stream_analog_level(kInputVolume); - manager_with_analog_agc.set_stream_analog_level(kInputVolume); - - // Make a first call with input that doesn't clip in order to let the - // controller read the input volume. That is needed because clipping input - // causes the controller to stay in idle state for - // `InputVolumeControllerConfig::clipped_wait_frames` frames. - WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipping_ratio=*/0.0f, - audio_buffer); - manager_no_analog_agc.AnalyzePreProcess(audio_buffer); - manager_with_analog_agc.AnalyzePreProcess(audio_buffer); - manager_no_analog_agc.Process(GetValueOrEmpty(kHighSpeechProbability), - GetValueOrEmpty(-18.0f)); - manager_with_analog_agc.Process(GetValueOrEmpty(kHighSpeechProbability), - GetValueOrEmpty(-18.0f)); - - // Feed clipping input to trigger a downward adapation of the analog level. - WriteAudioBufferSamples(/*samples_value=*/0.0f, /*clipping_ratio=*/0.2f, - audio_buffer); - manager_no_analog_agc.AnalyzePreProcess(audio_buffer); - manager_with_analog_agc.AnalyzePreProcess(audio_buffer); - manager_no_analog_agc.Process(GetValueOrEmpty(kHighSpeechProbability), - GetValueOrEmpty(-10.0f)); - manager_with_analog_agc.Process(GetValueOrEmpty(kHighSpeechProbability), - GetValueOrEmpty(-10.0f)); - - // Check that no adaptation occurs when the analog controller is disabled - // and make sure that the test triggers a downward adaptation otherwise. - EXPECT_EQ(manager_no_analog_agc.recommended_analog_level(), kInputVolume); - ASSERT_LT(manager_with_analog_agc.recommended_analog_level(), kInputVolume); -} - -// Checks that when the analog controller is disabled, no upward adaptation -// takes place. -// TODO(webrtc:7494): Revisit the test after moving the number of update wait -// frames to APM config. The test passes but internally the gain update timing -// differs. -TEST_P(InputVolumeControllerParametrizedTest, - DisabledAnalogAgcDoesNotAdaptUpwards) { - InputVolumeController manager_no_analog_agc( - kNumChannels, GetDisabledInputVolumeControllerConfig()); - manager_no_analog_agc.Initialize(); - InputVolumeController manager_with_analog_agc( - kNumChannels, GetInputVolumeControllerTestConfig()); - manager_with_analog_agc.Initialize(); - - constexpr int kInputVolume = kInitialInputVolume; - manager_no_analog_agc.set_stream_analog_level(kInputVolume); - manager_with_analog_agc.set_stream_analog_level(kInputVolume); - - // Feed speech with low energy to trigger an upward adapation of the analog - // level. - constexpr int kNumFrames = 125; - constexpr int kGainDb = -20; - SpeechSamplesReader reader; - reader.Feed(kNumFrames, kGainDb, GetValueOrEmpty(kHighSpeechProbability), - GetValueOrEmpty(-42.0f), manager_no_analog_agc); - reader.Feed(kNumFrames, kGainDb, GetValueOrEmpty(kHighSpeechProbability), - GetValueOrEmpty(-42.0f), manager_with_analog_agc); - - // Check that no adaptation occurs when the analog controller is disabled - // and make sure that the test triggers an upward adaptation otherwise. - EXPECT_EQ(manager_no_analog_agc.recommended_analog_level(), kInputVolume); - ASSERT_GT(manager_with_analog_agc.recommended_analog_level(), kInputVolume); -} - TEST_P(InputVolumeControllerParametrizedTest, StartupMinVolumeConfigurationIsRespected) { InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(kInitialInputVolume, - GetValueOrEmpty(kHighSpeechProbability), + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); EXPECT_EQ(kInitialInputVolume, helper.manager.recommended_analog_level()); } TEST_P(InputVolumeControllerParametrizedTest, MicVolumeResponseToRmsError) { - const auto speech_probability = GetValueOrEmpty(kHighSpeechProbability); - InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(kInitialInputVolume, speech_probability, + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); // Inside the digital gain's window; no change of volume. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-23.0f)); // Inside the digital gain's window; no change of volume. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-28.0f)); // Above the digital gain's window; volume should be increased. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-29.0f)); EXPECT_EQ(128, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-38.0f)); EXPECT_EQ(156, helper.manager.recommended_analog_level()); // Inside the digital gain's window; no change of volume. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-23.0f)); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-18.0f)); // Below the digial gain's window; volume should be decreased. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-17.0f)); EXPECT_EQ(155, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-17.0f)); EXPECT_EQ(151, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-9.0f)); EXPECT_EQ(119, helper.manager.recommended_analog_level()); } TEST_P(InputVolumeControllerParametrizedTest, MicVolumeIsLimited) { - const auto speech_probability = GetValueOrEmpty(kHighSpeechProbability); - InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(kInitialInputVolume, speech_probability, + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); // Maximum upwards change is limited. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-48.0f)); EXPECT_EQ(183, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-48.0f)); EXPECT_EQ(243, helper.manager.recommended_analog_level()); // Won't go higher than the maximum. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-48.0f)); EXPECT_EQ(255, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-17.0f)); EXPECT_EQ(254, helper.manager.recommended_analog_level()); // Maximum downwards change is limited. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(22.0f)); EXPECT_EQ(194, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(22.0f)); EXPECT_EQ(137, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(22.0f)); EXPECT_EQ(88, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(22.0f)); EXPECT_EQ(54, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(22.0f)); EXPECT_EQ(33, helper.manager.recommended_analog_level()); // Won't go lower than the minimum. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(22.0f)); EXPECT_EQ(std::max(18, GetMinMicLevel()), helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(22.0f)); EXPECT_EQ(std::max(12, GetMinMicLevel()), helper.manager.recommended_analog_level()); @@ -603,20 +520,17 @@ TEST_P(InputVolumeControllerParametrizedTest, MicVolumeIsLimited) { TEST_P(InputVolumeControllerParametrizedTest, NoActionWhileMuted) { InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(kInitialInputVolume, - GetValueOrEmpty(kHighSpeechProbability), + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); helper.manager.HandleCaptureOutputUsedChange(false); - helper.manager.Process(GetValueOrEmpty(kHighSpeechProbability), - GetValueOrEmpty(kSpeechLevel)); + helper.manager.Process(kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); } TEST_P(InputVolumeControllerParametrizedTest, UnmutingChecksVolumeWithoutRaising) { InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(kInitialInputVolume, - GetValueOrEmpty(kHighSpeechProbability), + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); helper.manager.HandleCaptureOutputUsedChange(false); @@ -626,15 +540,14 @@ TEST_P(InputVolumeControllerParametrizedTest, helper.manager.set_stream_analog_level(kInputVolume); // SetMicVolume should not be called. - helper.CallProcess(/*num_calls=*/1, GetValueOrEmpty(kHighSpeechProbability), + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); EXPECT_EQ(127, helper.manager.recommended_analog_level()); } TEST_P(InputVolumeControllerParametrizedTest, UnmutingRaisesTooLowVolume) { InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(kInitialInputVolume, - GetValueOrEmpty(kHighSpeechProbability), + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); helper.manager.HandleCaptureOutputUsedChange(false); @@ -643,67 +556,63 @@ TEST_P(InputVolumeControllerParametrizedTest, UnmutingRaisesTooLowVolume) { constexpr int kInputVolume = 11; helper.manager.set_stream_analog_level(kInputVolume); - helper.CallProcess(/*num_calls=*/1, GetValueOrEmpty(kHighSpeechProbability), + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); EXPECT_EQ(GetMinMicLevel(), helper.manager.recommended_analog_level()); } TEST_P(InputVolumeControllerParametrizedTest, ManualLevelChangeResultsInNoSetMicCall) { - const auto speech_probability = GetValueOrEmpty(kHighSpeechProbability); - InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(kInitialInputVolume, speech_probability, + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); // GetMicVolume returns a value outside of the quantization slack, indicating // a manual volume change. ASSERT_NE(helper.manager.recommended_analog_level(), 154); helper.manager.set_stream_analog_level(154); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-29.0f)); EXPECT_EQ(154, helper.manager.recommended_analog_level()); // Do the same thing, except downwards now. helper.manager.set_stream_analog_level(100); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-17.0f)); EXPECT_EQ(100, helper.manager.recommended_analog_level()); // And finally verify the AGC continues working without a manual change. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-17.0f)); EXPECT_EQ(99, helper.manager.recommended_analog_level()); } TEST_P(InputVolumeControllerParametrizedTest, RecoveryAfterManualLevelChangeFromMax) { - const auto speech_probability = GetValueOrEmpty(kHighSpeechProbability); - InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(kInitialInputVolume, speech_probability, + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); // Force the mic up to max volume. Takes a few steps due to the residual // gain limitation. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-48.0f)); EXPECT_EQ(183, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-48.0f)); EXPECT_EQ(243, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-48.0f)); EXPECT_EQ(255, helper.manager.recommended_analog_level()); // Manual change does not result in SetMicVolume call. helper.manager.set_stream_analog_level(50); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-17.0f)); EXPECT_EQ(50, helper.manager.recommended_analog_level()); // Continues working as usual afterwards. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-38.0f)); EXPECT_EQ(65, helper.manager.recommended_analog_level()); @@ -718,29 +627,27 @@ TEST_P(InputVolumeControllerParametrizedTest, GTEST_SKIP() << "Skipped. Min mic level overridden."; } - const auto speech_probability = GetValueOrEmpty(kHighSpeechProbability); - InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(kInitialInputVolume, speech_probability, + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); // Manual change below min, but strictly positive, otherwise AGC won't take // any action. helper.manager.set_stream_analog_level(1); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-17.0f)); EXPECT_EQ(1, helper.manager.recommended_analog_level()); // Continues working as usual afterwards. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-29.0f)); EXPECT_EQ(1, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-48.0f)); EXPECT_EQ(10, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-38.0f)); EXPECT_EQ(16, helper.manager.recommended_analog_level()); } @@ -754,24 +661,21 @@ TEST_P(InputVolumeControllerParametrizedTest, GTEST_SKIP() << "Skipped. Min mic level not overridden."; } - const auto speech_probability = GetValueOrEmpty(kHighSpeechProbability); - InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(kInitialInputVolume, speech_probability, + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); // Manual change below min, but strictly positive, otherwise // AGC won't take any action. helper.manager.set_stream_analog_level(1); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-17.0f)); EXPECT_EQ(GetMinMicLevel(), helper.manager.recommended_analog_level()); } TEST_P(InputVolumeControllerParametrizedTest, NoClippingHasNoImpact) { InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(kInitialInputVolume, - GetValueOrEmpty(kHighSpeechProbability), + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); helper.CallPreProc(/*num_calls=*/100, /*clipped_ratio=*/0); @@ -781,8 +685,7 @@ TEST_P(InputVolumeControllerParametrizedTest, NoClippingHasNoImpact) { TEST_P(InputVolumeControllerParametrizedTest, ClippingUnderThresholdHasNoImpact) { InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(kInitialInputVolume, - GetValueOrEmpty(kHighSpeechProbability), + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/0.099); @@ -791,8 +694,7 @@ TEST_P(InputVolumeControllerParametrizedTest, TEST_P(InputVolumeControllerParametrizedTest, ClippingLowersVolume) { InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(/*applied_input_volume=*/255, - GetValueOrEmpty(kHighSpeechProbability), + helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/0.2); @@ -802,8 +704,7 @@ TEST_P(InputVolumeControllerParametrizedTest, ClippingLowersVolume) { TEST_P(InputVolumeControllerParametrizedTest, WaitingPeriodBetweenClippingChecks) { InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(/*applied_input_volume=*/255, - GetValueOrEmpty(kHighSpeechProbability), + helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); @@ -819,8 +720,7 @@ TEST_P(InputVolumeControllerParametrizedTest, TEST_P(InputVolumeControllerParametrizedTest, ClippingLoweringIsLimited) { InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(/*applied_input_volume=*/180, - GetValueOrEmpty(kHighSpeechProbability), + helper.CallAgcSequence(/*applied_input_volume=*/180, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); @@ -833,44 +733,38 @@ TEST_P(InputVolumeControllerParametrizedTest, ClippingLoweringIsLimited) { TEST_P(InputVolumeControllerParametrizedTest, ClippingMaxIsRespectedWhenEqualToLevel) { - const auto speech_probability = GetValueOrEmpty(kHighSpeechProbability); - InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(/*applied_input_volume=*/255, speech_probability, + helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); EXPECT_EQ(240, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/10, speech_probability, + helper.CallProcess(/*num_calls=*/10, kHighSpeechProbability, GetValueOrEmpty(-48.0f)); EXPECT_EQ(240, helper.manager.recommended_analog_level()); } TEST_P(InputVolumeControllerParametrizedTest, ClippingMaxIsRespectedWhenHigherThanLevel) { - const auto speech_probability = GetValueOrEmpty(kHighSpeechProbability); - InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(/*applied_input_volume=*/200, speech_probability, + helper.CallAgcSequence(/*applied_input_volume=*/200, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); EXPECT_EQ(185, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-58.0f)); EXPECT_EQ(240, helper.manager.recommended_analog_level()); - helper.CallProcess(/*num_calls=*/10, speech_probability, + helper.CallProcess(/*num_calls=*/10, kHighSpeechProbability, GetValueOrEmpty(-58.0f)); EXPECT_EQ(240, helper.manager.recommended_analog_level()); } TEST_P(InputVolumeControllerParametrizedTest, UserCanRaiseVolumeAfterClipping) { - const auto speech_probability = GetValueOrEmpty(kHighSpeechProbability); - InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(/*applied_input_volume=*/225, speech_probability, + helper.CallAgcSequence(/*applied_input_volume=*/225, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); helper.CallPreProc(/*num_calls=*/1, /*clipped_ratio=*/kAboveClippedThreshold); @@ -878,20 +772,20 @@ TEST_P(InputVolumeControllerParametrizedTest, UserCanRaiseVolumeAfterClipping) { // User changed the volume. helper.manager.set_stream_analog_level(250); - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-32.0f)); EXPECT_EQ(250, helper.manager.recommended_analog_level()); // Move down... - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-8.0f)); EXPECT_EQ(210, helper.manager.recommended_analog_level()); // And back up to the new max established by the user. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-58.0f)); EXPECT_EQ(250, helper.manager.recommended_analog_level()); // Will not move above new maximum. - helper.CallProcess(/*num_calls=*/1, speech_probability, + helper.CallProcess(/*num_calls=*/1, kHighSpeechProbability, GetValueOrEmpty(-48.0f)); EXPECT_EQ(250, helper.manager.recommended_analog_level()); } @@ -899,8 +793,7 @@ TEST_P(InputVolumeControllerParametrizedTest, UserCanRaiseVolumeAfterClipping) { TEST_P(InputVolumeControllerParametrizedTest, ClippingDoesNotPullLowVolumeBackUp) { InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(/*applied_input_volume=*/80, - GetValueOrEmpty(kHighSpeechProbability), + helper.CallAgcSequence(/*applied_input_volume=*/80, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); int initial_volume = helper.manager.recommended_analog_level(); @@ -910,20 +803,18 @@ TEST_P(InputVolumeControllerParametrizedTest, TEST_P(InputVolumeControllerParametrizedTest, TakesNoActionOnZeroMicVolume) { InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(kInitialInputVolume, - GetValueOrEmpty(kHighSpeechProbability), + helper.CallAgcSequence(kInitialInputVolume, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); helper.manager.set_stream_analog_level(0); - helper.CallProcess(/*num_calls=*/10, GetValueOrEmpty(kHighSpeechProbability), + helper.CallProcess(/*num_calls=*/10, kHighSpeechProbability, GetValueOrEmpty(-48.0f)); EXPECT_EQ(0, helper.manager.recommended_analog_level()); } TEST_P(InputVolumeControllerParametrizedTest, ClippingDetectionLowersVolume) { InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(/*applied_input_volume=*/255, - GetValueOrEmpty(kHighSpeechProbability), + helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); EXPECT_EQ(255, helper.manager.recommended_analog_level()); @@ -936,8 +827,7 @@ TEST_P(InputVolumeControllerParametrizedTest, ClippingDetectionLowersVolume) { TEST_P(InputVolumeControllerParametrizedTest, DisabledClippingPredictorDoesNotLowerVolume) { InputVolumeControllerTestHelper helper; - helper.CallAgcSequence(/*applied_input_volume=*/255, - GetValueOrEmpty(kHighSpeechProbability), + helper.CallAgcSequence(/*applied_input_volume=*/255, kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); EXPECT_FALSE(helper.manager.clipping_predictor_enabled()); @@ -1023,11 +913,11 @@ TEST(InputVolumeControllerTest, return manager; }; std::unique_ptr manager = factory(); - std::unique_ptr manager_with_rms; + std::unique_ptr manager_with_override; { test::ScopedFieldTrials field_trial( GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); - manager_with_rms = factory(); + manager_with_override = factory(); } // Create a test input signal which containts 80% of clipped samples. @@ -1037,24 +927,26 @@ TEST(InputVolumeControllerTest, audio_buffer); // Simulate 4 seconds of clipping; it is expected to trigger a downward - // adjustment of the analog gain. + // adjustment of the analog gain. Use low speech probability to limit the + // volume changes to clipping handling. CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, - /*speech_probability=*/absl::nullopt, - /*speech_level=*/absl::nullopt, *manager); + kLowSpeechProbability, /*speech_level=*/-42.0f, + *manager); CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, - /*speech_probability=*/absl::nullopt, - /*speech_level=*/absl::nullopt, *manager_with_rms); + kLowSpeechProbability, /*speech_level=*/-42.0f, + *manager_with_override); // Make sure that an adaptation occurred. ASSERT_GT(manager->recommended_analog_level(), 0); // Check that the test signal triggers a larger downward adaptation for // `manager`, which is allowed to reach a lower gain. - EXPECT_GT(manager_with_rms->recommended_analog_level(), + EXPECT_GT(manager_with_override->recommended_analog_level(), manager->recommended_analog_level()); - // Check that the gain selected by `manager_with_rms` equals the minimum - // value overridden via field trial. - EXPECT_EQ(manager_with_rms->recommended_analog_level(), kMinMicLevelOverride); + // Check that the gain selected by `manager_with_override` equals the + // minimum value overridden via field trial. + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kMinMicLevelOverride); } // Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is @@ -1078,11 +970,11 @@ TEST(InputVolumeControllerTest, return manager; }; std::unique_ptr manager = factory(); - std::unique_ptr manager_with_rms; + std::unique_ptr manager_with_override; { test::ScopedFieldTrials field_trial( GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); - manager_with_rms = factory(); + manager_with_override = factory(); } // Create a test input signal which containts 80% of clipped samples. @@ -1094,24 +986,23 @@ TEST(InputVolumeControllerTest, // Simulate 4 seconds of clipping; it is expected to trigger a downward // adjustment of the analog gain. CallPreProcessAndProcess( - /*num_calls=*/400, audio_buffer, - /*speech_probability=*/0.7f, + /*num_calls=*/400, audio_buffer, kHighSpeechProbability, /*speech_level=*/-18.0f, *manager); CallPreProcessAndProcess( - /*num_calls=*/400, audio_buffer, - /*speech_probability=*/0.7f, - /*speech_level=*/-18.0f, *manager_with_rms); + /*num_calls=*/400, audio_buffer, kHighSpeechProbability, + /*speech_level=*/-18.0f, *manager_with_override); // Make sure that an adaptation occurred. ASSERT_GT(manager->recommended_analog_level(), 0); // Check that the test signal triggers a larger downward adaptation for // `manager`, which is allowed to reach a lower gain. - EXPECT_GT(manager_with_rms->recommended_analog_level(), + EXPECT_GT(manager_with_override->recommended_analog_level(), manager->recommended_analog_level()); - // Check that the gain selected by `manager_with_rms` equals the minimum + // Check that the gain selected by `manager_with_override` equals the minimum // value overridden via field trial. - EXPECT_EQ(manager_with_rms->recommended_analog_level(), kMinMicLevelOverride); + EXPECT_EQ(manager_with_override->recommended_analog_level(), + kMinMicLevelOverride); } // Checks that, when the "WebRTC-Audio-AgcMinMicLevelExperiment" field trial is @@ -1126,7 +1017,6 @@ TEST(InputVolumeControllerTest, // Use a large clipped level step to more quickly decrease the analog gain // with clipping. InputVolumeControllerConfig config = kDefaultInputVolumeControllerConfig; - config.enabled = true; config.clipped_level_step = 64; config.clipped_ratio_threshold = kClippedRatioThreshold; config.clipped_wait_frames = kClippedWaitFrames; @@ -1137,7 +1027,7 @@ TEST(InputVolumeControllerTest, return controller; }; std::unique_ptr manager = factory(); - std::unique_ptr manager_with_rms; + std::unique_ptr manager_with_override; { constexpr int kMinMicLevelOverride = 20; static_assert(kDefaultInputVolumeControllerConfig.clipped_level_min >= @@ -1145,7 +1035,7 @@ TEST(InputVolumeControllerTest, "Use a lower override value."); test::ScopedFieldTrials field_trial( GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); - manager_with_rms = factory(); + manager_with_override = factory(); } // Create a test input signal which containts 80% of clipped samples. @@ -1155,13 +1045,14 @@ TEST(InputVolumeControllerTest, audio_buffer); // Simulate 4 seconds of clipping; it is expected to trigger a downward - // adjustment of the analog gain. + // adjustment of the analog gain. Use low speech probability to limit the + // volume changes to clipping handling. CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, - /*speech_probability=*/absl::nullopt, - /*speech_level=*/absl::nullopt, *manager); + kLowSpeechProbability, /*speech_level=*/-18, + *manager); CallPreProcessAndProcess(/*num_calls=*/400, audio_buffer, - /*speech_probability=*/absl::nullopt, - /*speech_level=*/absl::nullopt, *manager_with_rms); + kLowSpeechProbability, /*speech_level=*/-18, + *manager_with_override); // Make sure that an adaptation occurred. ASSERT_GT(manager->recommended_analog_level(), 0); @@ -1171,8 +1062,8 @@ TEST(InputVolumeControllerTest, // expected because the minimum microphone level override is less than the // minimum level used when clipping is detected. EXPECT_EQ(manager->recommended_analog_level(), - manager_with_rms->recommended_analog_level()); - EXPECT_EQ(manager_with_rms->recommended_analog_level(), + manager_with_override->recommended_analog_level()); + EXPECT_EQ(manager_with_override->recommended_analog_level(), kDefaultInputVolumeControllerConfig.clipped_level_min); } @@ -1191,7 +1082,6 @@ TEST(InputVolumeControllerTest, // Use a large clipped level step to more quickly decrease the analog gain // with clipping. InputVolumeControllerConfig config = kDefaultInputVolumeControllerConfig; - config.enabled = true; config.clipped_level_step = 64; config.clipped_ratio_threshold = kClippedRatioThreshold; config.clipped_wait_frames = kClippedWaitFrames; @@ -1202,7 +1092,7 @@ TEST(InputVolumeControllerTest, return controller; }; std::unique_ptr manager = factory(); - std::unique_ptr manager_with_rms; + std::unique_ptr manager_with_override; { constexpr int kMinMicLevelOverride = 20; static_assert(kDefaultInputVolumeControllerConfig.clipped_level_min >= @@ -1210,7 +1100,7 @@ TEST(InputVolumeControllerTest, "Use a lower override value."); test::ScopedFieldTrials field_trial( GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride)); - manager_with_rms = factory(); + manager_with_override = factory(); } // Create a test input signal which containts 80% of clipped samples. @@ -1226,7 +1116,7 @@ TEST(InputVolumeControllerTest, CallPreProcessAndProcess( /*num_calls=*/400, audio_buffer, /*speech_probability=*/0.7f, - /*speech_level=*/-18.0f, *manager_with_rms); + /*speech_level=*/-18.0f, *manager_with_override); // Make sure that an adaptation occurred. ASSERT_GT(manager->recommended_analog_level(), 0); @@ -1236,8 +1126,8 @@ TEST(InputVolumeControllerTest, // expected because the minimum microphone level override is less than the // minimum level used when clipping is detected. EXPECT_EQ(manager->recommended_analog_level(), - manager_with_rms->recommended_analog_level()); - EXPECT_EQ(manager_with_rms->recommended_analog_level(), + manager_with_override->recommended_analog_level()); + EXPECT_EQ(manager_with_override->recommended_analog_level(), kDefaultInputVolumeControllerConfig.clipped_level_min); } @@ -1319,8 +1209,7 @@ TEST_P(InputVolumeControllerParametrizedTest, EXPECT_FALSE(manager.clipping_predictor_enabled()); EXPECT_FALSE(manager.use_clipping_predictor_step()); EXPECT_EQ(manager.recommended_analog_level(), 255); - manager.Process(GetValueOrEmpty(kHighSpeechProbability), - GetValueOrEmpty(kSpeechLevel)); + manager.Process(kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); CallPreProcessAudioBuffer(/*num_calls=*/10, /*peak_ratio=*/0.99f, manager); EXPECT_EQ(manager.recommended_analog_level(), 255); CallPreProcessAudioBuffer(/*num_calls=*/300, /*peak_ratio=*/0.99f, manager); @@ -1357,9 +1246,9 @@ TEST_P(InputVolumeControllerParametrizedTest, manager_with_prediction.set_stream_analog_level(kInitialLevel); manager_without_prediction.set_stream_analog_level(kInitialLevel); - manager_with_prediction.Process(GetValueOrEmpty(kHighSpeechProbability), + manager_with_prediction.Process(kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); - manager_without_prediction.Process(GetValueOrEmpty(kHighSpeechProbability), + manager_without_prediction.Process(kHighSpeechProbability, GetValueOrEmpty(kSpeechLevel)); EXPECT_TRUE(manager_with_prediction.clipping_predictor_enabled()); @@ -1450,11 +1339,12 @@ TEST_P(InputVolumeControllerParametrizedTest, EmptyRmsErrorHasNoEffect) { manager.set_stream_analog_level(kInputVolume); // Feed speech with low energy that would trigger an upward adapation of - // the analog level if an speech level and RMS values were not empty. + // the analog level if an speech level was not low and the RMS level empty. constexpr int kNumFrames = 125; constexpr int kGainDb = -20; SpeechSamplesReader reader; - reader.Feed(kNumFrames, kGainDb, absl::nullopt, absl::nullopt, manager); + reader.Feed(kNumFrames, kGainDb, kLowSpeechProbability, absl::nullopt, + manager); // Check that no adaptation occurs. ASSERT_EQ(manager.recommended_analog_level(), kInputVolume); @@ -1610,4 +1500,416 @@ TEST(InputVolumeControllerTest, SpeechProbabilityThresholdIsEffective) { ASSERT_EQ(controller_2->recommended_analog_level(), kInputVolume); } +TEST(MonoInputVolumeControllerTest, CheckHandleClippingLowersVolume) { + constexpr int kInitialInputVolume = 100; + constexpr int kInputVolumeStep = 29; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/70, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + UpdateRecommendedInputVolume(mono_controller, kInitialInputVolume, + kLowSpeechProbability, + /*rms_error_dbfs*/ -10.0f); + + mono_controller.HandleClipping(kInputVolumeStep); + + EXPECT_EQ(mono_controller.recommended_analog_level(), + kInitialInputVolume - kInputVolumeStep); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessNegativeRmsErrorDecreasesInputVolume) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + int volume = UpdateRecommendedInputVolume( + mono_controller, kInitialInputVolume, kHighSpeechProbability, -10.0f); + volume = UpdateRecommendedInputVolume(mono_controller, volume, + kHighSpeechProbability, -10.0f); + volume = UpdateRecommendedInputVolume(mono_controller, volume, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume, kInitialInputVolume); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessPositiveRmsErrorIncreasesInputVolume) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + int volume = UpdateRecommendedInputVolume( + mono_controller, kInitialInputVolume, kHighSpeechProbability, 10.0f); + volume = UpdateRecommendedInputVolume(mono_controller, volume, + kHighSpeechProbability, 10.0f); + volume = UpdateRecommendedInputVolume(mono_controller, volume, + kHighSpeechProbability, 10.0f); + + EXPECT_GT(volume, kInitialInputVolume); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessNegativeRmsErrorDecreasesInputVolumeWithLimit) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_3( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, + /*speech_probability_threshold=*/0.7, + /*speech_ratio_threshold=*/0.8); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + mono_controller_3.Initialize(); + + // Process RMS errors in the range + // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -14.0f); + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, -14.0f); + // Process RMS errors outside the range + // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -15.0f); + int volume_3 = UpdateRecommendedInputVolume( + mono_controller_3, kInitialInputVolume, kHighSpeechProbability, -30.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -15.0f); + volume_3 = UpdateRecommendedInputVolume(mono_controller_3, volume_3, + kHighSpeechProbability, -30.0f); + + EXPECT_LT(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); + EXPECT_EQ(volume_2, volume_3); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessPositiveRmsErrorIncreasesInputVolumeWithLimit) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_3( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + mono_controller_3.Initialize(); + + // Process RMS errors in the range + // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, 14.0f); + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, 14.0f); + // Process RMS errors outside the range + // [`-kMaxResidualGainChange`, `kMaxResidualGainChange`]. + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, 15.0f); + int volume_3 = UpdateRecommendedInputVolume( + mono_controller_3, kInitialInputVolume, kHighSpeechProbability, 30.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, 15.0f); + volume_3 = UpdateRecommendedInputVolume(mono_controller_3, volume_3, + kHighSpeechProbability, 30.0f); + + EXPECT_GT(volume_1, kInitialInputVolume); + EXPECT_GT(volume_2, volume_1); + EXPECT_EQ(volume_2, volume_3); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessRmsErrorDecreasesInputVolumeRepeatedly) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + int volume_before = UpdateRecommendedInputVolume( + mono_controller, kInitialInputVolume, kHighSpeechProbability, -10.0f); + volume_before = UpdateRecommendedInputVolume(mono_controller, volume_before, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume_before, kInitialInputVolume); + + int volume_after = UpdateRecommendedInputVolume( + mono_controller, volume_before, kHighSpeechProbability, -10.0f); + volume_after = UpdateRecommendedInputVolume(mono_controller, volume_after, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume_after, volume_before); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessPositiveRmsErrorIncreasesInputVolumeRepeatedly) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller( + /*clipped_level_min=*/64, + /*min_mic_level=*/32, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller.Initialize(); + + int volume_before = UpdateRecommendedInputVolume( + mono_controller, kInitialInputVolume, kHighSpeechProbability, 10.0f); + volume_before = UpdateRecommendedInputVolume(mono_controller, volume_before, + kHighSpeechProbability, 10.0f); + + EXPECT_GT(volume_before, kInitialInputVolume); + + int volume_after = UpdateRecommendedInputVolume( + mono_controller, volume_before, kHighSpeechProbability, 10.0f); + volume_after = UpdateRecommendedInputVolume(mono_controller, volume_after, + kHighSpeechProbability, 10.0f); + + EXPECT_GT(volume_after, volume_before); +} + +TEST(MonoInputVolumeControllerTest, CheckClippedLevelMinIsEffective) { + constexpr int kInitialInputVolume = 100; + constexpr int kClippedLevelMin = 70; + MonoInputVolumeController mono_controller_1( + kClippedLevelMin, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + kClippedLevelMin, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + // Process one frame to reset the state for `HandleClipping()`. + EXPECT_EQ(UpdateRecommendedInputVolume(mono_controller_1, kInitialInputVolume, + kLowSpeechProbability, -10.0f), + kInitialInputVolume); + EXPECT_EQ(UpdateRecommendedInputVolume(mono_controller_2, kInitialInputVolume, + kLowSpeechProbability, -10.0f), + kInitialInputVolume); + + mono_controller_1.HandleClipping(29); + mono_controller_2.HandleClipping(31); + + EXPECT_EQ(mono_controller_2.recommended_analog_level(), kClippedLevelMin); + EXPECT_LT(mono_controller_2.recommended_analog_level(), + mono_controller_1.recommended_analog_level()); +} + +TEST(MonoInputVolumeControllerTest, CheckMinMicLevelIsEffective) { + constexpr int kInitialInputVolume = 100; + constexpr int kMinMicLevel = 64; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, kMinMicLevel, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, kMinMicLevel, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -30.0f); + + EXPECT_LT(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); + EXPECT_EQ(volume_2, kMinMicLevel); +} + +TEST(MonoInputVolumeControllerTest, + CheckUpdateInputVolumeWaitFramesIsEffective) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/1, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/3, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + EXPECT_LT(volume_2, kInitialInputVolume); +} + +TEST(MonoInputVolumeControllerTest, + CheckSpeechProbabilityThresholdIsEffective) { + constexpr int kInitialInputVolume = 100; + constexpr float kSpeechProbabilityThreshold = 0.8f; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kSpeechProbabilityThreshold, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kSpeechProbabilityThreshold, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = + UpdateRecommendedInputVolume(mono_controller_1, kInitialInputVolume, + kSpeechProbabilityThreshold, -10.0f); + int volume_2 = + UpdateRecommendedInputVolume(mono_controller_2, kInitialInputVolume, + kSpeechProbabilityThreshold, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, volume_1, kSpeechProbabilityThreshold - 0.1f, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kSpeechProbabilityThreshold, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); +} + +TEST(MonoInputVolumeControllerTest, CheckSpeechRatioThresholdIsEffective) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/4, kHighSpeechProbability, + /*speech_ratio_threshold=*/0.75f); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/4, kHighSpeechProbability, + /*speech_ratio_threshold=*/0.75f); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kHighSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kLowSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kLowSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume(mono_controller_1, volume_1, + kLowSpeechProbability, -10.0f); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); +} + +TEST(MonoInputVolumeControllerTest, + CheckProcessEmptyRmsErrorDoesNotLowerVolume) { + constexpr int kInitialInputVolume = 100; + MonoInputVolumeController mono_controller_1( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + MonoInputVolumeController mono_controller_2( + /*clipped_level_min=*/64, + /*min_mic_level=*/84, + /*update_input_volume_wait_frames=*/2, kHighSpeechProbability, + kSpeechRatioThreshold); + mono_controller_1.Initialize(); + mono_controller_2.Initialize(); + + int volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, kInitialInputVolume, kHighSpeechProbability, -10.0f); + int volume_2 = UpdateRecommendedInputVolume( + mono_controller_2, kInitialInputVolume, kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_EQ(volume_2, kInitialInputVolume); + + volume_1 = UpdateRecommendedInputVolume( + mono_controller_1, volume_1, kHighSpeechProbability, absl::nullopt); + volume_2 = UpdateRecommendedInputVolume(mono_controller_2, volume_2, + kHighSpeechProbability, -10.0f); + + EXPECT_EQ(volume_1, kInitialInputVolume); + EXPECT_LT(volume_2, volume_1); +} + } // namespace webrtc diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc index 8b8231e59c..70b598df9c 100644 --- a/modules/audio_processing/gain_controller2.cc +++ b/modules/audio_processing/gain_controller2.cc @@ -67,7 +67,7 @@ std::unique_ptr CreateInputVolumeController( int num_channels) { if (enabled) { return std::make_unique( - num_channels, InputVolumeController::Config{.enabled = enabled}); + num_channels, InputVolumeController::Config()); } return nullptr; } @@ -170,12 +170,17 @@ void GainController2::Process(absl::optional speech_probability, } if (input_volume_controller_) { + // TODO(bugs.webrtc.org/7494): A temprorary check, remove once not needed. + RTC_DCHECK(adaptive_digital_controller_); absl::optional speech_level; if (adaptive_digital_controller_) { speech_level = adaptive_digital_controller_->GetSpeechLevelDbfsIfConfident(); } - input_volume_controller_->Process(speech_probability, speech_level); + RTC_DCHECK(speech_probability.has_value()); + if (speech_probability.has_value()) { + input_volume_controller_->Process(*speech_probability, speech_level); + } } fixed_gain_applier_.ApplyGain(float_frame); diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc index eaf0859d3a..7fb0c26fc8 100644 --- a/modules/audio_processing/gain_controller2_unittest.cc +++ b/modules/audio_processing/gain_controller2_unittest.cc @@ -159,6 +159,7 @@ TEST(GainController2, Agc2Config config; config.input_volume_controller.enabled = false; + auto gain_controller = std::make_unique(config, kSampleRateHz, kNumChannels, /*use_internal_vad=*/true); @@ -189,6 +190,8 @@ TEST(GainController2, Agc2Config config; config.input_volume_controller.enabled = true; + config.adaptive_digital.enabled = true; + auto gain_controller = std::make_unique(config, kSampleRateHz, kNumChannels, /*use_internal_vad=*/true);