From bf28277774c916731f9d60d65b0d69d16d9d4006 Mon Sep 17 00:00:00 2001 From: Hanna Silen Date: Fri, 18 Nov 2022 19:36:34 +0100 Subject: [PATCH] InputVolumeController: Add configurable speech probability aggregation Make speech probability threshold configurable by replacing kSpeechProbabilitySilenceThreshold with speech_probability_threshold in InputVolumeController::Config. Make the processing more robust against outliers in speech probability estimaton by computing an aggregate speech activity over a speech segment. In MonoInputVolumeController::Process(), use the passed non-empty speech probabilities to compute the speech activity over the speech segment and only allow updates for segments with a high enough ratio of speech frames. Pass RMS error and speech probability for every frame in Process(): If rms_error_dbfs is empty, volume updates are not allowed; if speech_probability is empty, the frame counts as a non- speech frame. Remove startup_min_volume from the config since it's no longer used after https://webrtc-review.googlesource.com/c/src/+/282821. Bug: webrtc:7494 Change-Id: I0ab81b03371496315348f552133aa9909bd36f26 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/283523 Commit-Queue: Hanna Silen Reviewed-by: Alessio Bazzica Cr-Commit-Position: refs/heads/main@{#38685} --- .../agc2/input_volume_controller.cc | 93 +++++---- .../agc2/input_volume_controller.h | 43 +++-- .../agc2/input_volume_controller_unittest.cc | 176 ++++++++++++++---- 3 files changed, 232 insertions(+), 80 deletions(-) diff --git a/modules/audio_processing/agc2/input_volume_controller.cc b/modules/audio_processing/agc2/input_volume_controller.cc index 2e5b9e3083..fecb090fbc 100644 --- a/modules/audio_processing/agc2/input_volume_controller.cc +++ b/modules/audio_processing/agc2/input_volume_controller.cc @@ -37,11 +37,6 @@ constexpr int kMinMicLevel = 12; // Prevent very large microphone level changes. constexpr int kMaxResidualGainChange = 15; -// Target speech level (dBFs) and speech probability threshold used to compute -// the RMS error in `GetSpeechLevelErrorDb()`. -// TODO(webrtc:7494): Move this to a config and pass in the ctor. -constexpr float kSpeechProbabilitySilenceThreshold = 0.5f; - using Agc1ClippingPredictorConfig = AudioProcessing::Config::GainController1:: AnalogGainController::ClippingPredictor; @@ -128,25 +123,16 @@ void LogClippingMetrics(int clipping_rate) { } // Computes the speech level error in dB. The value of `speech_level_dbfs` is -// required to be in the range [-90.0f, 30.0f] and `speech_probability` in the -// range [0.0f, 1.0f]. Returns a positive value when the speech level is below -// the target range and a negative value when the speech level is above the -// target range. +// required to be in the range [-90.0f, 30.0f]. Returns a positive value when +// the speech level is below the target range and a negative value when the +// speech level is above the target range. int GetSpeechLevelErrorDb(float speech_level_dbfs, - float speech_probability, int target_range_min_dbfs, int target_range_max_dbfs) { constexpr float kMinSpeechLevelDbfs = -90.0f; constexpr float kMaxSpeechLevelDbfs = 30.0f; RTC_DCHECK_GE(speech_level_dbfs, kMinSpeechLevelDbfs); RTC_DCHECK_LE(speech_level_dbfs, kMaxSpeechLevelDbfs); - RTC_DCHECK_GE(speech_probability, 0.0f); - RTC_DCHECK_LE(speech_probability, 1.0f); - - // TODO(webrtc:7494): Replace with the use of `SpeechProbabilityBuffer`. - if (speech_probability < kSpeechProbabilitySilenceThreshold) { - return 0; - } // Ensure the speech level is in the range [-90.0f, 30.0f]. speech_level_dbfs = rtc::SafeClamp( @@ -169,11 +155,26 @@ int GetSpeechLevelErrorDb(float speech_level_dbfs, MonoInputVolumeController::MonoInputVolumeController( int clipped_level_min, int min_mic_level, - int update_input_volume_wait_frames) + int update_input_volume_wait_frames, + float speech_probability_threshold, + float speech_ratio_threshold) : min_mic_level_(min_mic_level), max_level_(kMaxMicLevel), clipped_level_min_(clipped_level_min), - update_input_volume_wait_frames_(update_input_volume_wait_frames) {} + update_input_volume_wait_frames_( + std::max(update_input_volume_wait_frames, 1)), + speech_probability_threshold_(speech_probability_threshold), + speech_ratio_threshold_(speech_ratio_threshold) { + RTC_DCHECK_GE(clipped_level_min_, 0); + RTC_DCHECK_LE(clipped_level_min_, 255); + RTC_DCHECK_GE(min_mic_level_, 0); + RTC_DCHECK_LE(min_mic_level_, 255); + RTC_DCHECK_GE(update_input_volume_wait_frames_, 0); + RTC_DCHECK_GE(speech_probability_threshold_, 0.0f); + RTC_DCHECK_LE(speech_probability_threshold_, 1.0f); + RTC_DCHECK_GE(speech_ratio_threshold_, 0.0f); + RTC_DCHECK_LE(speech_ratio_threshold_, 1.0f); +} MonoInputVolumeController::~MonoInputVolumeController() = default; @@ -182,10 +183,18 @@ void MonoInputVolumeController::Initialize() { capture_output_used_ = true; check_volume_on_next_process_ = true; frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; is_first_frame_ = true; } -void MonoInputVolumeController::Process(absl::optional rms_error_dbfs) { +// A speech segment is considered active if at least +// `update_input_volume_wait_frames_` new frames have been processed since the +// previous update and the ratio of non-silence frames (i.e., frames with a +// non-empty `speech_probability` value above `speech_probability_threshold_`) +// is at least `speech_ratio_threshold_`. +void MonoInputVolumeController::Process( + absl::optional rms_error_dbfs, + absl::optional speech_probability) { if (check_volume_on_next_process_) { check_volume_on_next_process_ = false; // We have to wait until the first process call to check the volume, @@ -193,9 +202,29 @@ void MonoInputVolumeController::Process(absl::optional rms_error_dbfs) { CheckVolumeAndReset(); } - if (++frames_since_update_input_volume_ >= update_input_volume_wait_frames_ && - rms_error_dbfs.has_value() && !is_first_frame_) { - UpdateInputVolume(*rms_error_dbfs); + // Count frames with a high speech probability as speech. + if (speech_probability.has_value() && + *speech_probability >= speech_probability_threshold_) { + ++speech_frames_since_update_input_volume_; + } + + // Reset the counters and maybe update the input volume. + if (++frames_since_update_input_volume_ >= update_input_volume_wait_frames_) { + const float speech_ratio = + static_cast(speech_frames_since_update_input_volume_) / + static_cast(update_input_volume_wait_frames_); + + // Always reset the counters regardless of whether the volume changes or + // not. + frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; + + // Update the input volume if allowed. + if (!is_first_frame_ && speech_ratio >= speech_ratio_threshold_) { + if (rms_error_dbfs.has_value()) { + UpdateInputVolume(*rms_error_dbfs); + } + } } is_first_frame_ = false; @@ -216,6 +245,7 @@ void MonoInputVolumeController::HandleClipping(int clipped_level_step) { // will still not react until the postproc updates the level. SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step)); frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; is_first_frame_ = false; } } @@ -250,6 +280,7 @@ void MonoInputVolumeController::SetLevel(int new_level) { // Take no action in this case, since we can't be sure when the volume // was manually adjusted. frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; is_first_frame_ = false; return; } @@ -311,16 +342,13 @@ int MonoInputVolumeController::CheckVolumeAndReset() { level_ = level; startup_ = false; frames_since_update_input_volume_ = 0; + speech_frames_since_update_input_volume_ = 0; is_first_frame_ = true; return 0; } void MonoInputVolumeController::UpdateInputVolume(int rms_error_dbfs) { - // Always reset the counter regardless of whether the gain is changed - // or not. - frames_since_update_input_volume_ = 0; - const int residual_gain = rtc::SafeClamp( rms_error_dbfs, -kMaxResidualGainChange, kMaxResidualGainChange); @@ -367,7 +395,8 @@ InputVolumeController::InputVolumeController(int num_capture_channels, for (auto& controller : channel_controllers_) { controller = std::make_unique( config.clipped_level_min, min_mic_level, - config.update_input_volume_wait_frames); + config.update_input_volume_wait_frames, + config.speech_probability_threshold, config.speech_ratio_threshold); } RTC_DCHECK(!channel_controllers_.empty()); @@ -481,13 +510,13 @@ void InputVolumeController::Process(absl::optional speech_probability, absl::optional rms_error_dbfs; if (speech_probability.has_value() && speech_level_dbfs.has_value()) { - rms_error_dbfs = - GetSpeechLevelErrorDb(*speech_level_dbfs, *speech_probability, - target_range_min_dbfs_, target_range_max_dbfs_); + // Compute the error for all frames (both speech and non-speech frames). + rms_error_dbfs = GetSpeechLevelErrorDb( + *speech_level_dbfs, target_range_min_dbfs_, target_range_max_dbfs_); } for (auto& controller : channel_controllers_) { - controller->Process(rms_error_dbfs); + controller->Process(rms_error_dbfs, speech_probability); } AggregateChannelLevels(); diff --git a/modules/audio_processing/agc2/input_volume_controller.h b/modules/audio_processing/agc2/input_volume_controller.h index 8c1bac7ccc..d2f3970c81 100644 --- a/modules/audio_processing/agc2/input_volume_controller.h +++ b/modules/audio_processing/agc2/input_volume_controller.h @@ -36,16 +36,13 @@ class InputVolumeController final { // Config for the constructor. struct Config { bool enabled = false; - // TODO(bugs.webrtc.org/1275566): Describe `startup_min_volume`. - int startup_min_volume = 0; - // Lowest analog microphone level that will be applied in response to - // clipping. + // Lowest input volume level that will be applied in response to clipping. int clipped_level_min = 70; - // Amount the microphone level is lowered with every clipping event. - // Limited to (0, 255]. + // Amount input volume level is lowered with every clipping event. Limited + // to (0, 255]. int clipped_level_step = 15; // Proportion of clipped samples required to declare a clipping event. - // Limited to (0.f, 1.f). + // Limited to (0.0f, 1.0f). float clipped_ratio_threshold = 0.1f; // Time in frames to wait after a clipping event before checking again. // Limited to values higher than 0. @@ -65,6 +62,12 @@ class InputVolumeController final { int target_range_min_dbfs = -48; // Number of wait frames between the recommended input volume updates. int update_input_volume_wait_frames = 100; + // Speech probability threshold: speech probabilities below the threshold + // are considered silence. Limited to [0.0f, 1.0f]. + float speech_probability_threshold = 0.7f; + // Minimum speech frame ratio for volume updates to be allowed. Limited to + // [0.0f, 1.0f]. + float speech_ratio_threshold = 0.9f; }; // Ctor. `num_capture_channels` specifies the number of channels for the audio @@ -90,6 +93,7 @@ class InputVolumeController final { // prediction (if enabled). Must be called after `set_stream_analog_level()`. void AnalyzePreProcess(const AudioBuffer& audio_buffer); + // TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore. // Adjusts the recommended input volume upwards/downwards based on // `speech_level_dbfs`. Must be called after `AnalyzePreProcess()`. The value // of `speech_probability` is expected to be in the range [0.0f, 1.0f] and @@ -185,7 +189,9 @@ class MonoInputVolumeController { public: MonoInputVolumeController(int clipped_level_min, int min_mic_level, - int update_input_volume_wait_frames); + int update_input_volume_wait_frames, + float speech_probability_threshold, + float speech_ratio_threshold); ~MonoInputVolumeController(); MonoInputVolumeController(const MonoInputVolumeController&) = delete; MonoInputVolumeController& operator=(const MonoInputVolumeController&) = @@ -202,10 +208,13 @@ class MonoInputVolumeController { // `set_stream_analog_level()`. void HandleClipping(int clipped_level_step); - // Adjusts the recommended input volume upwards/downwards depending on whether - // `rms_error_dbfs` is positive or negative. Must be called after - // `HandleClipping()`. - void Process(absl::optional rms_error_dbfs); + // TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore. + // Adjusts the recommended input volume upwards/downwards depending on + // whether `rms_error_dbfs` is positive or negative. Updates are only allowed + // for active speech segments and when `rms_error_dbfs` is not empty. Must be + // called after `HandleClipping()`. + void Process(absl::optional rms_error_dbfs, + absl::optional speech_probability); // Returns the recommended input volume. Must be called after `Process()`. int recommended_analog_level() const { return recommended_input_volume_; } @@ -254,10 +263,18 @@ class MonoInputVolumeController { const int clipped_level_min_; - // Number of frames waited between the calls to `UpdateInputVolume()`. + // Counters for frames and speech frames since the last update in the + // recommended input volume. const int update_input_volume_wait_frames_; int frames_since_update_input_volume_ = 0; + int speech_frames_since_update_input_volume_ = 0; bool is_first_frame_ = true; + + // Speech probability threshold for a frame to be considered speech (instead + // of silence). Limited to [0.0f, 1.0f]. + const float speech_probability_threshold_; + // Minimum ratio of speech frames. Limited to [0.0f, 1.0f]. + const float speech_ratio_threshold_; }; } // namespace webrtc diff --git a/modules/audio_processing/agc2/input_volume_controller_unittest.cc b/modules/audio_processing/agc2/input_volume_controller_unittest.cc index 489d99f158..e9be177a7f 100644 --- a/modules/audio_processing/agc2/input_volume_controller_unittest.cc +++ b/modules/audio_processing/agc2/input_volume_controller_unittest.cc @@ -44,6 +44,8 @@ constexpr float kClippedRatioThreshold = 0.1f; constexpr int kClippedWaitFrames = 300; constexpr float kHighSpeechProbability = 0.7f; constexpr float kSpeechLevel = -25.0f; +constexpr float kSpeechProbabilityThreshold = 0.5f; +constexpr float kSpeechRatioThreshold = 0.8f; constexpr float kMinSample = std::numeric_limits::min(); constexpr float kMaxSample = std::numeric_limits::max(); @@ -57,7 +59,6 @@ constexpr InputVolumeControllerConfig kDefaultInputVolumeControllerConfig{}; constexpr ClippingPredictorConfig kDefaultClippingPredictorConfig{}; std::unique_ptr CreateInputVolumeController( - int startup_min_volume, int clipped_level_step, float clipped_ratio_threshold, int clipped_wait_frames, @@ -65,7 +66,6 @@ std::unique_ptr CreateInputVolumeController( int update_input_volume_wait_frames = 0) { InputVolumeControllerConfig config{ .enabled = true, - .startup_min_volume = startup_min_volume, .clipped_level_min = kClippedMin, .clipped_level_step = clipped_level_step, .clipped_ratio_threshold = clipped_ratio_threshold, @@ -74,6 +74,8 @@ std::unique_ptr CreateInputVolumeController( .target_range_max_dbfs = -18, .target_range_min_dbfs = -30, .update_input_volume_wait_frames = update_input_volume_wait_frames, + .speech_probability_threshold = kSpeechProbabilityThreshold, + .speech_ratio_threshold = kSpeechRatioThreshold, }; return std::make_unique(/*num_capture_channels=*/1, @@ -258,7 +260,6 @@ class SpeechSamplesReader { constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() { InputVolumeControllerConfig config{ .enabled = true, - .startup_min_volume = kInitialInputVolume, .clipped_level_min = kClippedMin, .clipped_level_step = kClippedLevelStep, .clipped_ratio_threshold = kClippedRatioThreshold, @@ -267,6 +268,8 @@ constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() { .target_range_max_dbfs = -18, .target_range_min_dbfs = -30, .update_input_volume_wait_frames = 0, + .speech_probability_threshold = 0.5f, + .speech_ratio_threshold = 1.0f, }; return config; } @@ -946,9 +949,8 @@ TEST_P(InputVolumeControllerParametrizedTest, } TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentDefault) { - std::unique_ptr manager = - CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, - kClippedRatioThreshold, kClippedWaitFrames); + std::unique_ptr manager = CreateInputVolumeController( + kClippedLevelStep, kClippedRatioThreshold, kClippedWaitFrames); EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), kMinMicLevel); } @@ -957,8 +959,8 @@ TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentDisabled) { test::ScopedFieldTrials field_trial( GetAgcMinMicLevelExperimentFieldTrial("Disabled" + field_trial_suffix)); std::unique_ptr manager = - CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, - kClippedRatioThreshold, kClippedWaitFrames); + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames); EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), kMinMicLevel); } @@ -969,9 +971,8 @@ TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentDisabled) { TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentOutOfRangeAbove) { test::ScopedFieldTrials field_trial( GetAgcMinMicLevelExperimentFieldTrial("Enabled-256")); - std::unique_ptr manager = - CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, - kClippedRatioThreshold, kClippedWaitFrames); + std::unique_ptr manager = CreateInputVolumeController( + kClippedLevelStep, kClippedRatioThreshold, kClippedWaitFrames); EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), kMinMicLevel); } @@ -980,9 +981,8 @@ TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentOutOfRangeAbove) { TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentOutOfRangeBelow) { test::ScopedFieldTrials field_trial( GetAgcMinMicLevelExperimentFieldTrial("Enabled--1")); - std::unique_ptr manager = - CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, - kClippedRatioThreshold, kClippedWaitFrames); + std::unique_ptr manager = CreateInputVolumeController( + kClippedLevelStep, kClippedRatioThreshold, kClippedWaitFrames); EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), kMinMicLevel); } @@ -997,8 +997,8 @@ TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentEnabled50) { GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride, field_trial_suffix)); std::unique_ptr manager = - CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, - kClippedRatioThreshold, kClippedWaitFrames); + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames); EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), kMinMicLevelOverride); @@ -1016,8 +1016,8 @@ TEST(InputVolumeControllerTest, // relevant field trial. const auto factory = []() { std::unique_ptr manager = - CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, - kClippedRatioThreshold, kClippedWaitFrames); + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames); manager->Initialize(); manager->set_stream_analog_level(kInitialInputVolume); return manager; @@ -1071,8 +1071,8 @@ TEST(InputVolumeControllerTest, // relevant field trial. const auto factory = []() { std::unique_ptr manager = - CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, - kClippedRatioThreshold, kClippedWaitFrames); + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames); manager->Initialize(); manager->set_stream_analog_level(kInitialInputVolume); return manager; @@ -1127,7 +1127,6 @@ TEST(InputVolumeControllerTest, // with clipping. InputVolumeControllerConfig config = kDefaultInputVolumeControllerConfig; config.enabled = true; - config.startup_min_volume = kInitialInputVolume; config.clipped_level_step = 64; config.clipped_ratio_threshold = kClippedRatioThreshold; config.clipped_wait_frames = kClippedWaitFrames; @@ -1193,7 +1192,6 @@ TEST(InputVolumeControllerTest, // with clipping. InputVolumeControllerConfig config = kDefaultInputVolumeControllerConfig; config.enabled = true; - config.startup_min_volume = kInitialInputVolume; config.clipped_level_step = 64; config.clipped_ratio_threshold = kClippedRatioThreshold; config.clipped_wait_frames = kClippedWaitFrames; @@ -1252,16 +1250,14 @@ TEST_P(InputVolumeControllerParametrizedTest, ClippingParametersVerified) { GTEST_SKIP() << "Skipped. RMS error does not affect the test."; } - std::unique_ptr manager = - CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, - kClippedRatioThreshold, kClippedWaitFrames); + std::unique_ptr manager = CreateInputVolumeController( + kClippedLevelStep, kClippedRatioThreshold, kClippedWaitFrames); manager->Initialize(); EXPECT_EQ(manager->clipped_level_step_, kClippedLevelStep); EXPECT_EQ(manager->clipped_ratio_threshold_, kClippedRatioThreshold); EXPECT_EQ(manager->clipped_wait_frames_, kClippedWaitFrames); std::unique_ptr manager_custom = - CreateInputVolumeController(kInitialInputVolume, - /*clipped_level_step=*/10, + CreateInputVolumeController(/*clipped_level_step=*/10, /*clipped_ratio_threshold=*/0.2f, /*clipped_wait_frames=*/50); manager_custom->Initialize(); @@ -1277,8 +1273,8 @@ TEST_P(InputVolumeControllerParametrizedTest, } std::unique_ptr manager = CreateInputVolumeController( - kInitialInputVolume, kClippedLevelStep, kClippedRatioThreshold, - kClippedWaitFrames, /*enable_clipping_predictor=*/false); + kClippedLevelStep, kClippedRatioThreshold, kClippedWaitFrames, + /*enable_clipping_predictor=*/false); manager->Initialize(); EXPECT_FALSE(manager->clipping_predictor_enabled()); @@ -1302,8 +1298,8 @@ TEST_P(InputVolumeControllerParametrizedTest, } std::unique_ptr manager = CreateInputVolumeController( - kInitialInputVolume, kClippedLevelStep, kClippedRatioThreshold, - kClippedWaitFrames, /*enable_clipping_predictor=*/true); + kClippedLevelStep, kClippedRatioThreshold, kClippedWaitFrames, + /*enable_clipping_predictor=*/true); manager->Initialize(); EXPECT_TRUE(manager->clipping_predictor_enabled()); @@ -1469,13 +1465,13 @@ TEST_P(InputVolumeControllerParametrizedTest, EmptyRmsErrorHasNoEffect) { TEST(InputVolumeControllerTest, UpdateInputVolumeWaitFramesIsEffective) { constexpr int kInputVolume = kInitialInputVolume; std::unique_ptr controller_wait_0 = - CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, - kClippedRatioThreshold, kClippedWaitFrames, + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, /*enable_clipping_predictor=*/false, /*update_input_volume_wait_frames=*/0); std::unique_ptr controller_wait_100 = - CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, - kClippedRatioThreshold, kClippedWaitFrames, + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, /*enable_clipping_predictor=*/false, /*update_input_volume_wait_frames=*/100); controller_wait_0->Initialize(); @@ -1504,4 +1500,114 @@ TEST(InputVolumeControllerTest, UpdateInputVolumeWaitFramesIsEffective) { ASSERT_GT(controller_wait_100->recommended_analog_level(), kInputVolume); } +TEST(InputVolumeControllerTest, SpeechRatioThresholdIsEffective) { + constexpr int kInputVolume = kInitialInputVolume; + // Create two input volume controllers with 10 frames between volume updates + // and the minimum speech ratio of 0.8 and speech probability threshold 0.5. + std::unique_ptr controller_1 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/10); + std::unique_ptr controller_2 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/10); + controller_1->Initialize(); + controller_2->Initialize(); + controller_1->set_stream_analog_level(kInputVolume); + controller_2->set_stream_analog_level(kInputVolume); + + SpeechSamplesReader reader_1; + SpeechSamplesReader reader_2; + + reader_1.Feed(/*num_frames=*/1, /*gain_db=*/0, + /*speech_probability=*/0.7f, /*speech_level=*/-42.0f, + *controller_1); + reader_2.Feed(/*num_frames=*/1, /*gain_db=*/0, + /*speech_probability=*/0.4f, /*speech_level=*/-42.0f, + *controller_2); + + ASSERT_EQ(controller_1->recommended_analog_level(), kInputVolume); + ASSERT_EQ(controller_2->recommended_analog_level(), kInputVolume); + + reader_1.Feed(/*num_frames=*/2, /*gain_db=*/0, + /*speech_probability=*/0.4f, /*speech_level=*/-42.0f, + *controller_1); + reader_2.Feed(/*num_frames=*/2, /*gain_db=*/0, + /*speech_probability=*/0.4f, /*speech_level=*/-42.0f, + *controller_2); + + ASSERT_EQ(controller_1->recommended_analog_level(), kInputVolume); + ASSERT_EQ(controller_2->recommended_analog_level(), kInputVolume); + + reader_1.Feed(/*num_frames=*/7, /*gain_db=*/0, + /*speech_probability=*/0.7f, /*speech_level=*/-42.0f, + *controller_1); + reader_2.Feed(/*num_frames=*/7, /*gain_db=*/0, + /*speech_probability=*/0.7f, /*speech_level=*/-42.0f, + *controller_2); + + ASSERT_GT(controller_1->recommended_analog_level(), kInputVolume); + ASSERT_EQ(controller_2->recommended_analog_level(), kInputVolume); +} + +TEST(InputVolumeControllerTest, SpeechProbabilityThresholdIsEffective) { + constexpr int kInputVolume = kInitialInputVolume; + // Create two input volume controllers with the exact same settings and + // 10 frames between volume updates. + std::unique_ptr controller_1 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/10); + std::unique_ptr controller_2 = + CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold, + kClippedWaitFrames, + /*enable_clipping_predictor=*/false, + /*update_input_volume_wait_frames=*/10); + controller_1->Initialize(); + controller_2->Initialize(); + controller_1->set_stream_analog_level(kInputVolume); + controller_2->set_stream_analog_level(kInputVolume); + + SpeechSamplesReader reader_1; + SpeechSamplesReader reader_2; + + // Process with two sets of inputs: Use `reader_1` to process inputs + // that make the volume to be adjusted after enough frames have been + // processsed and `reader_2` to process inputs that won't make the volume + // to be adjusted. + reader_1.Feed(/*num_frames=*/1, /*gain_db=*/0, + /*speech_probability=*/0.5f, /*speech_level=*/-42.0f, + *controller_1); + reader_2.Feed(/*num_frames=*/1, /*gain_db=*/0, + /*speech_probability=*/0.49f, /*speech_level=*/-42.0f, + *controller_2); + + ASSERT_EQ(controller_1->recommended_analog_level(), kInputVolume); + ASSERT_EQ(controller_2->recommended_analog_level(), kInputVolume); + + reader_1.Feed(/*num_frames=*/2, /*gain_db=*/0, + /*speech_probability=*/0.49f, /*speech_level=*/-42.0f, + *controller_1); + reader_2.Feed(/*num_frames=*/2, /*gain_db=*/0, + /*speech_probability=*/0.49f, /*speech_level=*/-42.0f, + *controller_2); + + ASSERT_EQ(controller_1->recommended_analog_level(), kInputVolume); + ASSERT_EQ(controller_2->recommended_analog_level(), kInputVolume); + + reader_1.Feed(/*num_frames=*/7, /*gain_db=*/0, + /*speech_probability=*/0.5f, /*speech_level=*/-42.0f, + *controller_1); + reader_2.Feed(/*num_frames=*/7, /*gain_db=*/0, + /*speech_probability=*/0.5f, /*speech_level=*/-42.0f, + *controller_2); + + ASSERT_GT(controller_1->recommended_analog_level(), kInputVolume); + ASSERT_EQ(controller_2->recommended_analog_level(), kInputVolume); +} + } // namespace webrtc