InputVolumeController: Add configurable speech probability aggregation

Make speech probability threshold configurable by replacing
kSpeechProbabilitySilenceThreshold with speech_probability_threshold in
InputVolumeController::Config.

Make the processing more robust against outliers in speech probability
estimaton by computing an aggregate speech activity over a speech
segment. In MonoInputVolumeController::Process(), use the passed
non-empty speech probabilities to compute the speech activity over the
speech segment and only allow updates for segments with a high enough
ratio of speech frames. Pass RMS error and speech probability for every
frame in Process(): If rms_error_dbfs is empty, volume updates are not
allowed; if speech_probability is empty, the frame counts as a non-
speech frame.

Remove startup_min_volume from the config since it's no longer used
after https://webrtc-review.googlesource.com/c/src/+/282821.

Bug: webrtc:7494
Change-Id: I0ab81b03371496315348f552133aa9909bd36f26
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/283523
Commit-Queue: Hanna Silen <silen@webrtc.org>
Reviewed-by: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38685}
This commit is contained in:
Hanna Silen
2022-11-18 19:36:34 +01:00
committed by WebRTC LUCI CQ
parent f45f823541
commit bf28277774
3 changed files with 232 additions and 80 deletions

View File

@ -37,11 +37,6 @@ constexpr int kMinMicLevel = 12;
// Prevent very large microphone level changes. // Prevent very large microphone level changes.
constexpr int kMaxResidualGainChange = 15; constexpr int kMaxResidualGainChange = 15;
// Target speech level (dBFs) and speech probability threshold used to compute
// the RMS error in `GetSpeechLevelErrorDb()`.
// TODO(webrtc:7494): Move this to a config and pass in the ctor.
constexpr float kSpeechProbabilitySilenceThreshold = 0.5f;
using Agc1ClippingPredictorConfig = AudioProcessing::Config::GainController1:: using Agc1ClippingPredictorConfig = AudioProcessing::Config::GainController1::
AnalogGainController::ClippingPredictor; AnalogGainController::ClippingPredictor;
@ -128,25 +123,16 @@ void LogClippingMetrics(int clipping_rate) {
} }
// Computes the speech level error in dB. The value of `speech_level_dbfs` is // Computes the speech level error in dB. The value of `speech_level_dbfs` is
// required to be in the range [-90.0f, 30.0f] and `speech_probability` in the // required to be in the range [-90.0f, 30.0f]. Returns a positive value when
// range [0.0f, 1.0f]. Returns a positive value when the speech level is below // the speech level is below the target range and a negative value when the
// the target range and a negative value when the speech level is above the // speech level is above the target range.
// target range.
int GetSpeechLevelErrorDb(float speech_level_dbfs, int GetSpeechLevelErrorDb(float speech_level_dbfs,
float speech_probability,
int target_range_min_dbfs, int target_range_min_dbfs,
int target_range_max_dbfs) { int target_range_max_dbfs) {
constexpr float kMinSpeechLevelDbfs = -90.0f; constexpr float kMinSpeechLevelDbfs = -90.0f;
constexpr float kMaxSpeechLevelDbfs = 30.0f; constexpr float kMaxSpeechLevelDbfs = 30.0f;
RTC_DCHECK_GE(speech_level_dbfs, kMinSpeechLevelDbfs); RTC_DCHECK_GE(speech_level_dbfs, kMinSpeechLevelDbfs);
RTC_DCHECK_LE(speech_level_dbfs, kMaxSpeechLevelDbfs); RTC_DCHECK_LE(speech_level_dbfs, kMaxSpeechLevelDbfs);
RTC_DCHECK_GE(speech_probability, 0.0f);
RTC_DCHECK_LE(speech_probability, 1.0f);
// TODO(webrtc:7494): Replace with the use of `SpeechProbabilityBuffer`.
if (speech_probability < kSpeechProbabilitySilenceThreshold) {
return 0;
}
// Ensure the speech level is in the range [-90.0f, 30.0f]. // Ensure the speech level is in the range [-90.0f, 30.0f].
speech_level_dbfs = rtc::SafeClamp<float>( speech_level_dbfs = rtc::SafeClamp<float>(
@ -169,11 +155,26 @@ int GetSpeechLevelErrorDb(float speech_level_dbfs,
MonoInputVolumeController::MonoInputVolumeController( MonoInputVolumeController::MonoInputVolumeController(
int clipped_level_min, int clipped_level_min,
int min_mic_level, int min_mic_level,
int update_input_volume_wait_frames) int update_input_volume_wait_frames,
float speech_probability_threshold,
float speech_ratio_threshold)
: min_mic_level_(min_mic_level), : min_mic_level_(min_mic_level),
max_level_(kMaxMicLevel), max_level_(kMaxMicLevel),
clipped_level_min_(clipped_level_min), clipped_level_min_(clipped_level_min),
update_input_volume_wait_frames_(update_input_volume_wait_frames) {} update_input_volume_wait_frames_(
std::max(update_input_volume_wait_frames, 1)),
speech_probability_threshold_(speech_probability_threshold),
speech_ratio_threshold_(speech_ratio_threshold) {
RTC_DCHECK_GE(clipped_level_min_, 0);
RTC_DCHECK_LE(clipped_level_min_, 255);
RTC_DCHECK_GE(min_mic_level_, 0);
RTC_DCHECK_LE(min_mic_level_, 255);
RTC_DCHECK_GE(update_input_volume_wait_frames_, 0);
RTC_DCHECK_GE(speech_probability_threshold_, 0.0f);
RTC_DCHECK_LE(speech_probability_threshold_, 1.0f);
RTC_DCHECK_GE(speech_ratio_threshold_, 0.0f);
RTC_DCHECK_LE(speech_ratio_threshold_, 1.0f);
}
MonoInputVolumeController::~MonoInputVolumeController() = default; MonoInputVolumeController::~MonoInputVolumeController() = default;
@ -182,10 +183,18 @@ void MonoInputVolumeController::Initialize() {
capture_output_used_ = true; capture_output_used_ = true;
check_volume_on_next_process_ = true; check_volume_on_next_process_ = true;
frames_since_update_input_volume_ = 0; frames_since_update_input_volume_ = 0;
speech_frames_since_update_input_volume_ = 0;
is_first_frame_ = true; is_first_frame_ = true;
} }
void MonoInputVolumeController::Process(absl::optional<int> rms_error_dbfs) { // A speech segment is considered active if at least
// `update_input_volume_wait_frames_` new frames have been processed since the
// previous update and the ratio of non-silence frames (i.e., frames with a
// non-empty `speech_probability` value above `speech_probability_threshold_`)
// is at least `speech_ratio_threshold_`.
void MonoInputVolumeController::Process(
absl::optional<int> rms_error_dbfs,
absl::optional<float> speech_probability) {
if (check_volume_on_next_process_) { if (check_volume_on_next_process_) {
check_volume_on_next_process_ = false; check_volume_on_next_process_ = false;
// We have to wait until the first process call to check the volume, // We have to wait until the first process call to check the volume,
@ -193,9 +202,29 @@ void MonoInputVolumeController::Process(absl::optional<int> rms_error_dbfs) {
CheckVolumeAndReset(); CheckVolumeAndReset();
} }
if (++frames_since_update_input_volume_ >= update_input_volume_wait_frames_ && // Count frames with a high speech probability as speech.
rms_error_dbfs.has_value() && !is_first_frame_) { if (speech_probability.has_value() &&
UpdateInputVolume(*rms_error_dbfs); *speech_probability >= speech_probability_threshold_) {
++speech_frames_since_update_input_volume_;
}
// Reset the counters and maybe update the input volume.
if (++frames_since_update_input_volume_ >= update_input_volume_wait_frames_) {
const float speech_ratio =
static_cast<float>(speech_frames_since_update_input_volume_) /
static_cast<float>(update_input_volume_wait_frames_);
// Always reset the counters regardless of whether the volume changes or
// not.
frames_since_update_input_volume_ = 0;
speech_frames_since_update_input_volume_ = 0;
// Update the input volume if allowed.
if (!is_first_frame_ && speech_ratio >= speech_ratio_threshold_) {
if (rms_error_dbfs.has_value()) {
UpdateInputVolume(*rms_error_dbfs);
}
}
} }
is_first_frame_ = false; is_first_frame_ = false;
@ -216,6 +245,7 @@ void MonoInputVolumeController::HandleClipping(int clipped_level_step) {
// will still not react until the postproc updates the level. // will still not react until the postproc updates the level.
SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step)); SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step));
frames_since_update_input_volume_ = 0; frames_since_update_input_volume_ = 0;
speech_frames_since_update_input_volume_ = 0;
is_first_frame_ = false; is_first_frame_ = false;
} }
} }
@ -250,6 +280,7 @@ void MonoInputVolumeController::SetLevel(int new_level) {
// Take no action in this case, since we can't be sure when the volume // Take no action in this case, since we can't be sure when the volume
// was manually adjusted. // was manually adjusted.
frames_since_update_input_volume_ = 0; frames_since_update_input_volume_ = 0;
speech_frames_since_update_input_volume_ = 0;
is_first_frame_ = false; is_first_frame_ = false;
return; return;
} }
@ -311,16 +342,13 @@ int MonoInputVolumeController::CheckVolumeAndReset() {
level_ = level; level_ = level;
startup_ = false; startup_ = false;
frames_since_update_input_volume_ = 0; frames_since_update_input_volume_ = 0;
speech_frames_since_update_input_volume_ = 0;
is_first_frame_ = true; is_first_frame_ = true;
return 0; return 0;
} }
void MonoInputVolumeController::UpdateInputVolume(int rms_error_dbfs) { void MonoInputVolumeController::UpdateInputVolume(int rms_error_dbfs) {
// Always reset the counter regardless of whether the gain is changed
// or not.
frames_since_update_input_volume_ = 0;
const int residual_gain = rtc::SafeClamp( const int residual_gain = rtc::SafeClamp(
rms_error_dbfs, -kMaxResidualGainChange, kMaxResidualGainChange); rms_error_dbfs, -kMaxResidualGainChange, kMaxResidualGainChange);
@ -367,7 +395,8 @@ InputVolumeController::InputVolumeController(int num_capture_channels,
for (auto& controller : channel_controllers_) { for (auto& controller : channel_controllers_) {
controller = std::make_unique<MonoInputVolumeController>( controller = std::make_unique<MonoInputVolumeController>(
config.clipped_level_min, min_mic_level, config.clipped_level_min, min_mic_level,
config.update_input_volume_wait_frames); config.update_input_volume_wait_frames,
config.speech_probability_threshold, config.speech_ratio_threshold);
} }
RTC_DCHECK(!channel_controllers_.empty()); RTC_DCHECK(!channel_controllers_.empty());
@ -481,13 +510,13 @@ void InputVolumeController::Process(absl::optional<float> speech_probability,
absl::optional<int> rms_error_dbfs; absl::optional<int> rms_error_dbfs;
if (speech_probability.has_value() && speech_level_dbfs.has_value()) { if (speech_probability.has_value() && speech_level_dbfs.has_value()) {
rms_error_dbfs = // Compute the error for all frames (both speech and non-speech frames).
GetSpeechLevelErrorDb(*speech_level_dbfs, *speech_probability, rms_error_dbfs = GetSpeechLevelErrorDb(
target_range_min_dbfs_, target_range_max_dbfs_); *speech_level_dbfs, target_range_min_dbfs_, target_range_max_dbfs_);
} }
for (auto& controller : channel_controllers_) { for (auto& controller : channel_controllers_) {
controller->Process(rms_error_dbfs); controller->Process(rms_error_dbfs, speech_probability);
} }
AggregateChannelLevels(); AggregateChannelLevels();

View File

@ -36,16 +36,13 @@ class InputVolumeController final {
// Config for the constructor. // Config for the constructor.
struct Config { struct Config {
bool enabled = false; bool enabled = false;
// TODO(bugs.webrtc.org/1275566): Describe `startup_min_volume`. // Lowest input volume level that will be applied in response to clipping.
int startup_min_volume = 0;
// Lowest analog microphone level that will be applied in response to
// clipping.
int clipped_level_min = 70; int clipped_level_min = 70;
// Amount the microphone level is lowered with every clipping event. // Amount input volume level is lowered with every clipping event. Limited
// Limited to (0, 255]. // to (0, 255].
int clipped_level_step = 15; int clipped_level_step = 15;
// Proportion of clipped samples required to declare a clipping event. // Proportion of clipped samples required to declare a clipping event.
// Limited to (0.f, 1.f). // Limited to (0.0f, 1.0f).
float clipped_ratio_threshold = 0.1f; float clipped_ratio_threshold = 0.1f;
// Time in frames to wait after a clipping event before checking again. // Time in frames to wait after a clipping event before checking again.
// Limited to values higher than 0. // Limited to values higher than 0.
@ -65,6 +62,12 @@ class InputVolumeController final {
int target_range_min_dbfs = -48; int target_range_min_dbfs = -48;
// Number of wait frames between the recommended input volume updates. // Number of wait frames between the recommended input volume updates.
int update_input_volume_wait_frames = 100; int update_input_volume_wait_frames = 100;
// Speech probability threshold: speech probabilities below the threshold
// are considered silence. Limited to [0.0f, 1.0f].
float speech_probability_threshold = 0.7f;
// Minimum speech frame ratio for volume updates to be allowed. Limited to
// [0.0f, 1.0f].
float speech_ratio_threshold = 0.9f;
}; };
// Ctor. `num_capture_channels` specifies the number of channels for the audio // Ctor. `num_capture_channels` specifies the number of channels for the audio
@ -90,6 +93,7 @@ class InputVolumeController final {
// prediction (if enabled). Must be called after `set_stream_analog_level()`. // prediction (if enabled). Must be called after `set_stream_analog_level()`.
void AnalyzePreProcess(const AudioBuffer& audio_buffer); void AnalyzePreProcess(const AudioBuffer& audio_buffer);
// TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
// Adjusts the recommended input volume upwards/downwards based on // Adjusts the recommended input volume upwards/downwards based on
// `speech_level_dbfs`. Must be called after `AnalyzePreProcess()`. The value // `speech_level_dbfs`. Must be called after `AnalyzePreProcess()`. The value
// of `speech_probability` is expected to be in the range [0.0f, 1.0f] and // of `speech_probability` is expected to be in the range [0.0f, 1.0f] and
@ -185,7 +189,9 @@ class MonoInputVolumeController {
public: public:
MonoInputVolumeController(int clipped_level_min, MonoInputVolumeController(int clipped_level_min,
int min_mic_level, int min_mic_level,
int update_input_volume_wait_frames); int update_input_volume_wait_frames,
float speech_probability_threshold,
float speech_ratio_threshold);
~MonoInputVolumeController(); ~MonoInputVolumeController();
MonoInputVolumeController(const MonoInputVolumeController&) = delete; MonoInputVolumeController(const MonoInputVolumeController&) = delete;
MonoInputVolumeController& operator=(const MonoInputVolumeController&) = MonoInputVolumeController& operator=(const MonoInputVolumeController&) =
@ -202,10 +208,13 @@ class MonoInputVolumeController {
// `set_stream_analog_level()`. // `set_stream_analog_level()`.
void HandleClipping(int clipped_level_step); void HandleClipping(int clipped_level_step);
// Adjusts the recommended input volume upwards/downwards depending on whether // TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
// `rms_error_dbfs` is positive or negative. Must be called after // Adjusts the recommended input volume upwards/downwards depending on
// `HandleClipping()`. // whether `rms_error_dbfs` is positive or negative. Updates are only allowed
void Process(absl::optional<int> rms_error_dbfs); // for active speech segments and when `rms_error_dbfs` is not empty. Must be
// called after `HandleClipping()`.
void Process(absl::optional<int> rms_error_dbfs,
absl::optional<float> speech_probability);
// Returns the recommended input volume. Must be called after `Process()`. // Returns the recommended input volume. Must be called after `Process()`.
int recommended_analog_level() const { return recommended_input_volume_; } int recommended_analog_level() const { return recommended_input_volume_; }
@ -254,10 +263,18 @@ class MonoInputVolumeController {
const int clipped_level_min_; const int clipped_level_min_;
// Number of frames waited between the calls to `UpdateInputVolume()`. // Counters for frames and speech frames since the last update in the
// recommended input volume.
const int update_input_volume_wait_frames_; const int update_input_volume_wait_frames_;
int frames_since_update_input_volume_ = 0; int frames_since_update_input_volume_ = 0;
int speech_frames_since_update_input_volume_ = 0;
bool is_first_frame_ = true; bool is_first_frame_ = true;
// Speech probability threshold for a frame to be considered speech (instead
// of silence). Limited to [0.0f, 1.0f].
const float speech_probability_threshold_;
// Minimum ratio of speech frames. Limited to [0.0f, 1.0f].
const float speech_ratio_threshold_;
}; };
} // namespace webrtc } // namespace webrtc

View File

@ -44,6 +44,8 @@ constexpr float kClippedRatioThreshold = 0.1f;
constexpr int kClippedWaitFrames = 300; constexpr int kClippedWaitFrames = 300;
constexpr float kHighSpeechProbability = 0.7f; constexpr float kHighSpeechProbability = 0.7f;
constexpr float kSpeechLevel = -25.0f; constexpr float kSpeechLevel = -25.0f;
constexpr float kSpeechProbabilityThreshold = 0.5f;
constexpr float kSpeechRatioThreshold = 0.8f;
constexpr float kMinSample = std::numeric_limits<int16_t>::min(); constexpr float kMinSample = std::numeric_limits<int16_t>::min();
constexpr float kMaxSample = std::numeric_limits<int16_t>::max(); constexpr float kMaxSample = std::numeric_limits<int16_t>::max();
@ -57,7 +59,6 @@ constexpr InputVolumeControllerConfig kDefaultInputVolumeControllerConfig{};
constexpr ClippingPredictorConfig kDefaultClippingPredictorConfig{}; constexpr ClippingPredictorConfig kDefaultClippingPredictorConfig{};
std::unique_ptr<InputVolumeController> CreateInputVolumeController( std::unique_ptr<InputVolumeController> CreateInputVolumeController(
int startup_min_volume,
int clipped_level_step, int clipped_level_step,
float clipped_ratio_threshold, float clipped_ratio_threshold,
int clipped_wait_frames, int clipped_wait_frames,
@ -65,7 +66,6 @@ std::unique_ptr<InputVolumeController> CreateInputVolumeController(
int update_input_volume_wait_frames = 0) { int update_input_volume_wait_frames = 0) {
InputVolumeControllerConfig config{ InputVolumeControllerConfig config{
.enabled = true, .enabled = true,
.startup_min_volume = startup_min_volume,
.clipped_level_min = kClippedMin, .clipped_level_min = kClippedMin,
.clipped_level_step = clipped_level_step, .clipped_level_step = clipped_level_step,
.clipped_ratio_threshold = clipped_ratio_threshold, .clipped_ratio_threshold = clipped_ratio_threshold,
@ -74,6 +74,8 @@ std::unique_ptr<InputVolumeController> CreateInputVolumeController(
.target_range_max_dbfs = -18, .target_range_max_dbfs = -18,
.target_range_min_dbfs = -30, .target_range_min_dbfs = -30,
.update_input_volume_wait_frames = update_input_volume_wait_frames, .update_input_volume_wait_frames = update_input_volume_wait_frames,
.speech_probability_threshold = kSpeechProbabilityThreshold,
.speech_ratio_threshold = kSpeechRatioThreshold,
}; };
return std::make_unique<InputVolumeController>(/*num_capture_channels=*/1, return std::make_unique<InputVolumeController>(/*num_capture_channels=*/1,
@ -258,7 +260,6 @@ class SpeechSamplesReader {
constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() { constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() {
InputVolumeControllerConfig config{ InputVolumeControllerConfig config{
.enabled = true, .enabled = true,
.startup_min_volume = kInitialInputVolume,
.clipped_level_min = kClippedMin, .clipped_level_min = kClippedMin,
.clipped_level_step = kClippedLevelStep, .clipped_level_step = kClippedLevelStep,
.clipped_ratio_threshold = kClippedRatioThreshold, .clipped_ratio_threshold = kClippedRatioThreshold,
@ -267,6 +268,8 @@ constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() {
.target_range_max_dbfs = -18, .target_range_max_dbfs = -18,
.target_range_min_dbfs = -30, .target_range_min_dbfs = -30,
.update_input_volume_wait_frames = 0, .update_input_volume_wait_frames = 0,
.speech_probability_threshold = 0.5f,
.speech_ratio_threshold = 1.0f,
}; };
return config; return config;
} }
@ -946,9 +949,8 @@ TEST_P(InputVolumeControllerParametrizedTest,
} }
TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentDefault) { TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentDefault) {
std::unique_ptr<InputVolumeController> manager = std::unique_ptr<InputVolumeController> manager = CreateInputVolumeController(
CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, kClippedLevelStep, kClippedRatioThreshold, kClippedWaitFrames);
kClippedRatioThreshold, kClippedWaitFrames);
EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), kMinMicLevel); EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), kMinMicLevel);
} }
@ -957,8 +959,8 @@ TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentDisabled) {
test::ScopedFieldTrials field_trial( test::ScopedFieldTrials field_trial(
GetAgcMinMicLevelExperimentFieldTrial("Disabled" + field_trial_suffix)); GetAgcMinMicLevelExperimentFieldTrial("Disabled" + field_trial_suffix));
std::unique_ptr<InputVolumeController> manager = std::unique_ptr<InputVolumeController> manager =
CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedRatioThreshold, kClippedWaitFrames); kClippedWaitFrames);
EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), kMinMicLevel); EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), kMinMicLevel);
} }
@ -969,9 +971,8 @@ TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentDisabled) {
TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentOutOfRangeAbove) { TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentOutOfRangeAbove) {
test::ScopedFieldTrials field_trial( test::ScopedFieldTrials field_trial(
GetAgcMinMicLevelExperimentFieldTrial("Enabled-256")); GetAgcMinMicLevelExperimentFieldTrial("Enabled-256"));
std::unique_ptr<InputVolumeController> manager = std::unique_ptr<InputVolumeController> manager = CreateInputVolumeController(
CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, kClippedLevelStep, kClippedRatioThreshold, kClippedWaitFrames);
kClippedRatioThreshold, kClippedWaitFrames);
EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), kMinMicLevel); EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), kMinMicLevel);
} }
@ -980,9 +981,8 @@ TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentOutOfRangeAbove) {
TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentOutOfRangeBelow) { TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentOutOfRangeBelow) {
test::ScopedFieldTrials field_trial( test::ScopedFieldTrials field_trial(
GetAgcMinMicLevelExperimentFieldTrial("Enabled--1")); GetAgcMinMicLevelExperimentFieldTrial("Enabled--1"));
std::unique_ptr<InputVolumeController> manager = std::unique_ptr<InputVolumeController> manager = CreateInputVolumeController(
CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, kClippedLevelStep, kClippedRatioThreshold, kClippedWaitFrames);
kClippedRatioThreshold, kClippedWaitFrames);
EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), kMinMicLevel); EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), kMinMicLevel);
} }
@ -997,8 +997,8 @@ TEST(InputVolumeControllerTest, AgcMinMicLevelExperimentEnabled50) {
GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride, GetAgcMinMicLevelExperimentFieldTrialEnabled(kMinMicLevelOverride,
field_trial_suffix)); field_trial_suffix));
std::unique_ptr<InputVolumeController> manager = std::unique_ptr<InputVolumeController> manager =
CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedRatioThreshold, kClippedWaitFrames); kClippedWaitFrames);
EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(), EXPECT_EQ(manager->channel_controllers_[0]->min_mic_level(),
kMinMicLevelOverride); kMinMicLevelOverride);
@ -1016,8 +1016,8 @@ TEST(InputVolumeControllerTest,
// relevant field trial. // relevant field trial.
const auto factory = []() { const auto factory = []() {
std::unique_ptr<InputVolumeController> manager = std::unique_ptr<InputVolumeController> manager =
CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedRatioThreshold, kClippedWaitFrames); kClippedWaitFrames);
manager->Initialize(); manager->Initialize();
manager->set_stream_analog_level(kInitialInputVolume); manager->set_stream_analog_level(kInitialInputVolume);
return manager; return manager;
@ -1071,8 +1071,8 @@ TEST(InputVolumeControllerTest,
// relevant field trial. // relevant field trial.
const auto factory = []() { const auto factory = []() {
std::unique_ptr<InputVolumeController> manager = std::unique_ptr<InputVolumeController> manager =
CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedRatioThreshold, kClippedWaitFrames); kClippedWaitFrames);
manager->Initialize(); manager->Initialize();
manager->set_stream_analog_level(kInitialInputVolume); manager->set_stream_analog_level(kInitialInputVolume);
return manager; return manager;
@ -1127,7 +1127,6 @@ TEST(InputVolumeControllerTest,
// with clipping. // with clipping.
InputVolumeControllerConfig config = kDefaultInputVolumeControllerConfig; InputVolumeControllerConfig config = kDefaultInputVolumeControllerConfig;
config.enabled = true; config.enabled = true;
config.startup_min_volume = kInitialInputVolume;
config.clipped_level_step = 64; config.clipped_level_step = 64;
config.clipped_ratio_threshold = kClippedRatioThreshold; config.clipped_ratio_threshold = kClippedRatioThreshold;
config.clipped_wait_frames = kClippedWaitFrames; config.clipped_wait_frames = kClippedWaitFrames;
@ -1193,7 +1192,6 @@ TEST(InputVolumeControllerTest,
// with clipping. // with clipping.
InputVolumeControllerConfig config = kDefaultInputVolumeControllerConfig; InputVolumeControllerConfig config = kDefaultInputVolumeControllerConfig;
config.enabled = true; config.enabled = true;
config.startup_min_volume = kInitialInputVolume;
config.clipped_level_step = 64; config.clipped_level_step = 64;
config.clipped_ratio_threshold = kClippedRatioThreshold; config.clipped_ratio_threshold = kClippedRatioThreshold;
config.clipped_wait_frames = kClippedWaitFrames; config.clipped_wait_frames = kClippedWaitFrames;
@ -1252,16 +1250,14 @@ TEST_P(InputVolumeControllerParametrizedTest, ClippingParametersVerified) {
GTEST_SKIP() << "Skipped. RMS error does not affect the test."; GTEST_SKIP() << "Skipped. RMS error does not affect the test.";
} }
std::unique_ptr<InputVolumeController> manager = std::unique_ptr<InputVolumeController> manager = CreateInputVolumeController(
CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, kClippedLevelStep, kClippedRatioThreshold, kClippedWaitFrames);
kClippedRatioThreshold, kClippedWaitFrames);
manager->Initialize(); manager->Initialize();
EXPECT_EQ(manager->clipped_level_step_, kClippedLevelStep); EXPECT_EQ(manager->clipped_level_step_, kClippedLevelStep);
EXPECT_EQ(manager->clipped_ratio_threshold_, kClippedRatioThreshold); EXPECT_EQ(manager->clipped_ratio_threshold_, kClippedRatioThreshold);
EXPECT_EQ(manager->clipped_wait_frames_, kClippedWaitFrames); EXPECT_EQ(manager->clipped_wait_frames_, kClippedWaitFrames);
std::unique_ptr<InputVolumeController> manager_custom = std::unique_ptr<InputVolumeController> manager_custom =
CreateInputVolumeController(kInitialInputVolume, CreateInputVolumeController(/*clipped_level_step=*/10,
/*clipped_level_step=*/10,
/*clipped_ratio_threshold=*/0.2f, /*clipped_ratio_threshold=*/0.2f,
/*clipped_wait_frames=*/50); /*clipped_wait_frames=*/50);
manager_custom->Initialize(); manager_custom->Initialize();
@ -1277,8 +1273,8 @@ TEST_P(InputVolumeControllerParametrizedTest,
} }
std::unique_ptr<InputVolumeController> manager = CreateInputVolumeController( std::unique_ptr<InputVolumeController> manager = CreateInputVolumeController(
kInitialInputVolume, kClippedLevelStep, kClippedRatioThreshold, kClippedLevelStep, kClippedRatioThreshold, kClippedWaitFrames,
kClippedWaitFrames, /*enable_clipping_predictor=*/false); /*enable_clipping_predictor=*/false);
manager->Initialize(); manager->Initialize();
EXPECT_FALSE(manager->clipping_predictor_enabled()); EXPECT_FALSE(manager->clipping_predictor_enabled());
@ -1302,8 +1298,8 @@ TEST_P(InputVolumeControllerParametrizedTest,
} }
std::unique_ptr<InputVolumeController> manager = CreateInputVolumeController( std::unique_ptr<InputVolumeController> manager = CreateInputVolumeController(
kInitialInputVolume, kClippedLevelStep, kClippedRatioThreshold, kClippedLevelStep, kClippedRatioThreshold, kClippedWaitFrames,
kClippedWaitFrames, /*enable_clipping_predictor=*/true); /*enable_clipping_predictor=*/true);
manager->Initialize(); manager->Initialize();
EXPECT_TRUE(manager->clipping_predictor_enabled()); EXPECT_TRUE(manager->clipping_predictor_enabled());
@ -1469,13 +1465,13 @@ TEST_P(InputVolumeControllerParametrizedTest, EmptyRmsErrorHasNoEffect) {
TEST(InputVolumeControllerTest, UpdateInputVolumeWaitFramesIsEffective) { TEST(InputVolumeControllerTest, UpdateInputVolumeWaitFramesIsEffective) {
constexpr int kInputVolume = kInitialInputVolume; constexpr int kInputVolume = kInitialInputVolume;
std::unique_ptr<InputVolumeController> controller_wait_0 = std::unique_ptr<InputVolumeController> controller_wait_0 =
CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedRatioThreshold, kClippedWaitFrames, kClippedWaitFrames,
/*enable_clipping_predictor=*/false, /*enable_clipping_predictor=*/false,
/*update_input_volume_wait_frames=*/0); /*update_input_volume_wait_frames=*/0);
std::unique_ptr<InputVolumeController> controller_wait_100 = std::unique_ptr<InputVolumeController> controller_wait_100 =
CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep, CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedRatioThreshold, kClippedWaitFrames, kClippedWaitFrames,
/*enable_clipping_predictor=*/false, /*enable_clipping_predictor=*/false,
/*update_input_volume_wait_frames=*/100); /*update_input_volume_wait_frames=*/100);
controller_wait_0->Initialize(); controller_wait_0->Initialize();
@ -1504,4 +1500,114 @@ TEST(InputVolumeControllerTest, UpdateInputVolumeWaitFramesIsEffective) {
ASSERT_GT(controller_wait_100->recommended_analog_level(), kInputVolume); ASSERT_GT(controller_wait_100->recommended_analog_level(), kInputVolume);
} }
TEST(InputVolumeControllerTest, SpeechRatioThresholdIsEffective) {
constexpr int kInputVolume = kInitialInputVolume;
// Create two input volume controllers with 10 frames between volume updates
// and the minimum speech ratio of 0.8 and speech probability threshold 0.5.
std::unique_ptr<InputVolumeController> controller_1 =
CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedWaitFrames,
/*enable_clipping_predictor=*/false,
/*update_input_volume_wait_frames=*/10);
std::unique_ptr<InputVolumeController> controller_2 =
CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedWaitFrames,
/*enable_clipping_predictor=*/false,
/*update_input_volume_wait_frames=*/10);
controller_1->Initialize();
controller_2->Initialize();
controller_1->set_stream_analog_level(kInputVolume);
controller_2->set_stream_analog_level(kInputVolume);
SpeechSamplesReader reader_1;
SpeechSamplesReader reader_2;
reader_1.Feed(/*num_frames=*/1, /*gain_db=*/0,
/*speech_probability=*/0.7f, /*speech_level=*/-42.0f,
*controller_1);
reader_2.Feed(/*num_frames=*/1, /*gain_db=*/0,
/*speech_probability=*/0.4f, /*speech_level=*/-42.0f,
*controller_2);
ASSERT_EQ(controller_1->recommended_analog_level(), kInputVolume);
ASSERT_EQ(controller_2->recommended_analog_level(), kInputVolume);
reader_1.Feed(/*num_frames=*/2, /*gain_db=*/0,
/*speech_probability=*/0.4f, /*speech_level=*/-42.0f,
*controller_1);
reader_2.Feed(/*num_frames=*/2, /*gain_db=*/0,
/*speech_probability=*/0.4f, /*speech_level=*/-42.0f,
*controller_2);
ASSERT_EQ(controller_1->recommended_analog_level(), kInputVolume);
ASSERT_EQ(controller_2->recommended_analog_level(), kInputVolume);
reader_1.Feed(/*num_frames=*/7, /*gain_db=*/0,
/*speech_probability=*/0.7f, /*speech_level=*/-42.0f,
*controller_1);
reader_2.Feed(/*num_frames=*/7, /*gain_db=*/0,
/*speech_probability=*/0.7f, /*speech_level=*/-42.0f,
*controller_2);
ASSERT_GT(controller_1->recommended_analog_level(), kInputVolume);
ASSERT_EQ(controller_2->recommended_analog_level(), kInputVolume);
}
TEST(InputVolumeControllerTest, SpeechProbabilityThresholdIsEffective) {
constexpr int kInputVolume = kInitialInputVolume;
// Create two input volume controllers with the exact same settings and
// 10 frames between volume updates.
std::unique_ptr<InputVolumeController> controller_1 =
CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedWaitFrames,
/*enable_clipping_predictor=*/false,
/*update_input_volume_wait_frames=*/10);
std::unique_ptr<InputVolumeController> controller_2 =
CreateInputVolumeController(kClippedLevelStep, kClippedRatioThreshold,
kClippedWaitFrames,
/*enable_clipping_predictor=*/false,
/*update_input_volume_wait_frames=*/10);
controller_1->Initialize();
controller_2->Initialize();
controller_1->set_stream_analog_level(kInputVolume);
controller_2->set_stream_analog_level(kInputVolume);
SpeechSamplesReader reader_1;
SpeechSamplesReader reader_2;
// Process with two sets of inputs: Use `reader_1` to process inputs
// that make the volume to be adjusted after enough frames have been
// processsed and `reader_2` to process inputs that won't make the volume
// to be adjusted.
reader_1.Feed(/*num_frames=*/1, /*gain_db=*/0,
/*speech_probability=*/0.5f, /*speech_level=*/-42.0f,
*controller_1);
reader_2.Feed(/*num_frames=*/1, /*gain_db=*/0,
/*speech_probability=*/0.49f, /*speech_level=*/-42.0f,
*controller_2);
ASSERT_EQ(controller_1->recommended_analog_level(), kInputVolume);
ASSERT_EQ(controller_2->recommended_analog_level(), kInputVolume);
reader_1.Feed(/*num_frames=*/2, /*gain_db=*/0,
/*speech_probability=*/0.49f, /*speech_level=*/-42.0f,
*controller_1);
reader_2.Feed(/*num_frames=*/2, /*gain_db=*/0,
/*speech_probability=*/0.49f, /*speech_level=*/-42.0f,
*controller_2);
ASSERT_EQ(controller_1->recommended_analog_level(), kInputVolume);
ASSERT_EQ(controller_2->recommended_analog_level(), kInputVolume);
reader_1.Feed(/*num_frames=*/7, /*gain_db=*/0,
/*speech_probability=*/0.5f, /*speech_level=*/-42.0f,
*controller_1);
reader_2.Feed(/*num_frames=*/7, /*gain_db=*/0,
/*speech_probability=*/0.5f, /*speech_level=*/-42.0f,
*controller_2);
ASSERT_GT(controller_1->recommended_analog_level(), kInputVolume);
ASSERT_EQ(controller_2->recommended_analog_level(), kInputVolume);
}
} // namespace webrtc } // namespace webrtc