InputVolumeController: Make speech_probability non-optional
Make the argument speech_probability non-optional in InputVolumeController::Process() and MonoInputVolumeController::Process(). Additional clean-up: Remove the flag enabled in the config. Add unit tests for MonoInputVolumeController. Bug: webrtc:7494 Change-Id: Ie28af77dc628bf71d09ce1ff033d39031f77a21e Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/283700 Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Commit-Queue: Hanna Silen <silen@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38710}
This commit is contained in:
committed by
WebRTC LUCI CQ
parent
310e0624aa
commit
27fed4513f
@ -187,14 +187,13 @@ void MonoInputVolumeController::Initialize() {
|
||||
is_first_frame_ = true;
|
||||
}
|
||||
|
||||
// A speech segment is considered active if at least
|
||||
// A speeh segment is considered active if at least
|
||||
// `update_input_volume_wait_frames_` new frames have been processed since the
|
||||
// previous update and the ratio of non-silence frames (i.e., frames with a
|
||||
// non-empty `speech_probability` value above `speech_probability_threshold_`)
|
||||
// is at least `speech_ratio_threshold_`.
|
||||
void MonoInputVolumeController::Process(
|
||||
absl::optional<int> rms_error_dbfs,
|
||||
absl::optional<float> speech_probability) {
|
||||
// `speech_probability` higher than `speech_probability_threshold_`) is at least
|
||||
// `speech_ratio_threshold_`.
|
||||
void MonoInputVolumeController::Process(absl::optional<int> rms_error_dbfs,
|
||||
float speech_probability) {
|
||||
if (check_volume_on_next_process_) {
|
||||
check_volume_on_next_process_ = false;
|
||||
// We have to wait until the first process call to check the volume,
|
||||
@ -203,8 +202,7 @@ void MonoInputVolumeController::Process(
|
||||
}
|
||||
|
||||
// Count frames with a high speech probability as speech.
|
||||
if (speech_probability.has_value() &&
|
||||
*speech_probability >= speech_probability_threshold_) {
|
||||
if (speech_probability >= speech_probability_threshold_) {
|
||||
++speech_frames_since_update_input_volume_;
|
||||
}
|
||||
|
||||
@ -364,8 +362,7 @@ void MonoInputVolumeController::UpdateInputVolume(int rms_error_dbfs) {
|
||||
|
||||
InputVolumeController::InputVolumeController(int num_capture_channels,
|
||||
const Config& config)
|
||||
: analog_controller_enabled_(config.enabled),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
: num_capture_channels_(num_capture_channels),
|
||||
min_mic_level_override_(GetMinMicLevelOverride()),
|
||||
capture_output_used_(true),
|
||||
clipped_level_step_(config.clipped_level_step),
|
||||
@ -384,9 +381,8 @@ InputVolumeController::InputVolumeController(int num_capture_channels,
|
||||
target_range_max_dbfs_(config.target_range_max_dbfs),
|
||||
target_range_min_dbfs_(config.target_range_min_dbfs),
|
||||
channel_controllers_(num_capture_channels) {
|
||||
RTC_LOG(LS_INFO) << "[agc] analog controller enabled: "
|
||||
<< (analog_controller_enabled_ ? "yes" : "no");
|
||||
const int min_mic_level = min_mic_level_override_.value_or(kMinMicLevel);
|
||||
RTC_LOG(LS_INFO) << "[agc] Input volume controller enabled";
|
||||
RTC_LOG(LS_INFO) << "[agc] Min mic level: " << min_mic_level
|
||||
<< " (overridden: "
|
||||
<< (min_mic_level_override_.has_value() ? "yes" : "no")
|
||||
@ -475,10 +471,12 @@ void InputVolumeController::AnalyzePreProcess(const AudioBuffer& audio_buffer) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (clipping_detected) {
|
||||
RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
|
||||
<< clipped_ratio;
|
||||
}
|
||||
|
||||
int step = clipped_level_step_;
|
||||
if (clipping_predicted) {
|
||||
predicted_step = std::max(predicted_step, clipped_level_step_);
|
||||
@ -487,6 +485,7 @@ void InputVolumeController::AnalyzePreProcess(const AudioBuffer& audio_buffer) {
|
||||
step = predicted_step;
|
||||
}
|
||||
}
|
||||
|
||||
if (clipping_detected ||
|
||||
(clipping_predicted && use_clipping_predictor_step_)) {
|
||||
for (auto& state_ch : channel_controllers_) {
|
||||
@ -497,10 +496,11 @@ void InputVolumeController::AnalyzePreProcess(const AudioBuffer& audio_buffer) {
|
||||
clipping_predictor_->Reset();
|
||||
}
|
||||
}
|
||||
|
||||
AggregateChannelLevels();
|
||||
}
|
||||
|
||||
void InputVolumeController::Process(absl::optional<float> speech_probability,
|
||||
void InputVolumeController::Process(float speech_probability,
|
||||
absl::optional<float> speech_level_dbfs) {
|
||||
AggregateChannelLevels();
|
||||
|
||||
@ -509,7 +509,7 @@ void InputVolumeController::Process(absl::optional<float> speech_probability,
|
||||
}
|
||||
|
||||
absl::optional<int> rms_error_dbfs;
|
||||
if (speech_probability.has_value() && speech_level_dbfs.has_value()) {
|
||||
if (speech_level_dbfs.has_value()) {
|
||||
// Compute the error for all frames (both speech and non-speech frames).
|
||||
rms_error_dbfs = GetSpeechLevelErrorDb(
|
||||
*speech_level_dbfs, target_range_min_dbfs_, target_range_max_dbfs_);
|
||||
@ -527,14 +527,11 @@ void InputVolumeController::HandleCaptureOutputUsedChange(
|
||||
for (auto& controller : channel_controllers_) {
|
||||
controller->HandleCaptureOutputUsedChange(capture_output_used);
|
||||
}
|
||||
|
||||
capture_output_used_ = capture_output_used;
|
||||
}
|
||||
|
||||
void InputVolumeController::set_stream_analog_level(int level) {
|
||||
if (!analog_controller_enabled_) {
|
||||
recommended_input_volume_ = level;
|
||||
}
|
||||
|
||||
for (auto& controller : channel_controllers_) {
|
||||
controller->set_stream_analog_level(level);
|
||||
}
|
||||
@ -559,9 +556,7 @@ void InputVolumeController::AggregateChannelLevels() {
|
||||
std::max(new_recommended_input_volume, *min_mic_level_override_);
|
||||
}
|
||||
|
||||
if (analog_controller_enabled_) {
|
||||
recommended_input_volume_ = new_recommended_input_volume;
|
||||
}
|
||||
recommended_input_volume_ = new_recommended_input_volume;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
@ -35,7 +35,6 @@ class InputVolumeController final {
|
||||
public:
|
||||
// Config for the constructor.
|
||||
struct Config {
|
||||
bool enabled = false;
|
||||
// Lowest input volume level that will be applied in response to clipping.
|
||||
int clipped_level_min = 70;
|
||||
// Amount input volume level is lowered with every clipping event. Limited
|
||||
@ -98,7 +97,7 @@ class InputVolumeController final {
|
||||
// `speech_level_dbfs`. Must be called after `AnalyzePreProcess()`. The value
|
||||
// of `speech_probability` is expected to be in the range [0.0f, 1.0f] and
|
||||
// `speech_level_dbfs` in the the range [-90.f, 30.0f].
|
||||
void Process(absl::optional<float> speech_probability,
|
||||
void Process(float speech_probability,
|
||||
absl::optional<float> speech_level_dbfs);
|
||||
|
||||
// TODO(bugs.webrtc.org/7494): Return recommended input volume and remove
|
||||
@ -142,8 +141,6 @@ class InputVolumeController final {
|
||||
|
||||
void AggregateChannelLevels();
|
||||
|
||||
const bool analog_controller_enabled_;
|
||||
|
||||
const int num_capture_channels_;
|
||||
|
||||
// If not empty, the value is used to override the minimum input volume.
|
||||
@ -213,8 +210,7 @@ class MonoInputVolumeController {
|
||||
// whether `rms_error_dbfs` is positive or negative. Updates are only allowed
|
||||
// for active speech segments and when `rms_error_dbfs` is not empty. Must be
|
||||
// called after `HandleClipping()`.
|
||||
void Process(absl::optional<int> rms_error_dbfs,
|
||||
absl::optional<float> speech_probability);
|
||||
void Process(absl::optional<int> rms_error_dbfs, float speech_probability);
|
||||
|
||||
// Returns the recommended input volume. Must be called after `Process()`.
|
||||
int recommended_analog_level() const { return recommended_input_volume_; }
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -67,7 +67,7 @@ std::unique_ptr<InputVolumeController> CreateInputVolumeController(
|
||||
int num_channels) {
|
||||
if (enabled) {
|
||||
return std::make_unique<InputVolumeController>(
|
||||
num_channels, InputVolumeController::Config{.enabled = enabled});
|
||||
num_channels, InputVolumeController::Config());
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@ -170,12 +170,17 @@ void GainController2::Process(absl::optional<float> speech_probability,
|
||||
}
|
||||
|
||||
if (input_volume_controller_) {
|
||||
// TODO(bugs.webrtc.org/7494): A temprorary check, remove once not needed.
|
||||
RTC_DCHECK(adaptive_digital_controller_);
|
||||
absl::optional<float> speech_level;
|
||||
if (adaptive_digital_controller_) {
|
||||
speech_level =
|
||||
adaptive_digital_controller_->GetSpeechLevelDbfsIfConfident();
|
||||
}
|
||||
input_volume_controller_->Process(speech_probability, speech_level);
|
||||
RTC_DCHECK(speech_probability.has_value());
|
||||
if (speech_probability.has_value()) {
|
||||
input_volume_controller_->Process(*speech_probability, speech_level);
|
||||
}
|
||||
}
|
||||
|
||||
fixed_gain_applier_.ApplyGain(float_frame);
|
||||
|
||||
@ -159,6 +159,7 @@ TEST(GainController2,
|
||||
|
||||
Agc2Config config;
|
||||
config.input_volume_controller.enabled = false;
|
||||
|
||||
auto gain_controller =
|
||||
std::make_unique<GainController2>(config, kSampleRateHz, kNumChannels,
|
||||
/*use_internal_vad=*/true);
|
||||
@ -189,6 +190,8 @@ TEST(GainController2,
|
||||
|
||||
Agc2Config config;
|
||||
config.input_volume_controller.enabled = true;
|
||||
config.adaptive_digital.enabled = true;
|
||||
|
||||
auto gain_controller =
|
||||
std::make_unique<GainController2>(config, kSampleRateHz, kNumChannels,
|
||||
/*use_internal_vad=*/true);
|
||||
|
||||
Reference in New Issue
Block a user