APM: remove LevelEstimator
Only used in unit tests and a duplication of what `capture_output_rms_` already does. This CL also removes `AudioProcessingStats::output_rms_dbfs`, which is now unused. Bug: webrtc:5298 Fix: chromium:1261339 Change-Id: I6e583c11d4abb58444c440509a8495a7f5ebc589 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/235664 Reviewed-by: Sam Zackrisson <saza@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/main@{#35246}
This commit is contained in:

committed by
WebRTC LUCI CQ

parent
00c62eddf4
commit
183c64ce19
@ -157,8 +157,6 @@ rtc_library("audio_processing") {
|
|||||||
"echo_detector/normalized_covariance_estimator.h",
|
"echo_detector/normalized_covariance_estimator.h",
|
||||||
"gain_control_impl.cc",
|
"gain_control_impl.cc",
|
||||||
"gain_control_impl.h",
|
"gain_control_impl.h",
|
||||||
"level_estimator.cc",
|
|
||||||
"level_estimator.h",
|
|
||||||
"render_queue_item_verifier.h",
|
"render_queue_item_verifier.h",
|
||||||
"residual_echo_detector.cc",
|
"residual_echo_detector.cc",
|
||||||
"residual_echo_detector.h",
|
"residual_echo_detector.h",
|
||||||
@ -445,7 +443,6 @@ if (rtc_include_tests) {
|
|||||||
"echo_detector/normalized_covariance_estimator_unittest.cc",
|
"echo_detector/normalized_covariance_estimator_unittest.cc",
|
||||||
"gain_control_unittest.cc",
|
"gain_control_unittest.cc",
|
||||||
"high_pass_filter_unittest.cc",
|
"high_pass_filter_unittest.cc",
|
||||||
"level_estimator_unittest.cc",
|
|
||||||
"residual_echo_detector_unittest.cc",
|
"residual_echo_detector_unittest.cc",
|
||||||
"rms_level_unittest.cc",
|
"rms_level_unittest.cc",
|
||||||
"test/debug_dump_replayer.cc",
|
"test/debug_dump_replayer.cc",
|
||||||
|
@ -418,7 +418,6 @@ void AudioProcessingImpl::InitializeLocked() {
|
|||||||
InitializePostProcessor();
|
InitializePostProcessor();
|
||||||
InitializePreProcessor();
|
InitializePreProcessor();
|
||||||
InitializeCaptureLevelsAdjuster();
|
InitializeCaptureLevelsAdjuster();
|
||||||
InitializeLevelEstimator();
|
|
||||||
|
|
||||||
if (aec_dump_) {
|
if (aec_dump_) {
|
||||||
aec_dump_->WriteInitMessage(formats_.api_format, rtc::TimeUTCMillis());
|
aec_dump_->WriteInitMessage(formats_.api_format, rtc::TimeUTCMillis());
|
||||||
@ -596,8 +595,6 @@ void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) {
|
|||||||
InitializeCaptureLevelsAdjuster();
|
InitializeCaptureLevelsAdjuster();
|
||||||
}
|
}
|
||||||
|
|
||||||
InitializeLevelEstimator();
|
|
||||||
|
|
||||||
if (voice_detection_config_changed) {
|
if (voice_detection_config_changed) {
|
||||||
InitializeVoiceDetector();
|
InitializeVoiceDetector();
|
||||||
}
|
}
|
||||||
@ -1279,7 +1276,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
|
|||||||
capture_buffer->MergeFrequencyBands();
|
capture_buffer->MergeFrequencyBands();
|
||||||
}
|
}
|
||||||
|
|
||||||
capture_.stats.output_rms_dbfs = absl::nullopt;
|
|
||||||
if (capture_.capture_output_used) {
|
if (capture_.capture_output_used) {
|
||||||
if (capture_.capture_fullband_audio) {
|
if (capture_.capture_fullband_audio) {
|
||||||
const auto& ec = submodules_.echo_controller;
|
const auto& ec = submodules_.echo_controller;
|
||||||
@ -1332,13 +1328,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
|
|||||||
submodules_.capture_post_processor->Process(capture_buffer);
|
submodules_.capture_post_processor->Process(capture_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (submodules_.output_level_estimator) {
|
|
||||||
// The level estimator operates on the recombined data.
|
|
||||||
submodules_.output_level_estimator->ProcessStream(*capture_buffer);
|
|
||||||
capture_.stats.output_rms_dbfs =
|
|
||||||
submodules_.output_level_estimator->RMS();
|
|
||||||
}
|
|
||||||
|
|
||||||
capture_output_rms_.Analyze(rtc::ArrayView<const float>(
|
capture_output_rms_.Analyze(rtc::ArrayView<const float>(
|
||||||
capture_buffer->channels_const()[0],
|
capture_buffer->channels_const()[0],
|
||||||
capture_nonlocked_.capture_processing_format.num_frames()));
|
capture_nonlocked_.capture_processing_format.num_frames()));
|
||||||
@ -2014,16 +2003,6 @@ void AudioProcessingImpl::InitializeAnalyzer() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AudioProcessingImpl::InitializeLevelEstimator() {
|
|
||||||
if (!config_.level_estimation.enabled) {
|
|
||||||
submodules_.output_level_estimator.reset();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (!submodules_.output_level_estimator) {
|
|
||||||
submodules_.output_level_estimator = std::make_unique<LevelEstimator>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AudioProcessingImpl::InitializePostProcessor() {
|
void AudioProcessingImpl::InitializePostProcessor() {
|
||||||
if (submodules_.capture_post_processor) {
|
if (submodules_.capture_post_processor) {
|
||||||
submodules_.capture_post_processor->Initialize(
|
submodules_.capture_post_processor->Initialize(
|
||||||
|
@ -32,7 +32,6 @@
|
|||||||
#include "modules/audio_processing/include/audio_frame_proxies.h"
|
#include "modules/audio_processing/include/audio_frame_proxies.h"
|
||||||
#include "modules/audio_processing/include/audio_processing.h"
|
#include "modules/audio_processing/include/audio_processing.h"
|
||||||
#include "modules/audio_processing/include/audio_processing_statistics.h"
|
#include "modules/audio_processing/include/audio_processing_statistics.h"
|
||||||
#include "modules/audio_processing/level_estimator.h"
|
|
||||||
#include "modules/audio_processing/ns/noise_suppressor.h"
|
#include "modules/audio_processing/ns/noise_suppressor.h"
|
||||||
#include "modules/audio_processing/optionally_built_submodule_creators.h"
|
#include "modules/audio_processing/optionally_built_submodule_creators.h"
|
||||||
#include "modules/audio_processing/render_queue_item_verifier.h"
|
#include "modules/audio_processing/render_queue_item_verifier.h"
|
||||||
@ -284,7 +283,6 @@ class AudioProcessingImpl : public AudioProcessing {
|
|||||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_);
|
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_);
|
||||||
void InitializePostProcessor() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_);
|
void InitializePostProcessor() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_);
|
||||||
void InitializeAnalyzer() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_);
|
void InitializeAnalyzer() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_);
|
||||||
void InitializeLevelEstimator() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_);
|
|
||||||
|
|
||||||
// Initializations of render-only submodules, requiring the render lock
|
// Initializations of render-only submodules, requiring the render lock
|
||||||
// already acquired.
|
// already acquired.
|
||||||
@ -405,7 +403,6 @@ class AudioProcessingImpl : public AudioProcessing {
|
|||||||
std::unique_ptr<CustomProcessing> capture_post_processor;
|
std::unique_ptr<CustomProcessing> capture_post_processor;
|
||||||
std::unique_ptr<CustomProcessing> render_pre_processor;
|
std::unique_ptr<CustomProcessing> render_pre_processor;
|
||||||
std::unique_ptr<CustomAudioAnalyzer> capture_analyzer;
|
std::unique_ptr<CustomAudioAnalyzer> capture_analyzer;
|
||||||
std::unique_ptr<LevelEstimator> output_level_estimator;
|
|
||||||
std::unique_ptr<VoiceDetection> voice_detector;
|
std::unique_ptr<VoiceDetection> voice_detector;
|
||||||
std::unique_ptr<CaptureLevelsAdjuster> capture_levels_adjuster;
|
std::unique_ptr<CaptureLevelsAdjuster> capture_levels_adjuster;
|
||||||
} submodules_;
|
} submodules_;
|
||||||
|
@ -522,7 +522,6 @@ void AudioProcessingImplLockTest::SetUp() {
|
|||||||
AudioProcessing::Config::GainController1::kAdaptiveDigital;
|
AudioProcessing::Config::GainController1::kAdaptiveDigital;
|
||||||
apm_config.noise_suppression.enabled = true;
|
apm_config.noise_suppression.enabled = true;
|
||||||
apm_config.voice_detection.enabled = true;
|
apm_config.voice_detection.enabled = true;
|
||||||
apm_config.level_estimation.enabled = true;
|
|
||||||
apm_->ApplyConfig(apm_config);
|
apm_->ApplyConfig(apm_config);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -443,7 +443,6 @@ class CallSimulator : public ::testing::TestWithParam<SimulationConfig> {
|
|||||||
apm_config.gain_controller1.enabled = true;
|
apm_config.gain_controller1.enabled = true;
|
||||||
apm_config.gain_controller1.mode =
|
apm_config.gain_controller1.mode =
|
||||||
AudioProcessing::Config::GainController1::kAdaptiveDigital;
|
AudioProcessing::Config::GainController1::kAdaptiveDigital;
|
||||||
apm_config.level_estimation.enabled = true;
|
|
||||||
apm_config.voice_detection.enabled = true;
|
apm_config.voice_detection.enabled = true;
|
||||||
apm->ApplyConfig(apm_config);
|
apm->ApplyConfig(apm_config);
|
||||||
};
|
};
|
||||||
@ -456,7 +455,6 @@ class CallSimulator : public ::testing::TestWithParam<SimulationConfig> {
|
|||||||
apm_config.noise_suppression.enabled = true;
|
apm_config.noise_suppression.enabled = true;
|
||||||
apm_config.gain_controller1.mode =
|
apm_config.gain_controller1.mode =
|
||||||
AudioProcessing::Config::GainController1::kAdaptiveDigital;
|
AudioProcessing::Config::GainController1::kAdaptiveDigital;
|
||||||
apm_config.level_estimation.enabled = true;
|
|
||||||
apm_config.voice_detection.enabled = true;
|
apm_config.voice_detection.enabled = true;
|
||||||
apm->ApplyConfig(apm_config);
|
apm->ApplyConfig(apm_config);
|
||||||
};
|
};
|
||||||
@ -467,7 +465,6 @@ class CallSimulator : public ::testing::TestWithParam<SimulationConfig> {
|
|||||||
AudioProcessing::Config apm_config = apm->GetConfig();
|
AudioProcessing::Config apm_config = apm->GetConfig();
|
||||||
apm_config.echo_canceller.enabled = false;
|
apm_config.echo_canceller.enabled = false;
|
||||||
apm_config.gain_controller1.enabled = false;
|
apm_config.gain_controller1.enabled = false;
|
||||||
apm_config.level_estimation.enabled = false;
|
|
||||||
apm_config.noise_suppression.enabled = false;
|
apm_config.noise_suppression.enabled = false;
|
||||||
apm_config.voice_detection.enabled = false;
|
apm_config.voice_detection.enabled = false;
|
||||||
apm->ApplyConfig(apm_config);
|
apm->ApplyConfig(apm_config);
|
||||||
|
@ -207,7 +207,6 @@ void EnableAllAPComponents(AudioProcessing* ap) {
|
|||||||
apm_config.noise_suppression.enabled = true;
|
apm_config.noise_suppression.enabled = true;
|
||||||
|
|
||||||
apm_config.high_pass_filter.enabled = true;
|
apm_config.high_pass_filter.enabled = true;
|
||||||
apm_config.level_estimation.enabled = true;
|
|
||||||
apm_config.voice_detection.enabled = true;
|
apm_config.voice_detection.enabled = true;
|
||||||
apm_config.pipeline.maximum_internal_processing_rate = 48000;
|
apm_config.pipeline.maximum_internal_processing_rate = 48000;
|
||||||
ap->ApplyConfig(apm_config);
|
ap->ApplyConfig(apm_config);
|
||||||
@ -1272,7 +1271,6 @@ TEST_F(ApmTest, AllProcessingDisabledByDefault) {
|
|||||||
EXPECT_FALSE(config.echo_canceller.enabled);
|
EXPECT_FALSE(config.echo_canceller.enabled);
|
||||||
EXPECT_FALSE(config.high_pass_filter.enabled);
|
EXPECT_FALSE(config.high_pass_filter.enabled);
|
||||||
EXPECT_FALSE(config.gain_controller1.enabled);
|
EXPECT_FALSE(config.gain_controller1.enabled);
|
||||||
EXPECT_FALSE(config.level_estimation.enabled);
|
|
||||||
EXPECT_FALSE(config.noise_suppression.enabled);
|
EXPECT_FALSE(config.noise_suppression.enabled);
|
||||||
EXPECT_FALSE(config.voice_detection.enabled);
|
EXPECT_FALSE(config.voice_detection.enabled);
|
||||||
}
|
}
|
||||||
@ -1399,7 +1397,6 @@ TEST_F(ApmTest, SplittingFilter) {
|
|||||||
auto apm_config = apm_->GetConfig();
|
auto apm_config = apm_->GetConfig();
|
||||||
SetFrameTo(&frame_, 1000);
|
SetFrameTo(&frame_, 1000);
|
||||||
frame_copy.CopyFrom(frame_);
|
frame_copy.CopyFrom(frame_);
|
||||||
apm_config.level_estimation.enabled = true;
|
|
||||||
apm_->ApplyConfig(apm_config);
|
apm_->ApplyConfig(apm_config);
|
||||||
EXPECT_EQ(apm_->kNoError,
|
EXPECT_EQ(apm_->kNoError,
|
||||||
apm_->ProcessStream(
|
apm_->ProcessStream(
|
||||||
@ -1414,7 +1411,6 @@ TEST_F(ApmTest, SplittingFilter) {
|
|||||||
StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
|
StreamConfig(frame_.sample_rate_hz, frame_.num_channels),
|
||||||
frame_.data.data()));
|
frame_.data.data()));
|
||||||
EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
|
EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
|
||||||
apm_config.level_estimation.enabled = false;
|
|
||||||
apm_->ApplyConfig(apm_config);
|
apm_->ApplyConfig(apm_config);
|
||||||
|
|
||||||
// 3. Only GetStatistics-reporting VAD is enabled...
|
// 3. Only GetStatistics-reporting VAD is enabled...
|
||||||
@ -1438,11 +1434,10 @@ TEST_F(ApmTest, SplittingFilter) {
|
|||||||
apm_config.voice_detection.enabled = false;
|
apm_config.voice_detection.enabled = false;
|
||||||
apm_->ApplyConfig(apm_config);
|
apm_->ApplyConfig(apm_config);
|
||||||
|
|
||||||
// 4. Both the VAD and the level estimator are enabled...
|
// 4. The VAD is enabled...
|
||||||
SetFrameTo(&frame_, 1000);
|
SetFrameTo(&frame_, 1000);
|
||||||
frame_copy.CopyFrom(frame_);
|
frame_copy.CopyFrom(frame_);
|
||||||
apm_config.voice_detection.enabled = true;
|
apm_config.voice_detection.enabled = true;
|
||||||
apm_config.level_estimation.enabled = true;
|
|
||||||
apm_->ApplyConfig(apm_config);
|
apm_->ApplyConfig(apm_config);
|
||||||
EXPECT_EQ(apm_->kNoError,
|
EXPECT_EQ(apm_->kNoError,
|
||||||
apm_->ProcessStream(
|
apm_->ProcessStream(
|
||||||
@ -1458,7 +1453,6 @@ TEST_F(ApmTest, SplittingFilter) {
|
|||||||
frame_.data.data()));
|
frame_.data.data()));
|
||||||
EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
|
EXPECT_TRUE(FrameDataAreEqual(frame_, frame_copy));
|
||||||
apm_config.voice_detection.enabled = false;
|
apm_config.voice_detection.enabled = false;
|
||||||
apm_config.level_estimation.enabled = false;
|
|
||||||
apm_->ApplyConfig(apm_config);
|
apm_->ApplyConfig(apm_config);
|
||||||
|
|
||||||
// Check the test is valid. We should have distortion from the filter
|
// Check the test is valid. We should have distortion from the filter
|
||||||
@ -1790,7 +1784,6 @@ TEST_F(ApmTest, Process) {
|
|||||||
int analog_level = 127;
|
int analog_level = 127;
|
||||||
int analog_level_average = 0;
|
int analog_level_average = 0;
|
||||||
int max_output_average = 0;
|
int max_output_average = 0;
|
||||||
float rms_dbfs_average = 0.0f;
|
|
||||||
#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
|
#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
|
||||||
int stats_index = 0;
|
int stats_index = 0;
|
||||||
#endif
|
#endif
|
||||||
@ -1824,9 +1817,7 @@ TEST_F(ApmTest, Process) {
|
|||||||
analog_level_average += analog_level;
|
analog_level_average += analog_level;
|
||||||
AudioProcessingStats stats = apm_->GetStatistics();
|
AudioProcessingStats stats = apm_->GetStatistics();
|
||||||
EXPECT_TRUE(stats.voice_detected);
|
EXPECT_TRUE(stats.voice_detected);
|
||||||
EXPECT_TRUE(stats.output_rms_dbfs);
|
|
||||||
has_voice_count += *stats.voice_detected ? 1 : 0;
|
has_voice_count += *stats.voice_detected ? 1 : 0;
|
||||||
rms_dbfs_average += *stats.output_rms_dbfs;
|
|
||||||
|
|
||||||
size_t frame_size = frame_.samples_per_channel * frame_.num_channels;
|
size_t frame_size = frame_.samples_per_channel * frame_.num_channels;
|
||||||
size_t write_count =
|
size_t write_count =
|
||||||
@ -1879,7 +1870,6 @@ TEST_F(ApmTest, Process) {
|
|||||||
}
|
}
|
||||||
max_output_average /= frame_count;
|
max_output_average /= frame_count;
|
||||||
analog_level_average /= frame_count;
|
analog_level_average /= frame_count;
|
||||||
rms_dbfs_average /= frame_count;
|
|
||||||
|
|
||||||
if (!absl::GetFlag(FLAGS_write_apm_ref_data)) {
|
if (!absl::GetFlag(FLAGS_write_apm_ref_data)) {
|
||||||
const int kIntNear = 1;
|
const int kIntNear = 1;
|
||||||
@ -1907,19 +1897,11 @@ TEST_F(ApmTest, Process) {
|
|||||||
EXPECT_NEAR(test->max_output_average(),
|
EXPECT_NEAR(test->max_output_average(),
|
||||||
max_output_average - kMaxOutputAverageOffset,
|
max_output_average - kMaxOutputAverageOffset,
|
||||||
kMaxOutputAverageNear);
|
kMaxOutputAverageNear);
|
||||||
#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
|
|
||||||
const double kFloatNear = 0.002;
|
|
||||||
EXPECT_NEAR(test->rms_dbfs_average(), rms_dbfs_average, kFloatNear);
|
|
||||||
#endif
|
|
||||||
} else {
|
} else {
|
||||||
test->set_has_voice_count(has_voice_count);
|
test->set_has_voice_count(has_voice_count);
|
||||||
|
|
||||||
test->set_analog_level_average(analog_level_average);
|
test->set_analog_level_average(analog_level_average);
|
||||||
test->set_max_output_average(max_output_average);
|
test->set_max_output_average(max_output_average);
|
||||||
|
|
||||||
#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
|
|
||||||
test->set_rms_dbfs_average(rms_dbfs_average);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rewind(far_file_);
|
rewind(far_file_);
|
||||||
@ -2721,7 +2703,6 @@ rtc::scoped_refptr<AudioProcessing> CreateApm(bool mobile_aec) {
|
|||||||
apm_config.echo_canceller.enabled = true;
|
apm_config.echo_canceller.enabled = true;
|
||||||
apm_config.echo_canceller.mobile_mode = mobile_aec;
|
apm_config.echo_canceller.mobile_mode = mobile_aec;
|
||||||
apm_config.noise_suppression.enabled = false;
|
apm_config.noise_suppression.enabled = false;
|
||||||
apm_config.level_estimation.enabled = false;
|
|
||||||
apm_config.voice_detection.enabled = false;
|
apm_config.voice_detection.enabled = false;
|
||||||
apm->ApplyConfig(apm_config);
|
apm->ApplyConfig(apm_config);
|
||||||
return apm;
|
return apm;
|
||||||
@ -2835,60 +2816,6 @@ TEST(MAYBE_ApmStatistics, AECMEnabledTest) {
|
|||||||
EXPECT_FALSE(stats.echo_return_loss_enhancement);
|
EXPECT_FALSE(stats.echo_return_loss_enhancement);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(ApmStatistics, ReportOutputRmsDbfs) {
|
|
||||||
ProcessingConfig processing_config = {
|
|
||||||
{{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}};
|
|
||||||
AudioProcessing::Config config;
|
|
||||||
|
|
||||||
// Set up an audioframe.
|
|
||||||
Int16FrameData frame;
|
|
||||||
frame.num_channels = 1;
|
|
||||||
SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz);
|
|
||||||
|
|
||||||
// Fill the audio frame with a sawtooth pattern.
|
|
||||||
int16_t* ptr = frame.data.data();
|
|
||||||
for (size_t i = 0; i < frame.kMaxDataSizeSamples; i++) {
|
|
||||||
ptr[i] = 10000 * ((i % 3) - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
rtc::scoped_refptr<AudioProcessing> apm =
|
|
||||||
AudioProcessingBuilderForTesting().Create();
|
|
||||||
apm->Initialize(processing_config);
|
|
||||||
|
|
||||||
// If not enabled, no metric should be reported.
|
|
||||||
EXPECT_EQ(
|
|
||||||
apm->ProcessStream(frame.data.data(),
|
|
||||||
StreamConfig(frame.sample_rate_hz, frame.num_channels),
|
|
||||||
StreamConfig(frame.sample_rate_hz, frame.num_channels),
|
|
||||||
frame.data.data()),
|
|
||||||
0);
|
|
||||||
EXPECT_FALSE(apm->GetStatistics().output_rms_dbfs);
|
|
||||||
|
|
||||||
// If enabled, metrics should be reported.
|
|
||||||
config.level_estimation.enabled = true;
|
|
||||||
apm->ApplyConfig(config);
|
|
||||||
EXPECT_EQ(
|
|
||||||
apm->ProcessStream(frame.data.data(),
|
|
||||||
StreamConfig(frame.sample_rate_hz, frame.num_channels),
|
|
||||||
StreamConfig(frame.sample_rate_hz, frame.num_channels),
|
|
||||||
frame.data.data()),
|
|
||||||
0);
|
|
||||||
auto stats = apm->GetStatistics();
|
|
||||||
EXPECT_TRUE(stats.output_rms_dbfs);
|
|
||||||
EXPECT_GE(*stats.output_rms_dbfs, 0);
|
|
||||||
|
|
||||||
// If re-disabled, the value is again not reported.
|
|
||||||
config.level_estimation.enabled = false;
|
|
||||||
apm->ApplyConfig(config);
|
|
||||||
EXPECT_EQ(
|
|
||||||
apm->ProcessStream(frame.data.data(),
|
|
||||||
StreamConfig(frame.sample_rate_hz, frame.num_channels),
|
|
||||||
StreamConfig(frame.sample_rate_hz, frame.num_channels),
|
|
||||||
frame.data.data()),
|
|
||||||
0);
|
|
||||||
EXPECT_FALSE(apm->GetStatistics().output_rms_dbfs);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(ApmStatistics, ReportHasVoice) {
|
TEST(ApmStatistics, ReportHasVoice) {
|
||||||
ProcessingConfig processing_config = {
|
ProcessingConfig processing_config = {
|
||||||
{{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}};
|
{{32000, 1}, {32000, 1}, {32000, 1}, {32000, 1}}};
|
||||||
|
@ -210,9 +210,7 @@ std::string AudioProcessing::Config::ToString() const {
|
|||||||
<< ", max_output_noise_level_dbfs: "
|
<< ", max_output_noise_level_dbfs: "
|
||||||
<< gain_controller2.adaptive_digital.max_output_noise_level_dbfs
|
<< gain_controller2.adaptive_digital.max_output_noise_level_dbfs
|
||||||
<< "}}, residual_echo_detector: { enabled: "
|
<< "}}, residual_echo_detector: { enabled: "
|
||||||
<< residual_echo_detector.enabled
|
<< residual_echo_detector.enabled << " }}";
|
||||||
<< " }, level_estimation: { enabled: " << level_estimation.enabled
|
|
||||||
<< " }}";
|
|
||||||
return builder.str();
|
return builder.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -386,11 +386,6 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
|
|||||||
bool enabled = true;
|
bool enabled = true;
|
||||||
} residual_echo_detector;
|
} residual_echo_detector;
|
||||||
|
|
||||||
// Enables reporting of `output_rms_dbfs` in webrtc::AudioProcessingStats.
|
|
||||||
struct LevelEstimation {
|
|
||||||
bool enabled = false;
|
|
||||||
} level_estimation;
|
|
||||||
|
|
||||||
std::string ToString() const;
|
std::string ToString() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -24,14 +24,6 @@ struct RTC_EXPORT AudioProcessingStats {
|
|||||||
AudioProcessingStats(const AudioProcessingStats& other);
|
AudioProcessingStats(const AudioProcessingStats& other);
|
||||||
~AudioProcessingStats();
|
~AudioProcessingStats();
|
||||||
|
|
||||||
// The root mean square (RMS) level in dBFS (decibels from digital
|
|
||||||
// full-scale) of the last capture frame, after processing. It is
|
|
||||||
// constrained to [-127, 0].
|
|
||||||
// The computation follows: https://tools.ietf.org/html/rfc6465
|
|
||||||
// with the intent that it can provide the RTP audio level indication.
|
|
||||||
// Only reported if level estimation is enabled in AudioProcessing::Config.
|
|
||||||
absl::optional<int> output_rms_dbfs;
|
|
||||||
|
|
||||||
// True if voice is detected in the last capture frame, after processing.
|
// True if voice is detected in the last capture frame, after processing.
|
||||||
// It is conservative in flagging audio as speech, with low likelihood of
|
// It is conservative in flagging audio as speech, with low likelihood of
|
||||||
// incorrectly flagging a frame as voice.
|
// incorrectly flagging a frame as voice.
|
||||||
|
@ -1,29 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Use of this source code is governed by a BSD-style license
|
|
||||||
* that can be found in the LICENSE file in the root of the source
|
|
||||||
* tree. An additional intellectual property rights grant can be found
|
|
||||||
* in the file PATENTS. All contributing project authors may
|
|
||||||
* be found in the AUTHORS file in the root of the source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "modules/audio_processing/level_estimator.h"
|
|
||||||
|
|
||||||
#include "api/array_view.h"
|
|
||||||
|
|
||||||
namespace webrtc {
|
|
||||||
|
|
||||||
LevelEstimator::LevelEstimator() {
|
|
||||||
rms_.Reset();
|
|
||||||
}
|
|
||||||
|
|
||||||
LevelEstimator::~LevelEstimator() = default;
|
|
||||||
|
|
||||||
void LevelEstimator::ProcessStream(const AudioBuffer& audio) {
|
|
||||||
for (size_t i = 0; i < audio.num_channels(); i++) {
|
|
||||||
rms_.Analyze(rtc::ArrayView<const float>(audio.channels_const()[i],
|
|
||||||
audio.num_frames()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // namespace webrtc
|
|
@ -1,47 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Use of this source code is governed by a BSD-style license
|
|
||||||
* that can be found in the LICENSE file in the root of the source
|
|
||||||
* tree. An additional intellectual property rights grant can be found
|
|
||||||
* in the file PATENTS. All contributing project authors may
|
|
||||||
* be found in the AUTHORS file in the root of the source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_H_
|
|
||||||
#define MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_H_
|
|
||||||
|
|
||||||
#include "modules/audio_processing/audio_buffer.h"
|
|
||||||
#include "modules/audio_processing/rms_level.h"
|
|
||||||
|
|
||||||
namespace webrtc {
|
|
||||||
|
|
||||||
// An estimation component used to retrieve level metrics.
|
|
||||||
class LevelEstimator {
|
|
||||||
public:
|
|
||||||
LevelEstimator();
|
|
||||||
~LevelEstimator();
|
|
||||||
|
|
||||||
LevelEstimator(LevelEstimator&) = delete;
|
|
||||||
LevelEstimator& operator=(LevelEstimator&) = delete;
|
|
||||||
|
|
||||||
void ProcessStream(const AudioBuffer& audio);
|
|
||||||
|
|
||||||
// Returns the root mean square (RMS) level in dBFs (decibels from digital
|
|
||||||
// full-scale), or alternately dBov. It is computed over all primary stream
|
|
||||||
// frames since the last call to RMS(). The returned value is positive but
|
|
||||||
// should be interpreted as negative. It is constrained to [0, 127].
|
|
||||||
//
|
|
||||||
// The computation follows: https://tools.ietf.org/html/rfc6465
|
|
||||||
// with the intent that it can provide the RTP audio level indication.
|
|
||||||
//
|
|
||||||
// Frames passed to ProcessStream() with an `_energy` of zero are considered
|
|
||||||
// to have been muted. The RMS of the frame will be interpreted as -127.
|
|
||||||
int RMS() { return rms_.Average(); }
|
|
||||||
|
|
||||||
private:
|
|
||||||
RmsLevel rms_;
|
|
||||||
};
|
|
||||||
} // namespace webrtc
|
|
||||||
|
|
||||||
#endif // MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_H_
|
|
@ -1,89 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Use of this source code is governed by a BSD-style license
|
|
||||||
* that can be found in the LICENSE file in the root of the source
|
|
||||||
* tree. An additional intellectual property rights grant can be found
|
|
||||||
* in the file PATENTS. All contributing project authors may
|
|
||||||
* be found in the AUTHORS file in the root of the source tree.
|
|
||||||
*/
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "api/array_view.h"
|
|
||||||
#include "modules/audio_processing/audio_buffer.h"
|
|
||||||
#include "modules/audio_processing/level_estimator.h"
|
|
||||||
#include "modules/audio_processing/test/audio_buffer_tools.h"
|
|
||||||
#include "modules/audio_processing/test/bitexactness_tools.h"
|
|
||||||
#include "test/gtest.h"
|
|
||||||
|
|
||||||
namespace webrtc {
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
const int kNumFramesToProcess = 1000;
|
|
||||||
|
|
||||||
// Processes a specified amount of frames, verifies the results and reports
|
|
||||||
// any errors.
|
|
||||||
void RunBitexactnessTest(int sample_rate_hz,
|
|
||||||
size_t num_channels,
|
|
||||||
int rms_reference) {
|
|
||||||
LevelEstimator level_estimator;
|
|
||||||
int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100);
|
|
||||||
StreamConfig capture_config(sample_rate_hz, num_channels, false);
|
|
||||||
AudioBuffer capture_buffer(
|
|
||||||
capture_config.sample_rate_hz(), capture_config.num_channels(),
|
|
||||||
capture_config.sample_rate_hz(), capture_config.num_channels(),
|
|
||||||
capture_config.sample_rate_hz(), capture_config.num_channels());
|
|
||||||
|
|
||||||
test::InputAudioFile capture_file(
|
|
||||||
test::GetApmCaptureTestVectorFileName(sample_rate_hz));
|
|
||||||
std::vector<float> capture_input(samples_per_channel * num_channels);
|
|
||||||
for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) {
|
|
||||||
ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels,
|
|
||||||
&capture_file, capture_input);
|
|
||||||
|
|
||||||
test::CopyVectorToAudioBuffer(capture_config, capture_input,
|
|
||||||
&capture_buffer);
|
|
||||||
|
|
||||||
level_estimator.ProcessStream(capture_buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract test results.
|
|
||||||
int rms = level_estimator.RMS();
|
|
||||||
|
|
||||||
// Compare the output to the reference.
|
|
||||||
EXPECT_EQ(rms_reference, rms);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
TEST(LevelEstimatorBitExactnessTest, Mono8kHz) {
|
|
||||||
const int kRmsReference = 31;
|
|
||||||
|
|
||||||
RunBitexactnessTest(8000, 1, kRmsReference);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(LevelEstimatorBitExactnessTest, Mono16kHz) {
|
|
||||||
const int kRmsReference = 31;
|
|
||||||
|
|
||||||
RunBitexactnessTest(16000, 1, kRmsReference);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(LevelEstimatorBitExactnessTest, Mono32kHz) {
|
|
||||||
const int kRmsReference = 31;
|
|
||||||
|
|
||||||
RunBitexactnessTest(32000, 1, kRmsReference);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(LevelEstimatorBitExactnessTest, Mono48kHz) {
|
|
||||||
const int kRmsReference = 31;
|
|
||||||
|
|
||||||
RunBitexactnessTest(48000, 1, kRmsReference);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(LevelEstimatorBitExactnessTest, Stereo16kHz) {
|
|
||||||
const int kRmsReference = 30;
|
|
||||||
|
|
||||||
RunBitexactnessTest(16000, 2, kRmsReference);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace webrtc
|
|
@ -538,10 +538,6 @@ void AudioProcessingSimulator::ConfigureAudioProcessor() {
|
|||||||
apm_config.high_pass_filter.enabled = *settings_.use_hpf;
|
apm_config.high_pass_filter.enabled = *settings_.use_hpf;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (settings_.use_le) {
|
|
||||||
apm_config.level_estimation.enabled = *settings_.use_le;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (settings_.use_vad) {
|
if (settings_.use_vad) {
|
||||||
apm_config.voice_detection.enabled = *settings_.use_vad;
|
apm_config.voice_detection.enabled = *settings_.use_vad;
|
||||||
}
|
}
|
||||||
|
@ -106,7 +106,6 @@ struct SimulationSettings {
|
|||||||
absl::optional<int> use_ts;
|
absl::optional<int> use_ts;
|
||||||
absl::optional<bool> use_analog_agc;
|
absl::optional<bool> use_analog_agc;
|
||||||
absl::optional<bool> use_vad;
|
absl::optional<bool> use_vad;
|
||||||
absl::optional<bool> use_le;
|
|
||||||
absl::optional<bool> use_all;
|
absl::optional<bool> use_all;
|
||||||
absl::optional<bool> analog_agc_disable_digital_adaptive;
|
absl::optional<bool> analog_agc_disable_digital_adaptive;
|
||||||
absl::optional<int> agc_mode;
|
absl::optional<int> agc_mode;
|
||||||
|
@ -121,10 +121,6 @@ ABSL_FLAG(int,
|
|||||||
vad,
|
vad,
|
||||||
kParameterNotSpecifiedValue,
|
kParameterNotSpecifiedValue,
|
||||||
"Activate (1) or deactivate (0) the voice activity detector");
|
"Activate (1) or deactivate (0) the voice activity detector");
|
||||||
ABSL_FLAG(int,
|
|
||||||
le,
|
|
||||||
kParameterNotSpecifiedValue,
|
|
||||||
"Activate (1) or deactivate (0) the level estimator");
|
|
||||||
ABSL_FLAG(bool,
|
ABSL_FLAG(bool,
|
||||||
all_default,
|
all_default,
|
||||||
false,
|
false,
|
||||||
@ -369,7 +365,6 @@ void SetSettingIfFlagSet(int32_t flag, absl::optional<bool>* parameter) {
|
|||||||
SimulationSettings CreateSettings() {
|
SimulationSettings CreateSettings() {
|
||||||
SimulationSettings settings;
|
SimulationSettings settings;
|
||||||
if (absl::GetFlag(FLAGS_all_default)) {
|
if (absl::GetFlag(FLAGS_all_default)) {
|
||||||
settings.use_le = true;
|
|
||||||
settings.use_vad = true;
|
settings.use_vad = true;
|
||||||
settings.use_ts = true;
|
settings.use_ts = true;
|
||||||
settings.use_analog_agc = true;
|
settings.use_analog_agc = true;
|
||||||
@ -423,7 +418,6 @@ SimulationSettings CreateSettings() {
|
|||||||
SetSettingIfFlagSet(absl::GetFlag(FLAGS_analog_agc),
|
SetSettingIfFlagSet(absl::GetFlag(FLAGS_analog_agc),
|
||||||
&settings.use_analog_agc);
|
&settings.use_analog_agc);
|
||||||
SetSettingIfFlagSet(absl::GetFlag(FLAGS_vad), &settings.use_vad);
|
SetSettingIfFlagSet(absl::GetFlag(FLAGS_vad), &settings.use_vad);
|
||||||
SetSettingIfFlagSet(absl::GetFlag(FLAGS_le), &settings.use_le);
|
|
||||||
SetSettingIfFlagSet(absl::GetFlag(FLAGS_analog_agc_disable_digital_adaptive),
|
SetSettingIfFlagSet(absl::GetFlag(FLAGS_analog_agc_disable_digital_adaptive),
|
||||||
&settings.analog_agc_disable_digital_adaptive);
|
&settings.analog_agc_disable_digital_adaptive);
|
||||||
SetSettingIfSpecified(absl::GetFlag(FLAGS_agc_mode), &settings.agc_mode);
|
SetSettingIfSpecified(absl::GetFlag(FLAGS_agc_mode), &settings.agc_mode);
|
||||||
|
@ -52,7 +52,7 @@ rtc::scoped_refptr<AudioProcessing> CreateApm(test::FuzzDataHelper* fuzz_data,
|
|||||||
bool use_aecm = fuzz_data->ReadOrDefaultValue(true);
|
bool use_aecm = fuzz_data->ReadOrDefaultValue(true);
|
||||||
bool use_agc = fuzz_data->ReadOrDefaultValue(true);
|
bool use_agc = fuzz_data->ReadOrDefaultValue(true);
|
||||||
bool use_ns = fuzz_data->ReadOrDefaultValue(true);
|
bool use_ns = fuzz_data->ReadOrDefaultValue(true);
|
||||||
bool use_le = fuzz_data->ReadOrDefaultValue(true);
|
static_cast<void>(fuzz_data->ReadOrDefaultValue(true));
|
||||||
bool use_vad = fuzz_data->ReadOrDefaultValue(true);
|
bool use_vad = fuzz_data->ReadOrDefaultValue(true);
|
||||||
bool use_agc_limiter = fuzz_data->ReadOrDefaultValue(true);
|
bool use_agc_limiter = fuzz_data->ReadOrDefaultValue(true);
|
||||||
bool use_agc2 = fuzz_data->ReadOrDefaultValue(true);
|
bool use_agc2 = fuzz_data->ReadOrDefaultValue(true);
|
||||||
@ -115,7 +115,6 @@ rtc::scoped_refptr<AudioProcessing> CreateApm(test::FuzzDataHelper* fuzz_data,
|
|||||||
apm_config.noise_suppression.enabled = use_ns;
|
apm_config.noise_suppression.enabled = use_ns;
|
||||||
apm_config.transient_suppression.enabled = use_ts;
|
apm_config.transient_suppression.enabled = use_ts;
|
||||||
apm_config.voice_detection.enabled = use_vad;
|
apm_config.voice_detection.enabled = use_vad;
|
||||||
apm_config.level_estimation.enabled = use_le;
|
|
||||||
|
|
||||||
rtc::scoped_refptr<AudioProcessing> apm =
|
rtc::scoped_refptr<AudioProcessing> apm =
|
||||||
AudioProcessingBuilderForTesting()
|
AudioProcessingBuilderForTesting()
|
||||||
|
Reference in New Issue
Block a user