Level estimation and saturation protection stub.
The level estimator (AdaptiveModeLevelEstimator) produces a biased estimate of the speech level. In our model, we use another module (the SaturationProtector) to compute the bias. This CL contains the estimator and a stub of the saturation protector. Bug: webrtc:7494 Change-Id: I0df736d0346063f544fa680b4cc84177ea548545 Reviewed-on: https://webrtc-review.googlesource.com/64820 Commit-Queue: Alex Loiko <aleloi@webrtc.org> Reviewed-by: Ivo Creusen <ivoc@webrtc.org> Cr-Commit-Position: refs/heads/master@{#22641}
This commit is contained in:
@ -548,6 +548,7 @@ if (rtc_include_tests) {
|
||||
"../../test:test_support",
|
||||
"../audio_coding:neteq_input_audio_tools",
|
||||
"aec_dump:mock_aec_dump_unittests",
|
||||
"agc2:adaptive_digital_unittests",
|
||||
"agc2:fixed_digital_unittests",
|
||||
"test/conversational_speech:unittest",
|
||||
"vad:vad_unittests",
|
||||
|
||||
@ -25,6 +25,8 @@ rtc_source_set("adaptive_digital") {
|
||||
"adaptive_mode_level_estimator.h",
|
||||
"noise_level_estimator.cc",
|
||||
"noise_level_estimator.h",
|
||||
"saturation_protector.cc",
|
||||
"saturation_protector.h",
|
||||
]
|
||||
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
@ -126,3 +128,25 @@ rtc_source_set("fixed_digital_unittests") {
|
||||
"../../../rtc_base:rtc_base_tests_utils",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("adaptive_digital_unittests") {
|
||||
testonly = true
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
|
||||
sources = [
|
||||
"adaptive_mode_level_estimator_unittest.cc",
|
||||
]
|
||||
deps = [
|
||||
":adaptive_digital",
|
||||
":common",
|
||||
":test_utils",
|
||||
"..:apm_logging",
|
||||
"..:audio_frame_view",
|
||||
"../../../api:array_view",
|
||||
"../../../common_audio",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base:rtc_base_tests_utils",
|
||||
"../vad:vad_with_level",
|
||||
]
|
||||
}
|
||||
|
||||
@ -17,7 +17,9 @@
|
||||
namespace webrtc {
|
||||
|
||||
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
|
||||
ApmDataDumper* apm_data_dumper) {}
|
||||
ApmDataDumper* apm_data_dumper)
|
||||
: saturation_protector_(apm_data_dumper),
|
||||
apm_data_dumper_(apm_data_dumper) {}
|
||||
|
||||
void AdaptiveModeLevelEstimator::UpdateEstimation(
|
||||
const VadWithLevel::LevelAndProbability& vad_data) {
|
||||
@ -27,10 +29,40 @@ void AdaptiveModeLevelEstimator::UpdateEstimation(
|
||||
RTC_DCHECK_LT(vad_data.speech_peak_dbfs, 50.f);
|
||||
RTC_DCHECK_GE(vad_data.speech_probability, 0.f);
|
||||
RTC_DCHECK_LE(vad_data.speech_probability, 1.f);
|
||||
|
||||
if (vad_data.speech_probability < kVadConfidenceThreshold) {
|
||||
DebugDumpEstimate();
|
||||
return;
|
||||
}
|
||||
|
||||
const bool buffer_is_full = buffer_size_ms_ >= kFullBufferSizeMs;
|
||||
if (!buffer_is_full) {
|
||||
buffer_size_ms_ += kFrameDurationMs;
|
||||
}
|
||||
|
||||
const float leak_factor = buffer_is_full ? kFullBufferLeakFactor : 1.f;
|
||||
|
||||
estimate_numerator_ = estimate_numerator_ * leak_factor +
|
||||
vad_data.speech_rms_dbfs * vad_data.speech_probability;
|
||||
estimate_denominator_ =
|
||||
estimate_denominator_ * leak_factor + vad_data.speech_probability;
|
||||
|
||||
last_estimate_with_offset_dbfs_ = estimate_numerator_ / estimate_denominator_;
|
||||
|
||||
saturation_protector_.UpdateMargin(vad_data, last_estimate_with_offset_dbfs_);
|
||||
DebugDumpEstimate();
|
||||
}
|
||||
|
||||
float AdaptiveModeLevelEstimator::LatestLevelEstimate() const {
|
||||
// TODO(webrtc:7494): This is a stub. Add implementation.
|
||||
return 0.f;
|
||||
return rtc::SafeClamp<float>(
|
||||
last_estimate_with_offset_dbfs_ + saturation_protector_.LastMargin(),
|
||||
-90.f, 0.f);
|
||||
}
|
||||
|
||||
void AdaptiveModeLevelEstimator::DebugDumpEstimate() {
|
||||
apm_data_dumper_->DumpRaw("agc2_adaptive_level_estimate_with_offset_dbfs",
|
||||
last_estimate_with_offset_dbfs_);
|
||||
apm_data_dumper_->DumpRaw("agc2_adaptive_level_estimate_dbfs",
|
||||
LatestLevelEstimate());
|
||||
}
|
||||
} // namespace webrtc
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
|
||||
|
||||
#include "modules/audio_processing/agc2/saturation_protector.h"
|
||||
#include "modules/audio_processing/vad/vad_with_level.h"
|
||||
|
||||
namespace webrtc {
|
||||
@ -21,6 +22,16 @@ class AdaptiveModeLevelEstimator {
|
||||
explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper);
|
||||
void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data);
|
||||
float LatestLevelEstimate() const;
|
||||
|
||||
private:
|
||||
void DebugDumpEstimate();
|
||||
|
||||
int buffer_size_ms_ = 0;
|
||||
float last_estimate_with_offset_dbfs_ = kInitialSpeechLevelEstimateDbfs;
|
||||
float estimate_numerator_ = 0.f;
|
||||
float estimate_denominator_ = 0.f;
|
||||
SaturationProtector saturation_protector_;
|
||||
ApmDataDumper* const apm_data_dumper_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
@ -0,0 +1,115 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
|
||||
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/gunit.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
void RunOnConstantLevel(int num_iterations,
|
||||
VadWithLevel::LevelAndProbability vad_data,
|
||||
AdaptiveModeLevelEstimator* level_estimator) {
|
||||
for (int i = 0; i < num_iterations; ++i) {
|
||||
level_estimator->UpdateEstimation(vad_data); // By copy
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST(AutomaticGainController2AdaptiveModeLevelEstimator,
|
||||
EstimatorShouldNotCrash) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveModeLevelEstimator level_estimator(&apm_data_dumper);
|
||||
|
||||
VadWithLevel::LevelAndProbability vad_data(1.f, -20.f, -10.f);
|
||||
level_estimator.UpdateEstimation(vad_data);
|
||||
static_cast<void>(level_estimator.LatestLevelEstimate());
|
||||
}
|
||||
|
||||
TEST(AutomaticGainController2AdaptiveModeLevelEstimator, LevelShouldStabilize) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveModeLevelEstimator level_estimator(&apm_data_dumper);
|
||||
|
||||
constexpr float kSpeechRmsDbfs = -15.f;
|
||||
RunOnConstantLevel(
|
||||
100,
|
||||
VadWithLevel::LevelAndProbability(
|
||||
1.f, kSpeechRmsDbfs - kInitialSaturationMarginDb, kSpeechRmsDbfs),
|
||||
&level_estimator);
|
||||
|
||||
EXPECT_NEAR(level_estimator.LatestLevelEstimate(), kSpeechRmsDbfs, 0.1f);
|
||||
}
|
||||
|
||||
TEST(AutomaticGainController2AdaptiveModeLevelEstimator,
|
||||
EstimatorIgnoresZeroProbabilityFrames) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveModeLevelEstimator level_estimator(&apm_data_dumper);
|
||||
|
||||
// Run for one second of fake audio.
|
||||
constexpr float kSpeechRmsDbfs = -25.f;
|
||||
RunOnConstantLevel(
|
||||
100,
|
||||
VadWithLevel::LevelAndProbability(
|
||||
1.f, kSpeechRmsDbfs - kInitialSaturationMarginDb, kSpeechRmsDbfs),
|
||||
&level_estimator);
|
||||
|
||||
// Run for one more second, but mark as not speech.
|
||||
constexpr float kNoiseRmsDbfs = 0.f;
|
||||
RunOnConstantLevel(
|
||||
100, VadWithLevel::LevelAndProbability(0.f, kNoiseRmsDbfs, kNoiseRmsDbfs),
|
||||
&level_estimator);
|
||||
|
||||
// Level should not have changed.
|
||||
EXPECT_NEAR(level_estimator.LatestLevelEstimate(), kSpeechRmsDbfs, 0.1f);
|
||||
}
|
||||
|
||||
TEST(AutomaticGainController2AdaptiveModeLevelEstimator, TimeToAdapt) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveModeLevelEstimator level_estimator(&apm_data_dumper);
|
||||
|
||||
// Run for one 'window size' interval
|
||||
constexpr float kInitialSpeechRmsDbfs = -30.f;
|
||||
RunOnConstantLevel(
|
||||
kFullBufferSizeMs / kFrameDurationMs,
|
||||
VadWithLevel::LevelAndProbability(
|
||||
1.f, kInitialSpeechRmsDbfs - kInitialSaturationMarginDb,
|
||||
kInitialSpeechRmsDbfs),
|
||||
&level_estimator);
|
||||
|
||||
// Run for one half 'window size' interval. This should not be enough to
|
||||
// adapt.
|
||||
constexpr float kDifferentSpeechRmsDbfs = -10.f;
|
||||
// It should at most differ by 25% after one 'window size' interval.
|
||||
const float kMaxDifferenceDb =
|
||||
0.25 * std::abs(kDifferentSpeechRmsDbfs - kInitialSpeechRmsDbfs);
|
||||
RunOnConstantLevel(
|
||||
static_cast<int>(kFullBufferSizeMs / kFrameDurationMs / 2),
|
||||
VadWithLevel::LevelAndProbability(
|
||||
1.f, kDifferentSpeechRmsDbfs - kInitialSaturationMarginDb,
|
||||
kDifferentSpeechRmsDbfs),
|
||||
&level_estimator);
|
||||
EXPECT_GT(
|
||||
std::abs(kDifferentSpeechRmsDbfs - level_estimator.LatestLevelEstimate()),
|
||||
kMaxDifferenceDb);
|
||||
|
||||
// Run for some more time. Afterwards, we should have adapted.
|
||||
RunOnConstantLevel(
|
||||
static_cast<int>(3 * kFullBufferSizeMs / kFrameDurationMs),
|
||||
VadWithLevel::LevelAndProbability(
|
||||
1.f, kDifferentSpeechRmsDbfs - kInitialSaturationMarginDb,
|
||||
kDifferentSpeechRmsDbfs),
|
||||
&level_estimator);
|
||||
EXPECT_NEAR(level_estimator.LatestLevelEstimate(), kDifferentSpeechRmsDbfs,
|
||||
kMaxDifferenceDb);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -27,6 +27,18 @@ constexpr size_t kMaximalNumberOfSamplesPerChannel = 480;
|
||||
|
||||
constexpr float kAttackFilterConstant = 0.f;
|
||||
|
||||
// Used in the Level Estimator for deciding when to update the speech
|
||||
// level estimate.
|
||||
constexpr float kVadConfidenceThreshold = 0.9f;
|
||||
|
||||
// The amount of 'memory' of the Level Estimator. Decides leak factors.
|
||||
constexpr float kFullBufferSizeMs = 1000.f;
|
||||
constexpr float kFullBufferLeakFactor = 1.f - 1.f / kFullBufferSizeMs;
|
||||
|
||||
constexpr float kInitialSpeechLevelEstimateDbfs = -30.f;
|
||||
|
||||
constexpr float kInitialSaturationMarginDb = 17.f;
|
||||
|
||||
// This is computed from kDecayMs by
|
||||
// 10 ** (-1/20 * subframe_duration / kDecayMs).
|
||||
// |subframe_duration| is |kFrameDurationMs / kSubFramesInFrame|.
|
||||
|
||||
@ -20,7 +20,7 @@ class NoiseLevelEstimator {
|
||||
public:
|
||||
NoiseLevelEstimator() {}
|
||||
|
||||
// Returns the estimated noise level in DbFS.
|
||||
// Returns the estimated noise level in dBFS.
|
||||
float Analyze(AudioFrameView<const float> frame);
|
||||
|
||||
private:
|
||||
|
||||
29
modules/audio_processing/agc2/saturation_protector.cc
Normal file
29
modules/audio_processing/agc2/saturation_protector.cc
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc2/saturation_protector.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper) {}
|
||||
|
||||
void SaturationProtector::UpdateMargin(
|
||||
const VadWithLevel::LevelAndProbability& vad_data,
|
||||
float last_speech_level_estimate) {}
|
||||
|
||||
float SaturationProtector::LastMargin() const {
|
||||
return kInitialSaturationMarginDb;
|
||||
}
|
||||
} // namespace webrtc
|
||||
41
modules/audio_processing/agc2/saturation_protector.h
Normal file
41
modules/audio_processing/agc2/saturation_protector.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "modules/audio_processing/vad/vad_with_level.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
|
||||
class SaturationProtector {
|
||||
public:
|
||||
explicit SaturationProtector(ApmDataDumper* apm_data_dumper);
|
||||
|
||||
// Update and return margin estimate. This method should be called
|
||||
// whenever a frame is reliably classified as 'speech'.
|
||||
//
|
||||
// Returned value is in DB scale.
|
||||
void UpdateMargin(const VadWithLevel::LevelAndProbability& vad_data,
|
||||
float last_speech_level_estimate_dbfs);
|
||||
|
||||
// Returns latest computed margin. Used in cases when speech is not
|
||||
// detected.
|
||||
float LastMargin() const;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_H_
|
||||
Reference in New Issue
Block a user