AGC2: removed unused noise estimator implementation

This CL also includes the following changes:
- `AudioProcessing::Config::GainController2::noise_estimator`
  deprecated
- `EnergyToDbfs()` optimized by removing unnecessary `sqrt`
- Unit test minor fix, incorrect type was used

Bug: webrtc:7494
Change-Id: I88a6672d6f7cd03fcf6a3031883522d256880140
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/230940
Reviewed-by: Jesus de Vicente Pena <devicentepena@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#34893}
This commit is contained in:
Alessio Bazzica
2021-09-01 10:54:47 +02:00
committed by WebRTC LUCI CQ
parent 0d51a5fc00
commit b8a19df71c
15 changed files with 18 additions and 794 deletions

View File

@ -110,24 +110,15 @@ rtc_library("gain_applier") {
rtc_library("noise_level_estimator") {
sources = [
"down_sampler.cc",
"down_sampler.h",
"noise_level_estimator.cc",
"noise_level_estimator.h",
"noise_spectrum_estimator.cc",
"noise_spectrum_estimator.h",
"signal_classifier.cc",
"signal_classifier.h",
]
deps = [
":biquad_filter",
"..:apm_logging",
"..:audio_frame_view",
"../../../api:array_view",
"../../../common_audio",
"../../../common_audio/third_party/ooura:fft_size_128",
"../../../rtc_base:checks",
"../../../rtc_base:macromagic",
"../../../system_wrappers",
]
@ -241,10 +232,7 @@ rtc_library("noise_estimator_unittests") {
testonly = true
configs += [ "..:apm_debug_dump" ]
sources = [
"noise_level_estimator_unittest.cc",
"signal_classifier_unittest.cc",
]
sources = [ "noise_level_estimator_unittest.cc" ]
deps = [
":noise_level_estimator",
":test_utils",

View File

@ -41,17 +41,6 @@ AvailableCpuFeatures GetAllowedCpuFeatures(
return features;
}
std::unique_ptr<NoiseLevelEstimator> CreateNoiseLevelEstimator(
NoiseEstimatorType estimator_type,
ApmDataDumper* apm_data_dumper) {
switch (estimator_type) {
case NoiseEstimatorType::kStationaryNoise:
return CreateStationaryNoiseEstimator(apm_data_dumper);
case NoiseEstimatorType::kNoiseFloor:
return CreateNoiseFloorEstimator(apm_data_dumper);
}
}
} // namespace
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
@ -65,8 +54,7 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
config.max_output_noise_level_dbfs,
config.dry_run),
apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(
CreateNoiseLevelEstimator(config.noise_estimator, apm_data_dumper)),
noise_level_estimator_(CreateNoiseFloorEstimator(apm_data_dumper)),
saturation_protector_(
CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb,
kSaturationProtectorExtraHeadroomDb,

View File

@ -1,99 +0,0 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/down_sampler.h"
#include <string.h>
#include <algorithm>
#include "modules/audio_processing/agc2/biquad_filter.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
constexpr int kChunkSizeMs = 10;
constexpr int kSampleRate8kHz = 8000;
constexpr int kSampleRate16kHz = 16000;
constexpr int kSampleRate32kHz = 32000;
constexpr int kSampleRate48kHz = 48000;
// Bandlimiter coefficients computed based on that only
// the first 40 bins of the spectrum for the downsampled
// signal are used.
// [B,A] = butter(2,(41/64*4000)/8000)
const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_16kHz = {
{0.1455f, 0.2911f, 0.1455f},
{-0.6698f, 0.2520f}};
// [B,A] = butter(2,(41/64*4000)/16000)
const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_32kHz = {
{0.0462f, 0.0924f, 0.0462f},
{-1.3066f, 0.4915f}};
// [B,A] = butter(2,(41/64*4000)/24000)
const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_48kHz = {
{0.0226f, 0.0452f, 0.0226f},
{-1.5320f, 0.6224f}};
} // namespace
DownSampler::DownSampler(ApmDataDumper* data_dumper)
: data_dumper_(data_dumper) {
Initialize(48000);
}
void DownSampler::Initialize(int sample_rate_hz) {
RTC_DCHECK(
sample_rate_hz == kSampleRate8kHz || sample_rate_hz == kSampleRate16kHz ||
sample_rate_hz == kSampleRate32kHz || sample_rate_hz == kSampleRate48kHz);
sample_rate_hz_ = sample_rate_hz;
down_sampling_factor_ = rtc::CheckedDivExact(sample_rate_hz_, 8000);
/// Note that the down sampling filter is not used if the sample rate is 8
/// kHz.
if (sample_rate_hz_ == kSampleRate16kHz) {
low_pass_filter_.Initialize(kLowPassFilterCoefficients_16kHz);
} else if (sample_rate_hz_ == kSampleRate32kHz) {
low_pass_filter_.Initialize(kLowPassFilterCoefficients_32kHz);
} else if (sample_rate_hz_ == kSampleRate48kHz) {
low_pass_filter_.Initialize(kLowPassFilterCoefficients_48kHz);
}
}
void DownSampler::DownSample(rtc::ArrayView<const float> in,
rtc::ArrayView<float> out) {
data_dumper_->DumpWav("agc2_down_sampler_input", in, sample_rate_hz_, 1);
RTC_DCHECK_EQ(sample_rate_hz_ * kChunkSizeMs / 1000, in.size());
RTC_DCHECK_EQ(kSampleRate8kHz * kChunkSizeMs / 1000, out.size());
const size_t kMaxNumFrames = kSampleRate48kHz * kChunkSizeMs / 1000;
float x[kMaxNumFrames];
// Band-limit the signal to 4 kHz.
if (sample_rate_hz_ != kSampleRate8kHz) {
low_pass_filter_.Process(in, rtc::ArrayView<float>(x, in.size()));
// Downsample the signal.
size_t k = 0;
for (size_t j = 0; j < out.size(); ++j) {
RTC_DCHECK_GT(kMaxNumFrames, k);
out[j] = x[k];
k += down_sampling_factor_;
}
} else {
std::copy(in.data(), in.data() + in.size(), out.data());
}
data_dumper_->DumpWav("agc2_down_sampler_output", out, kSampleRate8kHz, 1);
}
} // namespace webrtc

View File

@ -1,42 +0,0 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AGC2_DOWN_SAMPLER_H_
#define MODULES_AUDIO_PROCESSING_AGC2_DOWN_SAMPLER_H_
#include "api/array_view.h"
#include "modules/audio_processing/agc2/biquad_filter.h"
namespace webrtc {
class ApmDataDumper;
class DownSampler {
public:
explicit DownSampler(ApmDataDumper* data_dumper);
DownSampler() = delete;
DownSampler(const DownSampler&) = delete;
DownSampler& operator=(const DownSampler&) = delete;
void Initialize(int sample_rate_hz);
void DownSample(rtc::ArrayView<const float> in, rtc::ArrayView<float> out);
private:
ApmDataDumper* const data_dumper_;
int sample_rate_hz_;
int down_sampling_factor_;
BiQuadFilter low_pass_filter_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AGC2_DOWN_SAMPLER_H_

View File

@ -17,13 +17,12 @@
#include <numeric>
#include "api/array_view.h"
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc2/signal_classifier.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
constexpr int kFramesPerSecond = 100;
float FrameEnergy(const AudioFrameView<const float>& audio) {
@ -37,108 +36,16 @@ float FrameEnergy(const AudioFrameView<const float>& audio) {
return energy;
}
float EnergyToDbfs(float signal_energy, size_t num_samples) {
const float rms = std::sqrt(signal_energy / num_samples);
return FloatS16ToDbfs(rms);
float EnergyToDbfs(float signal_energy, int num_samples) {
RTC_DCHECK_GE(signal_energy, 0.0f);
const float rms_square = signal_energy / num_samples;
constexpr float kMinDbfs = -90.30899869919436f;
if (rms_square <= 1.0f) {
return kMinDbfs;
}
return 10.0f * std::log10(rms_square) + kMinDbfs;
}
class NoiseLevelEstimatorImpl : public NoiseLevelEstimator {
public:
NoiseLevelEstimatorImpl(ApmDataDumper* data_dumper)
: data_dumper_(data_dumper), signal_classifier_(data_dumper) {
// Initially assume that 48 kHz will be used. `Analyze()` will detect the
// used sample rate and call `Initialize()` again if needed.
Initialize(/*sample_rate_hz=*/48000);
}
NoiseLevelEstimatorImpl(const NoiseLevelEstimatorImpl&) = delete;
NoiseLevelEstimatorImpl& operator=(const NoiseLevelEstimatorImpl&) = delete;
~NoiseLevelEstimatorImpl() = default;
float Analyze(const AudioFrameView<const float>& frame) override {
data_dumper_->DumpRaw("agc2_noise_level_estimator_hold_counter",
noise_energy_hold_counter_);
const int sample_rate_hz =
static_cast<int>(frame.samples_per_channel() * kFramesPerSecond);
if (sample_rate_hz != sample_rate_hz_) {
Initialize(sample_rate_hz);
}
const float frame_energy = FrameEnergy(frame);
if (frame_energy <= 0.f) {
RTC_DCHECK_GE(frame_energy, 0.f);
data_dumper_->DumpRaw("agc2_noise_level_estimator_signal_type", -1);
return EnergyToDbfs(noise_energy_, frame.samples_per_channel());
}
if (first_update_) {
// Initialize the noise energy to the frame energy.
first_update_ = false;
data_dumper_->DumpRaw("agc2_noise_level_estimator_signal_type", -1);
noise_energy_ = std::max(frame_energy, min_noise_energy_);
return EnergyToDbfs(noise_energy_, frame.samples_per_channel());
}
const SignalClassifier::SignalType signal_type =
signal_classifier_.Analyze(frame.channel(0));
data_dumper_->DumpRaw("agc2_noise_level_estimator_signal_type",
static_cast<int>(signal_type));
// Update the noise estimate in a minimum statistics-type manner.
if (signal_type == SignalClassifier::SignalType::kStationary) {
if (frame_energy > noise_energy_) {
// Leak the estimate upwards towards the frame energy if no recent
// downward update.
noise_energy_hold_counter_ =
std::max(noise_energy_hold_counter_ - 1, 0);
if (noise_energy_hold_counter_ == 0) {
constexpr float kMaxNoiseEnergyFactor = 1.01f;
noise_energy_ =
std::min(noise_energy_ * kMaxNoiseEnergyFactor, frame_energy);
}
} else {
// Update smoothly downwards with a limited maximum update magnitude.
constexpr float kMinNoiseEnergyFactor = 0.9f;
constexpr float kNoiseEnergyDeltaFactor = 0.05f;
noise_energy_ =
std::max(noise_energy_ * kMinNoiseEnergyFactor,
noise_energy_ - kNoiseEnergyDeltaFactor *
(noise_energy_ - frame_energy));
// Prevent an energy increase for the next 10 seconds.
constexpr int kNumFramesToEnergyIncreaseAllowed = 1000;
noise_energy_hold_counter_ = kNumFramesToEnergyIncreaseAllowed;
}
} else {
// TODO(bugs.webrtc.org/7494): Remove to not forget the estimated level.
// For a non-stationary signal, leak the estimate downwards in order to
// avoid estimate locking due to incorrect signal classification.
noise_energy_ = noise_energy_ * 0.99f;
}
// Ensure a minimum of the estimate.
noise_energy_ = std::max(noise_energy_, min_noise_energy_);
return EnergyToDbfs(noise_energy_, frame.samples_per_channel());
}
private:
void Initialize(int sample_rate_hz) {
sample_rate_hz_ = sample_rate_hz;
noise_energy_ = 1.0f;
first_update_ = true;
// Initialize the minimum noise energy to -84 dBFS.
min_noise_energy_ = sample_rate_hz * 2.0f * 2.0f / kFramesPerSecond;
noise_energy_hold_counter_ = 0;
signal_classifier_.Initialize(sample_rate_hz);
}
ApmDataDumper* const data_dumper_;
int sample_rate_hz_;
float min_noise_energy_;
bool first_update_;
float noise_energy_;
int noise_energy_hold_counter_;
SignalClassifier signal_classifier_;
};
// Updates the noise floor with instant decay and slow attack. This tuning is
// specific for AGC2, so that (i) it can promptly increase the gain if the noise
// floor drops (instant decay) and (ii) in case of music or fast speech, due to
@ -186,7 +93,8 @@ class NoiseFloorEstimator : public NoiseLevelEstimator {
// Ignore frames when muted or below the minimum measurable energy.
data_dumper_->DumpRaw("agc2_noise_floor_estimator_preliminary_level",
noise_energy_);
return EnergyToDbfs(noise_energy_, frame.samples_per_channel());
return EnergyToDbfs(noise_energy_,
static_cast<int>(frame.samples_per_channel()));
}
if (preliminary_noise_energy_set_) {
@ -220,7 +128,8 @@ class NoiseFloorEstimator : public NoiseLevelEstimator {
noise_energy_ = std::min(noise_energy_, preliminary_noise_energy_);
counter_--;
}
return EnergyToDbfs(noise_energy_, frame.samples_per_channel());
return EnergyToDbfs(noise_energy_,
static_cast<int>(frame.samples_per_channel()));
}
private:
@ -247,11 +156,6 @@ class NoiseFloorEstimator : public NoiseLevelEstimator {
} // namespace
std::unique_ptr<NoiseLevelEstimator> CreateStationaryNoiseEstimator(
ApmDataDumper* data_dumper) {
return std::make_unique<NoiseLevelEstimatorImpl>(data_dumper);
}
std::unique_ptr<NoiseLevelEstimator> CreateNoiseFloorEstimator(
ApmDataDumper* data_dumper) {
return std::make_unique<NoiseFloorEstimator>(data_dumper);

View File

@ -27,10 +27,6 @@ class NoiseLevelEstimator {
virtual float Analyze(const AudioFrameView<const float>& frame) = 0;
};
// Creates a noise level estimator based on stationarity detection.
std::unique_ptr<NoiseLevelEstimator> CreateStationaryNoiseEstimator(
ApmDataDumper* data_dumper);
// Creates a noise level estimator based on noise floor detection.
std::unique_ptr<NoiseLevelEstimator> CreateNoiseFloorEstimator(
ApmDataDumper* data_dumper);

View File

@ -50,45 +50,6 @@ class NoiseEstimatorParametrization : public ::testing::TestWithParam<int> {
int sample_rate_hz() const { return GetParam(); }
};
// White random noise is stationary, but does not trigger the detector
// every frame due to the randomness.
TEST_P(NoiseEstimatorParametrization, StationaryNoiseEstimatorWithRandomNoise) {
ApmDataDumper data_dumper(0);
auto estimator = CreateStationaryNoiseEstimator(&data_dumper);
test::WhiteNoiseGenerator gen(/*min_amplitude=*/test::kMinS16,
/*max_amplitude=*/test::kMaxS16);
const float noise_level_dbfs =
RunEstimator(gen, *estimator, sample_rate_hz());
EXPECT_NEAR(noise_level_dbfs, -5.5f, 1.0f);
}
// Sine curves are (very) stationary. They trigger the detector all
// the time. Except for a few initial frames.
TEST_P(NoiseEstimatorParametrization, StationaryNoiseEstimatorWithSineTone) {
ApmDataDumper data_dumper(0);
auto estimator = CreateStationaryNoiseEstimator(&data_dumper);
test::SineGenerator gen(/*amplitude=*/test::kMaxS16, /*frequency_hz=*/600.0f,
sample_rate_hz());
const float noise_level_dbfs =
RunEstimator(gen, *estimator, sample_rate_hz());
EXPECT_NEAR(noise_level_dbfs, -3.0f, 1.0f);
}
// Pulses are transient if they are far enough apart. They shouldn't
// trigger the noise detector.
TEST_P(NoiseEstimatorParametrization, StationaryNoiseEstimatorWithPulseTone) {
ApmDataDumper data_dumper(0);
auto estimator = CreateStationaryNoiseEstimator(&data_dumper);
test::PulseGenerator gen(/*pulse_amplitude=*/test::kMaxS16,
/*no_pulse_amplitude=*/10.0f, /*frequency_hz=*/20.0f,
sample_rate_hz());
const int noise_level_dbfs = RunEstimator(gen, *estimator, sample_rate_hz());
EXPECT_NEAR(noise_level_dbfs, -79.0f, 1.0f);
}
// Checks that full scale white noise maps to about -5.5 dBFS.
TEST_P(NoiseEstimatorParametrization, NoiseFloorEstimatorWithRandomNoise) {
ApmDataDumper data_dumper(0);
@ -122,7 +83,8 @@ TEST_P(NoiseEstimatorParametrization, NoiseFloorEstimatorWithPulseTone) {
constexpr float kNoPulseAmplitude = 10.0f;
test::PulseGenerator gen(/*pulse_amplitude=*/test::kMaxS16, kNoPulseAmplitude,
/*frequency_hz=*/20.0f, sample_rate_hz());
const int noise_level_dbfs = RunEstimator(gen, *estimator, sample_rate_hz());
const float noise_level_dbfs =
RunEstimator(gen, *estimator, sample_rate_hz());
const float expected_noise_floor_dbfs =
20.0f * std::log10f(kNoPulseAmplitude / test::kMaxS16);
EXPECT_NEAR(noise_level_dbfs, expected_noise_floor_dbfs, 0.5f);

View File

@ -1,70 +0,0 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/noise_spectrum_estimator.h"
#include <string.h>
#include <algorithm>
#include "api/array_view.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/arraysize.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
constexpr float kMinNoisePower = 100.f;
} // namespace
NoiseSpectrumEstimator::NoiseSpectrumEstimator(ApmDataDumper* data_dumper)
: data_dumper_(data_dumper) {
Initialize();
}
void NoiseSpectrumEstimator::Initialize() {
std::fill(noise_spectrum_, noise_spectrum_ + arraysize(noise_spectrum_),
kMinNoisePower);
}
void NoiseSpectrumEstimator::Update(rtc::ArrayView<const float> spectrum,
bool first_update) {
RTC_DCHECK_EQ(65, spectrum.size());
if (first_update) {
// Initialize the noise spectral estimate with the signal spectrum.
std::copy(spectrum.data(), spectrum.data() + spectrum.size(),
noise_spectrum_);
} else {
// Smoothly update the noise spectral estimate towards the signal spectrum
// such that the magnitude of the updates are limited.
for (size_t k = 0; k < spectrum.size(); ++k) {
if (noise_spectrum_[k] < spectrum[k]) {
noise_spectrum_[k] = std::min(
1.01f * noise_spectrum_[k],
noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k]));
} else {
noise_spectrum_[k] = std::max(
0.99f * noise_spectrum_[k],
noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k]));
}
}
}
// Ensure that the noise spectal estimate does not become too low.
for (auto& v : noise_spectrum_) {
v = std::max(v, kMinNoisePower);
}
data_dumper_->DumpRaw("agc2_noise_spectrum", 65, noise_spectrum_);
data_dumper_->DumpRaw("agc2_signal_spectrum", spectrum);
}
} // namespace webrtc

View File

@ -1,42 +0,0 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AGC2_NOISE_SPECTRUM_ESTIMATOR_H_
#define MODULES_AUDIO_PROCESSING_AGC2_NOISE_SPECTRUM_ESTIMATOR_H_
#include "api/array_view.h"
namespace webrtc {
class ApmDataDumper;
class NoiseSpectrumEstimator {
public:
explicit NoiseSpectrumEstimator(ApmDataDumper* data_dumper);
NoiseSpectrumEstimator() = delete;
NoiseSpectrumEstimator(const NoiseSpectrumEstimator&) = delete;
NoiseSpectrumEstimator& operator=(const NoiseSpectrumEstimator&) = delete;
void Initialize();
void Update(rtc::ArrayView<const float> spectrum, bool first_update);
rtc::ArrayView<const float> GetNoiseSpectrum() const {
return rtc::ArrayView<const float>(noise_spectrum_);
}
private:
ApmDataDumper* data_dumper_;
float noise_spectrum_[65];
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AGC2_NOISE_SPECTRUM_ESTIMATOR_H_

View File

@ -1,177 +0,0 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/signal_classifier.h"
#include <algorithm>
#include <numeric>
#include <vector>
#include "api/array_view.h"
#include "modules/audio_processing/agc2/down_sampler.h"
#include "modules/audio_processing/agc2/noise_spectrum_estimator.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "system_wrappers/include/cpu_features_wrapper.h"
namespace webrtc {
namespace {
bool IsSse2Available() {
#if defined(WEBRTC_ARCH_X86_FAMILY)
return GetCPUInfo(kSSE2) != 0;
#else
return false;
#endif
}
void RemoveDcLevel(rtc::ArrayView<float> x) {
RTC_DCHECK_LT(0, x.size());
float mean = std::accumulate(x.data(), x.data() + x.size(), 0.f);
mean /= x.size();
for (float& v : x) {
v -= mean;
}
}
void PowerSpectrum(const OouraFft* ooura_fft,
rtc::ArrayView<const float> x,
rtc::ArrayView<float> spectrum) {
RTC_DCHECK_EQ(65, spectrum.size());
RTC_DCHECK_EQ(128, x.size());
float X[128];
std::copy(x.data(), x.data() + x.size(), X);
ooura_fft->Fft(X);
float* X_p = X;
RTC_DCHECK_EQ(X_p, &X[0]);
spectrum[0] = (*X_p) * (*X_p);
++X_p;
RTC_DCHECK_EQ(X_p, &X[1]);
spectrum[64] = (*X_p) * (*X_p);
for (int k = 1; k < 64; ++k) {
++X_p;
RTC_DCHECK_EQ(X_p, &X[2 * k]);
spectrum[k] = (*X_p) * (*X_p);
++X_p;
RTC_DCHECK_EQ(X_p, &X[2 * k + 1]);
spectrum[k] += (*X_p) * (*X_p);
}
}
webrtc::SignalClassifier::SignalType ClassifySignal(
rtc::ArrayView<const float> signal_spectrum,
rtc::ArrayView<const float> noise_spectrum,
ApmDataDumper* data_dumper) {
int num_stationary_bands = 0;
int num_highly_nonstationary_bands = 0;
// Detect stationary and highly nonstationary bands.
for (size_t k = 1; k < 40; k++) {
if (signal_spectrum[k] < 3 * noise_spectrum[k] &&
signal_spectrum[k] * 3 > noise_spectrum[k]) {
++num_stationary_bands;
} else if (signal_spectrum[k] > 9 * noise_spectrum[k]) {
++num_highly_nonstationary_bands;
}
}
data_dumper->DumpRaw("agc2_num_stationary_bands", 1, &num_stationary_bands);
data_dumper->DumpRaw("agc2_num_highly_nonstationary_bands", 1,
&num_highly_nonstationary_bands);
// Use the detected number of bands to classify the overall signal
// stationarity.
if (num_stationary_bands > 15) {
return SignalClassifier::SignalType::kStationary;
} else {
return SignalClassifier::SignalType::kNonStationary;
}
}
} // namespace
SignalClassifier::FrameExtender::FrameExtender(size_t frame_size,
size_t extended_frame_size)
: x_old_(extended_frame_size - frame_size, 0.f) {}
SignalClassifier::FrameExtender::~FrameExtender() = default;
void SignalClassifier::FrameExtender::ExtendFrame(
rtc::ArrayView<const float> x,
rtc::ArrayView<float> x_extended) {
RTC_DCHECK_EQ(x_old_.size() + x.size(), x_extended.size());
std::copy(x_old_.data(), x_old_.data() + x_old_.size(), x_extended.data());
std::copy(x.data(), x.data() + x.size(), x_extended.data() + x_old_.size());
std::copy(x_extended.data() + x_extended.size() - x_old_.size(),
x_extended.data() + x_extended.size(), x_old_.data());
}
SignalClassifier::SignalClassifier(ApmDataDumper* data_dumper)
: data_dumper_(data_dumper),
down_sampler_(data_dumper_),
noise_spectrum_estimator_(data_dumper_),
ooura_fft_(IsSse2Available()) {
Initialize(48000);
}
SignalClassifier::~SignalClassifier() {}
void SignalClassifier::Initialize(int sample_rate_hz) {
down_sampler_.Initialize(sample_rate_hz);
noise_spectrum_estimator_.Initialize();
frame_extender_.reset(new FrameExtender(80, 128));
sample_rate_hz_ = sample_rate_hz;
initialization_frames_left_ = 2;
consistent_classification_counter_ = 3;
last_signal_type_ = SignalClassifier::SignalType::kNonStationary;
}
SignalClassifier::SignalType SignalClassifier::Analyze(
rtc::ArrayView<const float> signal) {
RTC_DCHECK_EQ(signal.size(), sample_rate_hz_ / 100);
// Compute the signal power spectrum.
float downsampled_frame[80];
down_sampler_.DownSample(signal, downsampled_frame);
float extended_frame[128];
frame_extender_->ExtendFrame(downsampled_frame, extended_frame);
RemoveDcLevel(extended_frame);
float signal_spectrum[65];
PowerSpectrum(&ooura_fft_, extended_frame, signal_spectrum);
// Classify the signal based on the estimate of the noise spectrum and the
// signal spectrum estimate.
const SignalType signal_type = ClassifySignal(
signal_spectrum, noise_spectrum_estimator_.GetNoiseSpectrum(),
data_dumper_);
// Update the noise spectrum based on the signal spectrum.
noise_spectrum_estimator_.Update(signal_spectrum,
initialization_frames_left_ > 0);
// Update the number of frames until a reliable signal spectrum is achieved.
initialization_frames_left_ = std::max(0, initialization_frames_left_ - 1);
if (last_signal_type_ == signal_type) {
consistent_classification_counter_ =
std::max(0, consistent_classification_counter_ - 1);
} else {
last_signal_type_ = signal_type;
consistent_classification_counter_ = 3;
}
if (consistent_classification_counter_ > 0) {
return SignalClassifier::SignalType::kNonStationary;
}
return signal_type;
}
} // namespace webrtc

View File

@ -1,73 +0,0 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AGC2_SIGNAL_CLASSIFIER_H_
#define MODULES_AUDIO_PROCESSING_AGC2_SIGNAL_CLASSIFIER_H_
#include <memory>
#include <vector>
#include "api/array_view.h"
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h"
#include "modules/audio_processing/agc2/down_sampler.h"
#include "modules/audio_processing/agc2/noise_spectrum_estimator.h"
namespace webrtc {
class ApmDataDumper;
class AudioBuffer;
class SignalClassifier {
public:
enum class SignalType { kNonStationary, kStationary };
explicit SignalClassifier(ApmDataDumper* data_dumper);
SignalClassifier() = delete;
SignalClassifier(const SignalClassifier&) = delete;
SignalClassifier& operator=(const SignalClassifier&) = delete;
~SignalClassifier();
void Initialize(int sample_rate_hz);
SignalType Analyze(rtc::ArrayView<const float> signal);
private:
class FrameExtender {
public:
FrameExtender(size_t frame_size, size_t extended_frame_size);
FrameExtender() = delete;
FrameExtender(const FrameExtender&) = delete;
FrameExtender& operator=(const FrameExtender&) = delete;
~FrameExtender();
void ExtendFrame(rtc::ArrayView<const float> x,
rtc::ArrayView<float> x_extended);
private:
std::vector<float> x_old_;
};
ApmDataDumper* const data_dumper_;
DownSampler down_sampler_;
std::unique_ptr<FrameExtender> frame_extender_;
NoiseSpectrumEstimator noise_spectrum_estimator_;
int sample_rate_hz_;
int initialization_frames_left_;
int consistent_classification_counter_;
SignalType last_signal_type_;
const OouraFft ooura_fft_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AGC2_SIGNAL_CLASSIFIER_H_

View File

@ -1,86 +0,0 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/signal_classifier.h"
#include <array>
#include <functional>
#include <limits>
#include "api/function_view.h"
#include "modules/audio_processing/agc2/agc2_testing_common.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/gunit.h"
#include "rtc_base/random.h"
namespace webrtc {
namespace {
constexpr int kNumIterations = 100;
// Runs the signal classifier on audio generated by 'sample_generator'
// for kNumIterations. Returns the number of frames classified as noise.
float RunClassifier(rtc::FunctionView<float()> sample_generator,
int sample_rate_hz) {
ApmDataDumper data_dumper(0);
SignalClassifier classifier(&data_dumper);
std::array<float, 480> signal;
classifier.Initialize(sample_rate_hz);
const size_t samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100);
int number_of_noise_frames = 0;
for (int i = 0; i < kNumIterations; ++i) {
for (size_t j = 0; j < samples_per_channel; ++j) {
signal[j] = sample_generator();
}
number_of_noise_frames +=
classifier.Analyze({&signal[0], samples_per_channel}) ==
SignalClassifier::SignalType::kStationary;
}
return number_of_noise_frames;
}
class SignalClassifierParametrization : public ::testing::TestWithParam<int> {
protected:
int sample_rate_hz() const { return GetParam(); }
};
// White random noise is stationary, but does not trigger the detector
// every frame due to the randomness.
TEST_P(SignalClassifierParametrization, WhiteNoise) {
test::WhiteNoiseGenerator gen(/*min_amplitude=*/test::kMinS16,
/*max_amplitude=*/test::kMaxS16);
const int number_of_noise_frames = RunClassifier(gen, sample_rate_hz());
EXPECT_GT(number_of_noise_frames, kNumIterations / 2);
}
// Sine curves are (very) stationary. They trigger the detector all
// the time. Except for a few initial frames.
TEST_P(SignalClassifierParametrization, SineTone) {
test::SineGenerator gen(/*amplitude=*/test::kMaxS16, /*frequency_hz=*/600.0f,
sample_rate_hz());
const int number_of_noise_frames = RunClassifier(gen, sample_rate_hz());
EXPECT_GE(number_of_noise_frames, kNumIterations - 5);
}
// Pulses are transient if they are far enough apart. They shouldn't
// trigger the noise detector.
TEST_P(SignalClassifierParametrization, PulseTone) {
test::PulseGenerator gen(/*pulse_amplitude=*/test::kMaxS16,
/*no_pulse_amplitude=*/10.0f, /*frequency_hz=*/20.0f,
sample_rate_hz());
const int number_of_noise_frames = RunClassifier(gen, sample_rate_hz());
EXPECT_EQ(number_of_noise_frames, 0);
}
INSTANTIATE_TEST_SUITE_P(GainController2SignalClassifier,
SignalClassifierParametrization,
::testing::Values(8000, 16000, 32000, 48000));
} // namespace
} // namespace webrtc

View File

@ -3109,11 +3109,6 @@ TEST(AudioProcessing, GainController2ConfigEqual) {
b_adaptive.dry_run = a_adaptive.dry_run;
EXPECT_EQ(a, b);
a_adaptive.noise_estimator = AudioProcessing::Config::GainController2::
NoiseEstimator::kStationaryNoise;
b_adaptive.noise_estimator = a_adaptive.noise_estimator;
EXPECT_EQ(a, b);
a_adaptive.vad_reset_period_ms++;
b_adaptive.vad_reset_period_ms = a_adaptive.vad_reset_period_ms;
EXPECT_EQ(a, b);
@ -3171,11 +3166,6 @@ TEST(AudioProcessing, GainController2ConfigNotEqual) {
EXPECT_NE(a, b);
a_adaptive = b_adaptive;
a_adaptive.noise_estimator = AudioProcessing::Config::GainController2::
NoiseEstimator::kStationaryNoise;
EXPECT_NE(a, b);
a_adaptive = b_adaptive;
a_adaptive.vad_reset_period_ms++;
EXPECT_NE(a, b);
a_adaptive = b_adaptive;

View File

@ -46,17 +46,6 @@ std::string GainController1ModeToString(const Agc1Config::Mode& mode) {
RTC_CHECK_NOTREACHED();
}
std::string GainController2NoiseEstimatorToString(
const Agc2Config::NoiseEstimator& type) {
switch (type) {
case Agc2Config::NoiseEstimator::kStationaryNoise:
return "StationaryNoise";
case Agc2Config::NoiseEstimator::kNoiseFloor:
return "NoiseFloor";
}
RTC_CHECK_NOTREACHED();
}
} // namespace
constexpr int AudioProcessing::kNativeSampleRatesHz[];
@ -99,7 +88,6 @@ bool Agc1Config::operator==(const Agc1Config& rhs) const {
bool Agc2Config::AdaptiveDigital::operator==(
const Agc2Config::AdaptiveDigital& rhs) const {
return enabled == rhs.enabled && dry_run == rhs.dry_run &&
noise_estimator == rhs.noise_estimator &&
vad_reset_period_ms == rhs.vad_reset_period_ms &&
adjacent_speech_frames_threshold ==
rhs.adjacent_speech_frames_threshold &&
@ -204,9 +192,6 @@ std::string AudioProcessing::Config::ToString() const {
<< " }, adaptive_digital: { enabled: "
<< gain_controller2.adaptive_digital.enabled
<< ", dry_run: " << gain_controller2.adaptive_digital.dry_run
<< ", noise_estimator: "
<< GainController2NoiseEstimatorToString(
gain_controller2.adaptive_digital.noise_estimator)
<< ", vad_reset_period_ms: "
<< gain_controller2.adaptive_digital.vad_reset_period_ms
<< ", adjacent_speech_frames_threshold: "

View File

@ -402,7 +402,6 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
bool enabled = false;
// Run the adaptive digital controller but the signal is not modified.
bool dry_run = false;
NoiseEstimator noise_estimator = kNoiseFloor;
int vad_reset_period_ms = 1500;
int adjacent_speech_frames_threshold = 12;
float max_gain_change_db_per_second = 3.0f;
@ -411,6 +410,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
bool avx2_allowed = true;
bool neon_allowed = true;
// TODO(crbug.com/webrtc/7494): Remove deprecated settings below.
NoiseEstimator noise_estimator = kNoiseFloor;
float vad_probability_attack = 1.0f;
LevelEstimator level_estimator = kRms;
int level_estimator_adjacent_speech_frames_threshold = 12;