Add new variance update option and unittests for intelligibility
- New option for computing variance that is more adaptive with lower complexity. - Fixed related off-by-one errors. - Added intelligibility unittests. - Do not enhance if experiencing variance underflow. R=andrew@webrtc.org, henrik.lundin@webrtc.org Review URL: https://codereview.webrtc.org/1207353002 . Cr-Commit-Position: refs/heads/master@{#9567}
This commit is contained in:
@ -70,7 +70,7 @@
|
||||
'<(webrtc_root)/test/test.gyp:test_support',
|
||||
],
|
||||
'sources': [
|
||||
'intelligibility/intelligibility_proc.cc',
|
||||
'intelligibility/test/intelligibility_proc.cc',
|
||||
],
|
||||
}, # intelligibility_proc
|
||||
],
|
||||
|
@ -17,8 +17,8 @@
|
||||
|
||||
#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
@ -27,26 +27,24 @@
|
||||
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
|
||||
#include "webrtc/common_audio/window_generator.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
const int kErbResolution = 2;
|
||||
const int kWindowSizeMs = 2;
|
||||
const int kChunkSizeMs = 10; // Size provided by APM.
|
||||
const float kClipFreq = 200.0f;
|
||||
const float kConfigRho = 0.02f; // Default production and interpretation SNR.
|
||||
const float kKbdAlpha = 1.5f;
|
||||
const float kLambdaBot = -1.0f; // Extreme values in bisection
|
||||
const float kLambdaTop = -10e-18f; // search for lamda.
|
||||
|
||||
} // namespace
|
||||
|
||||
using std::complex;
|
||||
using std::max;
|
||||
using std::min;
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
const int IntelligibilityEnhancer::kErbResolution = 2;
|
||||
const int IntelligibilityEnhancer::kWindowSizeMs = 2;
|
||||
const int IntelligibilityEnhancer::kChunkSizeMs = 10; // Size provided by APM.
|
||||
const int IntelligibilityEnhancer::kAnalyzeRate = 800;
|
||||
const int IntelligibilityEnhancer::kVarianceRate = 2;
|
||||
const float IntelligibilityEnhancer::kClipFreq = 200.0f;
|
||||
const float IntelligibilityEnhancer::kConfigRho = 0.02f;
|
||||
const float IntelligibilityEnhancer::kKbdAlpha = 1.5f;
|
||||
|
||||
// To disable gain update smoothing, set gain limit to be VERY high.
|
||||
// TODO(ekmeyerson): Add option to disable gain smoothing altogether
|
||||
// to avoid the extra computation.
|
||||
const float IntelligibilityEnhancer::kGainChangeLimit = 0.0125f;
|
||||
|
||||
using VarianceType = intelligibility::VarianceArray::StepType;
|
||||
|
||||
IntelligibilityEnhancer::TransformCallback::TransformCallback(
|
||||
@ -93,7 +91,7 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(int erb_resolution,
|
||||
noise_variance_(freqs_, VarianceType::kStepInfinite, 475, 0.01f),
|
||||
filtered_clear_var_(new float[bank_size_]),
|
||||
filtered_noise_var_(new float[bank_size_]),
|
||||
filter_bank_(nullptr),
|
||||
filter_bank_(bank_size_),
|
||||
center_freqs_(new float[bank_size_]),
|
||||
rho_(new float[bank_size_]),
|
||||
gains_eq_(new float[bank_size_]),
|
||||
@ -149,7 +147,7 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(int erb_resolution,
|
||||
IntelligibilityEnhancer::~IntelligibilityEnhancer() {
|
||||
WebRtcVad_Free(vad_low_);
|
||||
WebRtcVad_Free(vad_high_);
|
||||
free(filter_bank_);
|
||||
free(temp_out_buffer_);
|
||||
}
|
||||
|
||||
void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio) {
|
||||
@ -203,8 +201,6 @@ void IntelligibilityEnhancer::DispatchAudio(
|
||||
|
||||
void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block,
|
||||
complex<float>* out_block) {
|
||||
float power_target;
|
||||
|
||||
if (block_count_ < 2) {
|
||||
memset(out_block, 0, freqs_ * sizeof(*out_block));
|
||||
++block_count_;
|
||||
@ -216,8 +212,8 @@ void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block,
|
||||
// based on experiments with different cutoffs.
|
||||
if (has_voice_low_ || true) {
|
||||
clear_variance_.Step(in_block, false);
|
||||
power_target = std::accumulate(clear_variance_.variance(),
|
||||
clear_variance_.variance() + freqs_, 0.0f);
|
||||
const float power_target = std::accumulate(
|
||||
clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.0f);
|
||||
|
||||
if (block_count_ % analysis_rate_ == analysis_rate_ - 1) {
|
||||
AnalyzeClearBlock(power_target);
|
||||
@ -239,35 +235,46 @@ void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) {
|
||||
FilterVariance(clear_variance_.variance(), filtered_clear_var_.get());
|
||||
FilterVariance(noise_variance_.variance(), filtered_noise_var_.get());
|
||||
|
||||
// Bisection search for optimal |lambda|
|
||||
|
||||
float lambda_bot = -1.0f, lambda_top = -10e-18f, lambda;
|
||||
float power_bot, power_top, power;
|
||||
SolveForGainsGivenLambda(lambda_top, start_freq_, gains_eq_.get());
|
||||
power_top =
|
||||
SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get());
|
||||
const float power_top =
|
||||
DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
|
||||
SolveForGainsGivenLambda(lambda_bot, start_freq_, gains_eq_.get());
|
||||
power_bot =
|
||||
SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get());
|
||||
const float power_bot =
|
||||
DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
|
||||
DCHECK(power_target >= power_bot && power_target <= power_top);
|
||||
if (power_target >= power_bot && power_target <= power_top) {
|
||||
SolveForLambda(power_target, power_bot, power_top);
|
||||
UpdateErbGains();
|
||||
} // Else experiencing variance underflow, so do nothing.
|
||||
}
|
||||
|
||||
float power_ratio = 2.0f; // Ratio of achieved power to target power.
|
||||
void IntelligibilityEnhancer::SolveForLambda(float power_target,
|
||||
float power_bot,
|
||||
float power_top) {
|
||||
const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values
|
||||
const int kMaxIters = 100; // for these, based on experiments.
|
||||
|
||||
const float reciprocal_power_target = 1.f / power_target;
|
||||
float lambda_bot = kLambdaBot;
|
||||
float lambda_top = kLambdaTop;
|
||||
float power_ratio = 2.0f; // Ratio of achieved power to target power.
|
||||
int iters = 0;
|
||||
while (fabs(power_ratio - 1.0f) > kConvergeThresh && iters <= kMaxIters) {
|
||||
lambda = lambda_bot + (lambda_top - lambda_bot) / 2.0f;
|
||||
while (std::fabs(power_ratio - 1.0f) > kConvergeThresh &&
|
||||
iters <= kMaxIters) {
|
||||
const float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.0f;
|
||||
SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get());
|
||||
power = DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
|
||||
const float power =
|
||||
DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
|
||||
if (power < power_target) {
|
||||
lambda_bot = lambda;
|
||||
} else {
|
||||
lambda_top = lambda;
|
||||
}
|
||||
power_ratio = fabs(power / power_target);
|
||||
power_ratio = std::fabs(power * reciprocal_power_target);
|
||||
++iters;
|
||||
}
|
||||
}
|
||||
|
||||
void IntelligibilityEnhancer::UpdateErbGains() {
|
||||
// (ERB gain) = filterbank' * (freq gain)
|
||||
float* gains = gain_applier_.target();
|
||||
for (int i = 0; i < freqs_; ++i) {
|
||||
@ -303,12 +310,8 @@ void IntelligibilityEnhancer::CreateErbBank() {
|
||||
center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq;
|
||||
}
|
||||
|
||||
filter_bank_ = static_cast<float**>(
|
||||
malloc(sizeof(*filter_bank_) * bank_size_ +
|
||||
sizeof(**filter_bank_) * freqs_ * bank_size_));
|
||||
for (int i = 0; i < bank_size_; ++i) {
|
||||
filter_bank_[i] =
|
||||
reinterpret_cast<float*>(filter_bank_ + bank_size_) + freqs_ * i;
|
||||
filter_bank_[i].resize(freqs_);
|
||||
}
|
||||
|
||||
for (int i = 1; i <= bank_size_; ++i) {
|
||||
@ -388,7 +391,7 @@ void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
|
||||
|
||||
void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) {
|
||||
for (int i = 0; i < bank_size_; ++i) {
|
||||
result[i] = DotProduct(filter_bank_[i], var, freqs_);
|
||||
result[i] = DotProduct(filter_bank_[i].data(), var, freqs_);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
|
||||
|
||||
#include <complex>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/common_audio/lapped_transform.h"
|
||||
@ -83,6 +84,8 @@ class IntelligibilityEnhancer {
|
||||
AudioSource source_;
|
||||
};
|
||||
friend class TransformCallback;
|
||||
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);
|
||||
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);
|
||||
|
||||
// Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source.
|
||||
void DispatchAudio(AudioSource source,
|
||||
@ -97,6 +100,12 @@ class IntelligibilityEnhancer {
|
||||
// Computes and sets modified gains.
|
||||
void AnalyzeClearBlock(float power_target);
|
||||
|
||||
// Bisection search for optimal |lambda|.
|
||||
void SolveForLambda(float power_target, float power_bot, float power_top);
|
||||
|
||||
// Transforms freq gains to ERB gains.
|
||||
void UpdateErbGains();
|
||||
|
||||
// Updates variance calculation for noise input with |in_block|.
|
||||
void ProcessNoiseBlock(const std::complex<float>* in_block,
|
||||
std::complex<float>* out_block);
|
||||
@ -118,16 +127,6 @@ class IntelligibilityEnhancer {
|
||||
// Returns dot product of vectors specified by size |length| arrays |a|,|b|.
|
||||
static float DotProduct(const float* a, const float* b, int length);
|
||||
|
||||
static const int kErbResolution;
|
||||
static const int kWindowSizeMs;
|
||||
static const int kChunkSizeMs;
|
||||
static const int kAnalyzeRate; // Default for |analysis_rate_|.
|
||||
static const int kVarianceRate; // Default for |variance_rate_|.
|
||||
static const float kClipFreq;
|
||||
static const float kConfigRho; // Default production and interpretation SNR.
|
||||
static const float kKbdAlpha;
|
||||
static const float kGainChangeLimit;
|
||||
|
||||
const int freqs_; // Num frequencies in frequency domain.
|
||||
const int window_size_; // Window size in samples; also the block size.
|
||||
const int chunk_length_; // Chunk size in samples.
|
||||
@ -142,7 +141,7 @@ class IntelligibilityEnhancer {
|
||||
intelligibility::VarianceArray noise_variance_;
|
||||
rtc::scoped_ptr<float[]> filtered_clear_var_;
|
||||
rtc::scoped_ptr<float[]> filtered_noise_var_;
|
||||
float** filter_bank_; // TODO(ekmeyerson): Switch to using ChannelBuffer.
|
||||
std::vector<std::vector<float>> filter_bank_;
|
||||
rtc::scoped_ptr<float[]> center_freqs_;
|
||||
int start_freq_;
|
||||
rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.
|
||||
|
@ -0,0 +1,205 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
//
|
||||
// Unit tests for intelligibility enhancer.
|
||||
//
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/base/arraysize.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Target output for ERB create test. Generated with matlab.
|
||||
const float kTestCenterFreqs[] = {
|
||||
13.169f, 26.965f, 41.423f, 56.577f, 72.461f, 89.113f, 106.57f, 124.88f,
|
||||
144.08f, 164.21f, 185.34f, 207.5f, 230.75f, 255.16f, 280.77f, 307.66f,
|
||||
335.9f, 365.56f, 396.71f, 429.44f, 463.84f, 500.f};
|
||||
const float kTestFilterBank[][2] = {{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.f},
|
||||
{0.055556f, 0.2f},
|
||||
{0, 0.2f},
|
||||
{0, 0.2f},
|
||||
{0, 0.2f},
|
||||
{0, 0.2f}};
|
||||
static_assert(arraysize(kTestCenterFreqs) == arraysize(kTestFilterBank),
|
||||
"Test filterbank badly initialized.");
|
||||
|
||||
// Target output for gain solving test. Generated with matlab.
|
||||
const int kTestStartFreq = 12; // Lowest integral frequency for ERBs.
|
||||
const float kTestZeroVar[] = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f,
|
||||
1.f, 1.f, 1.f, 0.f, 0.f, 0.f, 0.f, 0.f,
|
||||
0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
|
||||
static_assert(arraysize(kTestCenterFreqs) == arraysize(kTestZeroVar),
|
||||
"Variance test data badly initialized.");
|
||||
const float kTestNonZeroVarLambdaTop[] = {
|
||||
1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f,
|
||||
1.f, 1.f, 1.f, 0.f, 0.f, 0.0351f, 0.0636f, 0.0863f,
|
||||
0.1037f, 0.1162f, 0.1236f, 0.1251f, 0.1189f, 0.0993f};
|
||||
static_assert(arraysize(kTestCenterFreqs) ==
|
||||
arraysize(kTestNonZeroVarLambdaTop),
|
||||
"Variance test data badly initialized.");
|
||||
const float kMaxTestError = 0.005f;
|
||||
|
||||
// Enhancer initialization parameters.
|
||||
const int kSamples = 2000;
|
||||
const int kErbResolution = 2;
|
||||
const int kSampleRate = 1000;
|
||||
const int kFragmentSize = kSampleRate / 100;
|
||||
const int kNumChannels = 1;
|
||||
const float kDecayRate = 0.9f;
|
||||
const int kWindowSize = 800;
|
||||
const int kAnalyzeRate = 800;
|
||||
const int kVarianceRate = 2;
|
||||
const float kGainLimit = 0.1f;
|
||||
|
||||
} // namespace
|
||||
|
||||
using std::vector;
|
||||
using intelligibility::VarianceArray;
|
||||
|
||||
class IntelligibilityEnhancerTest : public ::testing::Test {
|
||||
protected:
|
||||
IntelligibilityEnhancerTest()
|
||||
: enh_(kErbResolution,
|
||||
kSampleRate,
|
||||
kNumChannels,
|
||||
VarianceArray::kStepInfinite,
|
||||
kDecayRate,
|
||||
kWindowSize,
|
||||
kAnalyzeRate,
|
||||
kVarianceRate,
|
||||
kGainLimit),
|
||||
clear_data_(kSamples),
|
||||
noise_data_(kSamples),
|
||||
orig_data_(kSamples) {}
|
||||
|
||||
bool CheckUpdate(VarianceArray::StepType step_type) {
|
||||
IntelligibilityEnhancer enh(kErbResolution, kSampleRate, kNumChannels,
|
||||
step_type, kDecayRate, kWindowSize,
|
||||
kAnalyzeRate, kVarianceRate, kGainLimit);
|
||||
float* clear_cursor = &clear_data_[0];
|
||||
float* noise_cursor = &noise_data_[0];
|
||||
for (int i = 0; i < kSamples; i += kFragmentSize) {
|
||||
enh.ProcessCaptureAudio(&noise_cursor);
|
||||
enh.ProcessRenderAudio(&clear_cursor);
|
||||
clear_cursor += kFragmentSize;
|
||||
noise_cursor += kFragmentSize;
|
||||
}
|
||||
for (int i = 0; i < kSamples; i++) {
|
||||
if (std::fabs(clear_data_[i] - orig_data_[i]) > kMaxTestError) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
IntelligibilityEnhancer enh_;
|
||||
vector<float> clear_data_;
|
||||
vector<float> noise_data_;
|
||||
vector<float> orig_data_;
|
||||
};
|
||||
|
||||
// For each class of generated data, tests that render stream is
|
||||
// updated when it should be for each variance update method.
|
||||
TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {
|
||||
vector<VarianceArray::StepType> step_types;
|
||||
step_types.push_back(VarianceArray::kStepInfinite);
|
||||
step_types.push_back(VarianceArray::kStepDecaying);
|
||||
step_types.push_back(VarianceArray::kStepWindowed);
|
||||
step_types.push_back(VarianceArray::kStepBlocked);
|
||||
step_types.push_back(VarianceArray::kStepBlockBasedMovingAverage);
|
||||
std::fill(noise_data_.begin(), noise_data_.end(), 0.0f);
|
||||
std::fill(orig_data_.begin(), orig_data_.end(), 0.0f);
|
||||
for (auto step_type : step_types) {
|
||||
std::fill(clear_data_.begin(), clear_data_.end(), 0.0f);
|
||||
EXPECT_FALSE(CheckUpdate(step_type));
|
||||
}
|
||||
std::srand(1);
|
||||
auto float_rand = []() { return std::rand() * 2.f / RAND_MAX - 1; };
|
||||
std::generate(noise_data_.begin(), noise_data_.end(), float_rand);
|
||||
for (auto step_type : step_types) {
|
||||
EXPECT_FALSE(CheckUpdate(step_type));
|
||||
}
|
||||
for (auto step_type : step_types) {
|
||||
std::generate(clear_data_.begin(), clear_data_.end(), float_rand);
|
||||
orig_data_ = clear_data_;
|
||||
EXPECT_TRUE(CheckUpdate(step_type));
|
||||
}
|
||||
}
|
||||
|
||||
// Tests ERB bank creation, comparing against matlab output.
|
||||
TEST_F(IntelligibilityEnhancerTest, TestErbCreation) {
|
||||
ASSERT_EQ(static_cast<int>(arraysize(kTestCenterFreqs)), enh_.bank_size_);
|
||||
for (int i = 0; i < enh_.bank_size_; ++i) {
|
||||
EXPECT_NEAR(kTestCenterFreqs[i], enh_.center_freqs_[i], kMaxTestError);
|
||||
ASSERT_EQ(static_cast<int>(arraysize(kTestFilterBank[0])), enh_.freqs_);
|
||||
for (int j = 0; j < enh_.freqs_; ++j) {
|
||||
EXPECT_NEAR(kTestFilterBank[i][j], enh_.filter_bank_[i][j],
|
||||
kMaxTestError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tests analytic solution for optimal gains, comparing
|
||||
// against matlab output.
|
||||
TEST_F(IntelligibilityEnhancerTest, TestSolveForGains) {
|
||||
ASSERT_EQ(kTestStartFreq, enh_.start_freq_);
|
||||
vector<float> sols(enh_.bank_size_);
|
||||
float lambda = -0.001f;
|
||||
for (int i = 0; i < enh_.bank_size_; i++) {
|
||||
enh_.filtered_clear_var_[i] = 0.0f;
|
||||
enh_.filtered_noise_var_[i] = 0.0f;
|
||||
enh_.rho_[i] = 0.02f;
|
||||
}
|
||||
enh_.SolveForGainsGivenLambda(lambda, enh_.start_freq_, &sols[0]);
|
||||
for (int i = 0; i < enh_.bank_size_; i++) {
|
||||
EXPECT_NEAR(kTestZeroVar[i], sols[i], kMaxTestError);
|
||||
}
|
||||
for (int i = 0; i < enh_.bank_size_; i++) {
|
||||
enh_.filtered_clear_var_[i] = static_cast<float>(i + 1);
|
||||
enh_.filtered_noise_var_[i] = static_cast<float>(enh_.bank_size_ - i);
|
||||
}
|
||||
enh_.SolveForGainsGivenLambda(lambda, enh_.start_freq_, &sols[0]);
|
||||
for (int i = 0; i < enh_.bank_size_; i++) {
|
||||
EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
|
||||
}
|
||||
lambda = -1.0;
|
||||
enh_.SolveForGainsGivenLambda(lambda, enh_.start_freq_, &sols[0]);
|
||||
for (int i = 0; i < enh_.bank_size_; i++) {
|
||||
EXPECT_NEAR(kTestZeroVar[i], sols[i], kMaxTestError);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -14,36 +14,32 @@
|
||||
|
||||
#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
using std::complex;
|
||||
using std::min;
|
||||
|
||||
namespace {
|
||||
namespace webrtc {
|
||||
|
||||
// Return |current| changed towards |target|, with the change being at most
|
||||
// |limit|.
|
||||
inline float UpdateFactor(float target, float current, float limit) {
|
||||
namespace intelligibility {
|
||||
|
||||
float UpdateFactor(float target, float current, float limit) {
|
||||
float delta = fabsf(target - current);
|
||||
float sign = copysign(1.0f, target - current);
|
||||
return current + sign * fminf(delta, limit);
|
||||
}
|
||||
|
||||
// std::isfinite for complex numbers.
|
||||
inline bool cplxfinite(complex<float> c) {
|
||||
bool cplxfinite(complex<float> c) {
|
||||
return std::isfinite(c.real()) && std::isfinite(c.imag());
|
||||
}
|
||||
|
||||
// std::isnormal for complex numbers.
|
||||
inline bool cplxnormal(complex<float> c) {
|
||||
bool cplxnormal(complex<float> c) {
|
||||
return std::isnormal(c.real()) && std::isnormal(c.imag());
|
||||
}
|
||||
|
||||
// Apply a small fudge to degenerate complex values. The numbers in the array
|
||||
// were chosen randomly, so that even a series of all zeroes has some small
|
||||
// variability.
|
||||
inline complex<float> zerofudge(complex<float> c) {
|
||||
complex<float> zerofudge(complex<float> c) {
|
||||
const static complex<float> fudge[7] = {{0.001f, 0.002f},
|
||||
{0.008f, 0.001f},
|
||||
{0.003f, 0.008f},
|
||||
@ -59,25 +55,14 @@ inline complex<float> zerofudge(complex<float> c) {
|
||||
return c;
|
||||
}
|
||||
|
||||
// Incremental mean computation. Return the mean of the series with the
|
||||
// mean |mean| with added |data|.
|
||||
inline complex<float> NewMean(complex<float> mean,
|
||||
complex<float> data,
|
||||
int count) {
|
||||
complex<float> NewMean(complex<float> mean, complex<float> data, int count) {
|
||||
return mean + (data - mean) / static_cast<float>(count);
|
||||
}
|
||||
|
||||
inline void AddToMean(complex<float> data, int count, complex<float>* mean) {
|
||||
void AddToMean(complex<float> data, int count, complex<float>* mean) {
|
||||
(*mean) = NewMean(*mean, data, count);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
using std::min;
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace intelligibility {
|
||||
|
||||
static const int kWindowBlockSize = 10;
|
||||
|
||||
@ -96,7 +81,8 @@ VarianceArray::VarianceArray(int freqs,
|
||||
decay_(decay),
|
||||
history_cursor_(0),
|
||||
count_(0),
|
||||
array_mean_(0.0f) {
|
||||
array_mean_(0.0f),
|
||||
buffer_full_(false) {
|
||||
history_.reset(new rtc::scoped_ptr<complex<float>[]>[freqs_]());
|
||||
for (int i = 0; i < freqs_; ++i) {
|
||||
history_[i].reset(new complex<float>[window_size_]());
|
||||
@ -122,6 +108,9 @@ VarianceArray::VarianceArray(int freqs,
|
||||
case kStepBlocked:
|
||||
step_func_ = &VarianceArray::BlockedStep;
|
||||
break;
|
||||
case kStepBlockBasedMovingAverage:
|
||||
step_func_ = &VarianceArray::BlockBasedMovingAverage;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -223,7 +212,7 @@ void VarianceArray::WindowedStep(const complex<float>* data, bool /*dummy*/) {
|
||||
// history window and a new block is started. The variances for the window
|
||||
// are recomputed from scratch at each of these transitions.
|
||||
void VarianceArray::BlockedStep(const complex<float>* data, bool /*dummy*/) {
|
||||
int blocks = min(window_size_, history_cursor_);
|
||||
int blocks = min(window_size_, history_cursor_ + 1);
|
||||
for (int i = 0; i < freqs_; ++i) {
|
||||
AddToMean(data[i], count_ + 1, &sub_running_mean_[i]);
|
||||
AddToMean(data[i] * std::conj(data[i]), count_ + 1,
|
||||
@ -242,8 +231,8 @@ void VarianceArray::BlockedStep(const complex<float>* data, bool /*dummy*/) {
|
||||
running_mean_[i] = complex<float>(0.0f, 0.0f);
|
||||
running_mean_sq_[i] = complex<float>(0.0f, 0.0f);
|
||||
for (int j = 0; j < min(window_size_, history_cursor_); ++j) {
|
||||
AddToMean(subhistory_[i][j], j, &running_mean_[i]);
|
||||
AddToMean(subhistory_sq_[i][j], j, &running_mean_sq_[i]);
|
||||
AddToMean(subhistory_[i][j], j + 1, &running_mean_[i]);
|
||||
AddToMean(subhistory_sq_[i][j], j + 1, &running_mean_sq_[i]);
|
||||
}
|
||||
++history_cursor_;
|
||||
}
|
||||
@ -254,6 +243,51 @@ void VarianceArray::BlockedStep(const complex<float>* data, bool /*dummy*/) {
|
||||
}
|
||||
}
|
||||
|
||||
// Recomputes variances for each window from scratch based on previous window.
|
||||
void VarianceArray::BlockBasedMovingAverage(const std::complex<float>* data,
|
||||
bool /*dummy*/) {
|
||||
// TODO(ekmeyerson) To mitigate potential divergence, add counter so that
|
||||
// after every so often sums are computed scratch by summing over all
|
||||
// elements instead of subtracting oldest and adding newest.
|
||||
for (int i = 0; i < freqs_; ++i) {
|
||||
sub_running_mean_[i] += data[i];
|
||||
sub_running_mean_sq_[i] += data[i] * std::conj(data[i]);
|
||||
}
|
||||
++count_;
|
||||
|
||||
// TODO(ekmeyerson) Make kWindowBlockSize nonconstant to allow
|
||||
// experimentation with different block size,window size pairs.
|
||||
if (count_ >= kWindowBlockSize) {
|
||||
count_ = 0;
|
||||
|
||||
for (int i = 0; i < freqs_; ++i) {
|
||||
running_mean_[i] -= subhistory_[i][history_cursor_];
|
||||
running_mean_sq_[i] -= subhistory_sq_[i][history_cursor_];
|
||||
|
||||
float scale = 1.f / kWindowBlockSize;
|
||||
subhistory_[i][history_cursor_] = sub_running_mean_[i] * scale;
|
||||
subhistory_sq_[i][history_cursor_] = sub_running_mean_sq_[i] * scale;
|
||||
|
||||
sub_running_mean_[i] = std::complex<float>(0.0f, 0.0f);
|
||||
sub_running_mean_sq_[i] = std::complex<float>(0.0f, 0.0f);
|
||||
|
||||
running_mean_[i] += subhistory_[i][history_cursor_];
|
||||
running_mean_sq_[i] += subhistory_sq_[i][history_cursor_];
|
||||
|
||||
scale = 1.f / (buffer_full_ ? window_size_ : history_cursor_ + 1);
|
||||
variance_[i] = std::real(running_mean_sq_[i] * scale -
|
||||
running_mean_[i] * scale *
|
||||
std::conj(running_mean_[i]) * scale);
|
||||
}
|
||||
|
||||
++history_cursor_;
|
||||
if (history_cursor_ >= window_size_) {
|
||||
buffer_full_ = true;
|
||||
history_cursor_ = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VarianceArray::Clear() {
|
||||
memset(running_mean_.get(), 0, sizeof(*running_mean_.get()) * freqs_);
|
||||
memset(running_mean_sq_.get(), 0, sizeof(*running_mean_sq_.get()) * freqs_);
|
||||
|
@ -23,6 +23,30 @@ namespace webrtc {
|
||||
|
||||
namespace intelligibility {
|
||||
|
||||
// Return |current| changed towards |target|, with the change being at most
|
||||
// |limit|.
|
||||
float UpdateFactor(float target, float current, float limit);
|
||||
|
||||
// std::isfinite for complex numbers.
|
||||
bool cplxfinite(std::complex<float> c);
|
||||
|
||||
// std::isnormal for complex numbers.
|
||||
bool cplxnormal(std::complex<float> c);
|
||||
|
||||
// Apply a small fudge to degenerate complex values. The numbers in the array
|
||||
// were chosen randomly, so that even a series of all zeroes has some small
|
||||
// variability.
|
||||
std::complex<float> zerofudge(std::complex<float> c);
|
||||
|
||||
// Incremental mean computation. Return the mean of the series with the
|
||||
// mean |mean| with added |data|.
|
||||
std::complex<float> NewMean(std::complex<float> mean,
|
||||
std::complex<float> data,
|
||||
int count);
|
||||
|
||||
// Updates |mean| with added |data|;
|
||||
void AddToMean(std::complex<float> data, int count, std::complex<float>* mean);
|
||||
|
||||
// Internal helper for computing the variances of a stream of arrays.
|
||||
// The result is an array of variances per position: the i-th variance
|
||||
// is the variance of the stream of data on the i-th positions in the
|
||||
@ -43,7 +67,8 @@ class VarianceArray {
|
||||
kStepInfinite = 0,
|
||||
kStepDecaying,
|
||||
kStepWindowed,
|
||||
kStepBlocked
|
||||
kStepBlocked,
|
||||
kStepBlockBasedMovingAverage
|
||||
};
|
||||
|
||||
// Construct an instance for the given input array length (|freqs|) and
|
||||
@ -77,6 +102,7 @@ class VarianceArray {
|
||||
void DecayStep(const std::complex<float>* data, bool dummy);
|
||||
void WindowedStep(const std::complex<float>* data, bool dummy);
|
||||
void BlockedStep(const std::complex<float>* data, bool dummy);
|
||||
void BlockBasedMovingAverage(const std::complex<float>* data, bool dummy);
|
||||
|
||||
// TODO(ekmeyerson): Switch the following running means
|
||||
// and histories from rtc::scoped_ptr to std::vector.
|
||||
@ -105,6 +131,7 @@ class VarianceArray {
|
||||
int history_cursor_;
|
||||
int count_;
|
||||
float array_mean_;
|
||||
bool buffer_full_;
|
||||
void (VarianceArray::*step_func_)(const std::complex<float>*, bool);
|
||||
};
|
||||
|
||||
|
@ -0,0 +1,188 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
//
|
||||
// Unit tests for intelligibility utils.
|
||||
//
|
||||
|
||||
#include <math.h>
|
||||
#include <complex>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/base/arraysize.h"
|
||||
#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"
|
||||
|
||||
using std::complex;
|
||||
using std::vector;
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace intelligibility {
|
||||
|
||||
vector<vector<complex<float>>> GenerateTestData(int freqs, int samples) {
|
||||
vector<vector<complex<float>>> data(samples);
|
||||
for (int i = 0; i < samples; i++) {
|
||||
for (int j = 0; j < freqs; j++) {
|
||||
const float val = 0.99f / ((i + 1) * (j + 1));
|
||||
data[i].push_back(complex<float>(val, val));
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
// Tests UpdateFactor.
|
||||
TEST(IntelligibilityUtilsTest, TestUpdateFactor) {
|
||||
EXPECT_EQ(0, intelligibility::UpdateFactor(0, 0, 0));
|
||||
EXPECT_EQ(4, intelligibility::UpdateFactor(4, 2, 3));
|
||||
EXPECT_EQ(3, intelligibility::UpdateFactor(4, 2, 1));
|
||||
EXPECT_EQ(2, intelligibility::UpdateFactor(2, 4, 3));
|
||||
EXPECT_EQ(3, intelligibility::UpdateFactor(2, 4, 1));
|
||||
}
|
||||
|
||||
// Tests cplxfinite, cplxnormal, and zerofudge.
|
||||
TEST(IntelligibilityUtilsTest, TestCplx) {
|
||||
complex<float> t0(1.f, 0.f);
|
||||
EXPECT_TRUE(intelligibility::cplxfinite(t0));
|
||||
EXPECT_FALSE(intelligibility::cplxnormal(t0));
|
||||
t0 = intelligibility::zerofudge(t0);
|
||||
EXPECT_NE(t0.imag(), 0.f);
|
||||
EXPECT_NE(t0.real(), 0.f);
|
||||
const complex<float> t1(1.f, std::sqrt(-1.f));
|
||||
EXPECT_FALSE(intelligibility::cplxfinite(t1));
|
||||
EXPECT_FALSE(intelligibility::cplxnormal(t1));
|
||||
const complex<float> t2(1.f, 1.f);
|
||||
EXPECT_TRUE(intelligibility::cplxfinite(t2));
|
||||
EXPECT_TRUE(intelligibility::cplxnormal(t2));
|
||||
}
|
||||
|
||||
// Tests NewMean and AddToMean.
|
||||
TEST(IntelligibilityUtilsTest, TestMeanUpdate) {
|
||||
const complex<float> data[] = {{3, 8}, {7, 6}, {2, 1}, {8, 9}, {0, 6}};
|
||||
const complex<float> means[] = {{3, 8}, {5, 7}, {4, 5}, {5, 6}, {4, 6}};
|
||||
complex<float> mean(3, 8);
|
||||
for (size_t i = 0; i < arraysize(data); i++) {
|
||||
EXPECT_EQ(means[i], NewMean(mean, data[i], i + 1));
|
||||
AddToMean(data[i], i + 1, &mean);
|
||||
EXPECT_EQ(means[i], mean);
|
||||
}
|
||||
}
|
||||
|
||||
// Tests VarianceArray, for all variance step types.
|
||||
TEST(IntelligibilityUtilsTest, TestVarianceArray) {
|
||||
const int kFreqs = 10;
|
||||
const int kSamples = 100;
|
||||
const int kWindowSize = 10; // Should pass for all kWindowSize > 1.
|
||||
const float kDecay = 0.5f;
|
||||
vector<VarianceArray::StepType> step_types;
|
||||
step_types.push_back(VarianceArray::kStepInfinite);
|
||||
step_types.push_back(VarianceArray::kStepDecaying);
|
||||
step_types.push_back(VarianceArray::kStepWindowed);
|
||||
step_types.push_back(VarianceArray::kStepBlocked);
|
||||
step_types.push_back(VarianceArray::kStepBlockBasedMovingAverage);
|
||||
const vector<vector<complex<float>>> test_data(
|
||||
GenerateTestData(kFreqs, kSamples));
|
||||
for (auto step_type : step_types) {
|
||||
VarianceArray variance_array(kFreqs, step_type, kWindowSize, kDecay);
|
||||
EXPECT_EQ(0, variance_array.variance()[0]);
|
||||
EXPECT_EQ(0, variance_array.array_mean());
|
||||
variance_array.ApplyScale(2.0f);
|
||||
EXPECT_EQ(0, variance_array.variance()[0]);
|
||||
EXPECT_EQ(0, variance_array.array_mean());
|
||||
|
||||
// Makes sure Step is doing something.
|
||||
variance_array.Step(&test_data[0][0]);
|
||||
for (int i = 1; i < kSamples; i++) {
|
||||
variance_array.Step(&test_data[i][0]);
|
||||
EXPECT_GE(variance_array.array_mean(), 0.0f);
|
||||
EXPECT_LE(variance_array.array_mean(), 1.0f);
|
||||
for (int j = 0; j < kFreqs; j++) {
|
||||
EXPECT_GE(variance_array.variance()[j], 0.0f);
|
||||
EXPECT_LE(variance_array.variance()[j], 1.0f);
|
||||
}
|
||||
}
|
||||
variance_array.Clear();
|
||||
EXPECT_EQ(0, variance_array.variance()[0]);
|
||||
EXPECT_EQ(0, variance_array.array_mean());
|
||||
}
|
||||
}
|
||||
|
||||
// Tests exact computation on synthetic data.
|
||||
TEST(IntelligibilityUtilsTest, TestMovingBlockAverage) {
|
||||
// Exact, not unbiased estimates.
|
||||
const float kTestVarianceBufferNotFull = 16.5f;
|
||||
const float kTestVarianceBufferFull1 = 66.5f;
|
||||
const float kTestVarianceBufferFull2 = 333.375f;
|
||||
const int kFreqs = 2;
|
||||
const int kSamples = 50;
|
||||
const int kWindowSize = 2;
|
||||
const float kDecay = 0.5f;
|
||||
const float kMaxError = 0.0001f;
|
||||
|
||||
VarianceArray variance_array(
|
||||
kFreqs, VarianceArray::kStepBlockBasedMovingAverage, kWindowSize, kDecay);
|
||||
|
||||
vector<vector<complex<float>>> test_data(kSamples);
|
||||
for (int i = 0; i < kSamples; i++) {
|
||||
for (int j = 0; j < kFreqs; j++) {
|
||||
if (i < 30) {
|
||||
test_data[i].push_back(complex<float>(static_cast<float>(kSamples - i),
|
||||
static_cast<float>(i + 1)));
|
||||
} else {
|
||||
test_data[i].push_back(complex<float>(0.f, 0.f));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < kSamples; i++) {
|
||||
variance_array.Step(&test_data[i][0]);
|
||||
for (int j = 0; j < kFreqs; j++) {
|
||||
if (i < 9) { // In utils, kWindowBlockSize = 10.
|
||||
EXPECT_EQ(0, variance_array.variance()[j]);
|
||||
} else if (i < 19) {
|
||||
EXPECT_NEAR(kTestVarianceBufferNotFull, variance_array.variance()[j],
|
||||
kMaxError);
|
||||
} else if (i < 39) {
|
||||
EXPECT_NEAR(kTestVarianceBufferFull1, variance_array.variance()[j],
|
||||
kMaxError);
|
||||
} else if (i < 49) {
|
||||
EXPECT_NEAR(kTestVarianceBufferFull2, variance_array.variance()[j],
|
||||
kMaxError);
|
||||
} else {
|
||||
EXPECT_EQ(0, variance_array.variance()[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tests gain applier.
|
||||
TEST(IntelligibilityUtilsTest, TestGainApplier) {
|
||||
const int kFreqs = 10;
|
||||
const int kSamples = 100;
|
||||
const float kChangeLimit = 0.1f;
|
||||
GainApplier gain_applier(kFreqs, kChangeLimit);
|
||||
const vector<vector<complex<float>>> in_data(
|
||||
GenerateTestData(kFreqs, kSamples));
|
||||
vector<vector<complex<float>>> out_data(GenerateTestData(kFreqs, kSamples));
|
||||
for (int i = 0; i < kSamples; i++) {
|
||||
gain_applier.Apply(&in_data[i][0], &out_data[i][0]);
|
||||
for (int j = 0; j < kFreqs; j++) {
|
||||
EXPECT_GT(out_data[i][j].real(), 0.0f);
|
||||
EXPECT_LT(out_data[i][j].real(), 1.0f);
|
||||
EXPECT_GT(out_data[i][j].imag(), 0.0f);
|
||||
EXPECT_LT(out_data[i][j].imag(), 1.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace intelligibility
|
||||
|
||||
} // namespace webrtc
|
@ -16,9 +16,9 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <string>
|
||||
|
||||
#include "gflags/gflags.h"
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
@ -171,6 +171,8 @@
|
||||
'audio_processing/beamformer/mock_nonlinear_beamformer.cc',
|
||||
'audio_processing/beamformer/mock_nonlinear_beamformer.h',
|
||||
'audio_processing/echo_cancellation_impl_unittest.cc',
|
||||
'audio_processing/intelligibility/intelligibility_enhancer_unittest.cc',
|
||||
'audio_processing/intelligibility/intelligibility_utils_unittest.cc',
|
||||
'audio_processing/splitting_filter_unittest.cc',
|
||||
'audio_processing/transient/dyadic_decimator_unittest.cc',
|
||||
'audio_processing/transient/file_utils.cc',
|
||||
|
Reference in New Issue
Block a user