RNN VAD: FC layer isolated into rnn_fc.h/.cc

Refactoring done to more easily and cleanly add SIMD optimizations and
to remove `FullyConnectedLayer` from the RNN VAD api.

Minor improvements (readability, API):
- `FullyConnectedLayer` gets the ActivationFunction enum and not
  a function view anymore
- SSE2 optimization moved into `FullyConnectedLayer::ComputeOutputSse2`
- layer name added for improved logs

Bug: webrtc:10480
Change-Id: Ida4903a67655e19ef0464f378c433c1f6e96dca7
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/195444
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32766}
This commit is contained in:
Alessio Bazzica
2020-12-03 19:33:52 +01:00
committed by Commit Bot
parent a760bca072
commit 31d3b217d3
7 changed files with 364 additions and 245 deletions

View File

@ -24,6 +24,7 @@ rtc_library("rnn_vad") {
deps = [
":rnn_vad_common",
":rnn_vad_layers",
":rnn_vad_lp_residual",
":rnn_vad_pitch",
":rnn_vad_sequence_buffer",
@ -78,6 +79,24 @@ rtc_library("rnn_vad_lp_residual") {
]
}
rtc_source_set("rnn_vad_layers") {
sources = [
"rnn_fc.cc",
"rnn_fc.h",
]
deps = [
":rnn_vad_common",
"..:cpu_features",
"../../../../api:array_view",
"../../../../api:function_view",
"../../../../rtc_base:checks",
"../../../../rtc_base:safe_conversions",
"../../../../rtc_base/system:arch",
"//third_party/rnnoise:rnn_vad",
]
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
}
rtc_source_set("vector_math") {
sources = [ "vector_math.h" ]
deps = [
@ -221,6 +240,7 @@ if (rtc_include_tests) {
"pitch_search_internal_unittest.cc",
"pitch_search_unittest.cc",
"ring_buffer_unittest.cc",
"rnn_fc_unittest.cc",
"rnn_unittest.cc",
"rnn_vad_unittest.cc",
"sequence_buffer_unittest.cc",
@ -233,6 +253,7 @@ if (rtc_include_tests) {
":rnn_vad",
":rnn_vad_auto_correlation",
":rnn_vad_common",
":rnn_vad_layers",
":rnn_vad_lp_residual",
":rnn_vad_pitch",
":rnn_vad_ring_buffer",

View File

@ -60,37 +60,6 @@ inline float RectifiedLinearUnit(float x) {
return x < 0.f ? 0.f : x;
}
std::vector<float> GetScaledParams(rtc::ArrayView<const int8_t> params) {
std::vector<float> scaled_params(params.size());
std::transform(params.begin(), params.end(), scaled_params.begin(),
[](int8_t x) -> float {
return rnnoise::kWeightsScale * static_cast<float>(x);
});
return scaled_params;
}
// TODO(bugs.chromium.org/10480): Hard-code optimized layout and remove this
// function to improve setup time.
// Casts and scales |weights| and re-arranges the layout.
std::vector<float> GetPreprocessedFcWeights(
rtc::ArrayView<const int8_t> weights,
int output_size) {
if (output_size == 1) {
return GetScaledParams(weights);
}
// Transpose, scale and cast.
const int input_size = rtc::CheckedDivExact(
rtc::dchecked_cast<int>(weights.size()), output_size);
std::vector<float> w(weights.size());
for (int o = 0; o < output_size; ++o) {
for (int i = 0; i < input_size; ++i) {
w[o * input_size + i] = rnnoise::kWeightsScale *
static_cast<float>(weights[i * output_size + o]);
}
}
return w;
}
constexpr int kNumGruGates = 3; // Update, reset, output.
// TODO(bugs.chromium.org/10480): Hard-coded optimized layout and remove this
@ -202,106 +171,8 @@ void ComputeGruLayerOutput(int input_size,
}
}
// Fully connected layer un-optimized implementation.
void ComputeFullyConnectedLayerOutput(
int input_size,
int output_size,
rtc::ArrayView<const float> input,
rtc::ArrayView<const float> bias,
rtc::ArrayView<const float> weights,
rtc::FunctionView<float(float)> activation_function,
rtc::ArrayView<float> output) {
RTC_DCHECK_EQ(input.size(), input_size);
RTC_DCHECK_EQ(bias.size(), output_size);
RTC_DCHECK_EQ(weights.size(), input_size * output_size);
for (int o = 0; o < output_size; ++o) {
output[o] = bias[o];
// TODO(bugs.chromium.org/9076): Benchmark how different layouts for
// |weights_| change the performance across different platforms.
for (int i = 0; i < input_size; ++i) {
output[o] += input[i] * weights[o * input_size + i];
}
output[o] = activation_function(output[o]);
}
}
#if defined(WEBRTC_ARCH_X86_FAMILY)
// Fully connected layer SSE2 implementation.
void ComputeFullyConnectedLayerOutputSse2(
int input_size,
int output_size,
rtc::ArrayView<const float> input,
rtc::ArrayView<const float> bias,
rtc::ArrayView<const float> weights,
rtc::FunctionView<float(float)> activation_function,
rtc::ArrayView<float> output) {
RTC_DCHECK_EQ(input.size(), input_size);
RTC_DCHECK_EQ(bias.size(), output_size);
RTC_DCHECK_EQ(weights.size(), input_size * output_size);
const int input_size_by_4 = input_size >> 2;
const int offset = input_size & ~3;
__m128 sum_wx_128;
const float* v = reinterpret_cast<const float*>(&sum_wx_128);
for (int o = 0; o < output_size; ++o) {
// Perform 128 bit vector operations.
sum_wx_128 = _mm_set1_ps(0);
const float* x_p = input.data();
const float* w_p = weights.data() + o * input_size;
for (int i = 0; i < input_size_by_4; ++i, x_p += 4, w_p += 4) {
sum_wx_128 = _mm_add_ps(sum_wx_128,
_mm_mul_ps(_mm_loadu_ps(x_p), _mm_loadu_ps(w_p)));
}
// Perform non-vector operations for any remaining items, sum up bias term
// and results from the vectorized code, and apply the activation function.
output[o] = activation_function(
std::inner_product(input.begin() + offset, input.end(),
weights.begin() + o * input_size + offset,
bias[o] + v[0] + v[1] + v[2] + v[3]));
}
}
#endif
} // namespace
FullyConnectedLayer::FullyConnectedLayer(
const int input_size,
const int output_size,
const rtc::ArrayView<const int8_t> bias,
const rtc::ArrayView<const int8_t> weights,
rtc::FunctionView<float(float)> activation_function,
const AvailableCpuFeatures& cpu_features)
: input_size_(input_size),
output_size_(output_size),
bias_(GetScaledParams(bias)),
weights_(GetPreprocessedFcWeights(weights, output_size)),
activation_function_(activation_function),
cpu_features_(cpu_features) {
RTC_DCHECK_LE(output_size_, kFullyConnectedLayerMaxUnits)
<< "Static over-allocation of fully-connected layers output vectors is "
"not sufficient.";
RTC_DCHECK_EQ(output_size_, bias_.size())
<< "Mismatching output size and bias terms array size.";
RTC_DCHECK_EQ(input_size_ * output_size_, weights_.size())
<< "Mismatching input-output size and weight coefficients array size.";
}
FullyConnectedLayer::~FullyConnectedLayer() = default;
void FullyConnectedLayer::ComputeOutput(rtc::ArrayView<const float> input) {
#if defined(WEBRTC_ARCH_X86_FAMILY)
// TODO(bugs.chromium.org/10480): Add AVX2.
if (cpu_features_.sse2) {
ComputeFullyConnectedLayerOutputSse2(input_size_, output_size_, input,
bias_, weights_, activation_function_,
output_);
return;
}
#endif
// TODO(bugs.chromium.org/10480): Add Neon.
ComputeFullyConnectedLayerOutput(input_size_, output_size_, input, bias_,
weights_, activation_function_, output_);
}
GatedRecurrentLayer::GatedRecurrentLayer(
const int input_size,
const int output_size,
@ -346,8 +217,9 @@ RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features)
kInputLayerOutputSize,
kInputDenseBias,
kInputDenseWeights,
TansigApproximated,
cpu_features),
ActivationFunction::kTansigApproximated,
cpu_features,
/*layer_name=*/"FC1"),
hidden_(kInputLayerOutputSize,
kHiddenLayerOutputSize,
kHiddenGruBias,
@ -357,8 +229,9 @@ RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features)
kOutputLayerOutputSize,
kOutputDenseBias,
kOutputDenseWeights,
SigmoidApproximated,
cpu_features) {
ActivationFunction::kSigmoidApproximated,
cpu_features,
/*layer_name=*/"FC2") {
// Input-output chaining size checks.
RTC_DCHECK_EQ(input_.size(), hidden_.input_size())
<< "The input and the hidden layers sizes do not match.";

View File

@ -21,54 +21,15 @@
#include "api/function_view.h"
#include "modules/audio_processing/agc2/cpu_features.h"
#include "modules/audio_processing/agc2/rnn_vad/common.h"
#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h"
#include "rtc_base/system/arch.h"
namespace webrtc {
namespace rnn_vad {
// Maximum number of units for an FC layer.
constexpr int kFullyConnectedLayerMaxUnits = 24;
// Maximum number of units for a GRU layer.
constexpr int kGruLayerMaxUnits = 24;
// Fully-connected layer with a custom activation function which owns the output
// buffer.
class FullyConnectedLayer {
public:
// Ctor. `output_size` cannot be greater than `kFullyConnectedLayerMaxUnits`.
FullyConnectedLayer(int input_size,
int output_size,
rtc::ArrayView<const int8_t> bias,
rtc::ArrayView<const int8_t> weights,
rtc::FunctionView<float(float)> activation_function,
const AvailableCpuFeatures& cpu_features);
FullyConnectedLayer(const FullyConnectedLayer&) = delete;
FullyConnectedLayer& operator=(const FullyConnectedLayer&) = delete;
~FullyConnectedLayer();
// Returns the size of the input vector.
int input_size() const { return input_size_; }
// Returns the pointer to the first element of the output buffer.
const float* data() const { return output_.data(); }
// Returns the size of the output buffer.
int size() const { return output_size_; }
// Computes the fully-connected layer output.
void ComputeOutput(rtc::ArrayView<const float> input);
private:
const int input_size_;
const int output_size_;
const std::vector<float> bias_;
const std::vector<float> weights_;
rtc::FunctionView<float(float)> activation_function_;
// The output vector of a recurrent layer has length equal to |output_size_|.
// However, for efficiency, over-allocation is used.
std::array<float, kFullyConnectedLayerMaxUnits> output_;
const AvailableCpuFeatures cpu_features_;
};
// Recurrent layer with gated recurrent units (GRUs) with sigmoid and ReLU as
// activation functions for the update/reset and output gates respectively. It
// owns the output buffer.

View File

@ -0,0 +1,151 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Defines WEBRTC_ARCH_X86_FAMILY, used below.
#include "rtc_base/system/arch.h"
#if defined(WEBRTC_ARCH_X86_FAMILY)
#include <emmintrin.h>
#endif
#include <algorithm>
#include <numeric>
#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "third_party/rnnoise/src/rnn_activations.h"
#include "third_party/rnnoise/src/rnn_vad_weights.h"
namespace webrtc {
namespace rnn_vad {
namespace {
std::vector<float> GetScaledParams(rtc::ArrayView<const int8_t> params) {
std::vector<float> scaled_params(params.size());
std::transform(params.begin(), params.end(), scaled_params.begin(),
[](int8_t x) -> float {
return ::rnnoise::kWeightsScale * static_cast<float>(x);
});
return scaled_params;
}
// TODO(bugs.chromium.org/10480): Hard-code optimized layout and remove this
// function to improve setup time.
// Casts and scales |weights| and re-arranges the layout.
std::vector<float> PreprocessWeights(rtc::ArrayView<const int8_t> weights,
int output_size) {
if (output_size == 1) {
return GetScaledParams(weights);
}
// Transpose, scale and cast.
const int input_size = rtc::CheckedDivExact(
rtc::dchecked_cast<int>(weights.size()), output_size);
std::vector<float> w(weights.size());
for (int o = 0; o < output_size; ++o) {
for (int i = 0; i < input_size; ++i) {
w[o * input_size + i] = rnnoise::kWeightsScale *
static_cast<float>(weights[i * output_size + o]);
}
}
return w;
}
rtc::FunctionView<float(float)> GetActivationFunction(
ActivationFunction activation_function) {
switch (activation_function) {
case ActivationFunction::kTansigApproximated:
return ::rnnoise::TansigApproximated;
break;
case ActivationFunction::kSigmoidApproximated:
return ::rnnoise::SigmoidApproximated;
break;
}
}
} // namespace
FullyConnectedLayer::FullyConnectedLayer(
const int input_size,
const int output_size,
const rtc::ArrayView<const int8_t> bias,
const rtc::ArrayView<const int8_t> weights,
ActivationFunction activation_function,
const AvailableCpuFeatures& cpu_features,
absl::string_view layer_name)
: input_size_(input_size),
output_size_(output_size),
bias_(GetScaledParams(bias)),
weights_(PreprocessWeights(weights, output_size)),
cpu_features_(cpu_features),
activation_function_(GetActivationFunction(activation_function)) {
RTC_DCHECK_LE(output_size_, kFullyConnectedLayerMaxUnits)
<< "Insufficient FC layer over-allocation (" << layer_name << ").";
RTC_DCHECK_EQ(output_size_, bias_.size())
<< "Mismatching output size and bias terms array size (" << layer_name
<< ").";
RTC_DCHECK_EQ(input_size_ * output_size_, weights_.size())
<< "Mismatching input-output size and weight coefficients array size ("
<< layer_name << ").";
}
FullyConnectedLayer::~FullyConnectedLayer() = default;
void FullyConnectedLayer::ComputeOutput(rtc::ArrayView<const float> input) {
RTC_DCHECK_EQ(input.size(), input_size_);
#if defined(WEBRTC_ARCH_X86_FAMILY)
// TODO(bugs.chromium.org/10480): Add AVX2.
if (cpu_features_.sse2) {
ComputeOutputSse2(input);
return;
}
#endif
// TODO(bugs.chromium.org/10480): Add Neon.
// Un-optimized implementation.
for (int o = 0; o < output_size_; ++o) {
output_[o] = bias_[o];
// TODO(bugs.chromium.org/9076): Benchmark how different layouts for
// |weights_| change the performance across different platforms.
for (int i = 0; i < input_size_; ++i) {
output_[o] += input[i] * weights_[o * input_size_ + i];
}
output_[o] = activation_function_(output_[o]);
}
}
#if defined(WEBRTC_ARCH_X86_FAMILY)
void FullyConnectedLayer::ComputeOutputSse2(rtc::ArrayView<const float> input) {
const int input_size_by_4 = input_size_ >> 2;
const int offset = input_size_ & ~3;
// TODO(bugs.chromium.org/10480): Check if reinterpret_cast below is ok.
__m128 sum_wx_128;
const float* v = reinterpret_cast<const float*>(&sum_wx_128);
for (int o = 0; o < output_size_; ++o) {
// Perform 128 bit vector operations.
sum_wx_128 = _mm_set1_ps(0);
const float* x_p = input.data();
const float* w_p = weights_.data() + o * input.size();
for (int i = 0; i < input_size_by_4; ++i, x_p += 4, w_p += 4) {
sum_wx_128 = _mm_add_ps(sum_wx_128,
_mm_mul_ps(_mm_loadu_ps(x_p), _mm_loadu_ps(w_p)));
}
// Perform non-vector operations for any remaining items, sum up bias term
// and results from the vectorized code, and apply the activation function.
output_[o] = activation_function_(
std::inner_product(input.begin() + offset, input.end(),
weights_.begin() + o * input.size() + offset,
bias_[o] + v[0] + v[1] + v[2] + v[3]));
}
}
#endif // defined(WEBRTC_ARCH_X86_FAMILY)
} // namespace rnn_vad
} // namespace webrtc

View File

@ -0,0 +1,76 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_
#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_
#include <array>
#include <vector>
#include "absl/strings/string_view.h"
#include "api/array_view.h"
#include "api/function_view.h"
#include "modules/audio_processing/agc2/cpu_features.h"
#include "rtc_base/system/arch.h"
namespace webrtc {
namespace rnn_vad {
// Activation function for a neural network cell.
enum class ActivationFunction { kTansigApproximated, kSigmoidApproximated };
// Maximum number of units for an FC layer.
constexpr int kFullyConnectedLayerMaxUnits = 24;
// Fully-connected layer with a custom activation function which owns the output
// buffer.
class FullyConnectedLayer {
public:
// Ctor. `output_size` cannot be greater than `kFullyConnectedLayerMaxUnits`.
FullyConnectedLayer(int input_size,
int output_size,
rtc::ArrayView<const int8_t> bias,
rtc::ArrayView<const int8_t> weights,
ActivationFunction activation_function,
const AvailableCpuFeatures& cpu_features,
absl::string_view layer_name);
FullyConnectedLayer(const FullyConnectedLayer&) = delete;
FullyConnectedLayer& operator=(const FullyConnectedLayer&) = delete;
~FullyConnectedLayer();
// Returns the size of the input vector.
int input_size() const { return input_size_; }
// Returns the pointer to the first element of the output buffer.
const float* data() const { return output_.data(); }
// Returns the size of the output buffer.
int size() const { return output_size_; }
// Computes the fully-connected layer output.
void ComputeOutput(rtc::ArrayView<const float> input);
private:
#if defined(WEBRTC_ARCH_X86_FAMILY)
void ComputeOutputSse2(rtc::ArrayView<const float> input);
#endif
const int input_size_;
const int output_size_;
const std::vector<float> bias_;
const std::vector<float> weights_;
const AvailableCpuFeatures cpu_features_;
rtc::FunctionView<float(float)> activation_function_;
// Over-allocated array with size equal to `output_size_`.
std::array<float, kFullyConnectedLayerMaxUnits> output_;
};
} // namespace rnn_vad
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_

View File

@ -0,0 +1,109 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h"
#include <array>
#include <vector>
#include "api/array_view.h"
#include "modules/audio_processing/agc2/cpu_features.h"
#include "modules/audio_processing/agc2/rnn_vad/test_utils.h"
#include "modules/audio_processing/test/performance_timer.h"
#include "rtc_base/logging.h"
#include "rtc_base/system/arch.h"
#include "test/gtest.h"
#include "third_party/rnnoise/src/rnn_vad_weights.h"
namespace webrtc {
namespace rnn_vad {
namespace test {
namespace {
using ::rnnoise::kInputDenseBias;
using ::rnnoise::kInputDenseWeights;
using ::rnnoise::kInputLayerInputSize;
using ::rnnoise::kInputLayerOutputSize;
// Fully connected layer test data.
constexpr std::array<float, 42> kFullyConnectedInputVector = {
-1.00131f, -0.627069f, -7.81097f, 7.86285f, -2.87145f, 3.32365f,
-0.653161f, 0.529839f, -0.425307f, 0.25583f, 0.235094f, 0.230527f,
-0.144687f, 0.182785f, 0.57102f, 0.125039f, 0.479482f, -0.0255439f,
-0.0073141f, -0.147346f, -0.217106f, -0.0846906f, -8.34943f, 3.09065f,
1.42628f, -0.85235f, -0.220207f, -0.811163f, 2.09032f, -2.01425f,
-0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f,
0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f};
constexpr std::array<float, 24> kFullyConnectedExpectedOutput = {
-0.623293f, -0.988299f, 0.999378f, 0.967168f, 0.103087f, -0.978545f,
-0.856347f, 0.346675f, 1.f, -0.717442f, -0.544176f, 0.960363f,
0.983443f, 0.999991f, -0.824335f, 0.984742f, 0.990208f, 0.938179f,
0.875092f, 0.999846f, 0.997707f, -0.999382f, 0.973153f, -0.966605f};
class RnnParametrization
: public ::testing::TestWithParam<AvailableCpuFeatures> {};
// Checks that the output of a fully connected layer is within tolerance given
// test input data.
TEST_P(RnnParametrization, CheckFullyConnectedLayerOutput) {
FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize,
kInputDenseBias, kInputDenseWeights,
ActivationFunction::kTansigApproximated,
/*cpu_features=*/GetParam(),
/*layer_name=*/"FC");
fc.ComputeOutput(kFullyConnectedInputVector);
ExpectNearAbsolute(kFullyConnectedExpectedOutput, fc, 1e-5f);
}
TEST_P(RnnParametrization, DISABLED_BenchmarkFullyConnectedLayer) {
const AvailableCpuFeatures cpu_features = GetParam();
FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize,
kInputDenseBias, kInputDenseWeights,
ActivationFunction::kTansigApproximated, cpu_features,
/*layer_name=*/"FC");
constexpr int kNumTests = 10000;
::webrtc::test::PerformanceTimer perf_timer(kNumTests);
for (int k = 0; k < kNumTests; ++k) {
perf_timer.StartTimer();
fc.ComputeOutput(kFullyConnectedInputVector);
perf_timer.StopTimer();
}
RTC_LOG(LS_INFO) << "CPU features: " << cpu_features.ToString() << " | "
<< (perf_timer.GetDurationAverage() / 1000) << " +/- "
<< (perf_timer.GetDurationStandardDeviation() / 1000)
<< " ms";
}
// Finds the relevant CPU features combinations to test.
std::vector<AvailableCpuFeatures> GetCpuFeaturesToTest() {
std::vector<AvailableCpuFeatures> v;
v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/false});
AvailableCpuFeatures available = GetAvailableCpuFeatures();
if (available.sse2) {
AvailableCpuFeatures features(
{/*sse2=*/true, /*avx2=*/false, /*neon=*/false});
v.push_back(features);
}
return v;
}
INSTANTIATE_TEST_SUITE_P(
RnnVadTest,
RnnParametrization,
::testing::ValuesIn(GetCpuFeaturesToTest()),
[](const ::testing::TestParamInfo<AvailableCpuFeatures>& info) {
return info.param.ToString();
});
} // namespace
} // namespace test
} // namespace rnn_vad
} // namespace webrtc

View File

@ -20,9 +20,7 @@
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "rtc_base/system/arch.h"
#include "test/gtest.h"
#include "third_party/rnnoise/src/rnn_activations.h"
#include "third_party/rnnoise/src/rnn_vad_weights.h"
namespace webrtc {
@ -67,21 +65,6 @@ void TestGatedRecurrentLayer(
}
}
// Fully connected layer test data.
constexpr std::array<float, 42> kFullyConnectedInputVector = {
-1.00131f, -0.627069f, -7.81097f, 7.86285f, -2.87145f, 3.32365f,
-0.653161f, 0.529839f, -0.425307f, 0.25583f, 0.235094f, 0.230527f,
-0.144687f, 0.182785f, 0.57102f, 0.125039f, 0.479482f, -0.0255439f,
-0.0073141f, -0.147346f, -0.217106f, -0.0846906f, -8.34943f, 3.09065f,
1.42628f, -0.85235f, -0.220207f, -0.811163f, 2.09032f, -2.01425f,
-0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f,
0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f};
constexpr std::array<float, 24> kFullyConnectedExpectedOutput = {
-0.623293f, -0.988299f, 0.999378f, 0.967168f, 0.103087f, -0.978545f,
-0.856347f, 0.346675f, 1.f, -0.717442f, -0.544176f, 0.960363f,
0.983443f, 0.999991f, -0.824335f, 0.984742f, 0.990208f, 0.938179f,
0.875092f, 0.999846f, 0.997707f, -0.999382f, 0.973153f, -0.966605f};
// Gated recurrent units layer test data.
constexpr int kGruInputSize = 5;
constexpr int kGruOutputSize = 4;
@ -170,61 +153,6 @@ TEST(RnnVadTest, DISABLED_BenchmarkGatedRecurrentLayer) {
<< " ms";
}
class RnnParametrization
: public ::testing::TestWithParam<AvailableCpuFeatures> {};
// Checks that the output of a fully connected layer is within tolerance given
// test input data.
TEST_P(RnnParametrization, CheckFullyConnectedLayerOutput) {
FullyConnectedLayer fc(
rnnoise::kInputLayerInputSize, rnnoise::kInputLayerOutputSize,
rnnoise::kInputDenseBias, rnnoise::kInputDenseWeights,
rnnoise::TansigApproximated, /*cpu_features=*/GetParam());
fc.ComputeOutput(kFullyConnectedInputVector);
ExpectNearAbsolute(kFullyConnectedExpectedOutput, fc, 1e-5f);
}
TEST_P(RnnParametrization, DISABLED_BenchmarkFullyConnectedLayer) {
const AvailableCpuFeatures cpu_features = GetParam();
FullyConnectedLayer fc(rnnoise::kInputLayerInputSize,
rnnoise::kInputLayerOutputSize,
rnnoise::kInputDenseBias, rnnoise::kInputDenseWeights,
rnnoise::TansigApproximated, cpu_features);
constexpr int kNumTests = 10000;
::webrtc::test::PerformanceTimer perf_timer(kNumTests);
for (int k = 0; k < kNumTests; ++k) {
perf_timer.StartTimer();
fc.ComputeOutput(kFullyConnectedInputVector);
perf_timer.StopTimer();
}
RTC_LOG(LS_INFO) << "CPU features: " << cpu_features.ToString() << " | "
<< (perf_timer.GetDurationAverage() / 1000) << " +/- "
<< (perf_timer.GetDurationStandardDeviation() / 1000)
<< " ms";
}
// Finds the relevant CPU features combinations to test.
std::vector<AvailableCpuFeatures> GetCpuFeaturesToTest() {
std::vector<AvailableCpuFeatures> v;
v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/false});
AvailableCpuFeatures available = GetAvailableCpuFeatures();
if (available.sse2) {
AvailableCpuFeatures features(
{/*sse2=*/true, /*avx2=*/false, /*neon=*/false});
v.push_back(features);
}
return v;
}
INSTANTIATE_TEST_SUITE_P(
RnnVadTest,
RnnParametrization,
::testing::ValuesIn(GetCpuFeaturesToTest()),
[](const ::testing::TestParamInfo<AvailableCpuFeatures>& info) {
return info.param.ToString();
});
// Checks that the speech probability is zero with silence.
TEST(RnnVadTest, CheckZeroProbabilityWithSilence) {
RnnVad rnn_vad(GetAvailableCpuFeatures());