RNN VAD: FC and GRU layers implicit conversion to ArrayView

Plus a few minor code readability improvements.

Bug: webrtc:10480
Change-Id: I590d8e203b1d05959a8c15373841e37abe83237e
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/195334
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32764}
This commit is contained in:
Alessio Bazzica
2020-12-03 16:54:38 +01:00
committed by Commit Bot
parent 40c3ea5c71
commit 812dc072c6
9 changed files with 98 additions and 103 deletions

View File

@ -84,6 +84,7 @@ rtc_source_set("vector_math") {
"..:cpu_features", "..:cpu_features",
"../../../../api:array_view", "../../../../api:array_view",
"../../../../rtc_base:checks", "../../../../rtc_base:checks",
"../../../../rtc_base:safe_conversions",
"../../../../rtc_base/system:arch", "../../../../rtc_base/system:arch",
] ]
} }
@ -103,6 +104,7 @@ if (current_cpu == "x86" || current_cpu == "x64") {
":vector_math", ":vector_math",
"../../../../api:array_view", "../../../../api:array_view",
"../../../../rtc_base:checks", "../../../../rtc_base:checks",
"../../../../rtc_base:safe_conversions",
] ]
} }
} }

View File

@ -40,21 +40,18 @@ static_assert(kFeatureVectorSize == kInputLayerInputSize, "");
using rnnoise::kInputDenseBias; using rnnoise::kInputDenseBias;
using rnnoise::kInputDenseWeights; using rnnoise::kInputDenseWeights;
using rnnoise::kInputLayerOutputSize; using rnnoise::kInputLayerOutputSize;
static_assert(kInputLayerOutputSize <= kFullyConnectedLayersMaxUnits, static_assert(kInputLayerOutputSize <= kFullyConnectedLayerMaxUnits, "");
"Increase kFullyConnectedLayersMaxUnits.");
using rnnoise::kHiddenGruBias; using rnnoise::kHiddenGruBias;
using rnnoise::kHiddenGruRecurrentWeights; using rnnoise::kHiddenGruRecurrentWeights;
using rnnoise::kHiddenGruWeights; using rnnoise::kHiddenGruWeights;
using rnnoise::kHiddenLayerOutputSize; using rnnoise::kHiddenLayerOutputSize;
static_assert(kHiddenLayerOutputSize <= kRecurrentLayersMaxUnits, static_assert(kHiddenLayerOutputSize <= kGruLayerMaxUnits, "");
"Increase kRecurrentLayersMaxUnits.");
using rnnoise::kOutputDenseBias; using rnnoise::kOutputDenseBias;
using rnnoise::kOutputDenseWeights; using rnnoise::kOutputDenseWeights;
using rnnoise::kOutputLayerOutputSize; using rnnoise::kOutputLayerOutputSize;
static_assert(kOutputLayerOutputSize <= kFullyConnectedLayersMaxUnits, static_assert(kOutputLayerOutputSize <= kFullyConnectedLayerMaxUnits, "");
"Increase kFullyConnectedLayersMaxUnits.");
using rnnoise::SigmoidApproximated; using rnnoise::SigmoidApproximated;
using rnnoise::TansigApproximated; using rnnoise::TansigApproximated;
@ -178,21 +175,21 @@ void ComputeGruLayerOutput(int input_size,
const int stride_out = output_size * output_size; const int stride_out = output_size * output_size;
// Update gate. // Update gate.
std::array<float, kRecurrentLayersMaxUnits> update; std::array<float, kGruLayerMaxUnits> update;
ComputeGruUpdateResetGates( ComputeGruUpdateResetGates(
input_size, output_size, weights.subview(0, stride_in), input_size, output_size, weights.subview(0, stride_in),
recurrent_weights.subview(0, stride_out), bias.subview(0, output_size), recurrent_weights.subview(0, stride_out), bias.subview(0, output_size),
input, state, update); input, state, update);
// Reset gate. // Reset gate.
std::array<float, kRecurrentLayersMaxUnits> reset; std::array<float, kGruLayerMaxUnits> reset;
ComputeGruUpdateResetGates( ComputeGruUpdateResetGates(
input_size, output_size, weights.subview(stride_in, stride_in), input_size, output_size, weights.subview(stride_in, stride_in),
recurrent_weights.subview(stride_out, stride_out), recurrent_weights.subview(stride_out, stride_out),
bias.subview(output_size, output_size), input, state, reset); bias.subview(output_size, output_size), input, state, reset);
// Output gate. // Output gate.
std::array<float, kRecurrentLayersMaxUnits> output; std::array<float, kGruLayerMaxUnits> output;
ComputeGruOutputGate( ComputeGruOutputGate(
input_size, output_size, weights.subview(2 * stride_in, stride_in), input_size, output_size, weights.subview(2 * stride_in, stride_in),
recurrent_weights.subview(2 * stride_out, stride_out), recurrent_weights.subview(2 * stride_out, stride_out),
@ -279,7 +276,7 @@ FullyConnectedLayer::FullyConnectedLayer(
weights_(GetPreprocessedFcWeights(weights, output_size)), weights_(GetPreprocessedFcWeights(weights, output_size)),
activation_function_(activation_function), activation_function_(activation_function),
cpu_features_(cpu_features) { cpu_features_(cpu_features) {
RTC_DCHECK_LE(output_size_, kFullyConnectedLayersMaxUnits) RTC_DCHECK_LE(output_size_, kFullyConnectedLayerMaxUnits)
<< "Static over-allocation of fully-connected layers output vectors is " << "Static over-allocation of fully-connected layers output vectors is "
"not sufficient."; "not sufficient.";
RTC_DCHECK_EQ(output_size_, bias_.size()) RTC_DCHECK_EQ(output_size_, bias_.size())
@ -290,10 +287,6 @@ FullyConnectedLayer::FullyConnectedLayer(
FullyConnectedLayer::~FullyConnectedLayer() = default; FullyConnectedLayer::~FullyConnectedLayer() = default;
rtc::ArrayView<const float> FullyConnectedLayer::GetOutput() const {
return rtc::ArrayView<const float>(output_.data(), output_size_);
}
void FullyConnectedLayer::ComputeOutput(rtc::ArrayView<const float> input) { void FullyConnectedLayer::ComputeOutput(rtc::ArrayView<const float> input) {
#if defined(WEBRTC_ARCH_X86_FAMILY) #if defined(WEBRTC_ARCH_X86_FAMILY)
// TODO(bugs.chromium.org/10480): Add AVX2. // TODO(bugs.chromium.org/10480): Add AVX2.
@ -321,7 +314,7 @@ GatedRecurrentLayer::GatedRecurrentLayer(
weights_(GetPreprocessedGruTensor(weights, output_size)), weights_(GetPreprocessedGruTensor(weights, output_size)),
recurrent_weights_( recurrent_weights_(
GetPreprocessedGruTensor(recurrent_weights, output_size)) { GetPreprocessedGruTensor(recurrent_weights, output_size)) {
RTC_DCHECK_LE(output_size_, kRecurrentLayersMaxUnits) RTC_DCHECK_LE(output_size_, kGruLayerMaxUnits)
<< "Static over-allocation of recurrent layers state vectors is not " << "Static over-allocation of recurrent layers state vectors is not "
"sufficient."; "sufficient.";
RTC_DCHECK_EQ(kNumGruGates * output_size_, bias_.size()) RTC_DCHECK_EQ(kNumGruGates * output_size_, bias_.size())
@ -337,10 +330,6 @@ GatedRecurrentLayer::GatedRecurrentLayer(
GatedRecurrentLayer::~GatedRecurrentLayer() = default; GatedRecurrentLayer::~GatedRecurrentLayer() = default;
rtc::ArrayView<const float> GatedRecurrentLayer::GetOutput() const {
return rtc::ArrayView<const float>(state_.data(), output_size_);
}
void GatedRecurrentLayer::Reset() { void GatedRecurrentLayer::Reset() {
state_.fill(0.f); state_.fill(0.f);
} }
@ -352,49 +341,49 @@ void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView<const float> input) {
recurrent_weights_, bias_, state_); recurrent_weights_, bias_, state_);
} }
RnnBasedVad::RnnBasedVad(const AvailableCpuFeatures& cpu_features) RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features)
: input_layer_(kInputLayerInputSize, : input_(kInputLayerInputSize,
kInputLayerOutputSize, kInputLayerOutputSize,
kInputDenseBias, kInputDenseBias,
kInputDenseWeights, kInputDenseWeights,
TansigApproximated, TansigApproximated,
cpu_features), cpu_features),
hidden_layer_(kInputLayerOutputSize, hidden_(kInputLayerOutputSize,
kHiddenLayerOutputSize, kHiddenLayerOutputSize,
kHiddenGruBias, kHiddenGruBias,
kHiddenGruWeights, kHiddenGruWeights,
kHiddenGruRecurrentWeights), kHiddenGruRecurrentWeights),
output_layer_(kHiddenLayerOutputSize, output_(kHiddenLayerOutputSize,
kOutputLayerOutputSize, kOutputLayerOutputSize,
kOutputDenseBias, kOutputDenseBias,
kOutputDenseWeights, kOutputDenseWeights,
SigmoidApproximated, SigmoidApproximated,
cpu_features) { cpu_features) {
// Input-output chaining size checks. // Input-output chaining size checks.
RTC_DCHECK_EQ(input_layer_.output_size(), hidden_layer_.input_size()) RTC_DCHECK_EQ(input_.size(), hidden_.input_size())
<< "The input and the hidden layers sizes do not match."; << "The input and the hidden layers sizes do not match.";
RTC_DCHECK_EQ(hidden_layer_.output_size(), output_layer_.input_size()) RTC_DCHECK_EQ(hidden_.size(), output_.input_size())
<< "The hidden and the output layers sizes do not match."; << "The hidden and the output layers sizes do not match.";
} }
RnnBasedVad::~RnnBasedVad() = default; RnnVad::~RnnVad() = default;
void RnnBasedVad::Reset() { void RnnVad::Reset() {
hidden_layer_.Reset(); hidden_.Reset();
} }
float RnnBasedVad::ComputeVadProbability( float RnnVad::ComputeVadProbability(
rtc::ArrayView<const float, kFeatureVectorSize> feature_vector, rtc::ArrayView<const float, kFeatureVectorSize> feature_vector,
bool is_silence) { bool is_silence) {
if (is_silence) { if (is_silence) {
Reset(); Reset();
return 0.f; return 0.f;
} }
input_layer_.ComputeOutput(feature_vector); input_.ComputeOutput(feature_vector);
hidden_layer_.ComputeOutput(input_layer_.GetOutput()); hidden_.ComputeOutput(input_);
output_layer_.ComputeOutput(hidden_layer_.GetOutput()); output_.ComputeOutput(hidden_);
const auto vad_output = output_layer_.GetOutput(); RTC_DCHECK_EQ(output_.size(), 1);
return vad_output[0]; return output_.data()[0];
} }
} // namespace rnn_vad } // namespace rnn_vad

View File

@ -26,21 +26,17 @@
namespace webrtc { namespace webrtc {
namespace rnn_vad { namespace rnn_vad {
// Maximum number of units for a fully-connected layer. This value is used to // Maximum number of units for an FC layer.
// over-allocate space for fully-connected layers output vectors (implemented as constexpr int kFullyConnectedLayerMaxUnits = 24;
// std::array). The value should equal the number of units of the largest
// fully-connected layer.
constexpr int kFullyConnectedLayersMaxUnits = 24;
// Maximum number of units for a recurrent layer. This value is used to // Maximum number of units for a GRU layer.
// over-allocate space for recurrent layers state vectors (implemented as constexpr int kGruLayerMaxUnits = 24;
// std::array). The value should equal the number of units of the largest
// recurrent layer.
constexpr int kRecurrentLayersMaxUnits = 24;
// Fully-connected layer. // Fully-connected layer with a custom activation function which owns the output
// buffer.
class FullyConnectedLayer { class FullyConnectedLayer {
public: public:
// Ctor. `output_size` cannot be greater than `kFullyConnectedLayerMaxUnits`.
FullyConnectedLayer(int input_size, FullyConnectedLayer(int input_size,
int output_size, int output_size,
rtc::ArrayView<const int8_t> bias, rtc::ArrayView<const int8_t> bias,
@ -50,9 +46,14 @@ class FullyConnectedLayer {
FullyConnectedLayer(const FullyConnectedLayer&) = delete; FullyConnectedLayer(const FullyConnectedLayer&) = delete;
FullyConnectedLayer& operator=(const FullyConnectedLayer&) = delete; FullyConnectedLayer& operator=(const FullyConnectedLayer&) = delete;
~FullyConnectedLayer(); ~FullyConnectedLayer();
// Returns the size of the input vector.
int input_size() const { return input_size_; } int input_size() const { return input_size_; }
int output_size() const { return output_size_; } // Returns the pointer to the first element of the output buffer.
rtc::ArrayView<const float> GetOutput() const; const float* data() const { return output_.data(); }
// Returns the size of the output buffer.
int size() const { return output_size_; }
// Computes the fully-connected layer output. // Computes the fully-connected layer output.
void ComputeOutput(rtc::ArrayView<const float> input); void ComputeOutput(rtc::ArrayView<const float> input);
@ -64,14 +65,16 @@ class FullyConnectedLayer {
rtc::FunctionView<float(float)> activation_function_; rtc::FunctionView<float(float)> activation_function_;
// The output vector of a recurrent layer has length equal to |output_size_|. // The output vector of a recurrent layer has length equal to |output_size_|.
// However, for efficiency, over-allocation is used. // However, for efficiency, over-allocation is used.
std::array<float, kFullyConnectedLayersMaxUnits> output_; std::array<float, kFullyConnectedLayerMaxUnits> output_;
const AvailableCpuFeatures cpu_features_; const AvailableCpuFeatures cpu_features_;
}; };
// Recurrent layer with gated recurrent units (GRUs) with sigmoid and ReLU as // Recurrent layer with gated recurrent units (GRUs) with sigmoid and ReLU as
// activation functions for the update/reset and output gates respectively. // activation functions for the update/reset and output gates respectively. It
// owns the output buffer.
class GatedRecurrentLayer { class GatedRecurrentLayer {
public: public:
// Ctor. `output_size` cannot be greater than `kGruLayerMaxUnits`.
GatedRecurrentLayer(int input_size, GatedRecurrentLayer(int input_size,
int output_size, int output_size,
rtc::ArrayView<const int8_t> bias, rtc::ArrayView<const int8_t> bias,
@ -80,9 +83,15 @@ class GatedRecurrentLayer {
GatedRecurrentLayer(const GatedRecurrentLayer&) = delete; GatedRecurrentLayer(const GatedRecurrentLayer&) = delete;
GatedRecurrentLayer& operator=(const GatedRecurrentLayer&) = delete; GatedRecurrentLayer& operator=(const GatedRecurrentLayer&) = delete;
~GatedRecurrentLayer(); ~GatedRecurrentLayer();
// Returns the size of the input vector.
int input_size() const { return input_size_; } int input_size() const { return input_size_; }
int output_size() const { return output_size_; } // Returns the pointer to the first element of the output buffer.
rtc::ArrayView<const float> GetOutput() const; const float* data() const { return state_.data(); }
// Returns the size of the output buffer.
int size() const { return output_size_; }
// Resets the GRU state.
void Reset(); void Reset();
// Computes the recurrent layer output and updates the status. // Computes the recurrent layer output and updates the status.
void ComputeOutput(rtc::ArrayView<const float> input); void ComputeOutput(rtc::ArrayView<const float> input);
@ -95,26 +104,28 @@ class GatedRecurrentLayer {
const std::vector<float> recurrent_weights_; const std::vector<float> recurrent_weights_;
// The state vector of a recurrent layer has length equal to |output_size_|. // The state vector of a recurrent layer has length equal to |output_size_|.
// However, to avoid dynamic allocation, over-allocation is used. // However, to avoid dynamic allocation, over-allocation is used.
std::array<float, kRecurrentLayersMaxUnits> state_; std::array<float, kGruLayerMaxUnits> state_;
}; };
// Recurrent network based VAD. // Recurrent network with hard-coded architecture and weights for voice activity
class RnnBasedVad { // detection.
class RnnVad {
public: public:
explicit RnnBasedVad(const AvailableCpuFeatures& cpu_features); explicit RnnVad(const AvailableCpuFeatures& cpu_features);
RnnBasedVad(const RnnBasedVad&) = delete; RnnVad(const RnnVad&) = delete;
RnnBasedVad& operator=(const RnnBasedVad&) = delete; RnnVad& operator=(const RnnVad&) = delete;
~RnnBasedVad(); ~RnnVad();
void Reset(); void Reset();
// Compute and returns the probability of voice (range: [0.0, 1.0]). // Observes `feature_vector` and `is_silence`, updates the RNN and returns the
// current voice probability.
float ComputeVadProbability( float ComputeVadProbability(
rtc::ArrayView<const float, kFeatureVectorSize> feature_vector, rtc::ArrayView<const float, kFeatureVectorSize> feature_vector,
bool is_silence); bool is_silence);
private: private:
FullyConnectedLayer input_layer_; FullyConnectedLayer input_;
GatedRecurrentLayer hidden_layer_; GatedRecurrentLayer hidden_;
FullyConnectedLayer output_layer_; FullyConnectedLayer output_;
}; };
} // namespace rnn_vad } // namespace rnn_vad

View File

@ -39,30 +39,20 @@ constexpr std::array<float, kFeatureVectorSize> kFeatures = {
-0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f, -0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f,
0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f}; 0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f};
void WarmUpRnnVad(RnnBasedVad& rnn_vad) { void WarmUpRnnVad(RnnVad& rnn_vad) {
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false);
} }
} }
void TestFullyConnectedLayer(FullyConnectedLayer* fc,
rtc::ArrayView<const float> input_vector,
rtc::ArrayView<const float> expected_output) {
RTC_CHECK(fc);
fc->ComputeOutput(input_vector);
ExpectNearAbsolute(expected_output, fc->GetOutput(), 1e-5f);
}
void TestGatedRecurrentLayer( void TestGatedRecurrentLayer(
GatedRecurrentLayer& gru, GatedRecurrentLayer& gru,
rtc::ArrayView<const float> input_sequence, rtc::ArrayView<const float> input_sequence,
rtc::ArrayView<const float> expected_output_sequence) { rtc::ArrayView<const float> expected_output_sequence) {
auto gru_output_view = gru.GetOutput();
const int input_sequence_length = rtc::CheckedDivExact( const int input_sequence_length = rtc::CheckedDivExact(
rtc::dchecked_cast<int>(input_sequence.size()), gru.input_size()); rtc::dchecked_cast<int>(input_sequence.size()), gru.input_size());
const int output_sequence_length = rtc::CheckedDivExact( const int output_sequence_length = rtc::CheckedDivExact(
rtc::dchecked_cast<int>(expected_output_sequence.size()), rtc::dchecked_cast<int>(expected_output_sequence.size()), gru.size());
gru.output_size());
ASSERT_EQ(input_sequence_length, output_sequence_length) ASSERT_EQ(input_sequence_length, output_sequence_length)
<< "The test data length is invalid."; << "The test data length is invalid.";
// Feed the GRU layer and check the output at every step. // Feed the GRU layer and check the output at every step.
@ -71,9 +61,9 @@ void TestGatedRecurrentLayer(
SCOPED_TRACE(i); SCOPED_TRACE(i);
gru.ComputeOutput( gru.ComputeOutput(
input_sequence.subview(i * gru.input_size(), gru.input_size())); input_sequence.subview(i * gru.input_size(), gru.input_size()));
const auto expected_output = expected_output_sequence.subview( const auto expected_output =
i * gru.output_size(), gru.output_size()); expected_output_sequence.subview(i * gru.size(), gru.size());
ExpectNearAbsolute(expected_output, gru_output_view, 3e-6f); ExpectNearAbsolute(expected_output, gru, 3e-6f);
} }
} }
@ -190,8 +180,8 @@ TEST_P(RnnParametrization, CheckFullyConnectedLayerOutput) {
rnnoise::kInputLayerInputSize, rnnoise::kInputLayerOutputSize, rnnoise::kInputLayerInputSize, rnnoise::kInputLayerOutputSize,
rnnoise::kInputDenseBias, rnnoise::kInputDenseWeights, rnnoise::kInputDenseBias, rnnoise::kInputDenseWeights,
rnnoise::TansigApproximated, /*cpu_features=*/GetParam()); rnnoise::TansigApproximated, /*cpu_features=*/GetParam());
TestFullyConnectedLayer(&fc, kFullyConnectedInputVector, fc.ComputeOutput(kFullyConnectedInputVector);
kFullyConnectedExpectedOutput); ExpectNearAbsolute(kFullyConnectedExpectedOutput, fc, 1e-5f);
} }
TEST_P(RnnParametrization, DISABLED_BenchmarkFullyConnectedLayer) { TEST_P(RnnParametrization, DISABLED_BenchmarkFullyConnectedLayer) {
@ -237,7 +227,7 @@ INSTANTIATE_TEST_SUITE_P(
// Checks that the speech probability is zero with silence. // Checks that the speech probability is zero with silence.
TEST(RnnVadTest, CheckZeroProbabilityWithSilence) { TEST(RnnVadTest, CheckZeroProbabilityWithSilence) {
RnnBasedVad rnn_vad(GetAvailableCpuFeatures()); RnnVad rnn_vad(GetAvailableCpuFeatures());
WarmUpRnnVad(rnn_vad); WarmUpRnnVad(rnn_vad);
EXPECT_EQ(rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/true), 0.f); EXPECT_EQ(rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/true), 0.f);
} }
@ -245,7 +235,7 @@ TEST(RnnVadTest, CheckZeroProbabilityWithSilence) {
// Checks that the same output is produced after reset given the same input // Checks that the same output is produced after reset given the same input
// sequence. // sequence.
TEST(RnnVadTest, CheckRnnVadReset) { TEST(RnnVadTest, CheckRnnVadReset) {
RnnBasedVad rnn_vad(GetAvailableCpuFeatures()); RnnVad rnn_vad(GetAvailableCpuFeatures());
WarmUpRnnVad(rnn_vad); WarmUpRnnVad(rnn_vad);
float pre = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); float pre = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false);
rnn_vad.Reset(); rnn_vad.Reset();
@ -257,7 +247,7 @@ TEST(RnnVadTest, CheckRnnVadReset) {
// Checks that the same output is produced after silence is observed given the // Checks that the same output is produced after silence is observed given the
// same input sequence. // same input sequence.
TEST(RnnVadTest, CheckRnnVadSilence) { TEST(RnnVadTest, CheckRnnVadSilence) {
RnnBasedVad rnn_vad(GetAvailableCpuFeatures()); RnnVad rnn_vad(GetAvailableCpuFeatures());
WarmUpRnnVad(rnn_vad); WarmUpRnnVad(rnn_vad);
float pre = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false); float pre = rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/false);
rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/true); rnn_vad.ComputeVadProbability(kFeatures, /*is_silence=*/true);

View File

@ -67,7 +67,7 @@ int main(int argc, char* argv[]) {
const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures(); const AvailableCpuFeatures cpu_features = GetAvailableCpuFeatures();
FeaturesExtractor features_extractor(cpu_features); FeaturesExtractor features_extractor(cpu_features);
std::array<float, kFeatureVectorSize> feature_vector; std::array<float, kFeatureVectorSize> feature_vector;
RnnBasedVad rnn_vad(cpu_features); RnnVad rnn_vad(cpu_features);
// Compute VAD probabilities. // Compute VAD probabilities.
while (true) { while (true) {

View File

@ -68,7 +68,7 @@ TEST_P(RnnVadProbabilityParametrization, RnnVadProbabilityWithinTolerance) {
PushSincResampler decimator(kFrameSize10ms48kHz, kFrameSize10ms24kHz); PushSincResampler decimator(kFrameSize10ms48kHz, kFrameSize10ms24kHz);
const AvailableCpuFeatures cpu_features = GetParam(); const AvailableCpuFeatures cpu_features = GetParam();
FeaturesExtractor features_extractor(cpu_features); FeaturesExtractor features_extractor(cpu_features);
RnnBasedVad rnn_vad(cpu_features); RnnVad rnn_vad(cpu_features);
// Init input samples and expected output readers. // Init input samples and expected output readers.
auto samples_reader = CreatePcmSamplesReader(kFrameSize10ms48kHz); auto samples_reader = CreatePcmSamplesReader(kFrameSize10ms48kHz);
@ -135,7 +135,7 @@ TEST_P(RnnVadProbabilityParametrization, DISABLED_RnnVadPerformance) {
const AvailableCpuFeatures cpu_features = GetParam(); const AvailableCpuFeatures cpu_features = GetParam();
FeaturesExtractor features_extractor(cpu_features); FeaturesExtractor features_extractor(cpu_features);
std::array<float, kFeatureVectorSize> feature_vector; std::array<float, kFeatureVectorSize> feature_vector;
RnnBasedVad rnn_vad(cpu_features); RnnVad rnn_vad(cpu_features);
constexpr int number_of_tests = 100; constexpr int number_of_tests = 100;
::webrtc::test::PerformanceTimer perf_timer(number_of_tests); ::webrtc::test::PerformanceTimer perf_timer(number_of_tests);
for (int k = 0; k < number_of_tests; ++k) { for (int k = 0; k < number_of_tests; ++k) {

View File

@ -23,6 +23,7 @@
#include "api/array_view.h" #include "api/array_view.h"
#include "modules/audio_processing/agc2/cpu_features.h" #include "modules/audio_processing/agc2/cpu_features.h"
#include "rtc_base/checks.h" #include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_conversions.h"
#include "rtc_base/system/arch.h" #include "rtc_base/system/arch.h"
namespace webrtc { namespace webrtc {
@ -63,8 +64,8 @@ class VectorMath {
accumulator = _mm_add_ps(accumulator, high); accumulator = _mm_add_ps(accumulator, high);
float dot_product = _mm_cvtss_f32(accumulator); float dot_product = _mm_cvtss_f32(accumulator);
// Add the result for the last block if incomplete. // Add the result for the last block if incomplete.
for (int i = incomplete_block_index; static_cast<size_t>(i) < x.size(); for (int i = incomplete_block_index;
++i) { i < rtc::dchecked_cast<int>(x.size()); ++i) {
dot_product += x[i] * y[i]; dot_product += x[i] * y[i];
} }
return dot_product; return dot_product;

View File

@ -14,6 +14,7 @@
#include "api/array_view.h" #include "api/array_view.h"
#include "rtc_base/checks.h" #include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_conversions.h"
namespace webrtc { namespace webrtc {
namespace rnn_vad { namespace rnn_vad {
@ -43,7 +44,8 @@ float VectorMath::DotProductAvx2(rtc::ArrayView<const float> x,
low = _mm_add_ss(high, low); low = _mm_add_ss(high, low);
float dot_product = _mm_cvtss_f32(low); float dot_product = _mm_cvtss_f32(low);
// Add the result for the last block if incomplete. // Add the result for the last block if incomplete.
for (int i = incomplete_block_index; static_cast<size_t>(i) < x.size(); ++i) { for (int i = incomplete_block_index; i < rtc::dchecked_cast<int>(x.size());
++i) {
dot_product += x[i] * y[i]; dot_product += x[i] * y[i];
} }
return dot_product; return dot_product;

View File

@ -60,7 +60,7 @@ class Vad : public VoiceActivityDetector {
private: private:
PushResampler<float> resampler_; PushResampler<float> resampler_;
rnn_vad::FeaturesExtractor features_extractor_; rnn_vad::FeaturesExtractor features_extractor_;
rnn_vad::RnnBasedVad rnn_vad_; rnn_vad::RnnVad rnn_vad_;
}; };
// Returns an updated version of `p_old` by using instant decay and the given // Returns an updated version of `p_old` by using instant decay and the given