Revert "RNN VAD: pitch search optimizations (part 3)"

This reverts commit ea89f2a447c514b73da2ed6189fe4b8485f123c6.

Reason for revert: bug in ancestor CL https://webrtc-review.googlesource.com/c/src/+/191320

Original change's description:
> RNN VAD: pitch search optimizations (part 3)
>
> `ComputeSlidingFrameSquareEnergies()` which computes the energy of a
> sliding 20 ms frame in the pitch buffer has been switched from backward
> to forward.
>
> The benchmark has shown a slight improvement (about +6x).
>
> This change is not bit exact but all the tolerance tests still pass
> except for one single case in `RnnVadTest,PitchSearchWithinTolerance`
> for which the tolerance has been slightly increased. Note that the pitch
> estimation is still bit-exact.
>
> Benchmarked as follows:
> ```
> out/release/modules_unittests \
>   --gtest_filter=*RnnVadTest.DISABLED_RnnVadPerformance* \
>   --gtest_also_run_disabled_tests --logs
> ```
>
> Results:
>
>       | baseline             | this CL
> ------+----------------------+------------------------
> run 1 | 22.8319 +/- 1.46554  | 22.087 +/- 0.552932
>       | 389.367x             | 402.499x
> ------+----------------------+------------------------
> run 2 | 22.4286 +/- 0.726449 | 22.216 +/- 0.916222
>       | 396.369x             | 400.162x
> ------+----------------------+------------------------
> run 2 | 22.5688 +/- 0.831341 | 22.4902 +/- 1.04881
>       | 393.906x             | 395.283x
>
> Bug: webrtc:10480
> Change-Id: I1fd54077a32e25e46196c8e18f003cd0ffd503e1
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/191703
> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
> Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#32572}

TBR=alessiob@webrtc.org,kwiberg@webrtc.org

Change-Id: I57a8f937ade0a35e1ccf0e229c391cc3a10e7c48
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: webrtc:10480
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/192621
Reviewed-by: Alessio Bazzica <alessiob@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32578}
This commit is contained in:
Alessio Bazzica
2020-11-10 18:43:29 +00:00
committed by Commit Bot
parent c161adc086
commit 57e68ee1b9
5 changed files with 21 additions and 27 deletions

View File

@ -169,7 +169,7 @@ int ComputePitchPeriod24kHz(
// Auto-correlation energy normalized by frame energy. // Auto-correlation energy normalized by frame energy.
const float numerator = const float numerator =
auto_correlation[inverted_lag] * auto_correlation[inverted_lag]; auto_correlation[inverted_lag] * auto_correlation[inverted_lag];
const float denominator = y_energy[inverted_lag]; const float denominator = y_energy[kMaxPitch24kHz - inverted_lag];
// Compare numerator/denominator ratios without using divisions. // Compare numerator/denominator ratios without using divisions.
if (numerator * best_denominator > best_numerator * denominator) { if (numerator * best_denominator > best_numerator * denominator) {
best_inverted_lag = inverted_lag; best_inverted_lag = inverted_lag;
@ -253,19 +253,19 @@ void Decimate2x(rtc::ArrayView<const float, kBufSize24kHz> src,
void ComputeSlidingFrameSquareEnergies24kHz( void ComputeSlidingFrameSquareEnergies24kHz(
rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer,
rtc::ArrayView<float, kRefineNumLags24kHz> y_energy) { rtc::ArrayView<float, kRefineNumLags24kHz> yy_values) {
float yy = std::inner_product(pitch_buffer.begin(), float yy = ComputeAutoCorrelation(kMaxPitch24kHz, pitch_buffer);
pitch_buffer.begin() + kFrameSize20ms24kHz, yy_values[0] = yy;
pitch_buffer.begin(), 0.f); static_assert(kMaxPitch24kHz - (kRefineNumLags24kHz - 1) >= 0, "");
y_energy[0] = yy;
static_assert(kMaxPitch24kHz - 1 + kFrameSize20ms24kHz < kBufSize24kHz, ""); static_assert(kMaxPitch24kHz - 1 + kFrameSize20ms24kHz < kBufSize24kHz, "");
static_assert(kMaxPitch24kHz < kRefineNumLags24kHz, ""); for (int lag = 1; lag < kRefineNumLags24kHz; ++lag) {
for (int inverted_lag = 0; inverted_lag < kMaxPitch24kHz; ++inverted_lag) { const int inverted_lag = kMaxPitch24kHz - lag;
yy -= pitch_buffer[inverted_lag] * pitch_buffer[inverted_lag]; const float y_old = pitch_buffer[inverted_lag + kFrameSize20ms24kHz];
yy += pitch_buffer[inverted_lag + kFrameSize20ms24kHz] * const float y_new = pitch_buffer[inverted_lag];
pitch_buffer[inverted_lag + kFrameSize20ms24kHz]; yy -= y_old * y_old;
yy = std::max(1.f, yy); yy += y_new * y_new;
y_energy[inverted_lag + 1] = yy; yy = std::max(0.f, yy);
yy_values[lag] = yy;
} }
} }
@ -375,7 +375,7 @@ PitchInfo ComputeExtendedPitchPeriod48kHz(
float y_energy; // Energy of the sliding frame `y`. float y_energy; // Energy of the sliding frame `y`.
}; };
const float x_energy = y_energy[kMaxPitch24kHz]; const float x_energy = y_energy[0];
const auto pitch_strength = [x_energy](float xy, float y_energy) { const auto pitch_strength = [x_energy](float xy, float y_energy) {
RTC_DCHECK_GE(x_energy * y_energy, 0.f); RTC_DCHECK_GE(x_energy * y_energy, 0.f);
return xy / std::sqrt(1.f + x_energy * y_energy); return xy / std::sqrt(1.f + x_energy * y_energy);
@ -387,7 +387,7 @@ PitchInfo ComputeExtendedPitchPeriod48kHz(
std::min(initial_pitch_period_48kHz / 2, kMaxPitch24kHz - 1); std::min(initial_pitch_period_48kHz / 2, kMaxPitch24kHz - 1);
best_pitch.xy = best_pitch.xy =
ComputeAutoCorrelation(kMaxPitch24kHz - best_pitch.period, pitch_buffer); ComputeAutoCorrelation(kMaxPitch24kHz - best_pitch.period, pitch_buffer);
best_pitch.y_energy = y_energy[kMaxPitch24kHz - best_pitch.period]; best_pitch.y_energy = y_energy[best_pitch.period];
best_pitch.strength = pitch_strength(best_pitch.xy, best_pitch.y_energy); best_pitch.strength = pitch_strength(best_pitch.xy, best_pitch.y_energy);
// Keep a copy of the initial pitch candidate. // Keep a copy of the initial pitch candidate.
const PitchInfo initial_pitch{best_pitch.period, best_pitch.strength}; const PitchInfo initial_pitch{best_pitch.period, best_pitch.strength};
@ -428,9 +428,8 @@ PitchInfo ComputeExtendedPitchPeriod48kHz(
const float xy_secondary_period = ComputeAutoCorrelation( const float xy_secondary_period = ComputeAutoCorrelation(
kMaxPitch24kHz - dual_alternative_period, pitch_buffer); kMaxPitch24kHz - dual_alternative_period, pitch_buffer);
const float xy = 0.5f * (xy_primary_period + xy_secondary_period); const float xy = 0.5f * (xy_primary_period + xy_secondary_period);
const float yy = const float yy = 0.5f * (y_energy[alternative_pitch.period] +
0.5f * (y_energy[kMaxPitch24kHz - alternative_pitch.period] + y_energy[dual_alternative_period]);
y_energy[kMaxPitch24kHz - dual_alternative_period]);
alternative_pitch.strength = pitch_strength(xy, yy); alternative_pitch.strength = pitch_strength(xy, yy);
// Maybe update best period. // Maybe update best period.

View File

@ -62,10 +62,10 @@ void Decimate2x(rtc::ArrayView<const float, kBufSize24kHz> src,
// corresponding pitch period. // corresponding pitch period.
// Computes the sum of squared samples for every sliding frame `y` in the pitch // Computes the sum of squared samples for every sliding frame `y` in the pitch
// buffer. The indexes of `y_energy` are inverted lags. // buffer. The indexes of `yy_values` are lags.
void ComputeSlidingFrameSquareEnergies24kHz( void ComputeSlidingFrameSquareEnergies24kHz(
rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer, rtc::ArrayView<const float, kBufSize24kHz> pitch_buffer,
rtc::ArrayView<float, kRefineNumLags24kHz> y_energy); rtc::ArrayView<float, kRefineNumLags24kHz> yy_values);
// Top-2 pitch period candidates. Unit: number of samples - i.e., inverted lags. // Top-2 pitch period candidates. Unit: number of samples - i.e., inverted lags.
struct CandidatePitchPeriods { struct CandidatePitchPeriods {

View File

@ -42,7 +42,7 @@ TEST(RnnVadTest, ComputeSlidingFrameSquareEnergies24kHzWithinTolerance) {
computed_output); computed_output);
auto square_energies_view = test_data.GetPitchBufSquareEnergiesView(); auto square_energies_view = test_data.GetPitchBufSquareEnergiesView();
ExpectNearAbsolute({square_energies_view.data(), square_energies_view.size()}, ExpectNearAbsolute({square_energies_view.data(), square_energies_view.size()},
computed_output, 1e-3f); computed_output, 3e-2f);
} }
// Checks that the estimated pitch period is bit-exact given test input data. // Checks that the estimated pitch period is bit-exact given test input data.

View File

@ -42,7 +42,7 @@ TEST(RnnVadTest, PitchSearchWithinTolerance) {
pitch_estimator.Estimate({lp_residual.data(), kBufSize24kHz}); pitch_estimator.Estimate({lp_residual.data(), kBufSize24kHz});
EXPECT_EQ(expected_pitch_period, pitch_period); EXPECT_EQ(expected_pitch_period, pitch_period);
EXPECT_NEAR(expected_pitch_strength, EXPECT_NEAR(expected_pitch_strength,
pitch_estimator.GetLastPitchStrengthForTesting(), 15e-6f); pitch_estimator.GetLastPitchStrengthForTesting(), 1e-5f);
} }
} }
} }

View File

@ -10,7 +10,6 @@
#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" #include "modules/audio_processing/agc2/rnn_vad/test_utils.h"
#include <algorithm>
#include <memory> #include <memory>
#include "rtc_base/checks.h" #include "rtc_base/checks.h"
@ -87,10 +86,6 @@ PitchTestData::PitchTestData() {
ResourcePath("audio_processing/agc2/rnn_vad/pitch_search_int", "dat"), ResourcePath("audio_processing/agc2/rnn_vad/pitch_search_int", "dat"),
1396); 1396);
test_data_reader.ReadChunk(test_data_); test_data_reader.ReadChunk(test_data_);
// Reverse the order of the squared energy values.
// Required after the WebRTC CL 191703 which switched to forward computation.
std::reverse(test_data_.begin() + kBufSize24kHz,
test_data_.begin() + kBufSize24kHz + kNumPitchBufSquareEnergies);
} }
PitchTestData::~PitchTestData() = default; PitchTestData::~PitchTestData() = default;