diff --git a/resources/audio_coding/neteq4_network_stats_android.dat.sha1 b/resources/audio_coding/neteq4_network_stats_android.dat.sha1 index 56907e15ee..165a80efab 100644 --- a/resources/audio_coding/neteq4_network_stats_android.dat.sha1 +++ b/resources/audio_coding/neteq4_network_stats_android.dat.sha1 @@ -1 +1 @@ -2853ab577fe571adfc7b18f77bbe58f1253d2019 \ No newline at end of file +01be67dc4c3b8e74743a45cbd8684c0535dec9ad \ No newline at end of file diff --git a/resources/audio_coding/neteq4_opus_network_stats.dat.sha1 b/resources/audio_coding/neteq4_opus_network_stats.dat.sha1 index e19ee80b9c..fc8fbea1cd 100644 --- a/resources/audio_coding/neteq4_opus_network_stats.dat.sha1 +++ b/resources/audio_coding/neteq4_opus_network_stats.dat.sha1 @@ -1 +1 @@ -dc2d9f584efb0111ebcd71a2c86f1fb09cd8c2bb \ No newline at end of file +9f09f7a5be8e85ddee80ffbe20d75a6b86ecaaa2 \ No newline at end of file diff --git a/resources/audio_coding/neteq4_opus_ref.pcm.sha1 b/resources/audio_coding/neteq4_opus_ref.pcm.sha1 index 3c4e897a3f..88a97111f1 100644 --- a/resources/audio_coding/neteq4_opus_ref.pcm.sha1 +++ b/resources/audio_coding/neteq4_opus_ref.pcm.sha1 @@ -1 +1 @@ -c23004d91ffbe5e7a1f24620fc89b58c0426040f \ No newline at end of file +9e490b6f347fd700395b70907d944684c4a8e595 \ No newline at end of file diff --git a/resources/audio_coding/neteq4_opus_ref_win_32.pcm.sha1 b/resources/audio_coding/neteq4_opus_ref_win_32.pcm.sha1 index 3c4e897a3f..88a97111f1 100644 --- a/resources/audio_coding/neteq4_opus_ref_win_32.pcm.sha1 +++ b/resources/audio_coding/neteq4_opus_ref_win_32.pcm.sha1 @@ -1 +1 @@ -c23004d91ffbe5e7a1f24620fc89b58c0426040f \ No newline at end of file +9e490b6f347fd700395b70907d944684c4a8e595 \ No newline at end of file diff --git a/resources/audio_coding/neteq4_opus_ref_win_64.pcm.sha1 b/resources/audio_coding/neteq4_opus_ref_win_64.pcm.sha1 index 3c4e897a3f..88a97111f1 100644 --- a/resources/audio_coding/neteq4_opus_ref_win_64.pcm.sha1 +++ b/resources/audio_coding/neteq4_opus_ref_win_64.pcm.sha1 @@ -1 +1 @@ -c23004d91ffbe5e7a1f24620fc89b58c0426040f \ No newline at end of file +9e490b6f347fd700395b70907d944684c4a8e595 \ No newline at end of file diff --git a/resources/audio_coding/neteq4_universal_ref.pcm.sha1 b/resources/audio_coding/neteq4_universal_ref.pcm.sha1 index 6643d54ea1..f2ded1f048 100644 --- a/resources/audio_coding/neteq4_universal_ref.pcm.sha1 +++ b/resources/audio_coding/neteq4_universal_ref.pcm.sha1 @@ -1 +1 @@ -f587883b7c371ee8d87dbf1b0f07525af7d959b8 \ No newline at end of file +037b597ada7d8b287ec3aea3e72671459455f445 \ No newline at end of file diff --git a/resources/audio_coding/neteq4_universal_ref_android.pcm.sha1 b/resources/audio_coding/neteq4_universal_ref_android.pcm.sha1 index 2a4b9385a5..3b26699cdc 100644 --- a/resources/audio_coding/neteq4_universal_ref_android.pcm.sha1 +++ b/resources/audio_coding/neteq4_universal_ref_android.pcm.sha1 @@ -1 +1 @@ -a349bd71dba548029b05d1d2a6dc7caafab9a856 \ No newline at end of file +d2a6b6ff54b340cf9f961c7f07768d86b3761073 \ No newline at end of file diff --git a/resources/audio_coding/neteq4_universal_ref_win_32.pcm.sha1 b/resources/audio_coding/neteq4_universal_ref_win_32.pcm.sha1 index 6643d54ea1..f2ded1f048 100644 --- a/resources/audio_coding/neteq4_universal_ref_win_32.pcm.sha1 +++ b/resources/audio_coding/neteq4_universal_ref_win_32.pcm.sha1 @@ -1 +1 @@ -f587883b7c371ee8d87dbf1b0f07525af7d959b8 \ No newline at end of file +037b597ada7d8b287ec3aea3e72671459455f445 \ No newline at end of file diff --git a/resources/audio_coding/neteq4_universal_ref_win_64.pcm.sha1 b/resources/audio_coding/neteq4_universal_ref_win_64.pcm.sha1 index 3c73ee929a..2a9a08f44f 100644 --- a/resources/audio_coding/neteq4_universal_ref_win_64.pcm.sha1 +++ b/resources/audio_coding/neteq4_universal_ref_win_64.pcm.sha1 @@ -1 +1 @@ -08266b198e7686b3cd9330813e0d2cd72fc8fdc2 \ No newline at end of file +bcd29d03325ad4db54399a89149db764b4c75287 \ No newline at end of file diff --git a/webrtc/common_audio/signal_processing/cross_correlation.c b/webrtc/common_audio/signal_processing/cross_correlation.c index d7c9f2b9af..c4e3678faf 100644 --- a/webrtc/common_audio/signal_processing/cross_correlation.c +++ b/webrtc/common_audio/signal_processing/cross_correlation.c @@ -10,6 +10,8 @@ #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include + /* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */ void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation, const int16_t* seq1, @@ -23,7 +25,7 @@ void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation, for (i = 0; i < dim_cross_correlation; i++) { int32_t corr = 0; for (j = 0; j < dim_seq; j++) - corr += (seq1[j] * seq2[j]) >> right_shifts; + corr += seq1[j] * seq2[j] >> right_shifts; seq2 += step_seq2; *cross_correlation++ = corr; } diff --git a/webrtc/modules/audio_coding/BUILD.gn b/webrtc/modules/audio_coding/BUILD.gn index 37978181a5..1b41a01dde 100644 --- a/webrtc/modules/audio_coding/BUILD.gn +++ b/webrtc/modules/audio_coding/BUILD.gn @@ -760,6 +760,8 @@ source_set("neteq") { "neteq/buffer_level_filter.h", "neteq/comfort_noise.cc", "neteq/comfort_noise.h", + "neteq/cross_correlation.cc", + "neteq/cross_correlation.h", "neteq/decision_logic.cc", "neteq/decision_logic.h", "neteq/decision_logic_fax.cc", diff --git a/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest_oldapi.cc b/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest_oldapi.cc index 503acdd65d..5b067e0f47 100644 --- a/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest_oldapi.cc +++ b/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest_oldapi.cc @@ -939,34 +939,34 @@ class AcmReceiverBitExactnessOldApi : public ::testing::Test { #if (defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)) && \ defined(WEBRTC_CODEC_ILBC) && defined(WEBRTC_CODEC_G722) TEST_F(AcmReceiverBitExactnessOldApi, 8kHzOutput) { - Run(8000, PlatformChecksum("f34e5c0e4dd4cd6c82b23f6ed006dad0", - "67a1471049dc87e7498bc19bf130dd35", - "efb5a07480bad8afb184c4150f4b3f3a", - "51717ab374871cbfa2c6977ea2aa40f3"), + Run(8000, PlatformChecksum("39ef9b20cd8b58cad7e0a3d111635827", + "09ef9ae4e8890a9a2f7850f7326836a0", + "a607f7d0ba98683c9c236217f86aaa6b", + "4a54f6ec712bda58484a388e1a332b42"), std::vector()); } TEST_F(AcmReceiverBitExactnessOldApi, 16kHzOutput) { - Run(16000, PlatformChecksum("5066b412805f3050f65154d676006964", - "887905a40d37f213b76f64296871473e", - "f580bfd4e5e29f0399b61b7512d4e3b4", - "5b2ae32c590b41d0c601179e14eaae96"), + Run(16000, PlatformChecksum("53763516a209220a209e2287f54ecae6", + "37a61e657affd80c2b8f07cea9879411", + "cdc3d88f6d8e497d4e00c62c0e6dbb3c", + "83edb67c157d0e3a0fb9f7d7b1ce5dc7"), std::vector()); } TEST_F(AcmReceiverBitExactnessOldApi, 32kHzOutput) { - Run(32000, PlatformChecksum("2cb4784af507c45b9121e2315def36f2", - "d2392b3247095d894a49b74a1106f281", - "fdf5166b98c43235978685e40e28fea6", - "7f620312f2fa74a10048bbb7739d4bf3"), + Run(32000, PlatformChecksum("ef1a373c9d03e1832bf4eaf389b57fb7", + "c7f7372441ca69858cd56b9ef4d5dcf2", + "c4a0e0b2e031d62c693af2a9ff4337ac", + "4cbfc6ab4d704f5d9b4f10406437fda9"), std::vector()); } TEST_F(AcmReceiverBitExactnessOldApi, 48kHzOutput) { - Run(48000, PlatformChecksum("ce63f874a198621fa35398e412640fcf", - "2cf0b8fe9784e8c96db307e125beb723", - "71f89e87ee1bad594f529d6c036289ad", - "b64c891e99eccc9ff45541ef67c9e9bf"), + Run(48000, PlatformChecksum("fa4bd7c1d040e710d8b93c1a1fdbd2bb", + "41856a46d0604ad307df749598b6cdce", + "22242dd832824046d48db9ea8a01f84c", + "c7f46bf165400b266d9b57aee02d2747"), std::vector()); } @@ -1021,10 +1021,10 @@ TEST_F(AcmReceiverBitExactnessOldApi, 48kHzOutputExternalDecoder) { std::vector external_decoders; external_decoders.push_back(ed); - Run(48000, PlatformChecksum("ce63f874a198621fa35398e412640fcf", - "2cf0b8fe9784e8c96db307e125beb723", - "71f89e87ee1bad594f529d6c036289ad", - "b64c891e99eccc9ff45541ef67c9e9bf"), + Run(48000, PlatformChecksum("fa4bd7c1d040e710d8b93c1a1fdbd2bb", + "41856a46d0604ad307df749598b6cdce", + "22242dd832824046d48db9ea8a01f84c", + "c7f46bf165400b266d9b57aee02d2747"), external_decoders); EXPECT_CALL(mock_decoder, Die()); diff --git a/webrtc/modules/audio_coding/neteq/background_noise.cc b/webrtc/modules/audio_coding/neteq/background_noise.cc index 7e7a6325e9..c86045eed9 100644 --- a/webrtc/modules/audio_coding/neteq/background_noise.cc +++ b/webrtc/modules/audio_coding/neteq/background_noise.cc @@ -17,6 +17,7 @@ #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h" +#include "webrtc/modules/audio_coding/neteq/cross_correlation.h" #include "webrtc/modules/audio_coding/neteq/post_decode_vad.h" namespace webrtc { @@ -169,15 +170,10 @@ int16_t BackgroundNoise::ScaleShift(size_t channel) const { int32_t BackgroundNoise::CalculateAutoCorrelation( const int16_t* signal, size_t length, int32_t* auto_correlation) const { - int16_t signal_max = WebRtcSpl_MaxAbsValueW16(signal, length); - int correlation_scale = kLogVecLen - - WebRtcSpl_NormW32(signal_max * signal_max); - correlation_scale = std::max(0, correlation_scale); - static const int kCorrelationStep = -1; - WebRtcSpl_CrossCorrelation(auto_correlation, signal, signal, length, - kMaxLpcOrder + 1, correlation_scale, - kCorrelationStep); + const int correlation_scale = + CrossCorrelationWithAutoShift(signal, signal, length, kMaxLpcOrder + 1, + kCorrelationStep, auto_correlation); // Number of shifts to normalize energy to energy/sample. int energy_sample_shift = kLogVecLen - correlation_scale; diff --git a/webrtc/modules/audio_coding/neteq/cross_correlation.cc b/webrtc/modules/audio_coding/neteq/cross_correlation.cc new file mode 100644 index 0000000000..ad89ab8a13 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq/cross_correlation.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq/cross_correlation.h" + +#include +#include + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { + +// This function decides the overflow-protecting scaling and calls +// WebRtcSpl_CrossCorrelation. +int CrossCorrelationWithAutoShift(const int16_t* sequence_1, + const int16_t* sequence_2, + size_t sequence_1_length, + size_t cross_correlation_length, + int cross_correlation_step, + int32_t* cross_correlation) { + // Find the maximum absolute value of sequence_1 and 2. + const int16_t max_1 = WebRtcSpl_MaxAbsValueW16(sequence_1, sequence_1_length); + const int sequence_2_shift = + cross_correlation_step * (static_cast(cross_correlation_length) - 1); + const int16_t* sequence_2_start = + sequence_2_shift >= 0 ? sequence_2 : sequence_2 + sequence_2_shift; + const size_t sequence_2_length = + sequence_1_length + std::abs(sequence_2_shift); + const int16_t max_2 = + WebRtcSpl_MaxAbsValueW16(sequence_2_start, sequence_2_length); + + // In order to avoid overflow when computing the sum we should scale the + // samples so that (in_vector_length * max_1 * max_2) will not overflow. + // Expected scaling fulfills + // 1) sufficient: + // sequence_1_length * (max_1 * max_2 >> scaling) <= 0x7fffffff; + // 2) necessary: + // if (scaling > 0) + // sequence_1_length * (max_1 * max_2 >> (scaling - 1)) > 0x7fffffff; + // The following calculation fulfills 1) and almost fulfills 2). + // There are some corner cases that 2) is not satisfied, e.g., + // max_1 = 17, max_2 = 30848, sequence_1_length = 4095, in such case, + // optimal scaling is 0, while the following calculation results in 1. + const int32_t factor = (max_1 * max_2) / (std::numeric_limits::max() + / static_cast(sequence_1_length)); + const int scaling = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor); + + WebRtcSpl_CrossCorrelation(cross_correlation, sequence_1, sequence_2, + sequence_1_length, cross_correlation_length, + scaling, cross_correlation_step); + + return scaling; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq/cross_correlation.h b/webrtc/modules/audio_coding/neteq/cross_correlation.h new file mode 100644 index 0000000000..db14141027 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq/cross_correlation.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_ + +#include "webrtc/common_types.h" + +namespace webrtc { + +// The function calculates the cross-correlation between two sequences +// |sequence_1| and |sequence_2|. |sequence_1| is taken as reference, with +// |sequence_1_length| as its length. |sequence_2| slides for the calculation of +// cross-correlation. The result will be saved in |cross_correlation|. +// |cross_correlation_length| correlation points are calculated. +// The corresponding lag starts from 0, and increases with a step of +// |cross_correlation_step|. The result is without normalization. To avoid +// overflow, the result will be right shifted. The amount of shifts will be +// returned. +// +// Input: +// - sequence_1 : First sequence (reference). +// - sequence_2 : Second sequence (sliding during calculation). +// - sequence_1_length : Length of |sequence_1|. +// - cross_correlation_length : Number of cross-correlations to calculate. +// - cross_correlation_step : Step in the lag for the cross-correlation. +// +// Output: +// - cross_correlation : The cross-correlation in Q(-right_shifts) +// +// Return: +// Number of right shifts in cross_correlation. + +int CrossCorrelationWithAutoShift(const int16_t* sequence_1, + const int16_t* sequence_2, + size_t sequence_1_length, + size_t cross_correlation_length, + int cross_correlation_step, + int32_t* cross_correlation); + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_ diff --git a/webrtc/modules/audio_coding/neteq/expand.cc b/webrtc/modules/audio_coding/neteq/expand.cc index ef7af46597..94f6a8fdfe 100644 --- a/webrtc/modules/audio_coding/neteq/expand.cc +++ b/webrtc/modules/audio_coding/neteq/expand.cc @@ -19,6 +19,7 @@ #include "webrtc/base/safe_conversions.h" #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/modules/audio_coding/neteq/background_noise.h" +#include "webrtc/modules/audio_coding/neteq/cross_correlation.h" #include "webrtc/modules/audio_coding/neteq/dsp_helper.h" #include "webrtc/modules/audio_coding/neteq/random_vector.h" #include "webrtc/modules/audio_coding/neteq/statistics_calculator.h" @@ -379,12 +380,10 @@ void Expand::AnalyzeSignal(int16_t* random_vector) { InitializeForAnExpandPeriod(); // Calculate correlation in downsampled domain (4 kHz sample rate). - int correlation_scale; size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness. // If it is decided to break bit-exactness |correlation_length| should be // initialized to the return value of Correlation(). - Correlation(audio_history, signal_length, correlation_vector, - &correlation_scale); + Correlation(audio_history, signal_length, correlation_vector); // Find peaks in correlation vector. DspHelper::PeakDetection(correlation_vector, correlation_length, @@ -450,21 +449,12 @@ void Expand::AnalyzeSignal(int16_t* random_vector) { for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { ChannelParameters& parameters = channel_parameters_[channel_ix]; - // Calculate suitable scaling. - int16_t signal_max = WebRtcSpl_MaxAbsValueW16( - &audio_history[signal_length - correlation_length - start_index - - correlation_lags], - correlation_length + start_index + correlation_lags - 1); - correlation_scale = (31 - WebRtcSpl_NormW32(signal_max * signal_max)) + - (31 - WebRtcSpl_NormW32(static_cast(correlation_length))) - 31; - correlation_scale = std::max(0, correlation_scale); // Calculate the correlation, store in |correlation_vector2|. - WebRtcSpl_CrossCorrelation( - correlation_vector2, + int correlation_scale = CrossCorrelationWithAutoShift( &(audio_history[signal_length - correlation_length]), &(audio_history[signal_length - correlation_length - start_index]), - correlation_length, correlation_lags, correlation_scale, -1); + correlation_length, correlation_lags, -1, correlation_vector2); // Find maximizing index. best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags); @@ -582,13 +572,6 @@ void Expand::AnalyzeSignal(int16_t* random_vector) { } // Calculate the LPC and the gain of the filters. - // Calculate scale value needed for auto-correlation. - correlation_scale = WebRtcSpl_MaxAbsValueW16( - &(audio_history[signal_length - fs_mult_lpc_analysis_len]), - fs_mult_lpc_analysis_len); - - correlation_scale = std::min(16 - WebRtcSpl_NormW32(correlation_scale), 0); - correlation_scale = std::max(correlation_scale * 2 + 7, 0); // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function. size_t temp_index = signal_length - fs_mult_lpc_analysis_len - @@ -601,11 +584,9 @@ void Expand::AnalyzeSignal(int16_t* random_vector) { memcpy(&temp_signal[kUnvoicedLpcOrder], &audio_history[temp_index + kUnvoicedLpcOrder], sizeof(int16_t) * fs_mult_lpc_analysis_len); - WebRtcSpl_CrossCorrelation(auto_correlation, - &temp_signal[kUnvoicedLpcOrder], - &temp_signal[kUnvoicedLpcOrder], - fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, - correlation_scale, -1); + correlation_scale = CrossCorrelationWithAutoShift( + &temp_signal[kUnvoicedLpcOrder], &temp_signal[kUnvoicedLpcOrder], + fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, -1, auto_correlation); delete [] temp_signal; // Verify that variance is positive. @@ -766,8 +747,7 @@ Expand::ChannelParameters::ChannelParameters() void Expand::Correlation(const int16_t* input, size_t input_length, - int16_t* output, - int* output_scale) const { + int16_t* output) const { // Set parameters depending on sample rate. const int16_t* filter_coefficients; size_t num_coefficients; @@ -814,13 +794,11 @@ void Expand::Correlation(const int16_t* input, downsampled_input, norm_shift); int32_t correlation[kNumCorrelationLags]; - static const int kCorrelationShift = 6; - WebRtcSpl_CrossCorrelation( - correlation, + CrossCorrelationWithAutoShift( &downsampled_input[kDownsampledLength - kCorrelationLength], &downsampled_input[kDownsampledLength - kCorrelationLength - kCorrelationStartLag], - kCorrelationLength, kNumCorrelationLags, kCorrelationShift, -1); + kCorrelationLength, kNumCorrelationLags, -1, correlation); // Normalize and move data from 32-bit to 16-bit vector. int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation, @@ -829,8 +807,6 @@ void Expand::Correlation(const int16_t* input, std::max(18 - WebRtcSpl_NormW32(max_correlation), 0)); WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation, norm_shift2); - // Total scale factor (right shifts) of correlation value. - *output_scale = 2 * norm_shift + kCorrelationShift + norm_shift2; } void Expand::UpdateLagIndex() { diff --git a/webrtc/modules/audio_coding/neteq/expand.h b/webrtc/modules/audio_coding/neteq/expand.h index 7f61bf3b18..44ced0ab6c 100644 --- a/webrtc/modules/audio_coding/neteq/expand.h +++ b/webrtc/modules/audio_coding/neteq/expand.h @@ -120,12 +120,10 @@ class Expand { // Calculate the auto-correlation of |input|, with length |input_length| // samples. The correlation is calculated from a downsampled version of - // |input|, and is written to |output|. The scale factor is written to - // |output_scale|. + // |input|, and is written to |output|. void Correlation(const int16_t* input, size_t input_length, - int16_t* output, - int* output_scale) const; + int16_t* output) const; void UpdateLagIndex(); diff --git a/webrtc/modules/audio_coding/neteq/merge.cc b/webrtc/modules/audio_coding/neteq/merge.cc index 9aed91f788..b62df61367 100644 --- a/webrtc/modules/audio_coding/neteq/merge.cc +++ b/webrtc/modules/audio_coding/neteq/merge.cc @@ -18,6 +18,7 @@ #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h" +#include "webrtc/modules/audio_coding/neteq/cross_correlation.h" #include "webrtc/modules/audio_coding/neteq/dsp_helper.h" #include "webrtc/modules/audio_coding/neteq/expand.h" #include "webrtc/modules/audio_coding/neteq/sync_buffer.h" @@ -63,10 +64,8 @@ size_t Merge::Process(int16_t* input, size_t input_length, for (size_t channel = 0; channel < num_channels_; ++channel) { int16_t* input_channel = &input_vector[channel][0]; int16_t* expanded_channel = &expanded_[channel][0]; - int16_t expanded_max, input_max; int16_t new_mute_factor = SignalScaling( - input_channel, input_length_per_channel, expanded_channel, - &expanded_max, &input_max); + input_channel, input_length_per_channel, expanded_channel); // Adjust muting factor (product of "main" muting factor and expand muting // factor). @@ -89,8 +88,7 @@ size_t Merge::Process(int16_t* input, size_t input_length, // Calculate the lag of the strongest correlation period. best_correlation_index = CorrelateAndPeakSearch( - expanded_max, input_max, old_length, - input_length_per_channel, expand_period); + old_length, input_length_per_channel, expand_period); } static const int kTempDataSize = 3600; @@ -204,19 +202,19 @@ size_t Merge::GetExpandedSignal(size_t* old_length, size_t* expand_period) { } int16_t Merge::SignalScaling(const int16_t* input, size_t input_length, - const int16_t* expanded_signal, - int16_t* expanded_max, int16_t* input_max) const { + const int16_t* expanded_signal) const { // Adjust muting factor if new vector is more or less of the BGN energy. const size_t mod_input_length = std::min(static_cast(64 * fs_mult_), input_length); - *expanded_max = WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length); - *input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length); + const int16_t expanded_max = + WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length); + const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length); // Calculate energy of expanded signal. // |log_fs_mult| is log2(fs_mult_), but is not exact for 48000 Hz. int log_fs_mult = 30 - WebRtcSpl_NormW32(fs_mult_); int expanded_shift = 6 + log_fs_mult - - WebRtcSpl_NormW32(*expanded_max * *expanded_max); + - WebRtcSpl_NormW32(expanded_max * expanded_max); expanded_shift = std::max(expanded_shift, 0); int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal, expanded_signal, @@ -224,8 +222,7 @@ int16_t Merge::SignalScaling(const int16_t* input, size_t input_length, expanded_shift); // Calculate energy of input signal. - int input_shift = 6 + log_fs_mult - - WebRtcSpl_NormW32(*input_max * *input_max); + int input_shift = 6 + log_fs_mult - WebRtcSpl_NormW32(input_max * input_max); input_shift = std::max(input_shift, 0); int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input, mod_input_length, @@ -307,22 +304,17 @@ void Merge::Downsample(const int16_t* input, size_t input_length, } } -size_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max, - size_t start_position, size_t input_length, +size_t Merge::CorrelateAndPeakSearch(size_t start_position, size_t input_length, size_t expand_period) const { // Calculate correlation without any normalization. const size_t max_corr_length = kMaxCorrelationLength; size_t stop_position_downsamp = std::min(max_corr_length, expand_->max_lag() / (fs_mult_ * 2) + 1); - int correlation_shift = 0; - if (expanded_max * input_max > 26843546) { - correlation_shift = 3; - } int32_t correlation[kMaxCorrelationLength]; - WebRtcSpl_CrossCorrelation(correlation, input_downsampled_, - expanded_downsampled_, kInputDownsampLength, - stop_position_downsamp, correlation_shift, 1); + CrossCorrelationWithAutoShift(input_downsampled_, expanded_downsampled_, + kInputDownsampLength, stop_position_downsamp, 1, + correlation); // Normalize correlation to 14 bits and copy to a 16-bit array. const size_t pad_length = expand_->overlap_length() - 1; diff --git a/webrtc/modules/audio_coding/neteq/merge.h b/webrtc/modules/audio_coding/neteq/merge.h index a168502c27..95dea5a885 100644 --- a/webrtc/modules/audio_coding/neteq/merge.h +++ b/webrtc/modules/audio_coding/neteq/merge.h @@ -69,11 +69,10 @@ class Merge { // of samples that were taken from the |sync_buffer_|. size_t GetExpandedSignal(size_t* old_length, size_t* expand_period); - // Analyzes |input| and |expanded_signal| to find maximum values. Returns - // a muting factor (Q14) to be used on the new data. + // Analyzes |input| and |expanded_signal| and returns muting factor (Q14) to + // be used on the new data. int16_t SignalScaling(const int16_t* input, size_t input_length, - const int16_t* expanded_signal, - int16_t* expanded_max, int16_t* input_max) const; + const int16_t* expanded_signal) const; // Downsamples |input| (|input_length| samples) and |expanded_signal| to // 4 kHz sample rate. The downsampled signals are written to @@ -84,8 +83,7 @@ class Merge { // Calculates cross-correlation between |input_downsampled_| and // |expanded_downsampled_|, and finds the correlation maximum. The maximizing // lag is returned. - size_t CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max, - size_t start_position, size_t input_length, + size_t CorrelateAndPeakSearch(size_t start_position, size_t input_length, size_t expand_period) const; const int fs_mult_; // fs_hz_ / 8000. diff --git a/webrtc/modules/audio_coding/neteq/neteq.gypi b/webrtc/modules/audio_coding/neteq/neteq.gypi index fee51dfe2b..509dda0c6e 100644 --- a/webrtc/modules/audio_coding/neteq/neteq.gypi +++ b/webrtc/modules/audio_coding/neteq/neteq.gypi @@ -73,6 +73,8 @@ 'buffer_level_filter.h', 'comfort_noise.cc', 'comfort_noise.h', + 'cross_correlation.cc', + 'cross_correlation.h', 'decision_logic.cc', 'decision_logic.h', 'decision_logic_fax.cc', diff --git a/webrtc/modules/audio_coding/neteq/time_stretch.cc b/webrtc/modules/audio_coding/neteq/time_stretch.cc index 6a91ea487b..880b1f82ea 100644 --- a/webrtc/modules/audio_coding/neteq/time_stretch.cc +++ b/webrtc/modules/audio_coding/neteq/time_stretch.cc @@ -16,6 +16,7 @@ #include "webrtc/base/safe_conversions.h" #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/modules/audio_coding/neteq/background_noise.h" +#include "webrtc/modules/audio_coding/neteq/cross_correlation.h" #include "webrtc/modules/audio_coding/neteq/dsp_helper.h" namespace webrtc { @@ -158,20 +159,15 @@ TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input, } void TimeStretch::AutoCorrelation() { - // Set scaling factor for cross correlation to protect against overflow. - int scaling = kLogCorrelationLen - WebRtcSpl_NormW32( - max_input_value_ * max_input_value_); - scaling = std::max(0, scaling); - // Calculate correlation from lag kMinLag to lag kMaxLag in 4 kHz domain. int32_t auto_corr[kCorrelationLen]; - WebRtcSpl_CrossCorrelation(auto_corr, &downsampled_input_[kMaxLag], - &downsampled_input_[kMaxLag - kMinLag], - kCorrelationLen, kMaxLag - kMinLag, scaling, -1); + CrossCorrelationWithAutoShift( + &downsampled_input_[kMaxLag], &downsampled_input_[kMaxLag - kMinLag], + kCorrelationLen, kMaxLag - kMinLag, -1, auto_corr); // Normalize correlation to 14 bits and write to |auto_correlation_|. int32_t max_corr = WebRtcSpl_MaxAbsValueW32(auto_corr, kCorrelationLen); - scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr)); + int scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr)); WebRtcSpl_VectorBitShiftW32ToW16(auto_correlation_, kCorrelationLen, auto_corr, scaling); }