Avoiding overflow in cross correlation in NetEq.
BUG= Review-Url: https://codereview.webrtc.org/1908623002 Cr-Commit-Position: refs/heads/master@{#12538}
This commit is contained in:
@ -1 +1 @@
|
||||
2853ab577fe571adfc7b18f77bbe58f1253d2019
|
||||
01be67dc4c3b8e74743a45cbd8684c0535dec9ad
|
@ -1 +1 @@
|
||||
dc2d9f584efb0111ebcd71a2c86f1fb09cd8c2bb
|
||||
9f09f7a5be8e85ddee80ffbe20d75a6b86ecaaa2
|
@ -1 +1 @@
|
||||
c23004d91ffbe5e7a1f24620fc89b58c0426040f
|
||||
9e490b6f347fd700395b70907d944684c4a8e595
|
@ -1 +1 @@
|
||||
c23004d91ffbe5e7a1f24620fc89b58c0426040f
|
||||
9e490b6f347fd700395b70907d944684c4a8e595
|
@ -1 +1 @@
|
||||
c23004d91ffbe5e7a1f24620fc89b58c0426040f
|
||||
9e490b6f347fd700395b70907d944684c4a8e595
|
@ -1 +1 @@
|
||||
f587883b7c371ee8d87dbf1b0f07525af7d959b8
|
||||
037b597ada7d8b287ec3aea3e72671459455f445
|
@ -1 +1 @@
|
||||
a349bd71dba548029b05d1d2a6dc7caafab9a856
|
||||
d2a6b6ff54b340cf9f961c7f07768d86b3761073
|
@ -1 +1 @@
|
||||
f587883b7c371ee8d87dbf1b0f07525af7d959b8
|
||||
037b597ada7d8b287ec3aea3e72671459455f445
|
@ -1 +1 @@
|
||||
08266b198e7686b3cd9330813e0d2cd72fc8fdc2
|
||||
bcd29d03325ad4db54399a89149db764b4c75287
|
@ -10,6 +10,8 @@
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
|
||||
void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
|
||||
const int16_t* seq1,
|
||||
@ -23,7 +25,7 @@ void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
|
||||
for (i = 0; i < dim_cross_correlation; i++) {
|
||||
int32_t corr = 0;
|
||||
for (j = 0; j < dim_seq; j++)
|
||||
corr += (seq1[j] * seq2[j]) >> right_shifts;
|
||||
corr += seq1[j] * seq2[j] >> right_shifts;
|
||||
seq2 += step_seq2;
|
||||
*cross_correlation++ = corr;
|
||||
}
|
||||
|
@ -760,6 +760,8 @@ source_set("neteq") {
|
||||
"neteq/buffer_level_filter.h",
|
||||
"neteq/comfort_noise.cc",
|
||||
"neteq/comfort_noise.h",
|
||||
"neteq/cross_correlation.cc",
|
||||
"neteq/cross_correlation.h",
|
||||
"neteq/decision_logic.cc",
|
||||
"neteq/decision_logic.h",
|
||||
"neteq/decision_logic_fax.cc",
|
||||
|
@ -939,34 +939,34 @@ class AcmReceiverBitExactnessOldApi : public ::testing::Test {
|
||||
#if (defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)) && \
|
||||
defined(WEBRTC_CODEC_ILBC) && defined(WEBRTC_CODEC_G722)
|
||||
TEST_F(AcmReceiverBitExactnessOldApi, 8kHzOutput) {
|
||||
Run(8000, PlatformChecksum("f34e5c0e4dd4cd6c82b23f6ed006dad0",
|
||||
"67a1471049dc87e7498bc19bf130dd35",
|
||||
"efb5a07480bad8afb184c4150f4b3f3a",
|
||||
"51717ab374871cbfa2c6977ea2aa40f3"),
|
||||
Run(8000, PlatformChecksum("39ef9b20cd8b58cad7e0a3d111635827",
|
||||
"09ef9ae4e8890a9a2f7850f7326836a0",
|
||||
"a607f7d0ba98683c9c236217f86aaa6b",
|
||||
"4a54f6ec712bda58484a388e1a332b42"),
|
||||
std::vector<ExternalDecoder>());
|
||||
}
|
||||
|
||||
TEST_F(AcmReceiverBitExactnessOldApi, 16kHzOutput) {
|
||||
Run(16000, PlatformChecksum("5066b412805f3050f65154d676006964",
|
||||
"887905a40d37f213b76f64296871473e",
|
||||
"f580bfd4e5e29f0399b61b7512d4e3b4",
|
||||
"5b2ae32c590b41d0c601179e14eaae96"),
|
||||
Run(16000, PlatformChecksum("53763516a209220a209e2287f54ecae6",
|
||||
"37a61e657affd80c2b8f07cea9879411",
|
||||
"cdc3d88f6d8e497d4e00c62c0e6dbb3c",
|
||||
"83edb67c157d0e3a0fb9f7d7b1ce5dc7"),
|
||||
std::vector<ExternalDecoder>());
|
||||
}
|
||||
|
||||
TEST_F(AcmReceiverBitExactnessOldApi, 32kHzOutput) {
|
||||
Run(32000, PlatformChecksum("2cb4784af507c45b9121e2315def36f2",
|
||||
"d2392b3247095d894a49b74a1106f281",
|
||||
"fdf5166b98c43235978685e40e28fea6",
|
||||
"7f620312f2fa74a10048bbb7739d4bf3"),
|
||||
Run(32000, PlatformChecksum("ef1a373c9d03e1832bf4eaf389b57fb7",
|
||||
"c7f7372441ca69858cd56b9ef4d5dcf2",
|
||||
"c4a0e0b2e031d62c693af2a9ff4337ac",
|
||||
"4cbfc6ab4d704f5d9b4f10406437fda9"),
|
||||
std::vector<ExternalDecoder>());
|
||||
}
|
||||
|
||||
TEST_F(AcmReceiverBitExactnessOldApi, 48kHzOutput) {
|
||||
Run(48000, PlatformChecksum("ce63f874a198621fa35398e412640fcf",
|
||||
"2cf0b8fe9784e8c96db307e125beb723",
|
||||
"71f89e87ee1bad594f529d6c036289ad",
|
||||
"b64c891e99eccc9ff45541ef67c9e9bf"),
|
||||
Run(48000, PlatformChecksum("fa4bd7c1d040e710d8b93c1a1fdbd2bb",
|
||||
"41856a46d0604ad307df749598b6cdce",
|
||||
"22242dd832824046d48db9ea8a01f84c",
|
||||
"c7f46bf165400b266d9b57aee02d2747"),
|
||||
std::vector<ExternalDecoder>());
|
||||
}
|
||||
|
||||
@ -1021,10 +1021,10 @@ TEST_F(AcmReceiverBitExactnessOldApi, 48kHzOutputExternalDecoder) {
|
||||
std::vector<ExternalDecoder> external_decoders;
|
||||
external_decoders.push_back(ed);
|
||||
|
||||
Run(48000, PlatformChecksum("ce63f874a198621fa35398e412640fcf",
|
||||
"2cf0b8fe9784e8c96db307e125beb723",
|
||||
"71f89e87ee1bad594f529d6c036289ad",
|
||||
"b64c891e99eccc9ff45541ef67c9e9bf"),
|
||||
Run(48000, PlatformChecksum("fa4bd7c1d040e710d8b93c1a1fdbd2bb",
|
||||
"41856a46d0604ad307df749598b6cdce",
|
||||
"22242dd832824046d48db9ea8a01f84c",
|
||||
"c7f46bf165400b266d9b57aee02d2747"),
|
||||
external_decoders);
|
||||
|
||||
EXPECT_CALL(mock_decoder, Die());
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/post_decode_vad.h"
|
||||
|
||||
namespace webrtc {
|
||||
@ -169,15 +170,10 @@ int16_t BackgroundNoise::ScaleShift(size_t channel) const {
|
||||
|
||||
int32_t BackgroundNoise::CalculateAutoCorrelation(
|
||||
const int16_t* signal, size_t length, int32_t* auto_correlation) const {
|
||||
int16_t signal_max = WebRtcSpl_MaxAbsValueW16(signal, length);
|
||||
int correlation_scale = kLogVecLen -
|
||||
WebRtcSpl_NormW32(signal_max * signal_max);
|
||||
correlation_scale = std::max(0, correlation_scale);
|
||||
|
||||
static const int kCorrelationStep = -1;
|
||||
WebRtcSpl_CrossCorrelation(auto_correlation, signal, signal, length,
|
||||
kMaxLpcOrder + 1, correlation_scale,
|
||||
kCorrelationStep);
|
||||
const int correlation_scale =
|
||||
CrossCorrelationWithAutoShift(signal, signal, length, kMaxLpcOrder + 1,
|
||||
kCorrelationStep, auto_correlation);
|
||||
|
||||
// Number of shifts to normalize energy to energy/sample.
|
||||
int energy_sample_shift = kLogVecLen - correlation_scale;
|
||||
|
62
webrtc/modules/audio_coding/neteq/cross_correlation.cc
Normal file
62
webrtc/modules/audio_coding/neteq/cross_correlation.cc
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <limits>
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This function decides the overflow-protecting scaling and calls
|
||||
// WebRtcSpl_CrossCorrelation.
|
||||
int CrossCorrelationWithAutoShift(const int16_t* sequence_1,
|
||||
const int16_t* sequence_2,
|
||||
size_t sequence_1_length,
|
||||
size_t cross_correlation_length,
|
||||
int cross_correlation_step,
|
||||
int32_t* cross_correlation) {
|
||||
// Find the maximum absolute value of sequence_1 and 2.
|
||||
const int16_t max_1 = WebRtcSpl_MaxAbsValueW16(sequence_1, sequence_1_length);
|
||||
const int sequence_2_shift =
|
||||
cross_correlation_step * (static_cast<int>(cross_correlation_length) - 1);
|
||||
const int16_t* sequence_2_start =
|
||||
sequence_2_shift >= 0 ? sequence_2 : sequence_2 + sequence_2_shift;
|
||||
const size_t sequence_2_length =
|
||||
sequence_1_length + std::abs(sequence_2_shift);
|
||||
const int16_t max_2 =
|
||||
WebRtcSpl_MaxAbsValueW16(sequence_2_start, sequence_2_length);
|
||||
|
||||
// In order to avoid overflow when computing the sum we should scale the
|
||||
// samples so that (in_vector_length * max_1 * max_2) will not overflow.
|
||||
// Expected scaling fulfills
|
||||
// 1) sufficient:
|
||||
// sequence_1_length * (max_1 * max_2 >> scaling) <= 0x7fffffff;
|
||||
// 2) necessary:
|
||||
// if (scaling > 0)
|
||||
// sequence_1_length * (max_1 * max_2 >> (scaling - 1)) > 0x7fffffff;
|
||||
// The following calculation fulfills 1) and almost fulfills 2).
|
||||
// There are some corner cases that 2) is not satisfied, e.g.,
|
||||
// max_1 = 17, max_2 = 30848, sequence_1_length = 4095, in such case,
|
||||
// optimal scaling is 0, while the following calculation results in 1.
|
||||
const int32_t factor = (max_1 * max_2) / (std::numeric_limits<int32_t>::max()
|
||||
/ static_cast<int32_t>(sequence_1_length));
|
||||
const int scaling = factor == 0 ? 0 : 31 - WebRtcSpl_NormW32(factor);
|
||||
|
||||
WebRtcSpl_CrossCorrelation(cross_correlation, sequence_1, sequence_2,
|
||||
sequence_1_length, cross_correlation_length,
|
||||
scaling, cross_correlation_step);
|
||||
|
||||
return scaling;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
50
webrtc/modules/audio_coding/neteq/cross_correlation.h
Normal file
50
webrtc/modules/audio_coding/neteq/cross_correlation.h
Normal file
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_
|
||||
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_
|
||||
|
||||
#include "webrtc/common_types.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// The function calculates the cross-correlation between two sequences
|
||||
// |sequence_1| and |sequence_2|. |sequence_1| is taken as reference, with
|
||||
// |sequence_1_length| as its length. |sequence_2| slides for the calculation of
|
||||
// cross-correlation. The result will be saved in |cross_correlation|.
|
||||
// |cross_correlation_length| correlation points are calculated.
|
||||
// The corresponding lag starts from 0, and increases with a step of
|
||||
// |cross_correlation_step|. The result is without normalization. To avoid
|
||||
// overflow, the result will be right shifted. The amount of shifts will be
|
||||
// returned.
|
||||
//
|
||||
// Input:
|
||||
// - sequence_1 : First sequence (reference).
|
||||
// - sequence_2 : Second sequence (sliding during calculation).
|
||||
// - sequence_1_length : Length of |sequence_1|.
|
||||
// - cross_correlation_length : Number of cross-correlations to calculate.
|
||||
// - cross_correlation_step : Step in the lag for the cross-correlation.
|
||||
//
|
||||
// Output:
|
||||
// - cross_correlation : The cross-correlation in Q(-right_shifts)
|
||||
//
|
||||
// Return:
|
||||
// Number of right shifts in cross_correlation.
|
||||
|
||||
int CrossCorrelationWithAutoShift(const int16_t* sequence_1,
|
||||
const int16_t* sequence_2,
|
||||
size_t sequence_1_length,
|
||||
size_t cross_correlation_length,
|
||||
int cross_correlation_step,
|
||||
int32_t* cross_correlation);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_CROSS_CORRELATION_H_
|
@ -19,6 +19,7 @@
|
||||
#include "webrtc/base/safe_conversions.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/random_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
|
||||
@ -379,12 +380,10 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
InitializeForAnExpandPeriod();
|
||||
|
||||
// Calculate correlation in downsampled domain (4 kHz sample rate).
|
||||
int correlation_scale;
|
||||
size_t correlation_length = 51; // TODO(hlundin): Legacy bit-exactness.
|
||||
// If it is decided to break bit-exactness |correlation_length| should be
|
||||
// initialized to the return value of Correlation().
|
||||
Correlation(audio_history, signal_length, correlation_vector,
|
||||
&correlation_scale);
|
||||
Correlation(audio_history, signal_length, correlation_vector);
|
||||
|
||||
// Find peaks in correlation vector.
|
||||
DspHelper::PeakDetection(correlation_vector, correlation_length,
|
||||
@ -450,21 +449,12 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
|
||||
for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) {
|
||||
ChannelParameters& parameters = channel_parameters_[channel_ix];
|
||||
// Calculate suitable scaling.
|
||||
int16_t signal_max = WebRtcSpl_MaxAbsValueW16(
|
||||
&audio_history[signal_length - correlation_length - start_index
|
||||
- correlation_lags],
|
||||
correlation_length + start_index + correlation_lags - 1);
|
||||
correlation_scale = (31 - WebRtcSpl_NormW32(signal_max * signal_max)) +
|
||||
(31 - WebRtcSpl_NormW32(static_cast<int32_t>(correlation_length))) - 31;
|
||||
correlation_scale = std::max(0, correlation_scale);
|
||||
|
||||
// Calculate the correlation, store in |correlation_vector2|.
|
||||
WebRtcSpl_CrossCorrelation(
|
||||
correlation_vector2,
|
||||
int correlation_scale = CrossCorrelationWithAutoShift(
|
||||
&(audio_history[signal_length - correlation_length]),
|
||||
&(audio_history[signal_length - correlation_length - start_index]),
|
||||
correlation_length, correlation_lags, correlation_scale, -1);
|
||||
correlation_length, correlation_lags, -1, correlation_vector2);
|
||||
|
||||
// Find maximizing index.
|
||||
best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags);
|
||||
@ -582,13 +572,6 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
}
|
||||
|
||||
// Calculate the LPC and the gain of the filters.
|
||||
// Calculate scale value needed for auto-correlation.
|
||||
correlation_scale = WebRtcSpl_MaxAbsValueW16(
|
||||
&(audio_history[signal_length - fs_mult_lpc_analysis_len]),
|
||||
fs_mult_lpc_analysis_len);
|
||||
|
||||
correlation_scale = std::min(16 - WebRtcSpl_NormW32(correlation_scale), 0);
|
||||
correlation_scale = std::max(correlation_scale * 2 + 7, 0);
|
||||
|
||||
// Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function.
|
||||
size_t temp_index = signal_length - fs_mult_lpc_analysis_len -
|
||||
@ -601,11 +584,9 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
|
||||
memcpy(&temp_signal[kUnvoicedLpcOrder],
|
||||
&audio_history[temp_index + kUnvoicedLpcOrder],
|
||||
sizeof(int16_t) * fs_mult_lpc_analysis_len);
|
||||
WebRtcSpl_CrossCorrelation(auto_correlation,
|
||||
&temp_signal[kUnvoicedLpcOrder],
|
||||
&temp_signal[kUnvoicedLpcOrder],
|
||||
fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1,
|
||||
correlation_scale, -1);
|
||||
correlation_scale = CrossCorrelationWithAutoShift(
|
||||
&temp_signal[kUnvoicedLpcOrder], &temp_signal[kUnvoicedLpcOrder],
|
||||
fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, -1, auto_correlation);
|
||||
delete [] temp_signal;
|
||||
|
||||
// Verify that variance is positive.
|
||||
@ -766,8 +747,7 @@ Expand::ChannelParameters::ChannelParameters()
|
||||
|
||||
void Expand::Correlation(const int16_t* input,
|
||||
size_t input_length,
|
||||
int16_t* output,
|
||||
int* output_scale) const {
|
||||
int16_t* output) const {
|
||||
// Set parameters depending on sample rate.
|
||||
const int16_t* filter_coefficients;
|
||||
size_t num_coefficients;
|
||||
@ -814,13 +794,11 @@ void Expand::Correlation(const int16_t* input,
|
||||
downsampled_input, norm_shift);
|
||||
|
||||
int32_t correlation[kNumCorrelationLags];
|
||||
static const int kCorrelationShift = 6;
|
||||
WebRtcSpl_CrossCorrelation(
|
||||
correlation,
|
||||
CrossCorrelationWithAutoShift(
|
||||
&downsampled_input[kDownsampledLength - kCorrelationLength],
|
||||
&downsampled_input[kDownsampledLength - kCorrelationLength
|
||||
- kCorrelationStartLag],
|
||||
kCorrelationLength, kNumCorrelationLags, kCorrelationShift, -1);
|
||||
kCorrelationLength, kNumCorrelationLags, -1, correlation);
|
||||
|
||||
// Normalize and move data from 32-bit to 16-bit vector.
|
||||
int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,
|
||||
@ -829,8 +807,6 @@ void Expand::Correlation(const int16_t* input,
|
||||
std::max(18 - WebRtcSpl_NormW32(max_correlation), 0));
|
||||
WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation,
|
||||
norm_shift2);
|
||||
// Total scale factor (right shifts) of correlation value.
|
||||
*output_scale = 2 * norm_shift + kCorrelationShift + norm_shift2;
|
||||
}
|
||||
|
||||
void Expand::UpdateLagIndex() {
|
||||
|
@ -120,12 +120,10 @@ class Expand {
|
||||
|
||||
// Calculate the auto-correlation of |input|, with length |input_length|
|
||||
// samples. The correlation is calculated from a downsampled version of
|
||||
// |input|, and is written to |output|. The scale factor is written to
|
||||
// |output_scale|.
|
||||
// |input|, and is written to |output|.
|
||||
void Correlation(const int16_t* input,
|
||||
size_t input_length,
|
||||
int16_t* output,
|
||||
int* output_scale) const;
|
||||
int16_t* output) const;
|
||||
|
||||
void UpdateLagIndex();
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/expand.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/sync_buffer.h"
|
||||
@ -63,10 +64,8 @@ size_t Merge::Process(int16_t* input, size_t input_length,
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
int16_t* input_channel = &input_vector[channel][0];
|
||||
int16_t* expanded_channel = &expanded_[channel][0];
|
||||
int16_t expanded_max, input_max;
|
||||
int16_t new_mute_factor = SignalScaling(
|
||||
input_channel, input_length_per_channel, expanded_channel,
|
||||
&expanded_max, &input_max);
|
||||
input_channel, input_length_per_channel, expanded_channel);
|
||||
|
||||
// Adjust muting factor (product of "main" muting factor and expand muting
|
||||
// factor).
|
||||
@ -89,8 +88,7 @@ size_t Merge::Process(int16_t* input, size_t input_length,
|
||||
|
||||
// Calculate the lag of the strongest correlation period.
|
||||
best_correlation_index = CorrelateAndPeakSearch(
|
||||
expanded_max, input_max, old_length,
|
||||
input_length_per_channel, expand_period);
|
||||
old_length, input_length_per_channel, expand_period);
|
||||
}
|
||||
|
||||
static const int kTempDataSize = 3600;
|
||||
@ -204,19 +202,19 @@ size_t Merge::GetExpandedSignal(size_t* old_length, size_t* expand_period) {
|
||||
}
|
||||
|
||||
int16_t Merge::SignalScaling(const int16_t* input, size_t input_length,
|
||||
const int16_t* expanded_signal,
|
||||
int16_t* expanded_max, int16_t* input_max) const {
|
||||
const int16_t* expanded_signal) const {
|
||||
// Adjust muting factor if new vector is more or less of the BGN energy.
|
||||
const size_t mod_input_length =
|
||||
std::min(static_cast<size_t>(64 * fs_mult_), input_length);
|
||||
*expanded_max = WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length);
|
||||
*input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);
|
||||
const int16_t expanded_max =
|
||||
WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length);
|
||||
const int16_t input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length);
|
||||
|
||||
// Calculate energy of expanded signal.
|
||||
// |log_fs_mult| is log2(fs_mult_), but is not exact for 48000 Hz.
|
||||
int log_fs_mult = 30 - WebRtcSpl_NormW32(fs_mult_);
|
||||
int expanded_shift = 6 + log_fs_mult
|
||||
- WebRtcSpl_NormW32(*expanded_max * *expanded_max);
|
||||
- WebRtcSpl_NormW32(expanded_max * expanded_max);
|
||||
expanded_shift = std::max(expanded_shift, 0);
|
||||
int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal,
|
||||
expanded_signal,
|
||||
@ -224,8 +222,7 @@ int16_t Merge::SignalScaling(const int16_t* input, size_t input_length,
|
||||
expanded_shift);
|
||||
|
||||
// Calculate energy of input signal.
|
||||
int input_shift = 6 + log_fs_mult -
|
||||
WebRtcSpl_NormW32(*input_max * *input_max);
|
||||
int input_shift = 6 + log_fs_mult - WebRtcSpl_NormW32(input_max * input_max);
|
||||
input_shift = std::max(input_shift, 0);
|
||||
int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input,
|
||||
mod_input_length,
|
||||
@ -307,22 +304,17 @@ void Merge::Downsample(const int16_t* input, size_t input_length,
|
||||
}
|
||||
}
|
||||
|
||||
size_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
|
||||
size_t start_position, size_t input_length,
|
||||
size_t Merge::CorrelateAndPeakSearch(size_t start_position, size_t input_length,
|
||||
size_t expand_period) const {
|
||||
// Calculate correlation without any normalization.
|
||||
const size_t max_corr_length = kMaxCorrelationLength;
|
||||
size_t stop_position_downsamp =
|
||||
std::min(max_corr_length, expand_->max_lag() / (fs_mult_ * 2) + 1);
|
||||
int correlation_shift = 0;
|
||||
if (expanded_max * input_max > 26843546) {
|
||||
correlation_shift = 3;
|
||||
}
|
||||
|
||||
int32_t correlation[kMaxCorrelationLength];
|
||||
WebRtcSpl_CrossCorrelation(correlation, input_downsampled_,
|
||||
expanded_downsampled_, kInputDownsampLength,
|
||||
stop_position_downsamp, correlation_shift, 1);
|
||||
CrossCorrelationWithAutoShift(input_downsampled_, expanded_downsampled_,
|
||||
kInputDownsampLength, stop_position_downsamp, 1,
|
||||
correlation);
|
||||
|
||||
// Normalize correlation to 14 bits and copy to a 16-bit array.
|
||||
const size_t pad_length = expand_->overlap_length() - 1;
|
||||
|
@ -69,11 +69,10 @@ class Merge {
|
||||
// of samples that were taken from the |sync_buffer_|.
|
||||
size_t GetExpandedSignal(size_t* old_length, size_t* expand_period);
|
||||
|
||||
// Analyzes |input| and |expanded_signal| to find maximum values. Returns
|
||||
// a muting factor (Q14) to be used on the new data.
|
||||
// Analyzes |input| and |expanded_signal| and returns muting factor (Q14) to
|
||||
// be used on the new data.
|
||||
int16_t SignalScaling(const int16_t* input, size_t input_length,
|
||||
const int16_t* expanded_signal,
|
||||
int16_t* expanded_max, int16_t* input_max) const;
|
||||
const int16_t* expanded_signal) const;
|
||||
|
||||
// Downsamples |input| (|input_length| samples) and |expanded_signal| to
|
||||
// 4 kHz sample rate. The downsampled signals are written to
|
||||
@ -84,8 +83,7 @@ class Merge {
|
||||
// Calculates cross-correlation between |input_downsampled_| and
|
||||
// |expanded_downsampled_|, and finds the correlation maximum. The maximizing
|
||||
// lag is returned.
|
||||
size_t CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
|
||||
size_t start_position, size_t input_length,
|
||||
size_t CorrelateAndPeakSearch(size_t start_position, size_t input_length,
|
||||
size_t expand_period) const;
|
||||
|
||||
const int fs_mult_; // fs_hz_ / 8000.
|
||||
|
@ -73,6 +73,8 @@
|
||||
'buffer_level_filter.h',
|
||||
'comfort_noise.cc',
|
||||
'comfort_noise.h',
|
||||
'cross_correlation.cc',
|
||||
'cross_correlation.h',
|
||||
'decision_logic.cc',
|
||||
'decision_logic.h',
|
||||
'decision_logic_fax.cc',
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "webrtc/base/safe_conversions.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/background_noise.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/cross_correlation.h"
|
||||
#include "webrtc/modules/audio_coding/neteq/dsp_helper.h"
|
||||
|
||||
namespace webrtc {
|
||||
@ -158,20 +159,15 @@ TimeStretch::ReturnCodes TimeStretch::Process(const int16_t* input,
|
||||
}
|
||||
|
||||
void TimeStretch::AutoCorrelation() {
|
||||
// Set scaling factor for cross correlation to protect against overflow.
|
||||
int scaling = kLogCorrelationLen - WebRtcSpl_NormW32(
|
||||
max_input_value_ * max_input_value_);
|
||||
scaling = std::max(0, scaling);
|
||||
|
||||
// Calculate correlation from lag kMinLag to lag kMaxLag in 4 kHz domain.
|
||||
int32_t auto_corr[kCorrelationLen];
|
||||
WebRtcSpl_CrossCorrelation(auto_corr, &downsampled_input_[kMaxLag],
|
||||
&downsampled_input_[kMaxLag - kMinLag],
|
||||
kCorrelationLen, kMaxLag - kMinLag, scaling, -1);
|
||||
CrossCorrelationWithAutoShift(
|
||||
&downsampled_input_[kMaxLag], &downsampled_input_[kMaxLag - kMinLag],
|
||||
kCorrelationLen, kMaxLag - kMinLag, -1, auto_corr);
|
||||
|
||||
// Normalize correlation to 14 bits and write to |auto_correlation_|.
|
||||
int32_t max_corr = WebRtcSpl_MaxAbsValueW32(auto_corr, kCorrelationLen);
|
||||
scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr));
|
||||
int scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr));
|
||||
WebRtcSpl_VectorBitShiftW32ToW16(auto_correlation_, kCorrelationLen,
|
||||
auto_corr, scaling);
|
||||
}
|
||||
|
Reference in New Issue
Block a user