Moved code into the lowest level of EchoSuppression
to simplify future refactoring and development. In more detail: 1) Moved the updating of eBuf from the EchoSubtraction method to the EchoSuppression method as it is only used in the latter. 2) Moved the computation of efw and dfw from the SubbandCoherence method as those are actually the analysis filterbank computation that is not directly related to the coherence. 3) As a consequence of 2) 3 functions needed to be replaced by the generic function pointer scheme used in WebRTCAec as they have optimized versions for SSE2 and NEON (which before were local to each of the aec_core*.c files. Motivation: Apart from making sense from a logical point of view, the changes will a) Allow eBuf stored in half the size on the state. b) Allow simpler switching between using the the microphone signal and echo subtractor output in the echo suppressor. c) Allow further refactoring that move all the changes to eBuf to one method (currently those are happening in at least 4 different methods. Drawbacks: i) dfw is moved to EchoSuppression which increases the stack usage for that method. This will, however, be improved once further refactoring can be done. The changes have been tested for bitexactness on Linux using a quite extensive dataset. BUG=webrtc:5201 Review URL: https://codereview.webrtc.org/1494563002 Cr-Commit-Position: refs/heads/master@{#10954}
This commit is contained in:
@ -135,6 +135,9 @@ WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
|
||||
WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
|
||||
WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
|
||||
WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
|
||||
WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex;
|
||||
WebRtcAecPartitionDelay WebRtcAec_PartitionDelay;
|
||||
WebRtcAecWindowData WebRtcAec_WindowData;
|
||||
|
||||
__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
|
||||
return aRe * bRe - aIm * bIm;
|
||||
@ -407,31 +410,13 @@ __inline static void StoreAsComplex(const float* data,
|
||||
|
||||
static void SubbandCoherence(AecCore* aec,
|
||||
float efw[2][PART_LEN1],
|
||||
float dfw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
float* fft,
|
||||
float* cohde,
|
||||
float* cohxd) {
|
||||
float dfw[2][PART_LEN1];
|
||||
int i;
|
||||
|
||||
if (aec->delayEstCtr == 0)
|
||||
aec->delayIdx = PartitionDelay(aec);
|
||||
|
||||
// Use delayed far.
|
||||
memcpy(xfw,
|
||||
aec->xfwBuf + aec->delayIdx * PART_LEN1,
|
||||
sizeof(xfw[0][0]) * 2 * PART_LEN1);
|
||||
|
||||
// Windowed near fft
|
||||
WindowData(fft, aec->dBuf);
|
||||
aec_rdft_forward_128(fft);
|
||||
StoreAsComplex(fft, dfw);
|
||||
|
||||
// Windowed error fft
|
||||
WindowData(fft, aec->eBuf);
|
||||
aec_rdft_forward_128(fft);
|
||||
StoreAsComplex(fft, efw);
|
||||
|
||||
SmoothedPSD(aec, efw, dfw, xfw);
|
||||
|
||||
// Subband coherence
|
||||
@ -1011,9 +996,12 @@ static void EchoSubtraction(
|
||||
|
||||
|
||||
static void EchoSuppression(AecCore* aec,
|
||||
float* echo_subtractor_output,
|
||||
float* output,
|
||||
float* const* outputH) {
|
||||
float efw[2][PART_LEN1], xfw[2][PART_LEN1];
|
||||
float efw[2][PART_LEN1];
|
||||
float xfw[2][PART_LEN1];
|
||||
float dfw[2][PART_LEN1];
|
||||
complex_t comfortNoiseHband[PART_LEN1];
|
||||
float fft[PART_LEN2];
|
||||
float scale, dtmp;
|
||||
@ -1040,6 +1028,22 @@ static void EchoSuppression(AecCore* aec,
|
||||
|
||||
float* xfw_ptr = NULL;
|
||||
|
||||
// Update eBuf with echo subtractor output.
|
||||
memcpy(aec->eBuf + PART_LEN,
|
||||
echo_subtractor_output,
|
||||
sizeof(float) * PART_LEN);
|
||||
|
||||
// Analysis filter banks for the echo suppressor.
|
||||
// Windowed near-end ffts.
|
||||
WindowData(fft, aec->dBuf);
|
||||
aec_rdft_forward_128(fft);
|
||||
StoreAsComplex(fft, dfw);
|
||||
|
||||
// Windowed echo suppressor output ffts.
|
||||
WindowData(fft, aec->eBuf);
|
||||
aec_rdft_forward_128(fft);
|
||||
StoreAsComplex(fft, efw);
|
||||
|
||||
aec->delayEstCtr++;
|
||||
if (aec->delayEstCtr == delayEstInterval) {
|
||||
aec->delayEstCtr = 0;
|
||||
@ -1060,7 +1064,15 @@ static void EchoSuppression(AecCore* aec,
|
||||
// Buffer far.
|
||||
memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
|
||||
|
||||
WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd);
|
||||
if (aec->delayEstCtr == 0)
|
||||
aec->delayIdx = WebRtcAec_PartitionDelay(aec);
|
||||
|
||||
// Use delayed far.
|
||||
memcpy(xfw,
|
||||
aec->xfwBuf + aec->delayIdx * PART_LEN1,
|
||||
sizeof(xfw[0][0]) * 2 * PART_LEN1);
|
||||
|
||||
WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd);
|
||||
|
||||
hNlXdAvg = 0;
|
||||
for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
|
||||
@ -1399,10 +1411,7 @@ static void ProcessBlock(AecCore* aec) {
|
||||
RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN);
|
||||
|
||||
// Perform echo suppression.
|
||||
memcpy(aec->eBuf + PART_LEN,
|
||||
echo_subtractor_output,
|
||||
sizeof(float) * PART_LEN);
|
||||
EchoSuppression(aec, output, outputH_ptr);
|
||||
EchoSuppression(aec, echo_subtractor_output, output, outputH_ptr);
|
||||
|
||||
if (aec->metricsMode == 1) {
|
||||
// Update power levels and echo metrics
|
||||
@ -1511,6 +1520,10 @@ AecCore* WebRtcAec_CreateAec() {
|
||||
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
|
||||
WebRtcAec_ComfortNoise = ComfortNoise;
|
||||
WebRtcAec_SubbandCoherence = SubbandCoherence;
|
||||
WebRtcAec_StoreAsComplex = StoreAsComplex;
|
||||
WebRtcAec_PartitionDelay = PartitionDelay;
|
||||
WebRtcAec_WindowData = WindowData;
|
||||
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
if (WebRtc_GetCPUInfo(kSSE2)) {
|
||||
|
@ -205,10 +205,21 @@ extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
|
||||
|
||||
typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec,
|
||||
float efw[2][PART_LEN1],
|
||||
float dfw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
float* fft,
|
||||
float* cohde,
|
||||
float* cohxd);
|
||||
extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
|
||||
|
||||
typedef int (*WebRtcAecPartitionDelay)(const AecCore* aec);
|
||||
extern WebRtcAecPartitionDelay WebRtcAec_PartitionDelay;
|
||||
|
||||
typedef void (*WebRtcAecStoreAsComplex)(const float* data,
|
||||
float data_complex[2][PART_LEN1]);
|
||||
extern WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex;
|
||||
|
||||
typedef void (*WebRtcAecWindowData)(float* x_windowed, const float* x);
|
||||
extern WebRtcAecWindowData WebRtcAec_WindowData;
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
|
||||
|
@ -453,7 +453,7 @@ static void OverdriveAndSuppressNEON(AecCore* aec,
|
||||
}
|
||||
}
|
||||
|
||||
static int PartitionDelay(const AecCore* aec) {
|
||||
static int PartitionDelayNEON(const AecCore* aec) {
|
||||
// Measures the energy in each filter partition and returns the partition with
|
||||
// highest energy.
|
||||
// TODO(bjornv): Spread computational cost by computing one partition per
|
||||
@ -638,7 +638,7 @@ static void SmoothedPSD(AecCore* aec,
|
||||
}
|
||||
|
||||
// Window time domain data to be used by the fft.
|
||||
__inline static void WindowData(float* x_windowed, const float* x) {
|
||||
static void WindowDataNEON(float* x_windowed, const float* x) {
|
||||
int i;
|
||||
for (i = 0; i < PART_LEN; i += 4) {
|
||||
const float32x4_t vec_Buf1 = vld1q_f32(&x[i]);
|
||||
@ -659,8 +659,8 @@ __inline static void WindowData(float* x_windowed, const float* x) {
|
||||
}
|
||||
|
||||
// Puts fft output data into a complex valued array.
|
||||
__inline static void StoreAsComplex(const float* data,
|
||||
float data_complex[2][PART_LEN1]) {
|
||||
static void StoreAsComplexNEON(const float* data,
|
||||
float data_complex[2][PART_LEN1]) {
|
||||
int i;
|
||||
for (i = 0; i < PART_LEN; i += 4) {
|
||||
const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]);
|
||||
@ -676,31 +676,13 @@ __inline static void StoreAsComplex(const float* data,
|
||||
|
||||
static void SubbandCoherenceNEON(AecCore* aec,
|
||||
float efw[2][PART_LEN1],
|
||||
float dfw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
float* fft,
|
||||
float* cohde,
|
||||
float* cohxd) {
|
||||
float dfw[2][PART_LEN1];
|
||||
int i;
|
||||
|
||||
if (aec->delayEstCtr == 0)
|
||||
aec->delayIdx = PartitionDelay(aec);
|
||||
|
||||
// Use delayed far.
|
||||
memcpy(xfw,
|
||||
aec->xfwBuf + aec->delayIdx * PART_LEN1,
|
||||
sizeof(xfw[0][0]) * 2 * PART_LEN1);
|
||||
|
||||
// Windowed near fft
|
||||
WindowData(fft, aec->dBuf);
|
||||
aec_rdft_forward_128(fft);
|
||||
StoreAsComplex(fft, dfw);
|
||||
|
||||
// Windowed error fft
|
||||
WindowData(fft, aec->eBuf);
|
||||
aec_rdft_forward_128(fft);
|
||||
StoreAsComplex(fft, efw);
|
||||
|
||||
SmoothedPSD(aec, efw, dfw, xfw);
|
||||
|
||||
{
|
||||
@ -743,4 +725,7 @@ void WebRtcAec_InitAec_neon(void) {
|
||||
WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
|
||||
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
|
||||
WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
|
||||
WebRtcAec_StoreAsComplex = StoreAsComplexNEON;
|
||||
WebRtcAec_PartitionDelay = PartitionDelayNEON;
|
||||
WebRtcAec_WindowData = WindowDataNEON;
|
||||
}
|
||||
|
@ -439,7 +439,8 @@ __inline static void _mm_add_ps_4x1(__m128 sum, float *dst) {
|
||||
sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1)));
|
||||
_mm_store_ss(dst, sum);
|
||||
}
|
||||
static int PartitionDelay(const AecCore* aec) {
|
||||
|
||||
static int PartitionDelaySSE2(const AecCore* aec) {
|
||||
// Measures the energy in each filter partition and returns the partition with
|
||||
// highest energy.
|
||||
// TODO(bjornv): Spread computational cost by computing one partition per
|
||||
@ -619,7 +620,7 @@ static void SmoothedPSD(AecCore* aec,
|
||||
}
|
||||
|
||||
// Window time domain data to be used by the fft.
|
||||
__inline static void WindowData(float* x_windowed, const float* x) {
|
||||
static void WindowDataSSE2(float* x_windowed, const float* x) {
|
||||
int i;
|
||||
for (i = 0; i < PART_LEN; i += 4) {
|
||||
const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]);
|
||||
@ -639,8 +640,8 @@ __inline static void WindowData(float* x_windowed, const float* x) {
|
||||
}
|
||||
|
||||
// Puts fft output data into a complex valued array.
|
||||
__inline static void StoreAsComplex(const float* data,
|
||||
float data_complex[2][PART_LEN1]) {
|
||||
static void StoreAsComplexSSE2(const float* data,
|
||||
float data_complex[2][PART_LEN1]) {
|
||||
int i;
|
||||
for (i = 0; i < PART_LEN; i += 4) {
|
||||
const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]);
|
||||
@ -661,31 +662,13 @@ __inline static void StoreAsComplex(const float* data,
|
||||
|
||||
static void SubbandCoherenceSSE2(AecCore* aec,
|
||||
float efw[2][PART_LEN1],
|
||||
float dfw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
float* fft,
|
||||
float* cohde,
|
||||
float* cohxd) {
|
||||
float dfw[2][PART_LEN1];
|
||||
int i;
|
||||
|
||||
if (aec->delayEstCtr == 0)
|
||||
aec->delayIdx = PartitionDelay(aec);
|
||||
|
||||
// Use delayed far.
|
||||
memcpy(xfw,
|
||||
aec->xfwBuf + aec->delayIdx * PART_LEN1,
|
||||
sizeof(xfw[0][0]) * 2 * PART_LEN1);
|
||||
|
||||
// Windowed near fft
|
||||
WindowData(fft, aec->dBuf);
|
||||
aec_rdft_forward_128(fft);
|
||||
StoreAsComplex(fft, dfw);
|
||||
|
||||
// Windowed error fft
|
||||
WindowData(fft, aec->eBuf);
|
||||
aec_rdft_forward_128(fft);
|
||||
StoreAsComplex(fft, efw);
|
||||
|
||||
SmoothedPSD(aec, efw, dfw, xfw);
|
||||
|
||||
{
|
||||
@ -740,4 +723,7 @@ void WebRtcAec_InitAec_SSE2(void) {
|
||||
WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
|
||||
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
|
||||
WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
|
||||
WebRtcAec_StoreAsComplex = StoreAsComplexSSE2;
|
||||
WebRtcAec_PartitionDelay = PartitionDelaySSE2;
|
||||
WebRtcAec_WindowData = WindowDataSSE2;
|
||||
}
|
||||
|
Reference in New Issue
Block a user