First part of the preparatory work before the actual work for solving the ducking problem starts.

This works aims to:
-More clearly separate the functionalities in the AEC.
-Make the inputs and outputs to functions more clear (currently the state struct is often passed as a parameter to the functions and the functions use members of the state both as inputs and outputs, which reduces the readability of the code and makes it difficult to change/refactor.

What is done in this CL:
-Most of what belongs to the echo subtraction functionality has been moved to a separate function.
-The NonLinearProcessing function has been renamed to EchoSuppressor which I think is more appropriate.
-Part of the code was replaced by a call to the TimeToFrequency function (which was also suggested by an existing todo).
-For consistency, a function FrequencyToTime doing the opposite of TimeToFrequency was added and part of the code was moved to that.
-The ScaleErrorSignal function was changed to no longer have the state as an input parameter. This entailed also changing the corresponding assembly optimized files accordingly.

Testing:
-The changes have been tested for bitexactness on Linux using a fairly extensive test.
-All the unittests pass on linux.

BUG=webrtc:5201

Review URL: https://codereview.webrtc.org/1455163006

Cr-Commit-Position: refs/heads/master@{#10764}
This commit is contained in:
peah
2015-11-23 23:05:44 -08:00
committed by Commit bot
parent 70bed7d415
commit d860523112
5 changed files with 131 additions and 99 deletions

View File

@ -175,16 +175,20 @@ static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) {
}
}
static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) {
const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
const float error_threshold = aec->extended_filter_enabled
static void ScaleErrorSignal(int extended_filter_enabled,
float normal_mu,
float normal_error_threshold,
float *x_pow,
float ef[2][PART_LEN1]) {
const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
const float error_threshold = extended_filter_enabled
? kExtendedErrorThreshold
: aec->normal_error_threshold;
: normal_error_threshold;
int i;
float abs_ef;
for (i = 0; i < (PART_LEN1); i++) {
ef[0][i] /= (aec->xPow[i] + 1e-10f);
ef[1][i] /= (aec->xPow[i] + 1e-10f);
ef[0][i] /= (x_pow[i] + 1e-10f);
ef[1][i] /= (x_pow[i] + 1e-10f);
abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
if (abs_ef > error_threshold) {
@ -837,6 +841,19 @@ static void UpdateDelayMetrics(AecCore* self) {
return;
}
static void FrequencyToTime(float freq_data[2][PART_LEN1],
float time_data[PART_LEN2]) {
int i;
time_data[0] = freq_data[0][0];
time_data[1] = freq_data[0][PART_LEN];
for (i = 1; i < PART_LEN; i++) {
time_data[2 * i] = freq_data[0][i];
time_data[2 * i + 1] = freq_data[1][i];
}
aec_rdft_inverse_128(time_data);
}
static void TimeToFrequency(float time_data[PART_LEN2],
float freq_data[2][PART_LEN1],
int window) {
@ -942,9 +959,63 @@ static int SignalBasedDelayCorrection(AecCore* self) {
return delay_correction;
}
static void NonLinearProcessing(AecCore* aec,
float* output,
float* const* outputH) {
static void EchoSubtraction(AecCore* aec,
float* nearend_ptr) {
float yf[2][PART_LEN1];
float fft[PART_LEN2];
float y[PART_LEN];
float e[PART_LEN];
float ef[2][PART_LEN1];
float scale;
int i;
memset(yf, 0, sizeof(yf));
// Produce frequency domain echo estimate.
WebRtcAec_FilterFar(aec, yf);
// Inverse fft to obtain echo estimate and error.
FrequencyToTime(yf, fft);
// Extract the output signal and compute the time-domain error.
scale = 2.0f / PART_LEN2;
for (i = 0; i < PART_LEN; ++i) {
y[i] = fft[PART_LEN + i] * scale; // fft scaling.
e[i] = nearend_ptr[i] - y[i];
}
// Error fft
memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
memset(fft, 0, sizeof(float) * PART_LEN);
memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
TimeToFrequency(fft, ef, 0);
RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,
&ef[0][0],
sizeof(ef[0][0]) * PART_LEN1 * 2);
if (aec->metricsMode == 1) {
// Note that the first PART_LEN samples in fft (before transformation) are
// zero. Hence, the scaling by two in UpdateLevel() should not be
// performed. That scaling is taken care of in UpdateMetrics() instead.
UpdateLevel(&aec->linoutlevel, ef);
}
// Scale error signal inversely with far power.
WebRtcAec_ScaleErrorSignal(aec->extended_filter_enabled,
aec->normal_mu,
aec->normal_error_threshold,
aec->xPow,
ef);
WebRtcAec_FilterAdaptation(aec, fft, ef);
RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, e, PART_LEN);
}
static void EchoSuppression(AecCore* aec,
float* output,
float* const* outputH) {
float efw[2][PART_LEN1], xfw[2][PART_LEN1];
complex_t comfortNoiseHband[PART_LEN1];
float fft[PART_LEN2];
@ -1177,11 +1248,9 @@ static void NonLinearProcessing(AecCore* aec,
static void ProcessBlock(AecCore* aec) {
size_t i;
float y[PART_LEN], e[PART_LEN];
float scale;
float fft[PART_LEN2];
float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
float xf[2][PART_LEN1];
float df[2][PART_LEN1];
float far_spectrum = 0.0f;
float near_spectrum = 0.0f;
@ -1201,12 +1270,12 @@ static void ProcessBlock(AecCore* aec) {
float output[PART_LEN];
float outputH[NUM_HIGH_BANDS_MAX][PART_LEN];
float* outputH_ptr[NUM_HIGH_BANDS_MAX];
float* xf_ptr = NULL;
for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
outputH_ptr[i] = outputH[i];
}
float* xf_ptr = NULL;
// Concatenate old and new nearend blocks.
for (i = 0; i < aec->num_bands - 1; ++i) {
WebRtc_ReadBuffer(aec->nearFrBufH[i],
@ -1314,60 +1383,11 @@ static void ProcessBlock(AecCore* aec) {
&xf_ptr[PART_LEN1],
sizeof(float) * PART_LEN1);
memset(yf, 0, sizeof(yf));
// Perform echo subtraction.
EchoSubtraction(aec, nearend_ptr);
// Filter far
WebRtcAec_FilterFar(aec, yf);
// Inverse fft to obtain echo estimate and error.
fft[0] = yf[0][0];
fft[1] = yf[0][PART_LEN];
for (i = 1; i < PART_LEN; i++) {
fft[2 * i] = yf[0][i];
fft[2 * i + 1] = yf[1][i];
}
aec_rdft_inverse_128(fft);
scale = 2.0f / PART_LEN2;
for (i = 0; i < PART_LEN; i++) {
y[i] = fft[PART_LEN + i] * scale; // fft scaling
}
for (i = 0; i < PART_LEN; i++) {
e[i] = nearend_ptr[i] - y[i];
}
// Error fft
memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
memset(fft, 0, sizeof(float) * PART_LEN);
memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
// TODO(bjornv): Change to use TimeToFrequency().
aec_rdft_forward_128(fft);
ef[1][0] = 0;
ef[1][PART_LEN] = 0;
ef[0][0] = fft[0];
ef[0][PART_LEN] = fft[1];
for (i = 1; i < PART_LEN; i++) {
ef[0][i] = fft[2 * i];
ef[1][i] = fft[2 * i + 1];
}
RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,
&ef[0][0],
sizeof(ef[0][0]) * PART_LEN1 * 2);
if (aec->metricsMode == 1) {
// Note that the first PART_LEN samples in fft (before transformation) are
// zero. Hence, the scaling by two in UpdateLevel() should not be
// performed. That scaling is taken care of in UpdateMetrics() instead.
UpdateLevel(&aec->linoutlevel, ef);
}
// Scale error signal inversely with far power.
WebRtcAec_ScaleErrorSignal(aec, ef);
WebRtcAec_FilterAdaptation(aec, fft, ef);
NonLinearProcessing(aec, output, outputH_ptr);
// Perform echo suppression.
EchoSuppression(aec, output, outputH_ptr);
if (aec->metricsMode == 1) {
// Update power levels and echo metrics
@ -1383,7 +1403,6 @@ static void ProcessBlock(AecCore* aec) {
WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN);
}
RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, e, PART_LEN);
RTC_AEC_DEBUG_WAV_WRITE(aec->outFile, output, PART_LEN);
}

View File

@ -172,7 +172,11 @@ struct AecCore {
typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]);
extern WebRtcAecFilterFar WebRtcAec_FilterFar;
typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]);
typedef void (*WebRtcAecScaleErrorSignal)(int extended_filter_enabled,
float normal_mu,
float normal_error_threshold,
float* xPow,
float ef[2][PART_LEN1]);
extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec,
float* fft,

View File

@ -699,15 +699,18 @@ void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec,
}
}
void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
const float error_threshold = aec->extended_filter_enabled
void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled,
float normal_mu,
float normal_error_threshold,
float *x_pow,
float ef[2][PART_LEN1]) {
const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
const float error_threshold = extended_filter_enabled
? kExtendedErrorThreshold
: aec->normal_error_threshold;
: normal_error_threshold;
int len = (PART_LEN1);
float* ef0 = ef[0];
float* ef1 = ef[1];
float* xPow = aec->xPow;
float fac1 = 1e-10f;
float err_th2 = error_threshold * error_threshold;
float f0, f1, f2;
@ -719,7 +722,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lwc1 %[f0], 0(%[xPow]) \n\t"
"lwc1 %[f0], 0(%[x_pow]) \n\t"
"lwc1 %[f1], 0(%[ef0]) \n\t"
"lwc1 %[f2], 0(%[ef1]) \n\t"
"add.s %[f0], %[f0], %[fac1] \n\t"
@ -747,7 +750,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
"swc1 %[f1], 0(%[ef0]) \n\t"
"swc1 %[f2], 0(%[ef1]) \n\t"
"addiu %[len], %[len], -1 \n\t"
"addiu %[xPow], %[xPow], 4 \n\t"
"addiu %[x_pow], %[x_pow], 4 \n\t"
"addiu %[ef0], %[ef0], 4 \n\t"
"bgtz %[len], 1b \n\t"
" addiu %[ef1], %[ef1], 4 \n\t"
@ -756,7 +759,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
#if !defined(MIPS32_R2_LE)
[f3] "=&f" (f3),
#endif
[xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
[x_pow] "+r" (x_pow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
[len] "+r" (len)
: [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
[err_th] "f" (error_threshold)
@ -771,4 +774,3 @@ void WebRtcAec_InitAec_mips(void) {
WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
}

View File

@ -122,20 +122,24 @@ static float32x4_t vsqrtq_f32(float32x4_t s) {
}
#endif // WEBRTC_ARCH_ARM64
static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
const float error_threshold = aec->extended_filter_enabled ?
kExtendedErrorThreshold : aec->normal_error_threshold;
static void ScaleErrorSignalNEON(int extended_filter_enabled,
float normal_mu,
float normal_error_threshold,
float *x_pow,
float ef[2][PART_LEN1]) {
const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
const float error_threshold = extended_filter_enabled ?
kExtendedErrorThreshold : normal_error_threshold;
const float32x4_t k1e_10f = vdupq_n_f32(1e-10f);
const float32x4_t kMu = vmovq_n_f32(mu);
const float32x4_t kThresh = vmovq_n_f32(error_threshold);
int i;
// vectorized code (four at once)
for (i = 0; i + 3 < PART_LEN1; i += 4) {
const float32x4_t xPow = vld1q_f32(&aec->xPow[i]);
const float32x4_t x_pow_local = vld1q_f32(&x_pow[i]);
const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]);
const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]);
const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f);
const float32x4_t xPowPlus = vaddq_f32(x_pow_local, k1e_10f);
float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus);
float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus);
const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re);
@ -162,8 +166,8 @@ static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
// scalar code for the remaining items.
for (; i < PART_LEN1; i++) {
float abs_ef;
ef[0][i] /= (aec->xPow[i] + 1e-10f);
ef[1][i] /= (aec->xPow[i] + 1e-10f);
ef[0][i] /= (x_pow[i] + 1e-10f);
ef[1][i] /= (x_pow[i] + 1e-10f);
abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
if (abs_ef > error_threshold) {
@ -733,4 +737,3 @@ void WebRtcAec_InitAec_neon(void) {
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
}

View File

@ -74,22 +74,26 @@ static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) {
}
}
static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
static void ScaleErrorSignalSSE2(int extended_filter_enabled,
float normal_mu,
float normal_error_threshold,
float *x_pow,
float ef[2][PART_LEN1]) {
const __m128 k1e_10f = _mm_set1_ps(1e-10f);
const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
: _mm_set1_ps(aec->normal_mu);
const __m128 kThresh = aec->extended_filter_enabled
const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
: _mm_set1_ps(normal_mu);
const __m128 kThresh = extended_filter_enabled
? _mm_set1_ps(kExtendedErrorThreshold)
: _mm_set1_ps(aec->normal_error_threshold);
: _mm_set1_ps(normal_error_threshold);
int i;
// vectorized code (four at once)
for (i = 0; i + 3 < PART_LEN1; i += 4) {
const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]);
const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]);
const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]);
const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]);
const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f);
const __m128 xPowPlus = _mm_add_ps(x_pow_local, k1e_10f);
__m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus);
__m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus);
const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re);
@ -116,14 +120,14 @@ static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
// scalar code for the remaining items.
{
const float mu =
aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
const float error_threshold = aec->extended_filter_enabled
extended_filter_enabled ? kExtendedMu : normal_mu;
const float error_threshold = extended_filter_enabled
? kExtendedErrorThreshold
: aec->normal_error_threshold;
: normal_error_threshold;
for (; i < (PART_LEN1); i++) {
float abs_ef;
ef[0][i] /= (aec->xPow[i] + 1e-10f);
ef[1][i] /= (aec->xPow[i] + 1e-10f);
ef[0][i] /= (x_pow[i] + 1e-10f);
ef[1][i] /= (x_pow[i] + 1e-10f);
abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
if (abs_ef > error_threshold) {