First part of the preparatory work before the actual work for solving the ducking problem starts.
This works aims to: -More clearly separate the functionalities in the AEC. -Make the inputs and outputs to functions more clear (currently the state struct is often passed as a parameter to the functions and the functions use members of the state both as inputs and outputs, which reduces the readability of the code and makes it difficult to change/refactor. What is done in this CL: -Most of what belongs to the echo subtraction functionality has been moved to a separate function. -The NonLinearProcessing function has been renamed to EchoSuppressor which I think is more appropriate. -Part of the code was replaced by a call to the TimeToFrequency function (which was also suggested by an existing todo). -For consistency, a function FrequencyToTime doing the opposite of TimeToFrequency was added and part of the code was moved to that. -The ScaleErrorSignal function was changed to no longer have the state as an input parameter. This entailed also changing the corresponding assembly optimized files accordingly. Testing: -The changes have been tested for bitexactness on Linux using a fairly extensive test. -All the unittests pass on linux. BUG=webrtc:5201 Review URL: https://codereview.webrtc.org/1455163006 Cr-Commit-Position: refs/heads/master@{#10764}
This commit is contained in:
@ -175,16 +175,20 @@ static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
|
||||
const float error_threshold = aec->extended_filter_enabled
|
||||
static void ScaleErrorSignal(int extended_filter_enabled,
|
||||
float normal_mu,
|
||||
float normal_error_threshold,
|
||||
float *x_pow,
|
||||
float ef[2][PART_LEN1]) {
|
||||
const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
|
||||
const float error_threshold = extended_filter_enabled
|
||||
? kExtendedErrorThreshold
|
||||
: aec->normal_error_threshold;
|
||||
: normal_error_threshold;
|
||||
int i;
|
||||
float abs_ef;
|
||||
for (i = 0; i < (PART_LEN1); i++) {
|
||||
ef[0][i] /= (aec->xPow[i] + 1e-10f);
|
||||
ef[1][i] /= (aec->xPow[i] + 1e-10f);
|
||||
ef[0][i] /= (x_pow[i] + 1e-10f);
|
||||
ef[1][i] /= (x_pow[i] + 1e-10f);
|
||||
abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
|
||||
|
||||
if (abs_ef > error_threshold) {
|
||||
@ -837,6 +841,19 @@ static void UpdateDelayMetrics(AecCore* self) {
|
||||
return;
|
||||
}
|
||||
|
||||
static void FrequencyToTime(float freq_data[2][PART_LEN1],
|
||||
float time_data[PART_LEN2]) {
|
||||
int i;
|
||||
time_data[0] = freq_data[0][0];
|
||||
time_data[1] = freq_data[0][PART_LEN];
|
||||
for (i = 1; i < PART_LEN; i++) {
|
||||
time_data[2 * i] = freq_data[0][i];
|
||||
time_data[2 * i + 1] = freq_data[1][i];
|
||||
}
|
||||
aec_rdft_inverse_128(time_data);
|
||||
}
|
||||
|
||||
|
||||
static void TimeToFrequency(float time_data[PART_LEN2],
|
||||
float freq_data[2][PART_LEN1],
|
||||
int window) {
|
||||
@ -942,9 +959,63 @@ static int SignalBasedDelayCorrection(AecCore* self) {
|
||||
return delay_correction;
|
||||
}
|
||||
|
||||
static void NonLinearProcessing(AecCore* aec,
|
||||
float* output,
|
||||
float* const* outputH) {
|
||||
static void EchoSubtraction(AecCore* aec,
|
||||
float* nearend_ptr) {
|
||||
float yf[2][PART_LEN1];
|
||||
float fft[PART_LEN2];
|
||||
float y[PART_LEN];
|
||||
float e[PART_LEN];
|
||||
float ef[2][PART_LEN1];
|
||||
float scale;
|
||||
int i;
|
||||
memset(yf, 0, sizeof(yf));
|
||||
|
||||
// Produce frequency domain echo estimate.
|
||||
WebRtcAec_FilterFar(aec, yf);
|
||||
|
||||
// Inverse fft to obtain echo estimate and error.
|
||||
FrequencyToTime(yf, fft);
|
||||
|
||||
// Extract the output signal and compute the time-domain error.
|
||||
scale = 2.0f / PART_LEN2;
|
||||
for (i = 0; i < PART_LEN; ++i) {
|
||||
y[i] = fft[PART_LEN + i] * scale; // fft scaling.
|
||||
e[i] = nearend_ptr[i] - y[i];
|
||||
}
|
||||
|
||||
// Error fft
|
||||
memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
|
||||
memset(fft, 0, sizeof(float) * PART_LEN);
|
||||
memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
|
||||
TimeToFrequency(fft, ef, 0);
|
||||
|
||||
RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,
|
||||
&ef[0][0],
|
||||
sizeof(ef[0][0]) * PART_LEN1 * 2);
|
||||
|
||||
if (aec->metricsMode == 1) {
|
||||
// Note that the first PART_LEN samples in fft (before transformation) are
|
||||
// zero. Hence, the scaling by two in UpdateLevel() should not be
|
||||
// performed. That scaling is taken care of in UpdateMetrics() instead.
|
||||
UpdateLevel(&aec->linoutlevel, ef);
|
||||
}
|
||||
|
||||
// Scale error signal inversely with far power.
|
||||
WebRtcAec_ScaleErrorSignal(aec->extended_filter_enabled,
|
||||
aec->normal_mu,
|
||||
aec->normal_error_threshold,
|
||||
aec->xPow,
|
||||
ef);
|
||||
WebRtcAec_FilterAdaptation(aec, fft, ef);
|
||||
|
||||
|
||||
RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, e, PART_LEN);
|
||||
}
|
||||
|
||||
|
||||
static void EchoSuppression(AecCore* aec,
|
||||
float* output,
|
||||
float* const* outputH) {
|
||||
float efw[2][PART_LEN1], xfw[2][PART_LEN1];
|
||||
complex_t comfortNoiseHband[PART_LEN1];
|
||||
float fft[PART_LEN2];
|
||||
@ -1177,11 +1248,9 @@ static void NonLinearProcessing(AecCore* aec,
|
||||
|
||||
static void ProcessBlock(AecCore* aec) {
|
||||
size_t i;
|
||||
float y[PART_LEN], e[PART_LEN];
|
||||
float scale;
|
||||
|
||||
float fft[PART_LEN2];
|
||||
float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
|
||||
float xf[2][PART_LEN1];
|
||||
float df[2][PART_LEN1];
|
||||
float far_spectrum = 0.0f;
|
||||
float near_spectrum = 0.0f;
|
||||
@ -1201,12 +1270,12 @@ static void ProcessBlock(AecCore* aec) {
|
||||
float output[PART_LEN];
|
||||
float outputH[NUM_HIGH_BANDS_MAX][PART_LEN];
|
||||
float* outputH_ptr[NUM_HIGH_BANDS_MAX];
|
||||
float* xf_ptr = NULL;
|
||||
|
||||
for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
|
||||
outputH_ptr[i] = outputH[i];
|
||||
}
|
||||
|
||||
float* xf_ptr = NULL;
|
||||
|
||||
// Concatenate old and new nearend blocks.
|
||||
for (i = 0; i < aec->num_bands - 1; ++i) {
|
||||
WebRtc_ReadBuffer(aec->nearFrBufH[i],
|
||||
@ -1314,60 +1383,11 @@ static void ProcessBlock(AecCore* aec) {
|
||||
&xf_ptr[PART_LEN1],
|
||||
sizeof(float) * PART_LEN1);
|
||||
|
||||
memset(yf, 0, sizeof(yf));
|
||||
// Perform echo subtraction.
|
||||
EchoSubtraction(aec, nearend_ptr);
|
||||
|
||||
// Filter far
|
||||
WebRtcAec_FilterFar(aec, yf);
|
||||
|
||||
// Inverse fft to obtain echo estimate and error.
|
||||
fft[0] = yf[0][0];
|
||||
fft[1] = yf[0][PART_LEN];
|
||||
for (i = 1; i < PART_LEN; i++) {
|
||||
fft[2 * i] = yf[0][i];
|
||||
fft[2 * i + 1] = yf[1][i];
|
||||
}
|
||||
aec_rdft_inverse_128(fft);
|
||||
|
||||
scale = 2.0f / PART_LEN2;
|
||||
for (i = 0; i < PART_LEN; i++) {
|
||||
y[i] = fft[PART_LEN + i] * scale; // fft scaling
|
||||
}
|
||||
|
||||
for (i = 0; i < PART_LEN; i++) {
|
||||
e[i] = nearend_ptr[i] - y[i];
|
||||
}
|
||||
|
||||
// Error fft
|
||||
memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
|
||||
memset(fft, 0, sizeof(float) * PART_LEN);
|
||||
memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
|
||||
// TODO(bjornv): Change to use TimeToFrequency().
|
||||
aec_rdft_forward_128(fft);
|
||||
|
||||
ef[1][0] = 0;
|
||||
ef[1][PART_LEN] = 0;
|
||||
ef[0][0] = fft[0];
|
||||
ef[0][PART_LEN] = fft[1];
|
||||
for (i = 1; i < PART_LEN; i++) {
|
||||
ef[0][i] = fft[2 * i];
|
||||
ef[1][i] = fft[2 * i + 1];
|
||||
}
|
||||
|
||||
RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,
|
||||
&ef[0][0],
|
||||
sizeof(ef[0][0]) * PART_LEN1 * 2);
|
||||
|
||||
if (aec->metricsMode == 1) {
|
||||
// Note that the first PART_LEN samples in fft (before transformation) are
|
||||
// zero. Hence, the scaling by two in UpdateLevel() should not be
|
||||
// performed. That scaling is taken care of in UpdateMetrics() instead.
|
||||
UpdateLevel(&aec->linoutlevel, ef);
|
||||
}
|
||||
|
||||
// Scale error signal inversely with far power.
|
||||
WebRtcAec_ScaleErrorSignal(aec, ef);
|
||||
WebRtcAec_FilterAdaptation(aec, fft, ef);
|
||||
NonLinearProcessing(aec, output, outputH_ptr);
|
||||
// Perform echo suppression.
|
||||
EchoSuppression(aec, output, outputH_ptr);
|
||||
|
||||
if (aec->metricsMode == 1) {
|
||||
// Update power levels and echo metrics
|
||||
@ -1383,7 +1403,6 @@ static void ProcessBlock(AecCore* aec) {
|
||||
WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN);
|
||||
}
|
||||
|
||||
RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, e, PART_LEN);
|
||||
RTC_AEC_DEBUG_WAV_WRITE(aec->outFile, output, PART_LEN);
|
||||
}
|
||||
|
||||
|
||||
@ -172,7 +172,11 @@ struct AecCore {
|
||||
|
||||
typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]);
|
||||
extern WebRtcAecFilterFar WebRtcAec_FilterFar;
|
||||
typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]);
|
||||
typedef void (*WebRtcAecScaleErrorSignal)(int extended_filter_enabled,
|
||||
float normal_mu,
|
||||
float normal_error_threshold,
|
||||
float* xPow,
|
||||
float ef[2][PART_LEN1]);
|
||||
extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
|
||||
typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec,
|
||||
float* fft,
|
||||
|
||||
@ -699,15 +699,18 @@ void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec,
|
||||
}
|
||||
}
|
||||
|
||||
void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
|
||||
const float error_threshold = aec->extended_filter_enabled
|
||||
void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled,
|
||||
float normal_mu,
|
||||
float normal_error_threshold,
|
||||
float *x_pow,
|
||||
float ef[2][PART_LEN1]) {
|
||||
const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
|
||||
const float error_threshold = extended_filter_enabled
|
||||
? kExtendedErrorThreshold
|
||||
: aec->normal_error_threshold;
|
||||
: normal_error_threshold;
|
||||
int len = (PART_LEN1);
|
||||
float* ef0 = ef[0];
|
||||
float* ef1 = ef[1];
|
||||
float* xPow = aec->xPow;
|
||||
float fac1 = 1e-10f;
|
||||
float err_th2 = error_threshold * error_threshold;
|
||||
float f0, f1, f2;
|
||||
@ -719,7 +722,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
".set push \n\t"
|
||||
".set noreorder \n\t"
|
||||
"1: \n\t"
|
||||
"lwc1 %[f0], 0(%[xPow]) \n\t"
|
||||
"lwc1 %[f0], 0(%[x_pow]) \n\t"
|
||||
"lwc1 %[f1], 0(%[ef0]) \n\t"
|
||||
"lwc1 %[f2], 0(%[ef1]) \n\t"
|
||||
"add.s %[f0], %[f0], %[fac1] \n\t"
|
||||
@ -747,7 +750,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
"swc1 %[f1], 0(%[ef0]) \n\t"
|
||||
"swc1 %[f2], 0(%[ef1]) \n\t"
|
||||
"addiu %[len], %[len], -1 \n\t"
|
||||
"addiu %[xPow], %[xPow], 4 \n\t"
|
||||
"addiu %[x_pow], %[x_pow], 4 \n\t"
|
||||
"addiu %[ef0], %[ef0], 4 \n\t"
|
||||
"bgtz %[len], 1b \n\t"
|
||||
" addiu %[ef1], %[ef1], 4 \n\t"
|
||||
@ -756,7 +759,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
#if !defined(MIPS32_R2_LE)
|
||||
[f3] "=&f" (f3),
|
||||
#endif
|
||||
[xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
|
||||
[x_pow] "+r" (x_pow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
|
||||
[len] "+r" (len)
|
||||
: [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
|
||||
[err_th] "f" (error_threshold)
|
||||
@ -771,4 +774,3 @@ void WebRtcAec_InitAec_mips(void) {
|
||||
WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
|
||||
WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
|
||||
}
|
||||
|
||||
|
||||
@ -122,20 +122,24 @@ static float32x4_t vsqrtq_f32(float32x4_t s) {
|
||||
}
|
||||
#endif // WEBRTC_ARCH_ARM64
|
||||
|
||||
static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
|
||||
const float error_threshold = aec->extended_filter_enabled ?
|
||||
kExtendedErrorThreshold : aec->normal_error_threshold;
|
||||
static void ScaleErrorSignalNEON(int extended_filter_enabled,
|
||||
float normal_mu,
|
||||
float normal_error_threshold,
|
||||
float *x_pow,
|
||||
float ef[2][PART_LEN1]) {
|
||||
const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
|
||||
const float error_threshold = extended_filter_enabled ?
|
||||
kExtendedErrorThreshold : normal_error_threshold;
|
||||
const float32x4_t k1e_10f = vdupq_n_f32(1e-10f);
|
||||
const float32x4_t kMu = vmovq_n_f32(mu);
|
||||
const float32x4_t kThresh = vmovq_n_f32(error_threshold);
|
||||
int i;
|
||||
// vectorized code (four at once)
|
||||
for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
||||
const float32x4_t xPow = vld1q_f32(&aec->xPow[i]);
|
||||
const float32x4_t x_pow_local = vld1q_f32(&x_pow[i]);
|
||||
const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]);
|
||||
const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]);
|
||||
const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f);
|
||||
const float32x4_t xPowPlus = vaddq_f32(x_pow_local, k1e_10f);
|
||||
float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus);
|
||||
float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus);
|
||||
const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re);
|
||||
@ -162,8 +166,8 @@ static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
// scalar code for the remaining items.
|
||||
for (; i < PART_LEN1; i++) {
|
||||
float abs_ef;
|
||||
ef[0][i] /= (aec->xPow[i] + 1e-10f);
|
||||
ef[1][i] /= (aec->xPow[i] + 1e-10f);
|
||||
ef[0][i] /= (x_pow[i] + 1e-10f);
|
||||
ef[1][i] /= (x_pow[i] + 1e-10f);
|
||||
abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
|
||||
|
||||
if (abs_ef > error_threshold) {
|
||||
@ -733,4 +737,3 @@ void WebRtcAec_InitAec_neon(void) {
|
||||
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
|
||||
WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
|
||||
}
|
||||
|
||||
|
||||
@ -74,22 +74,26 @@ static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) {
|
||||
}
|
||||
}
|
||||
|
||||
static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
static void ScaleErrorSignalSSE2(int extended_filter_enabled,
|
||||
float normal_mu,
|
||||
float normal_error_threshold,
|
||||
float *x_pow,
|
||||
float ef[2][PART_LEN1]) {
|
||||
const __m128 k1e_10f = _mm_set1_ps(1e-10f);
|
||||
const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
|
||||
: _mm_set1_ps(aec->normal_mu);
|
||||
const __m128 kThresh = aec->extended_filter_enabled
|
||||
const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
|
||||
: _mm_set1_ps(normal_mu);
|
||||
const __m128 kThresh = extended_filter_enabled
|
||||
? _mm_set1_ps(kExtendedErrorThreshold)
|
||||
: _mm_set1_ps(aec->normal_error_threshold);
|
||||
: _mm_set1_ps(normal_error_threshold);
|
||||
|
||||
int i;
|
||||
// vectorized code (four at once)
|
||||
for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
||||
const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]);
|
||||
const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]);
|
||||
const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]);
|
||||
const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]);
|
||||
|
||||
const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f);
|
||||
const __m128 xPowPlus = _mm_add_ps(x_pow_local, k1e_10f);
|
||||
__m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus);
|
||||
__m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus);
|
||||
const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re);
|
||||
@ -116,14 +120,14 @@ static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
|
||||
// scalar code for the remaining items.
|
||||
{
|
||||
const float mu =
|
||||
aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
|
||||
const float error_threshold = aec->extended_filter_enabled
|
||||
extended_filter_enabled ? kExtendedMu : normal_mu;
|
||||
const float error_threshold = extended_filter_enabled
|
||||
? kExtendedErrorThreshold
|
||||
: aec->normal_error_threshold;
|
||||
: normal_error_threshold;
|
||||
for (; i < (PART_LEN1); i++) {
|
||||
float abs_ef;
|
||||
ef[0][i] /= (aec->xPow[i] + 1e-10f);
|
||||
ef[1][i] /= (aec->xPow[i] + 1e-10f);
|
||||
ef[0][i] /= (x_pow[i] + 1e-10f);
|
||||
ef[1][i] /= (x_pow[i] + 1e-10f);
|
||||
abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
|
||||
|
||||
if (abs_ef > error_threshold) {
|
||||
|
||||
Reference in New Issue
Block a user