Add lookahead to the delay estimator.

TEST=audioproc_unittest

Review URL: http://webrtc-codereview.appspot.com/279014

git-svn-id: http://webrtc.googlecode.com/svn/trunk@992 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
andrew@webrtc.org
2011-11-22 22:40:27 +00:00
parent 5a529395aa
commit 828af1b4b9
9 changed files with 107 additions and 66 deletions

View File

@ -178,7 +178,8 @@ int WebRtcAec_CreateAec(aec_t **aecInst)
if (WebRtc_CreateDelayEstimator(&aec->delay_estimator, if (WebRtc_CreateDelayEstimator(&aec->delay_estimator,
PART_LEN1, PART_LEN1,
kMaxDelay) == -1) { kMaxDelayBlocks,
kLookaheadBlocks) == -1) {
WebRtcAec_FreeAec(aec); WebRtcAec_FreeAec(aec);
aec = NULL; aec = NULL;
return -1; return -1;
@ -700,7 +701,7 @@ static void ProcessBlock(aec_t *aec, const short *farend,
PART_LEN1, PART_LEN1,
aec->echoState); aec->echoState);
if (delay_estimate >= 0) { if (delay_estimate >= 0) {
// Update delay estimate buffer // Update delay estimate buffer.
aec->delay_histogram[delay_estimate]++; aec->delay_histogram[delay_estimate]++;
} }
} }

View File

@ -30,9 +30,10 @@
#define FAR_BUF_LEN (FILT_LEN2 * 2) #define FAR_BUF_LEN (FILT_LEN2 * 2)
#define PREF_BAND_SIZE 24 #define PREF_BAND_SIZE 24
#define BLOCKL_MAX FRAME_LEN // Delay estimator constants, used for logging.
// Maximum delay in fixed point delay estimator, used for logging enum { kMaxDelayBlocks = 60 };
enum {kMaxDelay = 100}; enum { kLookaheadBlocks = 15 };
enum { kHistorySizeBlocks = kMaxDelayBlocks + kLookaheadBlocks };
typedef float complex_t[2]; typedef float complex_t[2];
// For performance reasons, some arrays of complex numbers are replaced by twice // For performance reasons, some arrays of complex numbers are replaced by twice
@ -141,7 +142,7 @@ typedef struct {
int flag_Hband_cn; //for comfort noise int flag_Hband_cn; //for comfort noise
float cn_scale_Hband; //scale for comfort noise in H band float cn_scale_Hband; //scale for comfort noise in H band
int delay_histogram[kMaxDelay]; int delay_histogram[kHistorySizeBlocks];
int delay_logging_enabled; int delay_logging_enabled;
void* delay_estimator; void* delay_estimator;

View File

@ -748,7 +748,7 @@ int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std) {
} }
// Get number of delay values since last update // Get number of delay values since last update
for (i = 0; i < kMaxDelay; i++) { for (i = 0; i < kHistorySizeBlocks; i++) {
num_delay_values += self->aec->delay_histogram[i]; num_delay_values += self->aec->delay_histogram[i];
} }
if (num_delay_values == 0) { if (num_delay_values == 0) {
@ -760,17 +760,18 @@ int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std) {
delay_values = num_delay_values >> 1; // Start value for median count down delay_values = num_delay_values >> 1; // Start value for median count down
// Get median of delay values since last update // Get median of delay values since last update
for (i = 0; i < kMaxDelay; i++) { for (i = 0; i < kHistorySizeBlocks; i++) {
delay_values -= self->aec->delay_histogram[i]; delay_values -= self->aec->delay_histogram[i];
if (delay_values < 0) { if (delay_values < 0) {
my_median = i; my_median = i;
break; break;
} }
} }
*median = my_median * kMsPerBlock; // Account for lookahead.
*median = (my_median - kLookaheadBlocks) * kMsPerBlock;
// Calculate the L1 norm, with median value as central moment // Calculate the L1 norm, with median value as central moment
for (i = 0; i < kMaxDelay; i++) { for (i = 0; i < kHistorySizeBlocks; i++) {
l1_norm += (float) (fabs(i - my_median) * self->aec->delay_histogram[i]); l1_norm += (float) (fabs(i - my_median) * self->aec->delay_histogram[i]);
} }
*std = (int) (l1_norm / (float) num_delay_values + 0.5f) * kMsPerBlock; *std = (int) (l1_norm / (float) num_delay_values + 0.5f) * kMsPerBlock;

View File

@ -211,7 +211,8 @@ int WebRtcAecm_CreateCore(AecmCore_t **aecmInst)
if (WebRtc_CreateDelayEstimator(&aecm->delay_estimator, if (WebRtc_CreateDelayEstimator(&aecm->delay_estimator,
PART_LEN1, PART_LEN1,
MAX_DELAY) == -1) { MAX_DELAY,
0) == -1) {
WebRtcAecm_FreeCore(aecm); WebRtcAecm_FreeCore(aecm);
aecm = NULL; aecm = NULL;
return -1; return -1;

View File

@ -1144,8 +1144,8 @@ TEST_F(ApmTest, Process) {
webrtc::audioproc::Test::DelayMetrics reference_delay = webrtc::audioproc::Test::DelayMetrics reference_delay =
test->delay_metrics(); test->delay_metrics();
EXPECT_EQ(median, reference_delay.median()); EXPECT_EQ(reference_delay.median(), median);
EXPECT_EQ(std, reference_delay.std()); EXPECT_EQ(reference_delay.std(), std);
EXPECT_EQ(test->rms_level(), rms_level); EXPECT_EQ(test->rms_level(), rms_level);
#endif #endif

View File

@ -65,6 +65,10 @@ int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator_t* handle) {
free(handle->binary_far_history); free(handle->binary_far_history);
handle->binary_far_history = NULL; handle->binary_far_history = NULL;
} }
if (handle->binary_near_history != NULL) {
free(handle->binary_near_history);
handle->binary_near_history = NULL;
}
if (handle->delay_histogram != NULL) { if (handle->delay_histogram != NULL) {
free(handle->delay_histogram); free(handle->delay_histogram);
handle->delay_histogram = NULL; handle->delay_histogram = NULL;
@ -76,13 +80,22 @@ int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator_t* handle) {
} }
int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle,
int history_size) { int max_delay,
int lookahead) {
BinaryDelayEstimator_t* self = NULL; BinaryDelayEstimator_t* self = NULL;
int history_size = max_delay + lookahead;
if (handle == NULL) { if (handle == NULL) {
return -1; return -1;
} }
if (history_size < 0) { if (max_delay < 0) {
return -1;
}
if (lookahead < 0) {
return -1;
}
if (history_size < 2) {
// Must be this large for buffer shifting.
return -1; return -1;
} }
@ -97,6 +110,9 @@ int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle,
self->binary_far_history = NULL; self->binary_far_history = NULL;
self->delay_histogram = NULL; self->delay_histogram = NULL;
self->history_size = history_size;
self->near_history_size = lookahead + 1;
// Allocate memory for spectrum buffers // Allocate memory for spectrum buffers
self->mean_bit_counts = malloc(history_size * sizeof(int32_t)); self->mean_bit_counts = malloc(history_size * sizeof(int32_t));
if (self->mean_bit_counts == NULL) { if (self->mean_bit_counts == NULL) {
@ -117,6 +133,13 @@ int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle,
self = NULL; self = NULL;
return -1; return -1;
} }
self->binary_near_history = malloc(self->near_history_size *
sizeof(uint32_t));
if (self->binary_near_history == NULL) {
WebRtc_FreeBinaryDelayEstimator(self);
self = NULL;
return -1;
}
self->delay_histogram = malloc(history_size * sizeof(int)); self->delay_histogram = malloc(history_size * sizeof(int));
if (self->delay_histogram == NULL) { if (self->delay_histogram == NULL) {
WebRtc_FreeBinaryDelayEstimator(self); WebRtc_FreeBinaryDelayEstimator(self);
@ -124,26 +147,24 @@ int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle,
return -1; return -1;
} }
self->history_size = history_size;
return 0; return 0;
} }
int WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator_t* handle) { int WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator_t* handle) {
assert(handle != NULL); assert(handle != NULL);
// Set averaged bit counts to zero
memset(handle->mean_bit_counts, 0, sizeof(int32_t) * handle->history_size); memset(handle->mean_bit_counts, 0, sizeof(int32_t) * handle->history_size);
memset(handle->bit_counts, 0, sizeof(int32_t) * handle->history_size); memset(handle->bit_counts, 0, sizeof(int32_t) * handle->history_size);
// Set far end histories to zero memset(handle->binary_far_history, 0,
memset(handle->binary_far_history,
0,
sizeof(uint32_t) * handle->history_size); sizeof(uint32_t) * handle->history_size);
// Set delay histogram to zero memset(handle->binary_near_history, 0,
sizeof(uint32_t) * handle->near_history_size);
memset(handle->delay_histogram, 0, sizeof(int) * handle->history_size); memset(handle->delay_histogram, 0, sizeof(int) * handle->history_size);
// Set VAD counter to zero
handle->vad_counter = 0; handle->vad_counter = 0;
// Set delay memory to zero
handle->last_delay = 0; // Set to zero delay after compensating for lookahead.
handle->last_delay = handle->near_history_size - 1;
return 0; return 0;
} }
@ -169,6 +190,15 @@ int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator_t* handle,
// Insert new binary spectrum // Insert new binary spectrum
handle->binary_far_history[0] = binary_far_spectrum; handle->binary_far_history[0] = binary_far_spectrum;
if (handle->near_history_size > 1) {
memmove(&(handle->binary_near_history[1]),
&(handle->binary_near_history[0]),
(handle->near_history_size - 1) * sizeof(uint32_t));
handle->binary_near_history[0] = binary_near_spectrum;
binary_near_spectrum =
handle->binary_near_history[handle->near_history_size - 1];
}
// Compare with delayed spectra // Compare with delayed spectra
BitCountComparison(binary_near_spectrum, BitCountComparison(binary_near_spectrum,
handle->binary_far_history, handle->binary_far_history,
@ -198,7 +228,6 @@ int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator_t* handle,
handle->delay_histogram[min_position] += 3; handle->delay_histogram[min_position] += 3;
} }
handle->last_delay = 0;
for (i = 0; i < handle->history_size; i++) { for (i = 0; i < handle->history_size; i++) {
histogram_bin = handle->delay_histogram[i]; histogram_bin = handle->delay_histogram[i];

View File

@ -25,18 +25,22 @@ typedef struct {
// determined at run-time. // determined at run-time.
int32_t* bit_counts; int32_t* bit_counts;
// Binary history variables // Binary history variables.
uint32_t* binary_far_history; uint32_t* binary_far_history;
uint32_t* binary_near_history;
// Delay histogram variables // Delay histogram variables.
int* delay_histogram; int* delay_histogram;
int vad_counter; int vad_counter;
// Delay memory // Delay memory.
int last_delay; int last_delay;
// Buffer size parameters // Buffer size.
int history_size; int history_size;
// Near-end buffer size.
int near_history_size;
} BinaryDelayEstimator_t; } BinaryDelayEstimator_t;
// Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...) // Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...)
@ -45,20 +49,10 @@ typedef struct {
// //
int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator_t* handle); int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator_t* handle);
// Allocates the memory needed by the binary delay estimation. The memory needs // Refer to WebRtc_CreateDelayEstimator() in delay_estimator_wrapper.h
// to be initialized separately using WebRtc_InitBinaryDelayEstimator(...).
//
// Inputs:
// - handle : Instance that should be created
// - history_size : Size of the far end history used to estimate the
// delay from. Used to allocate memory for history
// specific buffers.
//
// Output:
// - handle : Created instance
//
int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle,
int history_size); int max_delay,
int lookahead);
// Initializes the delay estimation instance created with // Initializes the delay estimation instance created with
// WebRtc_CreateBinaryDelayEstimator(...) // WebRtc_CreateBinaryDelayEstimator(...)
@ -70,12 +64,13 @@ int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle,
// //
int WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator_t* handle); int WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator_t* handle);
// Estimates and returns the delay between the binary far end and binary near // Estimates and returns the delay between the binary far-end and binary near-
// end spectra. // end spectra. The value will be offset by the lookahead (i.e. the lookahead
// should be subtracted from the returned value).
// Inputs: // Inputs:
// - handle : Pointer to the delay estimation instance // - handle : Pointer to the delay estimation instance
// - binary_far_spectrum : Far end binary spectrum // - binary_far_spectrum : Far-end binary spectrum
// - binary_near_spectrum : Near end binary spectrum of the current block // - binary_near_spectrum : Near-end binary spectrum of the current block
// - vad_value : The VAD decision of the current block // - vad_value : The VAD decision of the current block
// //
// Output: // Output:
@ -102,14 +97,14 @@ int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator_t* handle,
// //
int WebRtc_binary_last_delay(BinaryDelayEstimator_t* handle); int WebRtc_binary_last_delay(BinaryDelayEstimator_t* handle);
// Returns the history size used in the far end buffers to calculate the delay // Returns the history size used in the far-end buffers to calculate the delay
// over. // over.
// //
// Input: // Input:
// - handle : Pointer to the delay estimation instance // - handle : Pointer to the delay estimation instance
// //
// Return value: // Return value:
// - history_size : > 0 - Far end history size // - history_size : > 0 - Far-end history size
// -1 - Error // -1 - Error
// //
int WebRtc_history_size(BinaryDelayEstimator_t* handle); int WebRtc_history_size(BinaryDelayEstimator_t* handle);

View File

@ -133,7 +133,8 @@ int WebRtc_FreeDelayEstimator(void* handle) {
int WebRtc_CreateDelayEstimator(void** handle, int WebRtc_CreateDelayEstimator(void** handle,
int spectrum_size, int spectrum_size,
int history_size) { int max_delay,
int lookahead) {
DelayEstimator_t *self = NULL; DelayEstimator_t *self = NULL;
// Check if the sub band used in the delay estimation is small enough to // Check if the sub band used in the delay estimation is small enough to
@ -158,7 +159,8 @@ int WebRtc_CreateDelayEstimator(void** handle,
// Create binary delay estimator. // Create binary delay estimator.
if (WebRtc_CreateBinaryDelayEstimator(&self->binary_handle, if (WebRtc_CreateBinaryDelayEstimator(&self->binary_handle,
history_size) != 0) { max_delay,
lookahead) != 0) {
WebRtc_FreeDelayEstimator(self); WebRtc_FreeDelayEstimator(self);
self = NULL; self = NULL;
return -1; return -1;

View File

@ -26,20 +26,29 @@ int WebRtc_FreeDelayEstimator(void* handle);
// initialized separately through WebRtc_InitDelayEstimator(...). // initialized separately through WebRtc_InitDelayEstimator(...).
// //
// Inputs: // Inputs:
// - handle : Instance that should be created // - handle : Instance that should be created.
// - spectrum_size : Size of the spectrum used both in far end and // - spectrum_size : Size of the spectrum used both in far-end and
// near end. Used to allocate memory for spectrum // near-end. Used to allocate memory for spectrum
// specific buffers. // specific buffers.
// - history_size : Size of the far end history used to estimate the // - max_delay : The maximum delay which can be estimated. Needed
// delay from. Used to allocate memory for history // to allocate memory for history buffers.
// specific buffers. // - lookahead : Amount of non-causal lookahead to use. This can detect
// cases in which a near-end signal occurs before the
// corresponding far-end signal. It will delay the
// estimate for the current block by an equal amount,
// and the returned values will be offset by it.
//
// A value of zero is the typical no-lookahead case. This
// also represents the minimum delay which can be
// estimated.
// //
// Output: // Output:
// - handle : Created instance // - handle : Created instance
// //
int WebRtc_CreateDelayEstimator(void** handle, int WebRtc_CreateDelayEstimator(void** handle,
int spectrum_size, int spectrum_size,
int history_size); int max_delay,
int lookahead);
// Initializes the delay estimation instance created with // Initializes the delay estimation instance created with
// WebRtc_CreateDelayEstimator(...) // WebRtc_CreateDelayEstimator(...)
@ -51,15 +60,17 @@ int WebRtc_CreateDelayEstimator(void** handle,
// //
int WebRtc_InitDelayEstimator(void* handle); int WebRtc_InitDelayEstimator(void* handle);
// Estimates and returns the delay between the far end and near end blocks. // Estimates and returns the delay between the far-end and near-end blocks. The
// value will be offset by the lookahead (i.e. the lookahead should be
// subtracted from the returned value).
// Inputs: // Inputs:
// - handle : Pointer to the delay estimation instance // - handle : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum data // - far_spectrum : Pointer to the far-end spectrum data
// - near_spectrum : Pointer to the near end spectrum data of the current // - near_spectrum : Pointer to the near-end spectrum data of the current
// block // block
// - spectrum_size : The size of the data arrays (same for both far and // - spectrum_size : The size of the data arrays (same for both far- and
// near end) // near-end)
// - far_q : The Q-domain of the far end data // - far_q : The Q-domain of the far-end data
// - vad_value : The VAD decision of the current block // - vad_value : The VAD decision of the current block
// //
// Output: // Output: