Add high frequency correction to Beamformer

R=andrew@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/35989004

Cr-Commit-Position: refs/heads/master@{#8278}
git-svn-id: http://webrtc.googlecode.com/svn/trunk@8278 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
aluebs@webrtc.org
2015-02-07 01:07:09 +00:00
parent 0c7ec770ff
commit 799e667e9f
2 changed files with 33 additions and 38 deletions

View File

@ -62,11 +62,11 @@ const float kCovUniformGapHalfWidth = 0.001f;
const float kHalfLifeSeconds = 0.05f; const float kHalfLifeSeconds = 0.05f;
// The average mask is computed from masks in this mid-frequency range. // The average mask is computed from masks in this mid-frequency range.
const int kMidFrequnecyLowerBoundHz = 250; const int kLowAverageStartHz = 200;
const int kMidFrequencyUpperBoundHz = 400; const int kLowAverageEndHz = 400;
const int kHighFrequencyLowerBoundHz = 4000; const int kHighAverageStartHz = 6000;
const int kHighFrequencyUpperBoundHz = 7000; const int kHighAverageEndHz = 6500;
// Mask threshold over which the data is considered signal and not interference. // Mask threshold over which the data is considered signal and not interference.
const float kMaskTargetThreshold = 0.3f; const float kMaskTargetThreshold = 0.3f;
@ -141,24 +141,24 @@ void Beamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
sample_rate_hz_ = sample_rate_hz; sample_rate_hz_ = sample_rate_hz;
decay_threshold_ = decay_threshold_ =
pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds)); pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds));
mid_frequency_lower_bin_bound_ = low_average_start_bin_ =
Round(kMidFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_); Round(kLowAverageStartHz * kFftSize / sample_rate_hz_);
mid_frequency_upper_bin_bound_ = low_average_end_bin_ =
Round(kMidFrequencyUpperBoundHz * kFftSize / sample_rate_hz_); Round(kLowAverageEndHz * kFftSize / sample_rate_hz_);
high_frequency_lower_bin_bound_ = high_average_start_bin_ =
Round(kHighFrequencyLowerBoundHz * kFftSize / sample_rate_hz_); Round(kHighAverageStartHz * kFftSize / sample_rate_hz_);
high_frequency_upper_bin_bound_ = high_average_end_bin_ =
Round(kHighFrequencyUpperBoundHz * kFftSize / sample_rate_hz_); Round(kHighAverageEndHz * kFftSize / sample_rate_hz_);
current_block_ix_ = 0; current_block_ix_ = 0;
previous_block_ix_ = -1; previous_block_ix_ = -1;
is_target_present_ = false; is_target_present_ = false;
hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
interference_blocks_count_ = hold_target_blocks_; interference_blocks_count_ = hold_target_blocks_;
DCHECK_LE(mid_frequency_upper_bin_bound_, kNumFreqBins); DCHECK_LE(low_average_end_bin_, kNumFreqBins);
DCHECK_LT(mid_frequency_lower_bin_bound_, mid_frequency_upper_bin_bound_); DCHECK_LT(low_average_start_bin_, low_average_end_bin_);
DCHECK_LE(high_frequency_upper_bin_bound_, kNumFreqBins); DCHECK_LE(high_average_end_bin_, kNumFreqBins);
DCHECK_LT(high_frequency_lower_bin_bound_, high_frequency_upper_bin_bound_); DCHECK_LT(high_average_start_bin_, high_average_end_bin_);
lapped_transform_.reset(new LappedTransform(num_input_channels_, lapped_transform_.reset(new LappedTransform(num_input_channels_,
1, 1,
@ -329,7 +329,7 @@ void Beamformer::ProcessAudioBlock(const complex_f* const* input,
// Calculating the post-filter masks. Note that we need two for each // Calculating the post-filter masks. Note that we need two for each
// frequency bin to account for the positive and negative interferer // frequency bin to account for the positive and negative interferer
// angle. // angle.
for (int i = 0; i < kNumFreqBins; ++i) { for (int i = low_average_start_bin_; i < high_average_end_bin_; ++i) {
eig_m_.CopyFromColumn(input, i, num_input_channels_); eig_m_.CopyFromColumn(input, i, num_input_channels_);
float eig_m_norm_factor = float eig_m_norm_factor =
std::sqrt(ConjugateDotProduct(eig_m_, eig_m_)).real(); std::sqrt(ConjugateDotProduct(eig_m_, eig_m_)).real();
@ -368,10 +368,7 @@ void Beamformer::ProcessAudioBlock(const complex_f* const* input,
} }
ApplyLowFrequencyCorrection(); ApplyLowFrequencyCorrection();
ApplyHighFrequencyCorrection();
if (high_pass_exists_) {
CalculateHighFrequencyMask();
}
ApplyMasks(input, output); ApplyMasks(input, output);
@ -430,31 +427,29 @@ void Beamformer::ApplyDecay() {
void Beamformer::ApplyLowFrequencyCorrection() { void Beamformer::ApplyLowFrequencyCorrection() {
float low_frequency_mask = 0.f; float low_frequency_mask = 0.f;
float* mask_els = postfilter_masks_[current_block_ix_].elements()[0]; float* mask_els = postfilter_masks_[current_block_ix_].elements()[0];
for (int i = mid_frequency_lower_bin_bound_; for (int i = low_average_start_bin_; i < low_average_end_bin_; ++i) {
i <= mid_frequency_upper_bin_bound_;
++i) {
low_frequency_mask += mask_els[i]; low_frequency_mask += mask_els[i];
} }
low_frequency_mask /= low_frequency_mask /= low_average_end_bin_ - low_average_start_bin_;
mid_frequency_upper_bin_bound_ - mid_frequency_lower_bin_bound_ + 1;
for (int i = 0; i < mid_frequency_lower_bin_bound_; ++i) { for (int i = 0; i < low_average_start_bin_; ++i) {
mask_els[i] = low_frequency_mask; mask_els[i] = low_frequency_mask;
} }
} }
void Beamformer::CalculateHighFrequencyMask() { void Beamformer::ApplyHighFrequencyCorrection() {
float high_pass_mask = 0.f; float high_pass_mask = 0.f;
float* mask_els = postfilter_masks_[current_block_ix_].elements()[0]; float* mask_els = postfilter_masks_[current_block_ix_].elements()[0];
for (int i = high_frequency_lower_bin_bound_; for (int i = high_average_start_bin_; i < high_average_end_bin_; ++i) {
i <= high_frequency_upper_bin_bound_;
++i) {
high_pass_mask += mask_els[i]; high_pass_mask += mask_els[i];
} }
high_pass_mask /= high_pass_mask /= high_average_end_bin_ - high_average_start_bin_;
high_frequency_upper_bin_bound_ - high_frequency_lower_bin_bound_ + 1;
for (int i = high_average_end_bin_; i < kNumFreqBins; ++i) {
mask_els[i] = high_pass_mask;
}
high_pass_postfilter_mask_ += high_pass_mask; high_pass_postfilter_mask_ += high_pass_mask;
} }

View File

@ -94,7 +94,7 @@ class Beamformer : public LappedTransform::Callback {
// in the time-domain. Further, we average these block-masks over a chunk, // in the time-domain. Further, we average these block-masks over a chunk,
// resulting in one postfilter mask per audio chunk. This allows us to skip // resulting in one postfilter mask per audio chunk. This allows us to skip
// both transforming and blocking the high-frequency signal. // both transforming and blocking the high-frequency signal.
void CalculateHighFrequencyMask(); void ApplyHighFrequencyCorrection();
// Applies both sets of masks to |input| and store in |output|. // Applies both sets of masks to |input| and store in |output|.
void ApplyMasks(const complex_f* const* input, complex_f* const* output); void ApplyMasks(const complex_f* const* input, complex_f* const* output);
@ -120,10 +120,10 @@ class Beamformer : public LappedTransform::Callback {
// Calculated based on user-input and constants in the .cc file. // Calculated based on user-input and constants in the .cc file.
float decay_threshold_; float decay_threshold_;
int mid_frequency_lower_bin_bound_; int low_average_start_bin_;
int mid_frequency_upper_bin_bound_; int low_average_end_bin_;
int high_frequency_lower_bin_bound_; int high_average_start_bin_;
int high_frequency_upper_bin_bound_; int high_average_end_bin_;
// Indices into |postfilter_masks_|. // Indices into |postfilter_masks_|.
int current_block_ix_; int current_block_ix_;