diff --git a/webrtc/common_audio/lapped_transform.cc b/webrtc/common_audio/lapped_transform.cc index 5ab1db1b25..006bda0cd1 100644 --- a/webrtc/common_audio/lapped_transform.cc +++ b/webrtc/common_audio/lapped_transform.cc @@ -83,7 +83,7 @@ LappedTransform::LappedTransform(size_t num_in_channels, cplx_post_(num_out_channels, cplx_length_, RealFourier::kFftBufferAlignment) { - RTC_CHECK(num_in_channels_ > 0 && num_out_channels_ > 0); + RTC_CHECK(num_in_channels_ > 0); RTC_CHECK_GT(block_length_, 0u); RTC_CHECK_GT(chunk_length_, 0u); RTC_CHECK(block_processor_); diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn index 21b7cedeac..9c1674a102 100644 --- a/webrtc/modules/audio_processing/BUILD.gn +++ b/webrtc/modules/audio_processing/BUILD.gn @@ -55,7 +55,6 @@ source_set("audio_processing") { "audio_processing_impl.h", "beamformer/array_util.cc", "beamformer/array_util.h", - "beamformer/beamformer.h", "beamformer/complex_matrix.h", "beamformer/covariance_matrix_generator.cc", "beamformer/covariance_matrix_generator.h", diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi index 90cf0559b6..0a2f4135b4 100644 --- a/webrtc/modules/audio_processing/audio_processing.gypi +++ b/webrtc/modules/audio_processing/audio_processing.gypi @@ -66,7 +66,6 @@ 'audio_processing_impl.h', 'beamformer/array_util.cc', 'beamformer/array_util.h', - 'beamformer/beamformer.h', 'beamformer/complex_matrix.h', 'beamformer/covariance_matrix_generator.cc', 'beamformer/covariance_matrix_generator.h', diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index afeebba4e5..2a2e54d5db 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -128,10 +128,10 @@ struct AudioProcessingImpl::ApmPublicSubmodules { }; struct AudioProcessingImpl::ApmPrivateSubmodules { - explicit ApmPrivateSubmodules(Beamformer* beamformer) + explicit ApmPrivateSubmodules(NonlinearBeamformer* beamformer) : beamformer(beamformer) {} // Accessed internally from capture or during initialization - std::unique_ptr> beamformer; + std::unique_ptr beamformer; std::unique_ptr agc_manager; std::unique_ptr level_controller; }; @@ -146,7 +146,7 @@ AudioProcessing* AudioProcessing::Create(const Config& config) { } AudioProcessing* AudioProcessing::Create(const Config& config, - Beamformer* beamformer) { + NonlinearBeamformer* beamformer) { AudioProcessingImpl* apm = new AudioProcessingImpl(config, beamformer); if (apm->Initialize() != kNoError) { delete apm; @@ -160,7 +160,7 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config) : AudioProcessingImpl(config, nullptr) {} AudioProcessingImpl::AudioProcessingImpl(const Config& config, - Beamformer* beamformer) + NonlinearBeamformer* beamformer) : public_submodules_(new ApmPublicSubmodules()), private_submodules_(new ApmPrivateSubmodules(beamformer)), constants_(config.Get().startup_min_volume, @@ -699,8 +699,8 @@ int AudioProcessingImpl::ProcessStreamLocked() { } if (capture_nonlocked_.beamformer_enabled) { - private_submodules_->beamformer->ProcessChunk(*ca->split_data_f(), - ca->split_data_f()); + private_submodules_->beamformer->AnalyzeChunk(*ca->split_data_f()); + // Discards all channels by the leftmost one. ca->set_num_channels(1); } @@ -746,6 +746,10 @@ int AudioProcessingImpl::ProcessStreamLocked() { RETURN_ON_ERR(public_submodules_->echo_control_mobile->ProcessCaptureAudio( ca, stream_delay_ms())); + if (capture_nonlocked_.beamformer_enabled) { + private_submodules_->beamformer->PostFilter(ca->split_data_f()); + } + public_submodules_->voice_detection->ProcessCaptureAudio(ca); if (constants_.use_experimental_agc && @@ -1223,7 +1227,7 @@ void AudioProcessingImpl::InitializeBeamformer() { if (capture_nonlocked_.beamformer_enabled) { if (!private_submodules_->beamformer) { private_submodules_->beamformer.reset(new NonlinearBeamformer( - capture_.array_geometry, capture_.target_direction)); + capture_.array_geometry, 1u, capture_.target_direction)); } private_submodules_->beamformer->Initialize(kChunkSizeMs, capture_nonlocked_.split_rate); diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index a79d0289e3..4b9011dc88 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -36,8 +36,7 @@ namespace webrtc { class AgcManagerDirect; class AudioConverter; -template -class Beamformer; +class NonlinearBeamformer; class AudioProcessingImpl : public AudioProcessing { public: @@ -45,7 +44,7 @@ class AudioProcessingImpl : public AudioProcessing { // Acquires both the render and capture locks. explicit AudioProcessingImpl(const Config& config); // AudioProcessingImpl takes ownership of beamformer. - AudioProcessingImpl(const Config& config, Beamformer* beamformer); + AudioProcessingImpl(const Config& config, NonlinearBeamformer* beamformer); virtual ~AudioProcessingImpl(); int Initialize() override; int Initialize(int input_sample_rate_hz, diff --git a/webrtc/modules/audio_processing/audio_processing_unittest.cc b/webrtc/modules/audio_processing/audio_processing_unittest.cc index e5ab3da3b4..23705e793d 100644 --- a/webrtc/modules/audio_processing/audio_processing_unittest.cc +++ b/webrtc/modules/audio_processing/audio_processing_unittest.cc @@ -1284,7 +1284,7 @@ TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) { geometry.push_back(webrtc::Point(0.05f, 0.f, 0.f)); config.Set(new Beamforming(true, geometry)); testing::NiceMock* beamformer = - new testing::NiceMock(geometry); + new testing::NiceMock(geometry, 1u); std::unique_ptr apm( AudioProcessing::Create(config, beamformer)); EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true)); diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.h b/webrtc/modules/audio_processing/beamformer/beamformer.h deleted file mode 100644 index 6a9ff45d12..0000000000 --- a/webrtc/modules/audio_processing/beamformer/beamformer.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_ - -#include "webrtc/common_audio/channel_buffer.h" -#include "webrtc/modules/audio_processing/beamformer/array_util.h" - -namespace webrtc { - -template -class Beamformer { - public: - virtual ~Beamformer() {} - - // Process one time-domain chunk of audio. The audio is expected to be split - // into frequency bands inside the ChannelBuffer. The number of frames and - // channels must correspond to the constructor parameters. The same - // ChannelBuffer can be passed in as |input| and |output|. - virtual void ProcessChunk(const ChannelBuffer& input, - ChannelBuffer* output) = 0; - - // Sample rate corresponds to the lower band. - // Needs to be called before the the Beamformer can be used. - virtual void Initialize(int chunk_size_ms, int sample_rate_hz) = 0; - - // Aim the beamformer at a point in space. - virtual void AimAt(const SphericalPointf& spherical_point) = 0; - - // Indicates whether a given point is inside of the beam. - virtual bool IsInBeam(const SphericalPointf& spherical_point) { return true; } - - // Returns true if the current data contains the target signal. - // Which signals are considered "targets" is implementation dependent. - virtual bool is_target_present() = 0; -}; - -} // namespace webrtc - -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_ diff --git a/webrtc/modules/audio_processing/beamformer/mock_nonlinear_beamformer.h b/webrtc/modules/audio_processing/beamformer/mock_nonlinear_beamformer.h index e2b4417c13..e0a1c6fa71 100644 --- a/webrtc/modules/audio_processing/beamformer/mock_nonlinear_beamformer.h +++ b/webrtc/modules/audio_processing/beamformer/mock_nonlinear_beamformer.h @@ -20,12 +20,13 @@ namespace webrtc { class MockNonlinearBeamformer : public NonlinearBeamformer { public: - explicit MockNonlinearBeamformer(const std::vector& array_geometry) - : NonlinearBeamformer(array_geometry) {} + MockNonlinearBeamformer(const std::vector& array_geometry, + size_t num_postfilter_channels) + : NonlinearBeamformer(array_geometry, num_postfilter_channels) {} MOCK_METHOD2(Initialize, void(int chunk_size_ms, int sample_rate_hz)); - MOCK_METHOD2(ProcessChunk, void(const ChannelBuffer& input, - ChannelBuffer* output)); + MOCK_METHOD1(AnalyzeChunk, void(const ChannelBuffer& data)); + MOCK_METHOD1(PostFilter, void(ChannelBuffer* data)); MOCK_METHOD1(IsInBeam, bool(const SphericalPointf& spherical_point)); MOCK_METHOD0(is_target_present, bool()); }; diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc index f5bdd6a3c2..5412fb5b1e 100644 --- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc @@ -122,18 +122,6 @@ size_t Round(float x) { return static_cast(std::floor(x + 0.5f)); } -// Calculates the sum of absolute values of a complex matrix. -float SumAbs(const ComplexMatrix& mat) { - float sum_abs = 0.f; - const complex* const* mat_els = mat.elements(); - for (size_t i = 0; i < mat.num_rows(); ++i) { - for (size_t j = 0; j < mat.num_columns(); ++j) { - sum_abs += std::abs(mat_els[i][j]); - } - } - return sum_abs; -} - // Calculates the sum of squares of a complex matrix. float SumSquares(const ComplexMatrix& mat) { float sum_squares = 0.f; @@ -183,10 +171,46 @@ const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f); // static const size_t NonlinearBeamformer::kNumFreqBins; +PostFilterTransform::PostFilterTransform(size_t num_channels, + size_t chunk_length, + float* window, + size_t fft_size) + : transform_(num_channels, + num_channels, + chunk_length, + window, + fft_size, + fft_size / 2, + this), + num_freq_bins_(fft_size / 2 + 1) {} + +void PostFilterTransform::ProcessChunk(float* const* data, float* final_mask) { + final_mask_ = final_mask; + transform_.ProcessChunk(data, data); +} + +void PostFilterTransform::ProcessAudioBlock(const complex* const* input, + size_t num_input_channels, + size_t num_freq_bins, + size_t num_output_channels, + complex* const* output) { + RTC_DCHECK_EQ(num_freq_bins_, num_freq_bins); + RTC_DCHECK_EQ(num_input_channels, num_output_channels); + + for (size_t ch = 0; ch < num_input_channels; ++ch) { + for (size_t f_ix = 0; f_ix < num_freq_bins_; ++f_ix) { + output[ch][f_ix] = + kCompensationGain * final_mask_[f_ix] * input[ch][f_ix]; + } + } +} + NonlinearBeamformer::NonlinearBeamformer( const std::vector& array_geometry, + size_t num_postfilter_channels, SphericalPointf target_direction) : num_input_channels_(array_geometry.size()), + num_postfilter_channels_(num_postfilter_channels), array_geometry_(GetCenteredArray(array_geometry)), array_normal_(GetArrayNormalIfExists(array_geometry)), min_mic_spacing_(GetMinimumSpacing(array_geometry)), @@ -208,18 +232,21 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; interference_blocks_count_ = hold_target_blocks_; - lapped_transform_.reset(new LappedTransform(num_input_channels_, - 1, - chunk_length_, - window_, - kFftSize, - kFftSize / 2, - this)); + process_transform_.reset(new LappedTransform(num_input_channels_, + 0u, + chunk_length_, + window_, + kFftSize, + kFftSize / 2, + this)); + postfilter_transform_.reset(new PostFilterTransform( + num_postfilter_channels_, chunk_length_, window_, kFftSize)); + const float wave_number_step = + (2.f * M_PI * sample_rate_hz_) / (kFftSize * kSpeedOfSoundMeterSeconds); for (size_t i = 0; i < kNumFreqBins; ++i) { time_smooth_mask_[i] = 1.f; final_mask_[i] = 1.f; - float freq_hz = (static_cast(i) / kFftSize) * sample_rate_hz_; - wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; + wave_numbers_[i] = i * wave_number_step; } InitLowFrequencyCorrectionRanges(); @@ -306,9 +333,6 @@ void NonlinearBeamformer::InitDelaySumMasks() { complex_f norm_factor = sqrt( ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); delay_sum_masks_[f_ix].Scale(1.f / norm_factor); - normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); - normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( - normalized_delay_sum_masks_[f_ix])); } } @@ -366,30 +390,49 @@ void NonlinearBeamformer::NormalizeCovMats() { } } -void NonlinearBeamformer::ProcessChunk(const ChannelBuffer& input, - ChannelBuffer* output) { - RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); - RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); +void NonlinearBeamformer::AnalyzeChunk(const ChannelBuffer& data) { + RTC_DCHECK_EQ(data.num_channels(), num_input_channels_); + RTC_DCHECK_EQ(data.num_frames_per_band(), chunk_length_); - float old_high_pass_mask = high_pass_postfilter_mask_; - lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); - // Ramp up/down for smoothing. 1 mask per 10ms results in audible - // discontinuities. + old_high_pass_mask_ = high_pass_postfilter_mask_; + process_transform_->ProcessChunk(data.channels(0), nullptr); +} + +void NonlinearBeamformer::PostFilter(ChannelBuffer* data) { + RTC_DCHECK_EQ(data->num_frames_per_band(), chunk_length_); + // TODO(aluebs): Change to RTC_CHECK_EQ once the ChannelBuffer is updated. + RTC_DCHECK_GE(data->num_channels(), num_postfilter_channels_); + + postfilter_transform_->ProcessChunk(data->channels(0), final_mask_); + + // Ramp up/down for smoothing is needed in order to avoid discontinuities in + // the transitions between 10 ms frames. const float ramp_increment = - (high_pass_postfilter_mask_ - old_high_pass_mask) / - input.num_frames_per_band(); - // Apply the smoothed high-pass mask to the first channel of each band. - // This can be done because the effect of the linear beamformer is negligible - // compared to the post-filter. - for (size_t i = 1; i < input.num_bands(); ++i) { - float smoothed_mask = old_high_pass_mask; - for (size_t j = 0; j < input.num_frames_per_band(); ++j) { + (high_pass_postfilter_mask_ - old_high_pass_mask_) / + data->num_frames_per_band(); + for (size_t i = 1; i < data->num_bands(); ++i) { + float smoothed_mask = old_high_pass_mask_; + for (size_t j = 0; j < data->num_frames_per_band(); ++j) { smoothed_mask += ramp_increment; - output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask; + for (size_t k = 0; k < num_postfilter_channels_; ++k) { + data->channels(i)[k][j] *= smoothed_mask; + } } } } +void NonlinearBeamformer::ProcessChunk(const ChannelBuffer& input, + ChannelBuffer* output) { + RTC_DCHECK_GT(output->num_channels(), 0u); + RTC_DCHECK_EQ(output->num_frames_per_band(), input.num_frames_per_band()); + AnalyzeChunk(input); + for (size_t i = 0u; i < input.num_bands(); ++i) { + std::memcpy(output->channels(i)[0], input.channels(i)[0], + sizeof(input.channels(0)[0][0]) * input.num_frames_per_band()); + } + PostFilter(output); +} + void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) { target_angle_radians_ = target_direction.azimuth(); InitHighFrequencyCorrectionRanges(); @@ -414,7 +457,7 @@ void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, complex_f* const* output) { RTC_CHECK_EQ(kNumFreqBins, num_freq_bins); RTC_CHECK_EQ(num_input_channels_, num_input_channels); - RTC_CHECK_EQ(1u, num_output_channels); + RTC_CHECK_EQ(0u, num_output_channels); // Calculating the post-filter masks. Note that we need two for each // frequency bin to account for the positive and negative interferer @@ -456,7 +499,6 @@ void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, ApplyLowFrequencyCorrection(); ApplyHighFrequencyCorrection(); ApplyMaskFrequencySmoothing(); - ApplyMasks(input, output); } float NonlinearBeamformer::CalculatePostfilterMask( @@ -484,22 +526,6 @@ float NonlinearBeamformer::CalculatePostfilterMask( return numerator / denominator; } -void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, - complex_f* const* output) { - complex_f* output_channel = output[0]; - for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { - output_channel[f_ix] = complex_f(0.f, 0.f); - - const complex_f* delay_sum_mask_els = - normalized_delay_sum_masks_[f_ix].elements()[0]; - for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) { - output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; - } - - output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix]; - } -} - // Smooth new_mask_ into time_smooth_mask_. void NonlinearBeamformer::ApplyMaskTimeSmoothing() { for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h index b8953b0a4f..10ef6e5af6 100644 --- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h @@ -21,48 +21,76 @@ #include "webrtc/common_audio/lapped_transform.h" #include "webrtc/common_audio/channel_buffer.h" -#include "webrtc/modules/audio_processing/beamformer/beamformer.h" +#include "webrtc/modules/audio_processing/beamformer/array_util.h" #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" namespace webrtc { +class PostFilterTransform : public LappedTransform::Callback { + public: + PostFilterTransform(size_t num_channels, + size_t chunk_length, + float* window, + size_t fft_size); + + void ProcessChunk(float* const* data, float* final_mask); + + protected: + void ProcessAudioBlock(const complex* const* input, + size_t num_input_channels, + size_t num_freq_bins, + size_t num_output_channels, + complex* const* output) override; + + private: + LappedTransform transform_; + const size_t num_freq_bins_; + float* final_mask_; +}; + // Enhances sound sources coming directly in front of a uniform linear array // and suppresses sound sources coming from all other directions. Operates on // multichannel signals and produces single-channel output. // // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear // Beamforming Postprocessor" by Bastiaan Kleijn. -class NonlinearBeamformer - : public Beamformer, - public LappedTransform::Callback { +class NonlinearBeamformer : public LappedTransform::Callback { public: static const float kHalfBeamWidthRadians; explicit NonlinearBeamformer( const std::vector& array_geometry, + size_t num_postfilter_channels = 1u, SphericalPointf target_direction = SphericalPointf(static_cast(M_PI) / 2.f, 0.f, 1.f)); // Sample rate corresponds to the lower band. // Needs to be called before the NonlinearBeamformer can be used. - void Initialize(int chunk_size_ms, int sample_rate_hz) override; + virtual void Initialize(int chunk_size_ms, int sample_rate_hz); - // Process one time-domain chunk of audio. The audio is expected to be split + // Analyzes one time-domain chunk of audio. The audio is expected to be split // into frequency bands inside the ChannelBuffer. The number of frames and - // channels must correspond to the constructor parameters. The same - // ChannelBuffer can be passed in as |input| and |output|. - void ProcessChunk(const ChannelBuffer& input, - ChannelBuffer* output) override; + // channels must correspond to the constructor parameters. + virtual void AnalyzeChunk(const ChannelBuffer& data); - void AimAt(const SphericalPointf& target_direction) override; + // Applies the postfilter mask to one chunk of audio. The audio is expected to + // be split into frequency bands inside the ChannelBuffer. The number of + // frames and channels must correspond to the constructor parameters. + virtual void PostFilter(ChannelBuffer* data); - bool IsInBeam(const SphericalPointf& spherical_point) override; + // TODO(aluebs): Remove once the dependencies have moved to new API. + virtual void ProcessChunk(const ChannelBuffer& input, + ChannelBuffer* output); + + virtual void AimAt(const SphericalPointf& target_direction); + + virtual bool IsInBeam(const SphericalPointf& spherical_point); // After processing each block |is_target_present_| is set to true if the // target signal es present and to false otherwise. This methods can be called // to know if the data is target signal or interference and process it // accordingly. - bool is_target_present() override { return is_target_present_; } + virtual bool is_target_present() { return is_target_present_; } protected: // Process one frequency-domain block of audio. This is where the fun @@ -116,8 +144,8 @@ class NonlinearBeamformer // Compute the means needed for the above frequency correction. float MaskRangeMean(size_t start_bin, size_t end_bin); - // Applies both sets of masks to |input| and store in |output|. - void ApplyMasks(const complex_f* const* input, complex_f* const* output); + // Applies post-filter mask to |input| and store in |output|. + void ApplyPostFilter(const complex_f* input, complex_f* output); void EstimateTargetPresence(); @@ -126,11 +154,13 @@ class NonlinearBeamformer // Deals with the fft transform and blocking. size_t chunk_length_; - std::unique_ptr lapped_transform_; + std::unique_ptr process_transform_; + std::unique_ptr postfilter_transform_; float window_[kFftSize]; // Parameters exposed to the user. const size_t num_input_channels_; + const size_t num_postfilter_channels_; int sample_rate_hz_; const std::vector array_geometry_; @@ -161,7 +191,6 @@ class NonlinearBeamformer // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. ComplexMatrixF delay_sum_masks_[kNumFreqBins]; - ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins]; // Arrays of length |kNumFreqBins|, Matrix of size |num_input_channels_| x // |num_input_channels_|. @@ -186,6 +215,7 @@ class NonlinearBeamformer // For processing the high-frequency input signal. float high_pass_postfilter_mask_; + float old_high_pass_mask_; // True when the target signal is present. bool is_target_present_; diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc index d187552692..233d406430 100644 --- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc @@ -43,14 +43,14 @@ int main(int argc, char* argv[]) { google::ParseCommandLineFlags(&argc, &argv, true); WavReader in_file(FLAGS_i); - WavWriter out_file(FLAGS_o, in_file.sample_rate(), 1); + WavWriter out_file(FLAGS_o, in_file.sample_rate(), in_file.num_channels()); const size_t num_mics = in_file.num_channels(); const std::vector array_geometry = ParseArrayGeometry(FLAGS_mic_positions, num_mics); RTC_CHECK_EQ(array_geometry.size(), num_mics); - NonlinearBeamformer bf(array_geometry); + NonlinearBeamformer bf(array_geometry, array_geometry.size()); bf.Initialize(kChunkSizeMs, in_file.sample_rate()); printf("Input file: %s\nChannels: %" PRIuS ", Sample rate: %d Hz\n\n", @@ -58,24 +58,22 @@ int main(int argc, char* argv[]) { printf("Output file: %s\nChannels: %" PRIuS ", Sample rate: %d Hz\n\n", FLAGS_o.c_str(), out_file.num_channels(), out_file.sample_rate()); - ChannelBuffer in_buf( + ChannelBuffer buf( rtc::CheckedDivExact(in_file.sample_rate(), kChunksPerSecond), in_file.num_channels()); - ChannelBuffer out_buf( - rtc::CheckedDivExact(out_file.sample_rate(), kChunksPerSecond), - out_file.num_channels()); - std::vector interleaved(in_buf.size()); + std::vector interleaved(buf.size()); while (in_file.ReadSamples(interleaved.size(), &interleaved[0]) == interleaved.size()) { FloatS16ToFloat(&interleaved[0], interleaved.size(), &interleaved[0]); - Deinterleave(&interleaved[0], in_buf.num_frames(), - in_buf.num_channels(), in_buf.channels()); + Deinterleave(&interleaved[0], buf.num_frames(), + buf.num_channels(), buf.channels()); - bf.ProcessChunk(in_buf, &out_buf); + bf.AnalyzeChunk(buf); + bf.PostFilter(&buf); - Interleave(out_buf.channels(), out_buf.num_frames(), - out_buf.num_channels(), &interleaved[0]); + Interleave(buf.channels(), buf.num_frames(), + buf.num_channels(), &interleaved[0]); FloatToFloatS16(&interleaved[0], interleaved.size(), &interleaved[0]); out_file.WriteSamples(&interleaved[0], interleaved.size()); } diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc index fbf0ec098f..1ad3ed6c2e 100644 --- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_unittest.cc @@ -57,14 +57,14 @@ const size_t kNumFramesToProcess = 1000; void ProcessOneFrame(int sample_rate_hz, AudioBuffer* capture_audio_buffer, - Beamformer* beamformer) { + NonlinearBeamformer* beamformer) { if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { capture_audio_buffer->SplitIntoFrequencyBands(); } - beamformer->ProcessChunk(*capture_audio_buffer->split_data_f(), - capture_audio_buffer->split_data_f()); + beamformer->AnalyzeChunk(*capture_audio_buffer->split_data_f()); capture_audio_buffer->set_num_channels(1); + beamformer->PostFilter(capture_audio_buffer->split_data_f()); if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { capture_audio_buffer->MergeFrequencyBands(); @@ -81,7 +81,7 @@ void RunBitExactnessTest(int sample_rate_hz, const std::vector& array_geometry, const SphericalPointf& target_direction, rtc::ArrayView output_reference) { - NonlinearBeamformer beamformer(array_geometry, target_direction); + NonlinearBeamformer beamformer(array_geometry, 1u, target_direction); beamformer.Initialize(AudioProcessing::kChunkSizeMs, BeamformerSampleRate(sample_rate_hz)); @@ -159,7 +159,7 @@ TEST(NonlinearBeamformerTest, AimingModifiesBeam) { std::vector array_geometry; array_geometry.push_back(Point(-0.025f, 0.f, 0.f)); array_geometry.push_back(Point(0.025f, 0.f, 0.f)); - NonlinearBeamformer bf(array_geometry); + NonlinearBeamformer bf(array_geometry, 1u); bf.Initialize(kChunkSizeMs, kSampleRateHz); // The default constructor parameter sets the target angle to PI / 2. Verify(&bf, static_cast(M_PI) / 2.f); @@ -176,7 +176,7 @@ TEST(NonlinearBeamformerTest, InterfAnglesTakeAmbiguityIntoAccount) { array_geometry.push_back(Point(-0.1f, 0.f, 0.f)); array_geometry.push_back(Point(0.f, 0.f, 0.f)); array_geometry.push_back(Point(0.2f, 0.f, 0.f)); - NonlinearBeamformer bf(array_geometry); + NonlinearBeamformer bf(array_geometry, 1u); bf.Initialize(kChunkSizeMs, kSampleRateHz); EXPECT_EQ(2u, bf.interf_angles_radians_.size()); EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_, @@ -197,7 +197,7 @@ TEST(NonlinearBeamformerTest, InterfAnglesTakeAmbiguityIntoAccount) { array_geometry.push_back(Point(0.2f, 0.f, 0.f)); array_geometry.push_back(Point(0.1f, 0.f, 0.2f)); array_geometry.push_back(Point(0.f, 0.f, -0.1f)); - NonlinearBeamformer bf(array_geometry); + NonlinearBeamformer bf(array_geometry, 1u); bf.Initialize(kChunkSizeMs, kSampleRateHz); EXPECT_EQ(2u, bf.interf_angles_radians_.size()); EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_, @@ -216,7 +216,7 @@ TEST(NonlinearBeamformerTest, InterfAnglesTakeAmbiguityIntoAccount) { array_geometry.push_back(Point(0.f, 0.f, 0.f)); array_geometry.push_back(Point(0.2f, 0.f, 0.f)); array_geometry.push_back(Point(0.f, 0.1f, -0.2f)); - NonlinearBeamformer bf(array_geometry); + NonlinearBeamformer bf(array_geometry, 1u); bf.Initialize(kChunkSizeMs, kSampleRateHz); EXPECT_EQ(2u, bf.interf_angles_radians_.size()); EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_, @@ -235,7 +235,7 @@ TEST(NonlinearBeamformerTest, InterfAnglesTakeAmbiguityIntoAccount) { array_geometry.push_back(Point(0.1f, 0.f, 0.f)); array_geometry.push_back(Point(0.f, 0.2f, 0.f)); array_geometry.push_back(Point(0.f, 0.f, 0.3f)); - NonlinearBeamformer bf(array_geometry); + NonlinearBeamformer bf(array_geometry, 1u); bf.Initialize(kChunkSizeMs, kSampleRateHz); EXPECT_EQ(2u, bf.interf_angles_radians_.size()); EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_, @@ -262,8 +262,8 @@ TEST(BeamformerBitExactnessTest, TEST(BeamformerBitExactnessTest, Stereo16kHz_ArrayGeometry1_TargetDirection1) { - const float kOutputReference[] = {0.000064f, 0.000211f, 0.000075f, - 0.000064f, 0.000211f, 0.000075f}; + const float kOutputReference[] = {-0.000077f, -0.000147f, -0.000138f, + -0.000077f, -0.000147f, -0.000138f}; RunBitExactnessTest(AudioProcessing::kSampleRate16kHz, CreateArrayGeometry(1), TargetDirection1, kOutputReference); @@ -271,8 +271,8 @@ TEST(BeamformerBitExactnessTest, TEST(BeamformerBitExactnessTest, Stereo32kHz_ArrayGeometry1_TargetDirection1) { - const float kOutputReference[] = {0.000183f, 0.000183f, 0.000183f, - 0.000183f, 0.000183f, 0.000183f}; + const float kOutputReference[] = {-0.000061f, -0.000061f, -0.000061f, + -0.000061f, -0.000061f, -0.000061f}; RunBitExactnessTest(AudioProcessing::kSampleRate32kHz, CreateArrayGeometry(1), TargetDirection1, kOutputReference); @@ -280,8 +280,8 @@ TEST(BeamformerBitExactnessTest, TEST(BeamformerBitExactnessTest, Stereo48kHz_ArrayGeometry1_TargetDirection1) { - const float kOutputReference[] = {0.000155f, 0.000152f, 0.000159f, - 0.000155f, 0.000152f, 0.000159f}; + const float kOutputReference[] = {0.000450f, 0.000436f, 0.000433f, + 0.000450f, 0.000436f, 0.000433f}; RunBitExactnessTest(AudioProcessing::kSampleRate48kHz, CreateArrayGeometry(1), TargetDirection1, kOutputReference); @@ -300,8 +300,8 @@ TEST(BeamformerBitExactnessTest, TEST(BeamformerBitExactnessTest, Stereo16kHz_ArrayGeometry1_TargetDirection2) { - const float kOutputReference[] = {0.001144f, -0.001026f, 0.001074f, - 0.001144f, -0.001026f, 0.001074f}; + const float kOutputReference[] = {0.000221f, -0.000249f, 0.000140f, + 0.000221f, -0.000249f, 0.000140f}; RunBitExactnessTest(AudioProcessing::kSampleRate16kHz, CreateArrayGeometry(1), TargetDirection2, kOutputReference); @@ -309,8 +309,8 @@ TEST(BeamformerBitExactnessTest, TEST(BeamformerBitExactnessTest, Stereo32kHz_ArrayGeometry1_TargetDirection2) { - const float kOutputReference[] = {0.000732f, -0.000397f, 0.000610f, - 0.000732f, -0.000397f, 0.000610f}; + const float kOutputReference[] = {0.000763f, -0.000336f, 0.000549f, + 0.000763f, -0.000336f, 0.000549f}; RunBitExactnessTest(AudioProcessing::kSampleRate32kHz, CreateArrayGeometry(1), TargetDirection2, kOutputReference); @@ -318,8 +318,8 @@ TEST(BeamformerBitExactnessTest, TEST(BeamformerBitExactnessTest, Stereo48kHz_ArrayGeometry1_TargetDirection2) { - const float kOutputReference[] = {0.000106f, -0.000464f, 0.000188f, - 0.000106f, -0.000464f, 0.000188f}; + const float kOutputReference[] = {-0.000004f, -0.000494f, 0.000255f, + -0.000004f, -0.000494f, 0.000255f}; RunBitExactnessTest(AudioProcessing::kSampleRate48kHz, CreateArrayGeometry(1), TargetDirection2, kOutputReference); @@ -327,8 +327,8 @@ TEST(BeamformerBitExactnessTest, TEST(BeamformerBitExactnessTest, Stereo8kHz_ArrayGeometry2_TargetDirection2) { - const float kOutputReference[] = {-0.000649f, 0.000576f, -0.000148f, - -0.000649f, 0.000576f, -0.000148f}; + const float kOutputReference[] = {-0.000914f, 0.002170f, -0.002382f, + -0.000914f, 0.002170f, -0.002382f}; RunBitExactnessTest(AudioProcessing::kSampleRate8kHz, CreateArrayGeometry(2), TargetDirection2, kOutputReference); @@ -336,8 +336,8 @@ TEST(BeamformerBitExactnessTest, TEST(BeamformerBitExactnessTest, Stereo16kHz_ArrayGeometry2_TargetDirection2) { - const float kOutputReference[] = {0.000808f, -0.000695f, 0.000739f, - 0.000808f, -0.000695f, 0.000739f}; + const float kOutputReference[] = {0.000179f, -0.000179f, 0.000081f, + 0.000179f, -0.000179f, 0.000081f}; RunBitExactnessTest(AudioProcessing::kSampleRate16kHz, CreateArrayGeometry(2), TargetDirection2, kOutputReference); @@ -345,8 +345,8 @@ TEST(BeamformerBitExactnessTest, TEST(BeamformerBitExactnessTest, Stereo32kHz_ArrayGeometry2_TargetDirection2) { - const float kOutputReference[] = {0.000580f, -0.000183f, 0.000458f, - 0.000580f, -0.000183f, 0.000458f}; + const float kOutputReference[] = {0.000549f, -0.000214f, 0.000366f, + 0.000549f, -0.000214f, 0.000366f}; RunBitExactnessTest(AudioProcessing::kSampleRate32kHz, CreateArrayGeometry(2), TargetDirection2, kOutputReference); @@ -354,8 +354,8 @@ TEST(BeamformerBitExactnessTest, TEST(BeamformerBitExactnessTest, Stereo48kHz_ArrayGeometry2_TargetDirection2) { - const float kOutputReference[] = {0.000075f, -0.000288f, 0.000156f, - 0.000075f, -0.000288f, 0.000156f}; + const float kOutputReference[] = {0.000019f, -0.000310f, 0.000182f, + 0.000019f, -0.000310f, 0.000182f}; RunBitExactnessTest(AudioProcessing::kSampleRate48kHz, CreateArrayGeometry(2), TargetDirection2, kOutputReference); diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h index d25c2525b3..06bfc9ba7f 100644 --- a/webrtc/modules/audio_processing/include/audio_processing.h +++ b/webrtc/modules/audio_processing/include/audio_processing.h @@ -31,8 +31,7 @@ struct AecCore; class AudioFrame; -template -class Beamformer; +class NonlinearBeamformer; class StreamConfig; class ProcessingConfig; @@ -275,7 +274,7 @@ class AudioProcessing { static AudioProcessing* Create(const Config& config); // Only for testing. static AudioProcessing* Create(const Config& config, - Beamformer* beamformer); + NonlinearBeamformer* beamformer); virtual ~AudioProcessing() {} // Initializes internal states, while retaining all user settings. This