Run fullband processing at output rate on ARM
The audio processing in the band-split domain on ARM platforms operate at a sampling frequency of 32 kHz. This CL upsamples the signal to fullband before the "fullband processing" if an output rate of 48 kHz is chosen. Change-Id: I268acd33aff1fcfa4f75ba8c0fb3e16abb9f74e8 Bug: b/130016532 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155640 Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org> Reviewed-by: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#29415}
This commit is contained in:

committed by
Commit Bot

parent
1d3008bfc6
commit
422b9e0982
@ -65,9 +65,8 @@ AudioBuffer::AudioBuffer(size_t input_num_frames,
|
|||||||
num_channels_(buffer_num_channels),
|
num_channels_(buffer_num_channels),
|
||||||
num_bands_(NumBandsFromFramesPerChannel(buffer_num_frames_)),
|
num_bands_(NumBandsFromFramesPerChannel(buffer_num_frames_)),
|
||||||
num_split_frames_(rtc::CheckedDivExact(buffer_num_frames_, num_bands_)),
|
num_split_frames_(rtc::CheckedDivExact(buffer_num_frames_, num_bands_)),
|
||||||
data_(new ChannelBuffer<float>(buffer_num_frames_, buffer_num_channels_)),
|
data_(
|
||||||
output_buffer_(
|
new ChannelBuffer<float>(buffer_num_frames_, buffer_num_channels_)) {
|
||||||
new ChannelBuffer<float>(output_num_frames_, num_channels_)) {
|
|
||||||
RTC_DCHECK_GT(input_num_frames_, 0);
|
RTC_DCHECK_GT(input_num_frames_, 0);
|
||||||
RTC_DCHECK_GT(buffer_num_frames_, 0);
|
RTC_DCHECK_GT(buffer_num_frames_, 0);
|
||||||
RTC_DCHECK_GT(output_num_frames_, 0);
|
RTC_DCHECK_GT(output_num_frames_, 0);
|
||||||
@ -185,6 +184,29 @@ void AudioBuffer::CopyTo(const StreamConfig& stream_config,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AudioBuffer::CopyTo(AudioBuffer* buffer) const {
|
||||||
|
RTC_DCHECK_EQ(buffer->num_frames(), output_num_frames_);
|
||||||
|
|
||||||
|
const bool resampling_needed = output_num_frames_ != buffer_num_frames_;
|
||||||
|
if (resampling_needed) {
|
||||||
|
for (size_t i = 0; i < num_channels_; ++i) {
|
||||||
|
output_resamplers_[i]->Resample(data_->channels()[i], buffer_num_frames_,
|
||||||
|
buffer->channels()[i],
|
||||||
|
buffer->num_frames());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (size_t i = 0; i < num_channels_; ++i) {
|
||||||
|
memcpy(buffer->channels()[i], data_->channels()[i],
|
||||||
|
buffer_num_frames_ * sizeof(**buffer->channels()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = num_channels_; i < buffer->num_channels(); ++i) {
|
||||||
|
memcpy(buffer->channels()[i], buffer->channels()[0],
|
||||||
|
output_num_frames_ * sizeof(**buffer->channels()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void AudioBuffer::RestoreNumChannels() {
|
void AudioBuffer::RestoreNumChannels() {
|
||||||
num_channels_ = buffer_num_channels_;
|
num_channels_ = buffer_num_channels_;
|
||||||
data_->set_num_channels(buffer_num_channels_);
|
data_->set_num_channels(buffer_num_channels_);
|
||||||
|
@ -115,6 +115,7 @@ class AudioBuffer {
|
|||||||
// Copies data from the buffer.
|
// Copies data from the buffer.
|
||||||
void CopyTo(AudioFrame* frame) const;
|
void CopyTo(AudioFrame* frame) const;
|
||||||
void CopyTo(const StreamConfig& stream_config, float* const* data);
|
void CopyTo(const StreamConfig& stream_config, float* const* data);
|
||||||
|
void CopyTo(AudioBuffer* buffer) const;
|
||||||
|
|
||||||
// Splits the buffer data into frequency bands.
|
// Splits the buffer data into frequency bands.
|
||||||
void SplitIntoFrequencyBands();
|
void SplitIntoFrequencyBands();
|
||||||
@ -165,7 +166,6 @@ class AudioBuffer {
|
|||||||
std::unique_ptr<ChannelBuffer<float>> data_;
|
std::unique_ptr<ChannelBuffer<float>> data_;
|
||||||
std::unique_ptr<ChannelBuffer<float>> split_data_;
|
std::unique_ptr<ChannelBuffer<float>> split_data_;
|
||||||
std::unique_ptr<SplittingFilter> splitting_filter_;
|
std::unique_ptr<SplittingFilter> splitting_filter_;
|
||||||
std::unique_ptr<ChannelBuffer<float>> output_buffer_;
|
|
||||||
std::vector<std::unique_ptr<PushSincResampler>> input_resamplers_;
|
std::vector<std::unique_ptr<PushSincResampler>> input_resamplers_;
|
||||||
std::vector<std::unique_ptr<PushSincResampler>> output_resamplers_;
|
std::vector<std::unique_ptr<PushSincResampler>> output_resamplers_;
|
||||||
bool downmix_by_averaging_ = true;
|
bool downmix_by_averaging_ = true;
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
|
|
||||||
#include "modules/audio_processing/audio_buffer.h"
|
#include "modules/audio_processing/audio_buffer.h"
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
#include "test/gtest.h"
|
#include "test/gtest.h"
|
||||||
|
|
||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
@ -44,4 +45,47 @@ TEST(AudioBufferTest, SetNumChannelsDeathTest) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
TEST(AudioBufferTest, CopyWithoutResampling) {
|
||||||
|
AudioBuffer ab1(32000, 2, 32000, 2, 32000, 2);
|
||||||
|
AudioBuffer ab2(32000, 2, 32000, 2, 32000, 2);
|
||||||
|
// Fill first buffer.
|
||||||
|
for (size_t ch = 0; ch < ab1.num_channels(); ++ch) {
|
||||||
|
for (size_t i = 0; i < ab1.num_frames(); ++i) {
|
||||||
|
ab1.channels()[ch][i] = i + ch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Copy to second buffer.
|
||||||
|
ab1.CopyTo(&ab2);
|
||||||
|
// Verify content of second buffer.
|
||||||
|
for (size_t ch = 0; ch < ab2.num_channels(); ++ch) {
|
||||||
|
for (size_t i = 0; i < ab2.num_frames(); ++i) {
|
||||||
|
EXPECT_EQ(ab2.channels()[ch][i], i + ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(AudioBufferTest, CopyWithResampling) {
|
||||||
|
AudioBuffer ab1(32000, 2, 32000, 2, 48000, 2);
|
||||||
|
AudioBuffer ab2(48000, 2, 48000, 2, 48000, 2);
|
||||||
|
float energy_ab1 = 0.f;
|
||||||
|
float energy_ab2 = 0.f;
|
||||||
|
const float pi = std::acos(-1.f);
|
||||||
|
// Put a sine and compute energy of first buffer.
|
||||||
|
for (size_t ch = 0; ch < ab1.num_channels(); ++ch) {
|
||||||
|
for (size_t i = 0; i < ab1.num_frames(); ++i) {
|
||||||
|
ab1.channels()[ch][i] = std::sin(2 * pi * 100.f / 32000.f * i);
|
||||||
|
energy_ab1 += ab1.channels()[ch][i] * ab1.channels()[ch][i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Copy to second buffer.
|
||||||
|
ab1.CopyTo(&ab2);
|
||||||
|
// Compute energy of second buffer.
|
||||||
|
for (size_t ch = 0; ch < ab2.num_channels(); ++ch) {
|
||||||
|
for (size_t i = 0; i < ab2.num_frames(); ++i) {
|
||||||
|
energy_ab2 += ab2.channels()[ch][i] * ab2.channels()[ch][i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Verify that energies match.
|
||||||
|
EXPECT_NEAR(energy_ab1, energy_ab2 * 32000.f / 48000.f, .01f * energy_ab1);
|
||||||
|
}
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
@ -525,6 +525,20 @@ int AudioProcessingImpl::InitializeLocked() {
|
|||||||
formats_.api_format.output_stream().sample_rate_hz(),
|
formats_.api_format.output_stream().sample_rate_hz(),
|
||||||
formats_.api_format.output_stream().num_channels()));
|
formats_.api_format.output_stream().num_channels()));
|
||||||
|
|
||||||
|
if (capture_nonlocked_.capture_processing_format.sample_rate_hz() <
|
||||||
|
formats_.api_format.output_stream().sample_rate_hz() &&
|
||||||
|
formats_.api_format.output_stream().sample_rate_hz() == 48000) {
|
||||||
|
capture_.capture_fullband_audio.reset(
|
||||||
|
new AudioBuffer(formats_.api_format.input_stream().sample_rate_hz(),
|
||||||
|
formats_.api_format.input_stream().num_channels(),
|
||||||
|
formats_.api_format.output_stream().sample_rate_hz(),
|
||||||
|
formats_.api_format.output_stream().num_channels(),
|
||||||
|
formats_.api_format.output_stream().sample_rate_hz(),
|
||||||
|
formats_.api_format.output_stream().num_channels()));
|
||||||
|
} else {
|
||||||
|
capture_.capture_fullband_audio.reset();
|
||||||
|
}
|
||||||
|
|
||||||
AllocateRenderQueue();
|
AllocateRenderQueue();
|
||||||
|
|
||||||
public_submodules_->gain_control->Initialize(num_proc_channels(),
|
public_submodules_->gain_control->Initialize(num_proc_channels(),
|
||||||
@ -803,6 +817,12 @@ int AudioProcessingImpl::proc_sample_rate_hz() const {
|
|||||||
return capture_nonlocked_.capture_processing_format.sample_rate_hz();
|
return capture_nonlocked_.capture_processing_format.sample_rate_hz();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int AudioProcessingImpl::proc_fullband_sample_rate_hz() const {
|
||||||
|
return capture_.capture_fullband_audio
|
||||||
|
? capture_.capture_fullband_audio->num_frames() * 100
|
||||||
|
: capture_nonlocked_.capture_processing_format.sample_rate_hz();
|
||||||
|
}
|
||||||
|
|
||||||
int AudioProcessingImpl::proc_split_sample_rate_hz() const {
|
int AudioProcessingImpl::proc_split_sample_rate_hz() const {
|
||||||
// Used as callback from submodules, hence locking is not allowed.
|
// Used as callback from submodules, hence locking is not allowed.
|
||||||
return capture_nonlocked_.split_rate;
|
return capture_nonlocked_.split_rate;
|
||||||
@ -968,7 +988,12 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
|
|||||||
capture_.keyboard_info.Extract(src, formats_.api_format.input_stream());
|
capture_.keyboard_info.Extract(src, formats_.api_format.input_stream());
|
||||||
capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream());
|
capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream());
|
||||||
RETURN_ON_ERR(ProcessCaptureStreamLocked());
|
RETURN_ON_ERR(ProcessCaptureStreamLocked());
|
||||||
|
if (capture_.capture_fullband_audio) {
|
||||||
|
capture_.capture_fullband_audio->CopyTo(formats_.api_format.output_stream(),
|
||||||
|
dest);
|
||||||
|
} else {
|
||||||
capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest);
|
capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest);
|
||||||
|
}
|
||||||
|
|
||||||
if (aec_dump_) {
|
if (aec_dump_) {
|
||||||
RecordProcessedCaptureStream(dest);
|
RecordProcessedCaptureStream(dest);
|
||||||
@ -1264,8 +1289,12 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
|
|||||||
RETURN_ON_ERR(ProcessCaptureStreamLocked());
|
RETURN_ON_ERR(ProcessCaptureStreamLocked());
|
||||||
if (submodule_states_.CaptureMultiBandProcessingActive() ||
|
if (submodule_states_.CaptureMultiBandProcessingActive() ||
|
||||||
submodule_states_.CaptureFullBandProcessingActive()) {
|
submodule_states_.CaptureFullBandProcessingActive()) {
|
||||||
|
if (capture_.capture_fullband_audio) {
|
||||||
|
capture_.capture_fullband_audio->CopyTo(frame);
|
||||||
|
} else {
|
||||||
capture_.capture_audio->CopyTo(frame);
|
capture_.capture_audio->CopyTo(frame);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (capture_.stats.voice_detected) {
|
if (capture_.stats.voice_detected) {
|
||||||
frame->vad_activity_ = *capture_.stats.voice_detected
|
frame->vad_activity_ = *capture_.stats.voice_detected
|
||||||
? AudioFrame::kVadActive
|
? AudioFrame::kVadActive
|
||||||
@ -1446,6 +1475,11 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
|
|||||||
capture_buffer->MergeFrequencyBands();
|
capture_buffer->MergeFrequencyBands();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (capture_.capture_fullband_audio) {
|
||||||
|
capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
|
||||||
|
capture_buffer = capture_.capture_fullband_audio.get();
|
||||||
|
}
|
||||||
|
|
||||||
if (config_.residual_echo_detector.enabled) {
|
if (config_.residual_echo_detector.enabled) {
|
||||||
RTC_DCHECK(private_submodules_->echo_detector);
|
RTC_DCHECK(private_submodules_->echo_detector);
|
||||||
private_submodules_->echo_detector->AnalyzeCaptureAudio(
|
private_submodules_->echo_detector->AnalyzeCaptureAudio(
|
||||||
@ -1830,8 +1864,8 @@ void AudioProcessingImpl::InitializeTransient() {
|
|||||||
public_submodules_->transient_suppressor.reset(new TransientSuppressor());
|
public_submodules_->transient_suppressor.reset(new TransientSuppressor());
|
||||||
}
|
}
|
||||||
public_submodules_->transient_suppressor->Initialize(
|
public_submodules_->transient_suppressor->Initialize(
|
||||||
capture_nonlocked_.capture_processing_format.sample_rate_hz(),
|
proc_fullband_sample_rate_hz(), capture_nonlocked_.split_rate,
|
||||||
capture_nonlocked_.split_rate, num_proc_channels());
|
num_proc_channels());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1956,7 +1990,8 @@ void AudioProcessingImpl::InitializeEchoController() {
|
|||||||
|
|
||||||
void AudioProcessingImpl::InitializeGainController2() {
|
void AudioProcessingImpl::InitializeGainController2() {
|
||||||
if (config_.gain_controller2.enabled) {
|
if (config_.gain_controller2.enabled) {
|
||||||
private_submodules_->gain_controller2->Initialize(proc_sample_rate_hz());
|
private_submodules_->gain_controller2->Initialize(
|
||||||
|
proc_fullband_sample_rate_hz());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1972,21 +2007,21 @@ void AudioProcessingImpl::InitializePreAmplifier() {
|
|||||||
void AudioProcessingImpl::InitializeResidualEchoDetector() {
|
void AudioProcessingImpl::InitializeResidualEchoDetector() {
|
||||||
RTC_DCHECK(private_submodules_->echo_detector);
|
RTC_DCHECK(private_submodules_->echo_detector);
|
||||||
private_submodules_->echo_detector->Initialize(
|
private_submodules_->echo_detector->Initialize(
|
||||||
proc_sample_rate_hz(), 1,
|
proc_fullband_sample_rate_hz(), 1,
|
||||||
formats_.render_processing_format.sample_rate_hz(), 1);
|
formats_.render_processing_format.sample_rate_hz(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AudioProcessingImpl::InitializeAnalyzer() {
|
void AudioProcessingImpl::InitializeAnalyzer() {
|
||||||
if (private_submodules_->capture_analyzer) {
|
if (private_submodules_->capture_analyzer) {
|
||||||
private_submodules_->capture_analyzer->Initialize(proc_sample_rate_hz(),
|
private_submodules_->capture_analyzer->Initialize(
|
||||||
num_proc_channels());
|
proc_fullband_sample_rate_hz(), num_proc_channels());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AudioProcessingImpl::InitializePostProcessor() {
|
void AudioProcessingImpl::InitializePostProcessor() {
|
||||||
if (private_submodules_->capture_post_processor) {
|
if (private_submodules_->capture_post_processor) {
|
||||||
private_submodules_->capture_post_processor->Initialize(
|
private_submodules_->capture_post_processor->Initialize(
|
||||||
proc_sample_rate_hz(), num_proc_channels());
|
proc_fullband_sample_rate_hz(), num_proc_channels());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -245,6 +245,10 @@ class AudioProcessingImpl : public AudioProcessing {
|
|||||||
void InitializeAnalyzer() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
void InitializeAnalyzer() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
||||||
void InitializePreProcessor() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
|
void InitializePreProcessor() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
|
||||||
|
|
||||||
|
// Sample rate used for the fullband processing.
|
||||||
|
int proc_fullband_sample_rate_hz() const
|
||||||
|
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
||||||
|
|
||||||
// Empties and handles the respective RuntimeSetting queues.
|
// Empties and handles the respective RuntimeSetting queues.
|
||||||
void HandleCaptureRuntimeSettings()
|
void HandleCaptureRuntimeSettings()
|
||||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
||||||
@ -387,6 +391,7 @@ class AudioProcessingImpl : public AudioProcessing {
|
|||||||
bool key_pressed;
|
bool key_pressed;
|
||||||
bool transient_suppressor_enabled;
|
bool transient_suppressor_enabled;
|
||||||
std::unique_ptr<AudioBuffer> capture_audio;
|
std::unique_ptr<AudioBuffer> capture_audio;
|
||||||
|
std::unique_ptr<AudioBuffer> capture_fullband_audio;
|
||||||
// Only the rate and samples fields of capture_processing_format_ are used
|
// Only the rate and samples fields of capture_processing_format_ are used
|
||||||
// because the capture processing number of channels is mutable and is
|
// because the capture processing number of channels is mutable and is
|
||||||
// tracked by the capture_audio_.
|
// tracked by the capture_audio_.
|
||||||
|
Reference in New Issue
Block a user