Stop using the beamformer inside APM

Removes the usage of an injected/enabled beamformer in APM, and marks
the API parts as deprecated.
Initialization and process calls are removed, and all enabled/disabled
flags are replaced by assuming no beamforming. Additionally, an AGC test
relying on the beamformer as a VAD is removed.

Bug: webrtc:9402
Change-Id: I0d3d0b9773da083ce43c28045db9a77278f59f95
Reviewed-on: https://webrtc-review.googlesource.com/83341
Reviewed-by: Minyue Li <minyue@webrtc.org>
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23643}
This commit is contained in:
Sam Zackrisson
2018-06-14 10:11:35 +02:00
committed by Commit Bot
parent 431abd989b
commit 9394f6fda1
5 changed files with 12 additions and 171 deletions

View File

@ -166,7 +166,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
bool residual_echo_detector_enabled,
bool noise_suppressor_enabled,
bool intelligibility_enhancer_enabled,
bool beamformer_enabled,
bool adaptive_gain_controller_enabled,
bool gain_controller2_enabled,
bool pre_amplifier_enabled,
@ -184,7 +183,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
changed |= (noise_suppressor_enabled != noise_suppressor_enabled_);
changed |=
(intelligibility_enhancer_enabled != intelligibility_enhancer_enabled_);
changed |= (beamformer_enabled != beamformer_enabled_);
changed |=
(adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_);
changed |=
@ -202,7 +200,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
residual_echo_detector_enabled_ = residual_echo_detector_enabled;
noise_suppressor_enabled_ = noise_suppressor_enabled;
intelligibility_enhancer_enabled_ = intelligibility_enhancer_enabled;
beamformer_enabled_ = beamformer_enabled;
adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled;
gain_controller2_enabled_ = gain_controller2_enabled;
pre_amplifier_enabled_ = pre_amplifier_enabled;
@ -231,8 +228,7 @@ bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandProcessingActive()
const {
return low_cut_filter_enabled_ || echo_canceller_enabled_ ||
mobile_echo_controller_enabled_ || noise_suppressor_enabled_ ||
beamformer_enabled_ || adaptive_gain_controller_enabled_ ||
echo_controller_enabled_;
adaptive_gain_controller_enabled_ || echo_controller_enabled_;
}
bool AudioProcessingImpl::ApmSubmoduleStates::CaptureFullBandProcessingActive()
@ -388,14 +384,11 @@ AudioProcessingImpl::AudioProcessingImpl(
config.Get<ExperimentalAgc>().enabled),
#endif
#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
capture_(false,
capture_(false),
#else
capture_(config.Get<ExperimentalNs>().enabled,
capture_(config.Get<ExperimentalNs>().enabled),
#endif
config.Get<Beamforming>().array_geometry,
config.Get<Beamforming>().target_direction),
capture_nonlocked_(config.Get<Beamforming>().enabled,
config.Get<Intelligibility>().enabled) {
capture_nonlocked_(config.Get<Intelligibility>().enabled) {
{
rtc::CritScope cs_render(&crit_render_);
rtc::CritScope cs_capture(&crit_capture_);
@ -509,11 +502,6 @@ int AudioProcessingImpl::MaybeInitialize(
int AudioProcessingImpl::InitializeLocked() {
UpdateActiveSubmoduleStates();
const int capture_audiobuffer_num_channels =
capture_nonlocked_.beamformer_enabled
? formats_.api_format.input_stream().num_channels()
: formats_.api_format.output_stream().num_channels();
const int render_audiobuffer_num_output_frames =
formats_.api_format.reverse_output_stream().num_frames() == 0
? formats_.render_processing_format.num_frames()
@ -544,7 +532,7 @@ int AudioProcessingImpl::InitializeLocked() {
new AudioBuffer(formats_.api_format.input_stream().num_frames(),
formats_.api_format.input_stream().num_channels(),
capture_nonlocked_.capture_processing_format.num_frames(),
capture_audiobuffer_num_channels,
formats_.api_format.output_stream().num_channels(),
formats_.api_format.output_stream().num_frames()));
public_submodules_->echo_cancellation->Initialize(
@ -575,7 +563,6 @@ int AudioProcessingImpl::InitializeLocked() {
public_submodules_->gain_control_for_experimental_agc->Initialize();
}
InitializeTransient();
InitializeBeamformer();
#if WEBRTC_INTELLIGIBILITY_ENHANCER
InitializeIntelligibility();
#endif
@ -615,11 +602,6 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
return kBadNumberChannelsError;
}
if (capture_nonlocked_.beamformer_enabled &&
num_in_channels != capture_.array_geometry.size()) {
return kBadNumberChannelsError;
}
formats_.api_format = config;
int capture_processing_rate = FindNativeProcessRateToUse(
@ -735,18 +717,6 @@ void AudioProcessingImpl::SetExtraOptions(const webrtc::Config& config) {
InitializeIntelligibility();
}
#endif
#ifdef WEBRTC_ANDROID_PLATFORM_BUILD
if (capture_nonlocked_.beamformer_enabled !=
config.Get<Beamforming>().enabled) {
capture_nonlocked_.beamformer_enabled = config.Get<Beamforming>().enabled;
if (config.Get<Beamforming>().array_geometry.size() > 1) {
capture_.array_geometry = config.Get<Beamforming>().array_geometry;
}
capture_.target_direction = config.Get<Beamforming>().target_direction;
InitializeBeamformer();
}
#endif // WEBRTC_ANDROID_PLATFORM_BUILD
}
int AudioProcessingImpl::proc_sample_rate_hz() const {
@ -771,10 +741,7 @@ size_t AudioProcessingImpl::num_input_channels() const {
size_t AudioProcessingImpl::num_proc_channels() const {
// Used as callback from submodules, hence locking is not allowed.
return (capture_nonlocked_.beamformer_enabled ||
capture_nonlocked_.echo_controller_enabled)
? 1
: num_output_channels();
return capture_nonlocked_.echo_controller_enabled ? 1 : num_output_channels();
}
size_t AudioProcessingImpl::num_output_channels() const {
@ -1265,13 +1232,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
capture_buffer->set_num_channels(1);
}
if (capture_nonlocked_.beamformer_enabled) {
private_submodules_->beamformer->AnalyzeChunk(
*capture_buffer->split_data_f());
// Discards all channels by the leftmost one.
capture_buffer->set_num_channels(1);
}
// TODO(peah): Move the AEC3 low-cut filter to this place.
if (private_submodules_->low_cut_filter &&
!private_submodules_->echo_controller) {
@ -1334,16 +1294,10 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
capture_buffer, stream_delay_ms()));
}
if (capture_nonlocked_.beamformer_enabled) {
private_submodules_->beamformer->PostFilter(capture_buffer->split_data_f());
}
public_submodules_->voice_detection->ProcessCaptureAudio(capture_buffer);
if (constants_.use_experimental_agc &&
public_submodules_->gain_control->is_enabled() &&
(!capture_nonlocked_.beamformer_enabled ||
private_submodules_->beamformer->is_target_present())) {
public_submodules_->gain_control->is_enabled()) {
private_submodules_->agc_manager->Process(
capture_buffer->split_bands_const(0)[kBand0To8kHz],
capture_buffer->num_frames_per_band(), capture_nonlocked_.split_rate);
@ -1811,7 +1765,6 @@ bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
config_.residual_echo_detector.enabled,
public_submodules_->noise_suppression->is_enabled(),
capture_nonlocked_.intelligibility_enabled,
capture_nonlocked_.beamformer_enabled,
public_submodules_->gain_control->is_enabled(),
config_.gain_controller2.enabled, config_.pre_amplifier.enabled,
capture_nonlocked_.echo_controller_enabled,
@ -1832,17 +1785,6 @@ void AudioProcessingImpl::InitializeTransient() {
}
}
void AudioProcessingImpl::InitializeBeamformer() {
if (capture_nonlocked_.beamformer_enabled) {
if (!private_submodules_->beamformer) {
private_submodules_->beamformer.reset(new NonlinearBeamformer(
capture_.array_geometry, 1u, capture_.target_direction));
}
private_submodules_->beamformer->Initialize(kChunkSizeMs,
capture_nonlocked_.split_rate);
}
}
void AudioProcessingImpl::InitializeIntelligibility() {
#if WEBRTC_INTELLIGIBILITY_ENHANCER
if (capture_nonlocked_.intelligibility_enabled) {
@ -2102,9 +2044,7 @@ void AudioProcessingImpl::RecordAudioProcessingState() {
}
AudioProcessingImpl::ApmCaptureState::ApmCaptureState(
bool transient_suppressor_enabled,
const std::vector<Point>& array_geometry,
SphericalPointf target_direction)
bool transient_suppressor_enabled)
: aec_system_delay_jumps(-1),
delay_offset_ms(0),
was_stream_delay_set(false),
@ -2114,8 +2054,6 @@ AudioProcessingImpl::ApmCaptureState::ApmCaptureState(
output_will_be_muted(false),
key_pressed(false),
transient_suppressor_enabled(transient_suppressor_enabled),
array_geometry(array_geometry),
target_direction(target_direction),
capture_processing_format(kSampleRate16kHz),
split_rate(kSampleRate16kHz),
echo_path_gain_change(false) {}

View File

@ -185,7 +185,6 @@ class AudioProcessingImpl : public AudioProcessing {
bool residual_echo_detector_enabled,
bool noise_suppressor_enabled,
bool intelligibility_enhancer_enabled,
bool beamformer_enabled,
bool adaptive_gain_controller_enabled,
bool gain_controller2_enabled,
bool pre_amplifier_enabled,
@ -209,7 +208,6 @@ class AudioProcessingImpl : public AudioProcessing {
bool residual_echo_detector_enabled_ = false;
bool noise_suppressor_enabled_ = false;
bool intelligibility_enhancer_enabled_ = false;
bool beamformer_enabled_ = false;
bool adaptive_gain_controller_enabled_ = false;
bool gain_controller2_enabled_ = false;
bool pre_amplifier_enabled_ = false;
@ -370,9 +368,7 @@ class AudioProcessingImpl : public AudioProcessing {
} constants_;
struct ApmCaptureState {
ApmCaptureState(bool transient_suppressor_enabled,
const std::vector<Point>& array_geometry,
SphericalPointf target_direction);
ApmCaptureState(bool transient_suppressor_enabled);
~ApmCaptureState();
int aec_system_delay_jumps;
int delay_offset_ms;
@ -383,8 +379,6 @@ class AudioProcessingImpl : public AudioProcessing {
bool output_will_be_muted;
bool key_pressed;
bool transient_suppressor_enabled;
std::vector<Point> array_geometry;
SphericalPointf target_direction;
std::unique_ptr<AudioBuffer> capture_audio;
// Only the rate and samples fields of capture_processing_format_ are used
// because the capture processing number of channels is mutable and is
@ -395,12 +389,10 @@ class AudioProcessingImpl : public AudioProcessing {
} capture_ RTC_GUARDED_BY(crit_capture_);
struct ApmCaptureNonLockedState {
ApmCaptureNonLockedState(bool beamformer_enabled,
bool intelligibility_enabled)
ApmCaptureNonLockedState(bool intelligibility_enabled)
: capture_processing_format(kSampleRate16kHz),
split_rate(kSampleRate16kHz),
stream_delay_ms(0),
beamformer_enabled(beamformer_enabled),
intelligibility_enabled(intelligibility_enabled) {}
// Only the rate and samples fields of capture_processing_format_ are used
// because the forward processing number of channels is mutable and is
@ -408,7 +400,6 @@ class AudioProcessingImpl : public AudioProcessing {
StreamConfig capture_processing_format;
int split_rate;
int stream_delay_ms;
bool beamformer_enabled;
bool intelligibility_enabled;
bool echo_controller_enabled = false;
} capture_nonlocked_;

View File

@ -1300,95 +1300,6 @@ TEST_F(ApmTest, ManualVolumeChangeIsPossible) {
}
}
#if !defined(WEBRTC_ANDROID) && !defined(WEBRTC_IOS)
TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) {
const int kSampleRateHz = 16000;
const size_t kSamplesPerChannel =
static_cast<size_t>(AudioProcessing::kChunkSizeMs * kSampleRateHz / 1000);
const size_t kNumInputChannels = 2;
const size_t kNumOutputChannels = 1;
const size_t kNumChunks = 700;
const float kScaleFactor = 0.25f;
Config config;
std::vector<webrtc::Point> geometry;
geometry.push_back(webrtc::Point(0.f, 0.f, 0.f));
geometry.push_back(webrtc::Point(0.05f, 0.f, 0.f));
config.Set<Beamforming>(new Beamforming(true, geometry));
testing::NiceMock<MockNonlinearBeamformer>* beamformer =
new testing::NiceMock<MockNonlinearBeamformer>(geometry, 1u);
std::unique_ptr<AudioProcessing> apm(
AudioProcessingBuilder()
.SetNonlinearBeamformer(
std::unique_ptr<webrtc::NonlinearBeamformer>(beamformer))
.Create(config));
EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true));
ChannelBuffer<float> src_buf(kSamplesPerChannel, kNumInputChannels);
ChannelBuffer<float> dest_buf(kSamplesPerChannel, kNumOutputChannels);
const size_t max_length = kSamplesPerChannel * std::max(kNumInputChannels,
kNumOutputChannels);
std::unique_ptr<int16_t[]> int_data(new int16_t[max_length]);
std::unique_ptr<float[]> float_data(new float[max_length]);
std::string filename = ResourceFilePath("far", kSampleRateHz);
FILE* far_file = fopen(filename.c_str(), "rb");
ASSERT_TRUE(far_file != NULL) << "Could not open file " << filename << "\n";
const int kDefaultVolume = apm->gain_control()->stream_analog_level();
const int kDefaultCompressionGain =
apm->gain_control()->compression_gain_db();
bool is_target = false;
EXPECT_CALL(*beamformer, is_target_present())
.WillRepeatedly(testing::ReturnPointee(&is_target));
for (size_t i = 0; i < kNumChunks; ++i) {
ASSERT_TRUE(ReadChunk(far_file,
int_data.get(),
float_data.get(),
&src_buf));
for (size_t j = 0; j < kNumInputChannels; ++j) {
for (size_t k = 0; k < kSamplesPerChannel; ++k) {
src_buf.channels()[j][k] *= kScaleFactor;
}
}
EXPECT_EQ(kNoErr,
apm->ProcessStream(src_buf.channels(),
src_buf.num_frames(),
kSampleRateHz,
LayoutFromChannels(src_buf.num_channels()),
kSampleRateHz,
LayoutFromChannels(dest_buf.num_channels()),
dest_buf.channels()));
}
EXPECT_EQ(kDefaultVolume,
apm->gain_control()->stream_analog_level());
EXPECT_EQ(kDefaultCompressionGain,
apm->gain_control()->compression_gain_db());
rewind(far_file);
is_target = true;
for (size_t i = 0; i < kNumChunks; ++i) {
ASSERT_TRUE(ReadChunk(far_file,
int_data.get(),
float_data.get(),
&src_buf));
for (size_t j = 0; j < kNumInputChannels; ++j) {
for (size_t k = 0; k < kSamplesPerChannel; ++k) {
src_buf.channels()[j][k] *= kScaleFactor;
}
}
EXPECT_EQ(kNoErr,
apm->ProcessStream(src_buf.channels(),
src_buf.num_frames(),
kSampleRateHz,
LayoutFromChannels(src_buf.num_channels()),
kSampleRateHz,
LayoutFromChannels(dest_buf.num_channels()),
dest_buf.channels()));
}
EXPECT_LT(kDefaultVolume,
apm->gain_control()->stream_analog_level());
EXPECT_LT(kDefaultCompressionGain,
apm->gain_control()->compression_gain_db());
ASSERT_EQ(0, fclose(far_file));
}
#endif
TEST_F(ApmTest, NoiseSuppression) {
// Test valid suppression levels.
NoiseSuppression::Level level[] = {

View File

@ -674,6 +674,7 @@ class AudioProcessingBuilder {
AudioProcessingBuilder& SetRenderPreProcessing(
std::unique_ptr<CustomProcessing> render_pre_processing);
// The AudioProcessingBuilder takes ownership of the nonlinear beamformer.
RTC_DEPRECATED
AudioProcessingBuilder& SetNonlinearBeamformer(
std::unique_ptr<NonlinearBeamformer> nonlinear_beamformer);
// The AudioProcessingBuilder takes ownership of the echo_detector.

View File

@ -30,7 +30,7 @@ enum class ConfigOptionID {
kDelayAgnostic,
kExperimentalAgc,
kExperimentalNs,
kBeamforming,
kBeamforming, // Deprecated
kIntelligibility,
kEchoCanceller3, // Deprecated
kAecRefinedAdaptiveFilter,