Make the echo detector injectable.

This adds a generic interface for an echo detector, and makes it possible to inject one into the audio processing module.

Bug: webrtc:8732
Change-Id: I30d97aeb829307b2ae9c4dbeb9a3e15ab7ec0912
Reviewed-on: https://webrtc-review.googlesource.com/38900
Commit-Queue: Ivo Creusen <ivoc@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#21588}
This commit is contained in:
Ivo Creusen
2018-01-11 16:08:54 +01:00
committed by Commit Bot
parent 02879f9990
commit 09fa4b04dd
6 changed files with 104 additions and 44 deletions

View File

@ -302,8 +302,10 @@ struct AudioProcessingImpl::ApmPublicSubmodules {
struct AudioProcessingImpl::ApmPrivateSubmodules { struct AudioProcessingImpl::ApmPrivateSubmodules {
ApmPrivateSubmodules(NonlinearBeamformer* beamformer, ApmPrivateSubmodules(NonlinearBeamformer* beamformer,
std::unique_ptr<CustomProcessing> capture_post_processor, std::unique_ptr<CustomProcessing> capture_post_processor,
std::unique_ptr<CustomProcessing> render_pre_processor) std::unique_ptr<CustomProcessing> render_pre_processor,
std::unique_ptr<EchoDetector> echo_detector)
: beamformer(beamformer), : beamformer(beamformer),
echo_detector(std::move(echo_detector)),
capture_post_processor(std::move(capture_post_processor)), capture_post_processor(std::move(capture_post_processor)),
render_pre_processor(std::move(render_pre_processor)) {} render_pre_processor(std::move(render_pre_processor)) {}
// Accessed internally from capture or during initialization // Accessed internally from capture or during initialization
@ -312,7 +314,7 @@ struct AudioProcessingImpl::ApmPrivateSubmodules {
std::unique_ptr<GainController2> gain_controller2; std::unique_ptr<GainController2> gain_controller2;
std::unique_ptr<LowCutFilter> low_cut_filter; std::unique_ptr<LowCutFilter> low_cut_filter;
std::unique_ptr<LevelController> level_controller; std::unique_ptr<LevelController> level_controller;
std::unique_ptr<ResidualEchoDetector> residual_echo_detector; std::unique_ptr<EchoDetector> echo_detector;
std::unique_ptr<EchoControl> echo_controller; std::unique_ptr<EchoControl> echo_controller;
std::unique_ptr<CustomProcessing> capture_post_processor; std::unique_ptr<CustomProcessing> capture_post_processor;
std::unique_ptr<CustomProcessing> render_pre_processor; std::unique_ptr<CustomProcessing> render_pre_processor;
@ -345,16 +347,27 @@ AudioProcessingBuilder& AudioProcessingBuilder::SetNonlinearBeamformer(
return *this; return *this;
} }
AudioProcessingBuilder& AudioProcessingBuilder::SetEchoDetector(
std::unique_ptr<EchoDetector> echo_detector) {
echo_detector_ = std::move(echo_detector);
return *this;
}
AudioProcessing* AudioProcessingBuilder::Create() { AudioProcessing* AudioProcessingBuilder::Create() {
webrtc::Config config; webrtc::Config config;
return Create(config); return Create(config);
} }
AudioProcessing* AudioProcessingBuilder::Create(const webrtc::Config& config) { AudioProcessing* AudioProcessingBuilder::Create(const webrtc::Config& config) {
return AudioProcessing::Create(config, std::move(capture_post_processing_), AudioProcessingImpl* apm = new rtc::RefCountedObject<AudioProcessingImpl>(
std::move(render_pre_processing_), config, std::move(capture_post_processing_),
std::move(echo_control_factory_), std::move(render_pre_processing_), std::move(echo_control_factory_),
nonlinear_beamformer_.release()); std::move(echo_detector_), nonlinear_beamformer_.release());
if (apm->Initialize() != AudioProcessing::kNoError) {
delete apm;
apm = nullptr;
}
return apm;
} }
AudioProcessing* AudioProcessing::Create() { AudioProcessing* AudioProcessing::Create() {
@ -388,7 +401,7 @@ AudioProcessing* AudioProcessing::Create(
NonlinearBeamformer* beamformer) { NonlinearBeamformer* beamformer) {
AudioProcessingImpl* apm = new rtc::RefCountedObject<AudioProcessingImpl>( AudioProcessingImpl* apm = new rtc::RefCountedObject<AudioProcessingImpl>(
config, std::move(capture_post_processor), config, std::move(capture_post_processor),
std::move(render_pre_processor), std::move(echo_control_factory), std::move(render_pre_processor), std::move(echo_control_factory), nullptr,
beamformer); beamformer);
if (apm->Initialize() != kNoError) { if (apm->Initialize() != kNoError) {
delete apm; delete apm;
@ -399,13 +412,15 @@ AudioProcessing* AudioProcessing::Create(
} }
AudioProcessingImpl::AudioProcessingImpl(const webrtc::Config& config) AudioProcessingImpl::AudioProcessingImpl(const webrtc::Config& config)
: AudioProcessingImpl(config, nullptr, nullptr, nullptr, nullptr) {} : AudioProcessingImpl(config, nullptr, nullptr, nullptr, nullptr, nullptr) {
}
AudioProcessingImpl::AudioProcessingImpl( AudioProcessingImpl::AudioProcessingImpl(
const webrtc::Config& config, const webrtc::Config& config,
std::unique_ptr<CustomProcessing> capture_post_processor, std::unique_ptr<CustomProcessing> capture_post_processor,
std::unique_ptr<CustomProcessing> render_pre_processor, std::unique_ptr<CustomProcessing> render_pre_processor,
std::unique_ptr<EchoControlFactory> echo_control_factory, std::unique_ptr<EchoControlFactory> echo_control_factory,
std::unique_ptr<EchoDetector> echo_detector,
NonlinearBeamformer* beamformer) NonlinearBeamformer* beamformer)
: high_pass_filter_impl_(new HighPassFilterImpl(this)), : high_pass_filter_impl_(new HighPassFilterImpl(this)),
echo_control_factory_(std::move(echo_control_factory)), echo_control_factory_(std::move(echo_control_factory)),
@ -414,7 +429,8 @@ AudioProcessingImpl::AudioProcessingImpl(
private_submodules_( private_submodules_(
new ApmPrivateSubmodules(beamformer, new ApmPrivateSubmodules(beamformer,
std::move(capture_post_processor), std::move(capture_post_processor),
std::move(render_pre_processor))), std::move(render_pre_processor),
std::move(echo_detector))),
constants_(config.Get<ExperimentalAgc>().startup_min_volume, constants_(config.Get<ExperimentalAgc>().startup_min_volume,
config.Get<ExperimentalAgc>().clipped_level_min, config.Get<ExperimentalAgc>().clipped_level_min,
#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
@ -454,8 +470,11 @@ AudioProcessingImpl::AudioProcessingImpl(
public_submodules_->gain_control_for_experimental_agc.reset( public_submodules_->gain_control_for_experimental_agc.reset(
new GainControlForExperimentalAgc( new GainControlForExperimentalAgc(
public_submodules_->gain_control.get(), &crit_capture_)); public_submodules_->gain_control.get(), &crit_capture_));
private_submodules_->residual_echo_detector.reset(
new ResidualEchoDetector()); // If no echo detector is injected, use the ResidualEchoDetector.
if (!private_submodules_->echo_detector) {
private_submodules_->echo_detector.reset(new ResidualEchoDetector());
}
// TODO(peah): Move this creation to happen only when the level controller // TODO(peah): Move this creation to happen only when the level controller
// is enabled. // is enabled.
@ -1121,7 +1140,8 @@ void AudioProcessingImpl::EmptyQueuedRenderAudio() {
} }
while (red_render_signal_queue_->Remove(&red_capture_queue_buffer_)) { while (red_render_signal_queue_->Remove(&red_capture_queue_buffer_)) {
private_submodules_->residual_echo_detector->AnalyzeRenderAudio( RTC_DCHECK(private_submodules_->echo_detector);
private_submodules_->echo_detector->AnalyzeRenderAudio(
red_capture_queue_buffer_); red_capture_queue_buffer_);
} }
} }
@ -1337,7 +1357,8 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
} }
if (config_.residual_echo_detector.enabled) { if (config_.residual_echo_detector.enabled) {
private_submodules_->residual_echo_detector->AnalyzeCaptureAudio( RTC_DCHECK(private_submodules_->echo_detector);
private_submodules_->echo_detector->AnalyzeCaptureAudio(
rtc::ArrayView<const float>(capture_buffer->channels_f()[0], rtc::ArrayView<const float>(capture_buffer->channels_f()[0],
capture_buffer->num_frames())); capture_buffer->num_frames()));
} }
@ -1664,11 +1685,11 @@ AudioProcessing::AudioProcessingStatistics AudioProcessingImpl::GetStatistics()
} }
{ {
rtc::CritScope cs_capture(&crit_capture_); rtc::CritScope cs_capture(&crit_capture_);
stats.residual_echo_likelihood = RTC_DCHECK(private_submodules_->echo_detector);
private_submodules_->residual_echo_detector->echo_likelihood(); auto ed_metrics = private_submodules_->echo_detector->GetMetrics();
stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
stats.residual_echo_likelihood_recent_max = stats.residual_echo_likelihood_recent_max =
private_submodules_->residual_echo_detector ed_metrics.echo_likelihood_recent_max;
->echo_likelihood_recent_max();
} }
public_submodules_->echo_cancellation->GetDelayMetrics( public_submodules_->echo_cancellation->GetDelayMetrics(
&stats.delay_median, &stats.delay_standard_deviation, &stats.delay_median, &stats.delay_standard_deviation,
@ -1705,11 +1726,11 @@ AudioProcessingStats AudioProcessingImpl::GetStatistics(
} }
if (config_.residual_echo_detector.enabled) { if (config_.residual_echo_detector.enabled) {
rtc::CritScope cs_capture(&crit_capture_); rtc::CritScope cs_capture(&crit_capture_);
stats.residual_echo_likelihood = rtc::Optional<double>( RTC_DCHECK(private_submodules_->echo_detector);
private_submodules_->residual_echo_detector->echo_likelihood()); auto ed_metrics = private_submodules_->echo_detector->GetMetrics();
stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
stats.residual_echo_likelihood_recent_max = stats.residual_echo_likelihood_recent_max =
rtc::Optional<double>(private_submodules_->residual_echo_detector ed_metrics.echo_likelihood_recent_max;
->echo_likelihood_recent_max());
} }
int delay_median, delay_std; int delay_median, delay_std;
float fraction_poor_delays; float fraction_poor_delays;
@ -1854,7 +1875,9 @@ void AudioProcessingImpl::InitializeLevelController() {
} }
void AudioProcessingImpl::InitializeResidualEchoDetector() { void AudioProcessingImpl::InitializeResidualEchoDetector() {
private_submodules_->residual_echo_detector->Initialize(); RTC_DCHECK(private_submodules_->echo_detector);
private_submodules_->echo_detector->Initialize(proc_sample_rate_hz(),
num_proc_channels());
} }
void AudioProcessingImpl::InitializePostProcessor() { void AudioProcessingImpl::InitializePostProcessor() {

View File

@ -45,6 +45,7 @@ class AudioProcessingImpl : public AudioProcessing {
std::unique_ptr<CustomProcessing> capture_post_processor, std::unique_ptr<CustomProcessing> capture_post_processor,
std::unique_ptr<CustomProcessing> render_pre_processor, std::unique_ptr<CustomProcessing> render_pre_processor,
std::unique_ptr<EchoControlFactory> echo_control_factory, std::unique_ptr<EchoControlFactory> echo_control_factory,
std::unique_ptr<EchoDetector> echo_detector,
NonlinearBeamformer* beamformer); NonlinearBeamformer* beamformer);
~AudioProcessingImpl() override; ~AudioProcessingImpl() override;
int Initialize() override; int Initialize() override;

View File

@ -49,6 +49,7 @@ class ProcessingConfig;
class EchoCancellation; class EchoCancellation;
class EchoControlMobile; class EchoControlMobile;
class EchoControlFactory; class EchoControlFactory;
class EchoDetector;
class GainControl; class GainControl;
class HighPassFilter; class HighPassFilter;
class LevelEstimator; class LevelEstimator;
@ -665,6 +666,9 @@ class AudioProcessingBuilder {
// The AudioProcessingBuilder takes ownership of the nonlinear beamformer. // The AudioProcessingBuilder takes ownership of the nonlinear beamformer.
AudioProcessingBuilder& SetNonlinearBeamformer( AudioProcessingBuilder& SetNonlinearBeamformer(
std::unique_ptr<NonlinearBeamformer> nonlinear_beamformer); std::unique_ptr<NonlinearBeamformer> nonlinear_beamformer);
// The AudioProcessingBuilder takes ownership of the echo_detector.
AudioProcessingBuilder& SetEchoDetector(
std::unique_ptr<EchoDetector> echo_detector);
// This creates an APM instance using the previously set components. Calling // This creates an APM instance using the previously set components. Calling
// the Create function resets the AudioProcessingBuilder to its initial state. // the Create function resets the AudioProcessingBuilder to its initial state.
AudioProcessing* Create(); AudioProcessing* Create();
@ -675,6 +679,7 @@ class AudioProcessingBuilder {
std::unique_ptr<CustomProcessing> capture_post_processing_; std::unique_ptr<CustomProcessing> capture_post_processing_;
std::unique_ptr<CustomProcessing> render_pre_processing_; std::unique_ptr<CustomProcessing> render_pre_processing_;
std::unique_ptr<NonlinearBeamformer> nonlinear_beamformer_; std::unique_ptr<NonlinearBeamformer> nonlinear_beamformer_;
std::unique_ptr<EchoDetector> echo_detector_;
RTC_DISALLOW_COPY_AND_ASSIGN(AudioProcessingBuilder); RTC_DISALLOW_COPY_AND_ASSIGN(AudioProcessingBuilder);
}; };
@ -1147,6 +1152,34 @@ class CustomProcessing {
virtual ~CustomProcessing() {} virtual ~CustomProcessing() {}
}; };
// Interface for an echo detector submodule.
class EchoDetector {
public:
// (Re-)Initializes the submodule.
virtual void Initialize(int sample_rate_hz, int num_channels) = 0;
// Analysis (not changing) of the render signal.
virtual void AnalyzeRenderAudio(rtc::ArrayView<const float> render_audio) = 0;
// Analysis (not changing) of the capture signal.
virtual void AnalyzeCaptureAudio(
rtc::ArrayView<const float> capture_audio) = 0;
// Pack an AudioBuffer into a vector<float>.
static void PackRenderAudioBuffer(AudioBuffer* audio,
std::vector<float>* packed_buffer);
struct Metrics {
double echo_likelihood;
double echo_likelihood_recent_max;
};
// Collect current metrics from the echo detector.
virtual Metrics GetMetrics() const = 0;
virtual ~EchoDetector() {}
};
// The voice activity detection (VAD) component analyzes the stream to // The voice activity detection (VAD) component analyzes the stream to
// determine if voice is present. A facility is also provided to pass in an // determine if voice is present. A facility is also provided to pass in an
// external VAD decision. // external VAD decision.

View File

@ -177,7 +177,8 @@ void ResidualEchoDetector::AnalyzeCaptureAudio(
: 0; : 0;
} }
void ResidualEchoDetector::Initialize() { void ResidualEchoDetector::Initialize(int /*sample_rate_hz*/,
int /*num_channels*/) {
render_buffer_.Clear(); render_buffer_.Clear();
std::fill(render_power_.begin(), render_power_.end(), 0.f); std::fill(render_power_.begin(), render_power_.end(), 0.f);
std::fill(render_power_mean_.begin(), render_power_mean_.end(), 0.f); std::fill(render_power_mean_.begin(), render_power_mean_.end(), 0.f);
@ -193,12 +194,17 @@ void ResidualEchoDetector::Initialize() {
reliability_ = 0.f; reliability_ = 0.f;
} }
void ResidualEchoDetector::PackRenderAudioBuffer( void EchoDetector::PackRenderAudioBuffer(AudioBuffer* audio,
AudioBuffer* audio, std::vector<float>* packed_buffer) {
std::vector<float>* packed_buffer) {
packed_buffer->clear(); packed_buffer->clear();
packed_buffer->insert(packed_buffer->end(), audio->channels_f()[0], packed_buffer->insert(packed_buffer->end(), audio->channels_f()[0],
audio->channels_f()[0] + audio->num_frames()); audio->channels_f()[0] + audio->num_frames());
} }
EchoDetector::Metrics ResidualEchoDetector::GetMetrics() const {
EchoDetector::Metrics metrics;
metrics.echo_likelihood = echo_likelihood_;
metrics.echo_likelihood_recent_max = recent_likelihood_max_.max();
return metrics;
}
} // namespace webrtc } // namespace webrtc

View File

@ -18,39 +18,32 @@
#include "modules/audio_processing/echo_detector/mean_variance_estimator.h" #include "modules/audio_processing/echo_detector/mean_variance_estimator.h"
#include "modules/audio_processing/echo_detector/moving_max.h" #include "modules/audio_processing/echo_detector/moving_max.h"
#include "modules/audio_processing/echo_detector/normalized_covariance_estimator.h" #include "modules/audio_processing/echo_detector/normalized_covariance_estimator.h"
#include "modules/audio_processing/include/audio_processing.h"
namespace webrtc { namespace webrtc {
class ApmDataDumper; class ApmDataDumper;
class AudioBuffer; class AudioBuffer;
class EchoDetector;
class ResidualEchoDetector { class ResidualEchoDetector : public EchoDetector {
public: public:
ResidualEchoDetector(); ResidualEchoDetector();
~ResidualEchoDetector(); ~ResidualEchoDetector() override;
// This function should be called while holding the render lock. // This function should be called while holding the render lock.
void AnalyzeRenderAudio(rtc::ArrayView<const float> render_audio); void AnalyzeRenderAudio(rtc::ArrayView<const float> render_audio) override;
// This function should be called while holding the capture lock. // This function should be called while holding the capture lock.
void AnalyzeCaptureAudio(rtc::ArrayView<const float> capture_audio); void AnalyzeCaptureAudio(rtc::ArrayView<const float> capture_audio) override;
// This function should be called while holding the capture lock. // This function should be called while holding the capture lock.
void Initialize(); void Initialize(int sample_rate_hz, int num_channels) override;
// This function is for testing purposes only. // This function is for testing purposes only.
void SetReliabilityForTest(float value) { reliability_ = value; } void SetReliabilityForTest(float value) { reliability_ = value; }
static void PackRenderAudioBuffer(AudioBuffer* audio,
std::vector<float>* packed_buffer);
// This function should be called while holding the capture lock. // This function should be called while holding the capture lock.
float echo_likelihood() const { return echo_likelihood_; } EchoDetector::Metrics GetMetrics() const override;
float echo_likelihood_recent_max() const {
return recent_likelihood_max_.max();
}
private: private:
static int instance_count_; static int instance_count_;

View File

@ -37,7 +37,8 @@ TEST(ResidualEchoDetectorTests, Echo) {
} }
} }
// We expect to detect echo with near certain likelihood. // We expect to detect echo with near certain likelihood.
EXPECT_NEAR(1.f, echo_detector.echo_likelihood(), 0.01f); auto ed_metrics = echo_detector.GetMetrics();
EXPECT_NEAR(1.f, ed_metrics.echo_likelihood, 0.01f);
} }
TEST(ResidualEchoDetectorTests, NoEcho) { TEST(ResidualEchoDetectorTests, NoEcho) {
@ -57,7 +58,8 @@ TEST(ResidualEchoDetectorTests, NoEcho) {
echo_detector.AnalyzeCaptureAudio(zeros); echo_detector.AnalyzeCaptureAudio(zeros);
} }
// We expect to not detect any echo. // We expect to not detect any echo.
EXPECT_NEAR(0.f, echo_detector.echo_likelihood(), 0.01f); auto ed_metrics = echo_detector.GetMetrics();
EXPECT_NEAR(0.f, ed_metrics.echo_likelihood, 0.01f);
} }
TEST(ResidualEchoDetectorTests, EchoWithRenderClockDrift) { TEST(ResidualEchoDetectorTests, EchoWithRenderClockDrift) {
@ -92,7 +94,8 @@ TEST(ResidualEchoDetectorTests, EchoWithRenderClockDrift) {
// A growing buffer can be caused by jitter or clock drift and it's not // A growing buffer can be caused by jitter or clock drift and it's not
// possible to make this decision right away. For this reason we only expect // possible to make this decision right away. For this reason we only expect
// an echo likelihood of 75% in this test. // an echo likelihood of 75% in this test.
EXPECT_GT(echo_detector.echo_likelihood(), 0.75f); auto ed_metrics = echo_detector.GetMetrics();
EXPECT_GT(ed_metrics.echo_likelihood, 0.75f);
} }
TEST(ResidualEchoDetectorTests, EchoWithCaptureClockDrift) { TEST(ResidualEchoDetectorTests, EchoWithCaptureClockDrift) {
@ -122,7 +125,8 @@ TEST(ResidualEchoDetectorTests, EchoWithCaptureClockDrift) {
} }
} }
// We expect to detect echo with near certain likelihood. // We expect to detect echo with near certain likelihood.
EXPECT_NEAR(1.f, echo_detector.echo_likelihood(), 0.01f); auto ed_metrics = echo_detector.GetMetrics();
EXPECT_NEAR(1.f, ed_metrics.echo_likelihood, 0.01f);
} }
} // namespace webrtc } // namespace webrtc