Only adapt AGC when the desired signal is present

Take the 50% quantile of the mask and compare it to certain threshold to determine if the desired signal is present. A hold is applied to avoid fast switching between states.
is_signal_present_ has been plotted and looks as expected. The AGC adaptation sounds promising, specially for the cases when the speaker fades in and out from the beam direction.

R=andrew@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/28329005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@8078 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
aluebs@webrtc.org
2015-01-15 18:07:21 +00:00
parent 3e42a8a56a
commit d82f55d2a7
10 changed files with 310 additions and 115 deletions

View File

@ -18,6 +18,7 @@
#include "webrtc/common_audio/resampler/include/push_resampler.h"
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/beamformer/mock_beamformer.h"
#include "webrtc/modules/audio_processing/common.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/audio_processing/test/test_utils.h"
@ -278,6 +279,35 @@ void OpenFileAndReadMessage(const std::string filename,
fclose(file);
}
// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
// stereo) file, converts to deinterleaved float (optionally downmixing) and
// returns the result in |cb|. Returns false if the file ended (or on error) and
// true otherwise.
//
// |int_data| and |float_data| are just temporary space that must be
// sufficiently large to hold the 10 ms chunk.
bool ReadChunk(FILE* file, int16_t* int_data, float* float_data,
ChannelBuffer<float>* cb) {
// The files always contain stereo audio.
size_t frame_size = cb->samples_per_channel() * 2;
size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file);
if (read_count != frame_size) {
// Check that the file really ended.
assert(feof(file));
return false; // This is expected.
}
S16ToFloat(int_data, frame_size, float_data);
if (cb->num_channels() == 1) {
MixStereoToMono(float_data, cb->data(), cb->samples_per_channel());
} else {
Deinterleave(float_data, cb->samples_per_channel(), 2,
cb->channels());
}
return true;
}
class ApmTest : public ::testing::Test {
protected:
ApmTest();
@ -1164,6 +1194,87 @@ TEST_F(ApmTest, ManualVolumeChangeIsPossible) {
}
}
#if !defined(WEBRTC_ANDROID) && !defined(WEBRTC_IOS)
TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) {
const int kSampleRateHz = 16000;
const int kSamplesPerChannel =
AudioProcessing::kChunkSizeMs * kSampleRateHz / 1000;
const int kNumInputChannels = 2;
const int kNumOutputChannels = 1;
const int kNumChunks = 700;
const float kScaleFactor = 0.25f;
Config config;
std::vector<webrtc::Point> geometry;
geometry.push_back(webrtc::Point(0.f, 0.f, 0.f));
geometry.push_back(webrtc::Point(0.05f, 0.f, 0.f));
config.Set<Beamforming>(new Beamforming(true, geometry));
testing::NiceMock<MockBeamformer>* beamformer =
new testing::NiceMock<MockBeamformer>(geometry);
scoped_ptr<AudioProcessing> apm(AudioProcessing::Create(config, beamformer));
EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true));
ChannelBuffer<float> src_buf(kSamplesPerChannel, kNumInputChannels);
ChannelBuffer<float> dest_buf(kSamplesPerChannel, kNumOutputChannels);
const int max_length = kSamplesPerChannel * std::max(kNumInputChannels,
kNumOutputChannels);
scoped_ptr<int16_t[]> int_data(new int16_t[max_length]);
scoped_ptr<float[]> float_data(new float[max_length]);
std::string filename = ResourceFilePath("far", kSampleRateHz);
FILE* far_file = fopen(filename.c_str(), "rb");
ASSERT_TRUE(far_file != NULL) << "Could not open file " << filename << "\n";
const int kDefaultVolume = apm->gain_control()->stream_analog_level();
const int kDefaultCompressionGain =
apm->gain_control()->compression_gain_db();
bool is_target = false;
EXPECT_CALL(*beamformer, is_target_present())
.WillRepeatedly(testing::ReturnPointee(&is_target));
for (int i = 0; i < kNumChunks; ++i) {
ASSERT_TRUE(ReadChunk(far_file,
int_data.get(),
float_data.get(),
&src_buf));
for (int j = 0; j < kNumInputChannels * kSamplesPerChannel; ++j) {
src_buf.data()[j] *= kScaleFactor;
}
EXPECT_EQ(kNoErr,
apm->ProcessStream(src_buf.channels(),
src_buf.samples_per_channel(),
kSampleRateHz,
LayoutFromChannels(src_buf.num_channels()),
kSampleRateHz,
LayoutFromChannels(dest_buf.num_channels()),
dest_buf.channels()));
}
EXPECT_EQ(kDefaultVolume,
apm->gain_control()->stream_analog_level());
EXPECT_EQ(kDefaultCompressionGain,
apm->gain_control()->compression_gain_db());
rewind(far_file);
is_target = true;
for (int i = 0; i < kNumChunks; ++i) {
ASSERT_TRUE(ReadChunk(far_file,
int_data.get(),
float_data.get(),
&src_buf));
for (int j = 0; j < kNumInputChannels * kSamplesPerChannel; ++j) {
src_buf.data()[j] *= kScaleFactor;
}
EXPECT_EQ(kNoErr,
apm->ProcessStream(src_buf.channels(),
src_buf.samples_per_channel(),
kSampleRateHz,
LayoutFromChannels(src_buf.num_channels()),
kSampleRateHz,
LayoutFromChannels(dest_buf.num_channels()),
dest_buf.channels()));
}
EXPECT_LT(kDefaultVolume,
apm->gain_control()->stream_analog_level());
EXPECT_LT(kDefaultCompressionGain,
apm->gain_control()->compression_gain_db());
ASSERT_EQ(0, fclose(far_file));
}
#endif
TEST_F(ApmTest, NoiseSuppression) {
// Test valid suppression levels.
NoiseSuppression::Level level[] = {
@ -2031,35 +2142,6 @@ TEST_F(ApmTest, NoErrorsWithKeyboardChannel) {
}
}
// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
// stereo) file, converts to deinterleaved float (optionally downmixing) and
// returns the result in |cb|. Returns false if the file ended (or on error) and
// true otherwise.
//
// |int_data| and |float_data| are just temporary space that must be
// sufficiently large to hold the 10 ms chunk.
bool ReadChunk(FILE* file, int16_t* int_data, float* float_data,
ChannelBuffer<float>* cb) {
// The files always contain stereo audio.
size_t frame_size = cb->samples_per_channel() * 2;
size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file);
if (read_count != frame_size) {
// Check that the file really ended.
assert(feof(file));
return false; // This is expected.
}
S16ToFloat(int_data, frame_size, float_data);
if (cb->num_channels() == 1) {
MixStereoToMono(float_data, cb->data(), cb->samples_per_channel());
} else {
Deinterleave(float_data, cb->samples_per_channel(), 2,
cb->channels());
}
return true;
}
// Compares the reference and test arrays over a region around the expected
// delay. Finds the highest SNR in that region and adds the variance and squared
// error results to the supplied accumulators.