Use array geometry in Beamformer

R=andrew@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/35559004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@8000 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
aluebs@webrtc.org
2015-01-05 21:58:58 +00:00
parent a37bf2c4fe
commit fb7a039e9d
6 changed files with 51 additions and 16 deletions

View File

@ -185,7 +185,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config)
use_new_agc_(config.Get<ExperimentalAgc>().enabled), use_new_agc_(config.Get<ExperimentalAgc>().enabled),
#endif #endif
transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled), transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled),
beamformer_enabled_(config.Get<Beamforming>().enabled) { beamformer_enabled_(config.Get<Beamforming>().enabled),
array_geometry_(config.Get<Beamforming>().array_geometry) {
echo_cancellation_ = new EchoCancellationImpl(this, crit_); echo_cancellation_ = new EchoCancellationImpl(this, crit_);
component_list_.push_back(echo_cancellation_); component_list_.push_back(echo_cancellation_);
@ -400,7 +401,8 @@ int AudioProcessingImpl::MaybeInitializeLocked(int input_sample_rate_hz,
return kNoError; return kNoError;
} }
if (beamformer_enabled_ && if (beamformer_enabled_ &&
(num_input_channels < 2 || num_output_channels > 1)) { (static_cast<size_t>(num_input_channels) != array_geometry_.size() ||
num_output_channels > 1)) {
return kBadNumberChannelsError; return kBadNumberChannelsError;
} }
return InitializeLocked(input_sample_rate_hz, return InitializeLocked(input_sample_rate_hz,
@ -995,11 +997,9 @@ int AudioProcessingImpl::InitializeTransient() {
void AudioProcessingImpl::InitializeBeamformer() { void AudioProcessingImpl::InitializeBeamformer() {
if (beamformer_enabled_) { if (beamformer_enabled_) {
#ifdef WEBRTC_BEAMFORMER #ifdef WEBRTC_BEAMFORMER
// TODO(aluebs): Don't use a hard-coded microphone spacing.
beamformer_.reset(new Beamformer(kChunkSizeMs, beamformer_.reset(new Beamformer(kChunkSizeMs,
split_rate_, split_rate_,
fwd_in_format_.num_channels(), array_geometry_));
0.05f));
#else #else
assert(false); assert(false);
#endif #endif

View File

@ -219,6 +219,7 @@ class AudioProcessingImpl : public AudioProcessing {
scoped_ptr<TransientSuppressor> transient_suppressor_; scoped_ptr<TransientSuppressor> transient_suppressor_;
const bool beamformer_enabled_; const bool beamformer_enabled_;
scoped_ptr<Beamformer> beamformer_; scoped_ptr<Beamformer> beamformer_;
const std::vector<Point> array_geometry_;
}; };
} // namespace webrtc } // namespace webrtc

View File

@ -128,13 +128,12 @@ int Round(float x) {
Beamformer::Beamformer(int chunk_size_ms, Beamformer::Beamformer(int chunk_size_ms,
int sample_rate_hz, int sample_rate_hz,
int num_input_channels, const std::vector<Point>& array_geometry)
float mic_spacing)
: chunk_length_(sample_rate_hz / (1000.f / chunk_size_ms)), : chunk_length_(sample_rate_hz / (1000.f / chunk_size_ms)),
window_(new float[kFftSize]), window_(new float[kFftSize]),
num_input_channels_(num_input_channels), num_input_channels_(array_geometry.size()),
sample_rate_hz_(sample_rate_hz), sample_rate_hz_(sample_rate_hz),
mic_spacing_(mic_spacing), mic_spacing_(MicSpacingFromGeometry(array_geometry)),
decay_threshold_( decay_threshold_(
pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds))), pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds))),
mid_frequency_lower_bin_bound_( mid_frequency_lower_bin_bound_(
@ -477,4 +476,18 @@ void Beamformer::CalculateHighFrequencyMask() {
high_pass_postfilter_mask_ += high_pass_mask; high_pass_postfilter_mask_ += high_pass_mask;
} }
// This method CHECKs for a uniform linear array.
float Beamformer::MicSpacingFromGeometry(const std::vector<Point>& geometry) {
CHECK_GE(geometry.size(), 2u);
float mic_spacing = 0.f;
for (size_t i = 0u; i < 3u; ++i) {
float difference = geometry[1].c[i] - geometry[0].c[i];
for (size_t j = 2u; j < geometry.size(); ++j) {
CHECK_LT(geometry[j].c[i] - geometry[j - 1].c[i] - difference, 1e-6);
}
mic_spacing += difference * difference;
}
return sqrt(mic_spacing);
}
} // namespace webrtc } // namespace webrtc

View File

@ -13,6 +13,7 @@
#include "webrtc/common_audio/lapped_transform.h" #include "webrtc/common_audio/lapped_transform.h"
#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
namespace webrtc { namespace webrtc {
@ -26,12 +27,12 @@ namespace webrtc {
// TODO: Target angle assumed to be 0. Parameterize target angle. // TODO: Target angle assumed to be 0. Parameterize target angle.
class Beamformer : public LappedTransform::Callback { class Beamformer : public LappedTransform::Callback {
public: public:
// At the moment it only accepts uniform linear microphone arrays. Using the
// first microphone as a reference position [0, 0, 0] is a natural choice.
Beamformer(int chunk_size_ms, Beamformer(int chunk_size_ms,
// Sample rate corresponds to the lower band. // Sample rate corresponds to the lower band.
int sample_rate_hz, int sample_rate_hz,
int num_input_channels, const std::vector<Point>& array_geometry);
// Microphone spacing in meters.
float mic_spacing);
// Process one time-domain chunk of audio. The audio can be separated into // Process one time-domain chunk of audio. The audio can be separated into
// two signals by frequency, with the higher half passed in as the second // two signals by frequency, with the higher half passed in as the second
@ -91,6 +92,8 @@ class Beamformer : public LappedTransform::Callback {
// Applies both sets of masks to |input| and store in |output|. // Applies both sets of masks to |input| and store in |output|.
void ApplyMasks(const complex_f* const* input, complex_f* const* output); void ApplyMasks(const complex_f* const* input, complex_f* const* output);
float MicSpacingFromGeometry(const std::vector<Point>& array_geometry);
// Deals with the fft transform and blocking. // Deals with the fft transform and blocking.
const int chunk_length_; const int chunk_length_;
scoped_ptr<LappedTransform> lapped_transform_; scoped_ptr<LappedTransform> lapped_transform_;

View File

@ -54,10 +54,14 @@ int main(int argc, char* argv[]) {
fseek(read_file, 44, SEEK_SET); fseek(read_file, 44, SEEK_SET);
FILE* write_file = fopen(FLAGS_output_file_path.c_str(), "wb"); FILE* write_file = fopen(FLAGS_output_file_path.c_str(), "wb");
std::vector<webrtc::Point> array_geometry;
for (int i = 0; i < FLAGS_num_input_channels; ++i) {
array_geometry.push_back(webrtc::Point(i * FLAGS_mic_spacing, 0.f, 0.f));
}
webrtc::Beamformer bf(kChunkTimeMilliseconds, webrtc::Beamformer bf(kChunkTimeMilliseconds,
FLAGS_sample_rate, FLAGS_sample_rate,
FLAGS_num_input_channels, array_geometry);
FLAGS_mic_spacing);
while (true) { while (true) {
size_t samples_read = webrtc::PcmReadToFloat(read_file, size_t samples_read = webrtc::PcmReadToFloat(read_file,
kInputSamplesPerChunk, kInputSamplesPerChunk,

View File

@ -13,6 +13,7 @@
#include <stddef.h> // size_t #include <stddef.h> // size_t
#include <stdio.h> // FILE #include <stdio.h> // FILE
#include <vector>
#include "webrtc/base/platform_file.h" #include "webrtc/base/platform_file.h"
#include "webrtc/common.h" #include "webrtc/common.h"
@ -82,12 +83,25 @@ struct ExperimentalNs {
bool enabled; bool enabled;
}; };
// Coordinates in meters.
struct Point {
Point(float x, float y, float z) {
c[0] = x;
c[1] = y;
c[2] = z;
}
float c[3];
};
// Use to enable beamforming. Must be provided through the constructor. It will // Use to enable beamforming. Must be provided through the constructor. It will
// have no impact if used with AudioProcessing::SetExtraOptions(). // have no impact if used with AudioProcessing::SetExtraOptions().
struct Beamforming { struct Beamforming {
Beamforming() : enabled(false) {} Beamforming() : enabled(false) {}
explicit Beamforming(bool enabled) : enabled(enabled) {} Beamforming(bool enabled, const std::vector<Point>& array_geometry)
bool enabled; : enabled(enabled),
array_geometry(array_geometry) {}
const bool enabled;
const std::vector<Point> array_geometry;
}; };
static const int kAudioProcMaxNativeSampleRateHz = 32000; static const int kAudioProcMaxNativeSampleRateHz = 32000;