Make the nonlinear beamformer steerable

Depends on this CL: https://codereview.webrtc.org/1395453004/

R=andrew@webrtc.org

Review URL: https://codereview.webrtc.org/1394103003 .

Cr-Commit-Position: refs/heads/master@{#10458}
This commit is contained in:
Alejandro Luebs
2015-10-29 18:21:34 -07:00
parent 7367463acc
commit cb3f9bd9c0
12 changed files with 613 additions and 98 deletions

View File

@ -225,6 +225,7 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config,
beamformer_enabled_(config.Get<Beamforming>().enabled), beamformer_enabled_(config.Get<Beamforming>().enabled),
beamformer_(beamformer), beamformer_(beamformer),
array_geometry_(config.Get<Beamforming>().array_geometry), array_geometry_(config.Get<Beamforming>().array_geometry),
target_direction_(config.Get<Beamforming>().target_direction),
intelligibility_enabled_(config.Get<Intelligibility>().enabled) { intelligibility_enabled_(config.Get<Intelligibility>().enabled) {
echo_cancellation_ = new EchoCancellationImpl(this, crit_); echo_cancellation_ = new EchoCancellationImpl(this, crit_);
component_list_.push_back(echo_cancellation_); component_list_.push_back(echo_cancellation_);
@ -1099,7 +1100,8 @@ void AudioProcessingImpl::InitializeTransient() {
void AudioProcessingImpl::InitializeBeamformer() { void AudioProcessingImpl::InitializeBeamformer() {
if (beamformer_enabled_) { if (beamformer_enabled_) {
if (!beamformer_) { if (!beamformer_) {
beamformer_.reset(new NonlinearBeamformer(array_geometry_)); beamformer_.reset(
new NonlinearBeamformer(array_geometry_, target_direction_));
} }
beamformer_->Initialize(kChunkSizeMs, split_rate_); beamformer_->Initialize(kChunkSizeMs, split_rate_);
} }

View File

@ -208,6 +208,7 @@ class AudioProcessingImpl : public AudioProcessing {
const bool beamformer_enabled_; const bool beamformer_enabled_;
rtc::scoped_ptr<Beamformer<float>> beamformer_; rtc::scoped_ptr<Beamformer<float>> beamformer_;
const std::vector<Point> array_geometry_; const std::vector<Point> array_geometry_;
const SphericalPointf target_direction_;
bool intelligibility_enabled_; bool intelligibility_enabled_;
rtc::scoped_ptr<IntelligibilityEnhancer> intelligibility_enhancer_; rtc::scoped_ptr<IntelligibilityEnhancer> intelligibility_enhancer_;

View File

@ -16,6 +16,11 @@
#include "webrtc/base/checks.h" #include "webrtc/base/checks.h"
namespace webrtc { namespace webrtc {
namespace {
const float kMaxDotProduct = 1e-6f;
} // namespace
float GetMinimumSpacing(const std::vector<Point>& array_geometry) { float GetMinimumSpacing(const std::vector<Point>& array_geometry) {
RTC_CHECK_GT(array_geometry.size(), 1u); RTC_CHECK_GT(array_geometry.size(), 1u);
@ -29,4 +34,85 @@ float GetMinimumSpacing(const std::vector<Point>& array_geometry) {
return mic_spacing; return mic_spacing;
} }
Point PairDirection(const Point& a, const Point& b) {
return {b.x() - a.x(), b.y() - a.y(), b.z() - a.z()};
}
float DotProduct(const Point& a, const Point& b) {
return a.x() * b.x() + a.y() * b.y() + a.z() * b.z();
}
Point CrossProduct(const Point& a, const Point& b) {
return {a.y() * b.z() - a.z() * b.y(), a.z() * b.x() - a.x() * b.z(),
a.x() * b.y() - a.y() * b.x()};
}
bool AreParallel(const Point& a, const Point& b) {
Point cross_product = CrossProduct(a, b);
return DotProduct(cross_product, cross_product) < kMaxDotProduct;
}
bool ArePerpendicular(const Point& a, const Point& b) {
return std::abs(DotProduct(a, b)) < kMaxDotProduct;
}
rtc::Maybe<Point> GetDirectionIfLinear(
const std::vector<Point>& array_geometry) {
RTC_DCHECK_GT(array_geometry.size(), 1u);
const Point first_pair_direction =
PairDirection(array_geometry[0], array_geometry[1]);
for (size_t i = 2u; i < array_geometry.size(); ++i) {
const Point pair_direction =
PairDirection(array_geometry[i - 1], array_geometry[i]);
if (!AreParallel(first_pair_direction, pair_direction)) {
return rtc::Maybe<Point>();
}
}
return first_pair_direction;
}
rtc::Maybe<Point> GetNormalIfPlanar(const std::vector<Point>& array_geometry) {
RTC_DCHECK_GT(array_geometry.size(), 1u);
const Point first_pair_direction =
PairDirection(array_geometry[0], array_geometry[1]);
Point pair_direction(0.f, 0.f, 0.f);
size_t i = 2u;
bool is_linear = true;
for (; i < array_geometry.size() && is_linear; ++i) {
pair_direction = PairDirection(array_geometry[i - 1], array_geometry[i]);
if (!AreParallel(first_pair_direction, pair_direction)) {
is_linear = false;
}
}
if (is_linear) {
return rtc::Maybe<Point>();
}
const Point normal_direction =
CrossProduct(first_pair_direction, pair_direction);
for (; i < array_geometry.size(); ++i) {
pair_direction = PairDirection(array_geometry[i - 1], array_geometry[i]);
if (!ArePerpendicular(normal_direction, pair_direction)) {
return rtc::Maybe<Point>();
}
}
return normal_direction;
}
rtc::Maybe<Point> GetArrayNormalIfExists(
const std::vector<Point>& array_geometry) {
const rtc::Maybe<Point> direction = GetDirectionIfLinear(array_geometry);
if (direction) {
return Point(direction->y(), -direction->x(), 0.f);
}
const rtc::Maybe<Point> normal = GetNormalIfPlanar(array_geometry);
if (normal && normal->z() < kMaxDotProduct) {
return normal;
}
return rtc::Maybe<Point>();
}
Point AzimuthToPoint(float azimuth) {
return Point(std::cos(azimuth), std::sin(azimuth), 0.f);
}
} // namespace webrtc } // namespace webrtc

View File

@ -14,11 +14,23 @@
#include <cmath> #include <cmath>
#include <vector> #include <vector>
#include "webrtc/base/maybe.h"
namespace webrtc { namespace webrtc {
// Coordinates in meters. // Coordinates in meters. The convention used is:
// x: the horizontal dimension, with positive to the right from the camera's
// perspective.
// y: the depth dimension, with positive forward from the camera's
// perspective.
// z: the vertical dimension, with positive upwards.
template<typename T> template<typename T>
struct CartesianPoint { struct CartesianPoint {
CartesianPoint() {
c[0] = 0;
c[1] = 0;
c[2] = 0;
}
CartesianPoint(T x, T y, T z) { CartesianPoint(T x, T y, T z) {
c[0] = x; c[0] = x;
c[1] = y; c[1] = y;
@ -32,10 +44,35 @@ struct CartesianPoint {
using Point = CartesianPoint<float>; using Point = CartesianPoint<float>;
// Calculates the direction from a to b.
Point PairDirection(const Point& a, const Point& b);
float DotProduct(const Point& a, const Point& b);
Point CrossProduct(const Point& a, const Point& b);
bool AreParallel(const Point& a, const Point& b);
bool ArePerpendicular(const Point& a, const Point& b);
// Returns the minimum distance between any two Points in the given // Returns the minimum distance between any two Points in the given
// |array_geometry|. // |array_geometry|.
float GetMinimumSpacing(const std::vector<Point>& array_geometry); float GetMinimumSpacing(const std::vector<Point>& array_geometry);
// If the given array geometry is linear it returns the direction without
// normalizing.
rtc::Maybe<Point> GetDirectionIfLinear(
const std::vector<Point>& array_geometry);
// If the given array geometry is planar it returns the normal without
// normalizing.
rtc::Maybe<Point> GetNormalIfPlanar(const std::vector<Point>& array_geometry);
// Returns the normal of an array if it has one and it is in the xy-plane.
rtc::Maybe<Point> GetArrayNormalIfExists(
const std::vector<Point>& array_geometry);
// The resulting Point will be in the xy-plane.
Point AzimuthToPoint(float azimuth);
template<typename T> template<typename T>
float Distance(CartesianPoint<T> a, CartesianPoint<T> b) { float Distance(CartesianPoint<T> a, CartesianPoint<T> b) {
return std::sqrt((a.x() - b.x()) * (a.x() - b.x()) + return std::sqrt((a.x() - b.x()) * (a.x() - b.x()) +
@ -43,6 +80,11 @@ float Distance(CartesianPoint<T> a, CartesianPoint<T> b) {
(a.z() - b.z()) * (a.z() - b.z())); (a.z() - b.z()) * (a.z() - b.z()));
} }
// The convention used:
// azimuth: zero is to the right from the camera's perspective, with positive
// angles in radians counter-clockwise.
// elevation: zero is horizontal, with positive angles in radians upwards.
// radius: distance from the camera in meters.
template <typename T> template <typename T>
struct SphericalPoint { struct SphericalPoint {
SphericalPoint(T azimuth, T elevation, T radius) { SphericalPoint(T azimuth, T elevation, T radius) {
@ -58,6 +100,17 @@ struct SphericalPoint {
using SphericalPointf = SphericalPoint<float>; using SphericalPointf = SphericalPoint<float>;
// Helper functions to transform degrees to radians and the inverse.
template <typename T>
T DegreesToRadians(T angle_degrees) {
return M_PI * angle_degrees / 180;
}
template <typename T>
T RadiansToDegrees(T angle_radians) {
return 180 * angle_radians / M_PI;
}
} // namespace webrtc } // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_ #endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_

View File

@ -8,25 +8,178 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
// MSVC++ requires this to be set before any other includes to get M_PI.
#define _USE_MATH_DEFINES
#include "webrtc/modules/audio_processing/beamformer/array_util.h" #include "webrtc/modules/audio_processing/beamformer/array_util.h"
#include <math.h>
#include <vector> #include <vector>
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
namespace webrtc { namespace webrtc {
bool operator==(const Point& lhs, const Point& rhs) {
return lhs.x() == rhs.x() && lhs.y() == rhs.y() && lhs.z() == rhs.z();
}
TEST(ArrayUtilTest, PairDirection) {
EXPECT_EQ(Point(1.f, 2.f, 3.f),
PairDirection(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f)));
EXPECT_EQ(Point(-1.f, -2.f, -3.f),
PairDirection(Point(1.f, 2.f, 3.f), Point(0.f, 0.f, 0.f)));
EXPECT_EQ(Point(0.f, 0.f, 0.f),
PairDirection(Point(1.f, 0.f, 0.f), Point(1.f, 0.f, 0.f)));
EXPECT_EQ(Point(-1.f, 2.f, 0.f),
PairDirection(Point(1.f, 0.f, 0.f), Point(0.f, 2.f, 0.f)));
EXPECT_EQ(Point(-4.f, 4.f, -4.f),
PairDirection(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f)));
}
TEST(ArrayUtilTest, DotProduct) {
EXPECT_FLOAT_EQ(0.f, DotProduct(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f)));
EXPECT_FLOAT_EQ(0.f, DotProduct(Point(1.f, 0.f, 2.f), Point(0.f, 3.f, 0.f)));
EXPECT_FLOAT_EQ(0.f, DotProduct(Point(1.f, 1.f, 0.f), Point(1.f, -1.f, 0.f)));
EXPECT_FLOAT_EQ(2.f, DotProduct(Point(1.f, 0.f, 0.f), Point(2.f, 0.f, 0.f)));
EXPECT_FLOAT_EQ(-6.f,
DotProduct(Point(-2.f, 0.f, 0.f), Point(3.f, 0.f, 0.f)));
EXPECT_FLOAT_EQ(-10.f,
DotProduct(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f)));
}
TEST(ArrayUtilTest, CrossProduct) {
EXPECT_EQ(Point(0.f, 0.f, 0.f),
CrossProduct(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f)));
EXPECT_EQ(Point(0.f, 0.f, 1.f),
CrossProduct(Point(1.f, 0.f, 0.f), Point(0.f, 1.f, 0.f)));
EXPECT_EQ(Point(1.f, 0.f, 0.f),
CrossProduct(Point(0.f, 1.f, 0.f), Point(0.f, 0.f, 1.f)));
EXPECT_EQ(Point(0.f, -1.f, 0.f),
CrossProduct(Point(1.f, 0.f, 0.f), Point(0.f, 0.f, 1.f)));
EXPECT_EQ(Point(-4.f, -8.f, -4.f),
CrossProduct(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f)));
}
TEST(ArrayUtilTest, AreParallel) {
EXPECT_TRUE(AreParallel(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f)));
EXPECT_FALSE(AreParallel(Point(1.f, 0.f, 2.f), Point(0.f, 3.f, 0.f)));
EXPECT_FALSE(AreParallel(Point(1.f, 2.f, 0.f), Point(1.f, -0.5f, 0.f)));
EXPECT_FALSE(AreParallel(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f)));
EXPECT_TRUE(AreParallel(Point(1.f, 0.f, 0.f), Point(2.f, 0.f, 0.f)));
EXPECT_TRUE(AreParallel(Point(1.f, 2.f, 3.f), Point(-2.f, -4.f, -6.f)));
}
TEST(ArrayUtilTest, ArePerpendicular) {
EXPECT_TRUE(ArePerpendicular(Point(0.f, 0.f, 0.f), Point(1.f, 2.f, 3.f)));
EXPECT_TRUE(ArePerpendicular(Point(1.f, 0.f, 2.f), Point(0.f, 3.f, 0.f)));
EXPECT_TRUE(ArePerpendicular(Point(1.f, 2.f, 0.f), Point(1.f, -0.5f, 0.f)));
EXPECT_FALSE(ArePerpendicular(Point(1.f, -2.f, 3.f), Point(-3.f, 2.f, -1.f)));
EXPECT_FALSE(ArePerpendicular(Point(1.f, 0.f, 0.f), Point(2.f, 0.f, 0.f)));
EXPECT_FALSE(ArePerpendicular(Point(1.f, 2.f, 3.f), Point(-2.f, -4.f, -6.f)));
}
TEST(ArrayUtilTest, GetMinimumSpacing) { TEST(ArrayUtilTest, GetMinimumSpacing) {
std::vector<Point> array_geometry; std::vector<Point> geometry;
array_geometry.push_back(Point(0.f, 0.f, 0.f)); geometry.push_back(Point(0.f, 0.f, 0.f));
array_geometry.push_back(Point(0.1f, 0.f, 0.f)); geometry.push_back(Point(0.1f, 0.f, 0.f));
EXPECT_FLOAT_EQ(0.1f, GetMinimumSpacing(array_geometry)); EXPECT_FLOAT_EQ(0.1f, GetMinimumSpacing(geometry));
array_geometry.push_back(Point(0.f, 0.05f, 0.f)); geometry.push_back(Point(0.f, 0.05f, 0.f));
EXPECT_FLOAT_EQ(0.05f, GetMinimumSpacing(array_geometry)); EXPECT_FLOAT_EQ(0.05f, GetMinimumSpacing(geometry));
array_geometry.push_back(Point(0.f, 0.f, 0.02f)); geometry.push_back(Point(0.f, 0.f, 0.02f));
EXPECT_FLOAT_EQ(0.02f, GetMinimumSpacing(array_geometry)); EXPECT_FLOAT_EQ(0.02f, GetMinimumSpacing(geometry));
array_geometry.push_back(Point(-0.003f, -0.004f, 0.02f)); geometry.push_back(Point(-0.003f, -0.004f, 0.02f));
EXPECT_FLOAT_EQ(0.005f, GetMinimumSpacing(array_geometry)); EXPECT_FLOAT_EQ(0.005f, GetMinimumSpacing(geometry));
}
TEST(ArrayUtilTest, GetDirectionIfLinear) {
std::vector<Point> geometry;
geometry.push_back(Point(0.f, 0.f, 0.f));
geometry.push_back(Point(0.1f, 0.f, 0.f));
EXPECT_TRUE(
AreParallel(Point(1.f, 0.f, 0.f), *GetDirectionIfLinear(geometry)));
geometry.push_back(Point(0.15f, 0.f, 0.f));
EXPECT_TRUE(
AreParallel(Point(1.f, 0.f, 0.f), *GetDirectionIfLinear(geometry)));
geometry.push_back(Point(-0.2f, 0.f, 0.f));
EXPECT_TRUE(
AreParallel(Point(1.f, 0.f, 0.f), *GetDirectionIfLinear(geometry)));
geometry.push_back(Point(0.05f, 0.f, 0.f));
EXPECT_TRUE(
AreParallel(Point(1.f, 0.f, 0.f), *GetDirectionIfLinear(geometry)));
geometry.push_back(Point(0.1f, 0.1f, 0.f));
EXPECT_FALSE(GetDirectionIfLinear(geometry));
geometry.push_back(Point(0.f, 0.f, -0.2f));
EXPECT_FALSE(GetDirectionIfLinear(geometry));
}
TEST(ArrayUtilTest, GetNormalIfPlanar) {
std::vector<Point> geometry;
geometry.push_back(Point(0.f, 0.f, 0.f));
geometry.push_back(Point(0.1f, 0.f, 0.f));
EXPECT_FALSE(GetNormalIfPlanar(geometry));
geometry.push_back(Point(0.15f, 0.f, 0.f));
EXPECT_FALSE(GetNormalIfPlanar(geometry));
geometry.push_back(Point(0.1f, 0.2f, 0.f));
EXPECT_TRUE(AreParallel(Point(0.f, 0.f, 1.f), *GetNormalIfPlanar(geometry)));
geometry.push_back(Point(0.f, -0.15f, 0.f));
EXPECT_TRUE(AreParallel(Point(0.f, 0.f, 1.f), *GetNormalIfPlanar(geometry)));
geometry.push_back(Point(0.f, 0.1f, 0.2f));
EXPECT_FALSE(GetNormalIfPlanar(geometry));
geometry.push_back(Point(0.f, 0.f, -0.15f));
EXPECT_FALSE(GetNormalIfPlanar(geometry));
geometry.push_back(Point(0.1f, 0.2f, 0.f));
EXPECT_FALSE(GetNormalIfPlanar(geometry));
}
TEST(ArrayUtilTest, GetArrayNormalIfExists) {
std::vector<Point> geometry;
geometry.push_back(Point(0.f, 0.f, 0.f));
geometry.push_back(Point(0.1f, 0.f, 0.f));
EXPECT_TRUE(
AreParallel(Point(0.f, 1.f, 0.f), *GetArrayNormalIfExists(geometry)));
geometry.push_back(Point(0.15f, 0.f, 0.f));
EXPECT_TRUE(
AreParallel(Point(0.f, 1.f, 0.f), *GetArrayNormalIfExists(geometry)));
geometry.push_back(Point(0.1f, 0.f, 0.2f));
EXPECT_TRUE(
AreParallel(Point(0.f, 1.f, 0.f), *GetArrayNormalIfExists(geometry)));
geometry.push_back(Point(0.f, 0.f, -0.1f));
EXPECT_TRUE(
AreParallel(Point(0.f, 1.f, 0.f), *GetArrayNormalIfExists(geometry)));
geometry.push_back(Point(0.1f, 0.2f, 0.3f));
EXPECT_FALSE(GetArrayNormalIfExists(geometry));
geometry.push_back(Point(0.f, -0.1f, 0.f));
EXPECT_FALSE(GetArrayNormalIfExists(geometry));
geometry.push_back(Point(1.f, 0.f, -0.2f));
EXPECT_FALSE(GetArrayNormalIfExists(geometry));
}
TEST(ArrayUtilTest, DegreesToRadians) {
EXPECT_FLOAT_EQ(0.f, DegreesToRadians(0.f));
EXPECT_FLOAT_EQ(static_cast<float>(M_PI) / 6.f, DegreesToRadians(30.f));
EXPECT_FLOAT_EQ(-static_cast<float>(M_PI) / 4.f, DegreesToRadians(-45.f));
EXPECT_FLOAT_EQ(static_cast<float>(M_PI) / 3.f, DegreesToRadians(60.f));
EXPECT_FLOAT_EQ(-static_cast<float>(M_PI) / 2.f, DegreesToRadians(-90.f));
EXPECT_FLOAT_EQ(2.f * static_cast<float>(M_PI) / 3.f,
DegreesToRadians(120.f));
EXPECT_FLOAT_EQ(-3.f * static_cast<float>(M_PI) / 4.f,
DegreesToRadians(-135.f));
EXPECT_FLOAT_EQ(5.f * static_cast<float>(M_PI) / 6.f,
DegreesToRadians(150.f));
EXPECT_FLOAT_EQ(-static_cast<float>(M_PI), DegreesToRadians(-180.f));
}
TEST(ArrayUtilTest, RadiansToDegrees) {
EXPECT_FLOAT_EQ(0.f, RadiansToDegrees(0.f));
EXPECT_FLOAT_EQ(30.f, RadiansToDegrees(M_PI / 6.f));
EXPECT_FLOAT_EQ(-45.f, RadiansToDegrees(-M_PI / 4.f));
EXPECT_FLOAT_EQ(60.f, RadiansToDegrees(M_PI / 3.f));
EXPECT_FLOAT_EQ(-90.f, RadiansToDegrees(-M_PI / 2.f));
EXPECT_FLOAT_EQ(120.f, RadiansToDegrees(2.f * M_PI / 3.f));
EXPECT_FLOAT_EQ(-135.f, RadiansToDegrees(-3.f * M_PI / 4.f));
EXPECT_FLOAT_EQ(150.f, RadiansToDegrees(5.f * M_PI / 6.f));
EXPECT_FLOAT_EQ(-180.f, RadiansToDegrees(-M_PI));
} }
} // namespace webrtc } // namespace webrtc

View File

@ -32,6 +32,9 @@ class Beamformer {
// Needs to be called before the the Beamformer can be used. // Needs to be called before the the Beamformer can be used.
virtual void Initialize(int chunk_size_ms, int sample_rate_hz) = 0; virtual void Initialize(int chunk_size_ms, int sample_rate_hz) = 0;
// Aim the beamformer at a point in space.
virtual void AimAt(const SphericalPointf& spherical_point) = 0;
// Indicates whether a given point is inside of the beam. // Indicates whether a given point is inside of the beam.
virtual bool IsInBeam(const SphericalPointf& spherical_point) { return true; } virtual bool IsInBeam(const SphericalPointf& spherical_point) { return true; }

View File

@ -29,13 +29,6 @@ const float kKbdAlpha = 1.5f;
const float kSpeedOfSoundMeterSeconds = 343; const float kSpeedOfSoundMeterSeconds = 343;
// For both target and interference angles, PI / 2 is perpendicular to the
// microphone array, facing forwards. The positive direction goes
// counterclockwise.
// The angle at which we amplify sound.
// TODO(aluebs): Make the target angle dynamically settable.
const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;
// The minimum separation in radians between the target direction and an // The minimum separation in radians between the target direction and an
// interferer scenario. // interferer scenario.
const float kMinAwayRadians = 0.2f; const float kMinAwayRadians = 0.2f;
@ -50,8 +43,6 @@ const float kAwaySlope = 0.008f;
// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
const float kBalance = 0.95f; const float kBalance = 0.95f;
const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f;
// Alpha coefficients for mask smoothing. // Alpha coefficients for mask smoothing.
const float kMaskTimeSmoothAlpha = 0.2f; const float kMaskTimeSmoothAlpha = 0.2f;
const float kMaskFrequencySmoothAlpha = 0.6f; const float kMaskFrequencySmoothAlpha = 0.6f;
@ -187,14 +178,23 @@ std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) {
} // namespace } // namespace
const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f);
// static // static
const size_t NonlinearBeamformer::kNumFreqBins; const size_t NonlinearBeamformer::kNumFreqBins;
NonlinearBeamformer::NonlinearBeamformer( NonlinearBeamformer::NonlinearBeamformer(
const std::vector<Point>& array_geometry) const std::vector<Point>& array_geometry,
SphericalPointf target_direction)
: num_input_channels_(array_geometry.size()), : num_input_channels_(array_geometry.size()),
array_geometry_(GetCenteredArray(array_geometry)), array_geometry_(GetCenteredArray(array_geometry)),
min_mic_spacing_(GetMinimumSpacing(array_geometry)) { array_normal_(GetArrayNormalIfExists(array_geometry)),
min_mic_spacing_(GetMinimumSpacing(array_geometry)),
target_angle_radians_(target_direction.azimuth()),
away_radians_(std::min(
static_cast<float>(M_PI),
std::max(kMinAwayRadians,
kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) {
WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
} }
@ -202,7 +202,6 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
chunk_length_ = chunk_length_ =
static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms)); static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
sample_rate_hz_ = sample_rate_hz; sample_rate_hz_ = sample_rate_hz;
InitFrequencyCorrectionRanges();
high_pass_postfilter_mask_ = 1.f; high_pass_postfilter_mask_ = 1.f;
is_target_present_ = false; is_target_present_ = false;
@ -223,75 +222,86 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
} }
// Initialize all nonadaptive values before looping through the frames. InitLowFrequencyCorrectionRanges();
InitInterfAngles(); InitDiffuseCovMats();
InitDelaySumMasks(); AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f));
InitTargetCovMats();
InitInterfCovMats();
for (size_t i = 0; i < kNumFreqBins; ++i) {
rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
rpsiws_[i].clear();
for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
}
}
} }
void NonlinearBeamformer::InitFrequencyCorrectionRanges() { // These bin indexes determine the regions over which a mean is taken. This is
// applied as a constant value over the adjacent end "frequency correction"
// regions.
//
// low_mean_start_bin_ high_mean_start_bin_
// v v constant
// |----------------|--------|----------------|-------|----------------|
// constant ^ ^
// low_mean_end_bin_ high_mean_end_bin_
//
void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() {
low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
RTC_DCHECK_GT(low_mean_start_bin_, 0U);
RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
}
void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() {
const float kAliasingFreqHz = const float kAliasingFreqHz =
kSpeedOfSoundMeterSeconds / kSpeedOfSoundMeterSeconds /
(min_mic_spacing_ * (1.f + std::abs(std::cos(kTargetAngleRadians)))); (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_))));
const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz, const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz,
sample_rate_hz_ / 2.f); sample_rate_hz_ / 2.f);
const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz, const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz,
sample_rate_hz_ / 2.f); sample_rate_hz_ / 2.f);
low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
// These bin indexes determine the regions over which a mean is taken. This
// is applied as a constant value over the adjacent end "frequency correction"
// regions.
//
// low_mean_start_bin_ high_mean_start_bin_
// v v constant
// |----------------|--------|----------------|-------|----------------|
// constant ^ ^
// low_mean_end_bin_ high_mean_end_bin_
//
RTC_DCHECK_GT(low_mean_start_bin_, 0U);
RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
} }
void NonlinearBeamformer::InitInterfAngles() { void NonlinearBeamformer::InitInterfAngles() {
const float kAwayRadians =
std::min(static_cast<float>(M_PI),
std::max(kMinAwayRadians, kAwaySlope * static_cast<float>(M_PI) /
min_mic_spacing_));
interf_angles_radians_.clear(); interf_angles_radians_.clear();
// TODO(aluebs): When the target angle is settable, make sure the interferer const Point target_direction = AzimuthToPoint(target_angle_radians_);
// scenarios aren't reflected over the target one for linear geometries. const Point clockwise_interf_direction =
interf_angles_radians_.push_back(kTargetAngleRadians - kAwayRadians); AzimuthToPoint(target_angle_radians_ - away_radians_);
interf_angles_radians_.push_back(kTargetAngleRadians + kAwayRadians); if (!array_normal_ ||
DotProduct(*array_normal_, target_direction) *
DotProduct(*array_normal_, clockwise_interf_direction) >=
0.f) {
// The target and clockwise interferer are in the same half-plane defined
// by the array.
interf_angles_radians_.push_back(target_angle_radians_ - away_radians_);
} else {
// Otherwise, the interferer will begin reflecting back at the target.
// Instead rotate it away 180 degrees.
interf_angles_radians_.push_back(target_angle_radians_ - away_radians_ +
M_PI);
}
const Point counterclock_interf_direction =
AzimuthToPoint(target_angle_radians_ + away_radians_);
if (!array_normal_ ||
DotProduct(*array_normal_, target_direction) *
DotProduct(*array_normal_, counterclock_interf_direction) >=
0.f) {
// The target and counter-clockwise interferer are in the same half-plane
// defined by the array.
interf_angles_radians_.push_back(target_angle_radians_ + away_radians_);
} else {
// Otherwise, the interferer will begin reflecting back at the target.
// Instead rotate it away 180 degrees.
interf_angles_radians_.push_back(target_angle_radians_ + away_radians_ -
M_PI);
}
} }
void NonlinearBeamformer::InitDelaySumMasks() { void NonlinearBeamformer::InitDelaySumMasks() {
for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
delay_sum_masks_[f_ix].Resize(1, num_input_channels_); delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, CovarianceMatrixGenerator::PhaseAlignmentMasks(
kFftSize, f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds,
sample_rate_hz_, array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]);
kSpeedOfSoundMeterSeconds,
array_geometry_,
kTargetAngleRadians,
&delay_sum_masks_[f_ix]);
complex_f norm_factor = sqrt( complex_f norm_factor = sqrt(
ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
@ -309,15 +319,19 @@ void NonlinearBeamformer::InitTargetCovMats() {
} }
} }
void NonlinearBeamformer::InitDiffuseCovMats() {
for (size_t i = 0; i < kNumFreqBins; ++i) {
uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_);
CovarianceMatrixGenerator::UniformCovarianceMatrix(
wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]);
complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0];
uniform_cov_mat_[i].Scale(1.f / normalization_factor);
uniform_cov_mat_[i].Scale(1 - kBalance);
}
}
void NonlinearBeamformer::InitInterfCovMats() { void NonlinearBeamformer::InitInterfCovMats() {
for (size_t i = 0; i < kNumFreqBins; ++i) { for (size_t i = 0; i < kNumFreqBins; ++i) {
ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);
CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],
array_geometry_,
&uniform_cov_mat);
complex_f normalization_factor = uniform_cov_mat.elements()[0][0];
uniform_cov_mat.Scale(1.f / normalization_factor);
uniform_cov_mat.Scale(1 - kBalance);
interf_cov_mats_[i].clear(); interf_cov_mats_[i].clear();
for (size_t j = 0; j < interf_angles_radians_.size(); ++j) { for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_, interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_,
@ -333,11 +347,21 @@ void NonlinearBeamformer::InitInterfCovMats() {
array_geometry_, array_geometry_,
&angled_cov_mat); &angled_cov_mat);
// Normalize matrices before averaging them. // Normalize matrices before averaging them.
normalization_factor = angled_cov_mat.elements()[0][0]; complex_f normalization_factor = angled_cov_mat.elements()[0][0];
angled_cov_mat.Scale(1.f / normalization_factor); angled_cov_mat.Scale(1.f / normalization_factor);
// Weighted average of matrices. // Weighted average of matrices.
angled_cov_mat.Scale(kBalance); angled_cov_mat.Scale(kBalance);
interf_cov_mats_[i][j]->Add(uniform_cov_mat, angled_cov_mat); interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat);
}
}
}
void NonlinearBeamformer::NormalizeCovMats() {
for (size_t i = 0; i < kNumFreqBins; ++i) {
rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
rpsiws_[i].clear();
for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
} }
} }
} }
@ -354,28 +378,32 @@ void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
const float ramp_increment = const float ramp_increment =
(high_pass_postfilter_mask_ - old_high_pass_mask) / (high_pass_postfilter_mask_ - old_high_pass_mask) /
input.num_frames_per_band(); input.num_frames_per_band();
// Apply delay and sum and post-filter in the time domain. WARNING: only works // Apply the smoothed high-pass mask to the first channel of each band.
// because delay-and-sum is not frequency dependent. // This can be done because the effct of the linear beamformer is negligible
// compared to the post-filter.
for (size_t i = 1; i < input.num_bands(); ++i) { for (size_t i = 1; i < input.num_bands(); ++i) {
float smoothed_mask = old_high_pass_mask; float smoothed_mask = old_high_pass_mask;
for (size_t j = 0; j < input.num_frames_per_band(); ++j) { for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
smoothed_mask += ramp_increment; smoothed_mask += ramp_increment;
output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask;
// Applying the delay and sum (at zero degrees, this is equivalent to
// averaging).
float sum = 0.f;
for (int k = 0; k < input.num_channels(); ++k) {
sum += input.channels(i)[k][j];
}
output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask;
} }
} }
} }
void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {
target_angle_radians_ = target_direction.azimuth();
InitHighFrequencyCorrectionRanges();
InitInterfAngles();
InitDelaySumMasks();
InitTargetCovMats();
InitInterfCovMats();
NormalizeCovMats();
}
bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {
// If more than half-beamwidth degrees away from the beam's center, // If more than half-beamwidth degrees away from the beam's center,
// you are out of the beam. // you are out of the beam.
return fabs(spherical_point.azimuth() - kTargetAngleRadians) < return fabs(spherical_point.azimuth() - target_angle_radians_) <
kHalfBeamWidthRadians; kHalfBeamWidthRadians;
} }

View File

@ -11,6 +11,10 @@
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
// MSVC++ requires this to be set before any other includes to get M_PI.
#define _USE_MATH_DEFINES
#include <math.h>
#include <vector> #include <vector>
#include "webrtc/common_audio/lapped_transform.h" #include "webrtc/common_audio/lapped_transform.h"
@ -31,7 +35,12 @@ class NonlinearBeamformer
: public Beamformer<float>, : public Beamformer<float>,
public LappedTransform::Callback { public LappedTransform::Callback {
public: public:
explicit NonlinearBeamformer(const std::vector<Point>& array_geometry); static const float kHalfBeamWidthRadians;
explicit NonlinearBeamformer(
const std::vector<Point>& array_geometry,
SphericalPointf target_direction =
SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f));
// Sample rate corresponds to the lower band. // Sample rate corresponds to the lower band.
// Needs to be called before the NonlinearBeamformer can be used. // Needs to be called before the NonlinearBeamformer can be used.
@ -44,6 +53,8 @@ class NonlinearBeamformer
void ProcessChunk(const ChannelBuffer<float>& input, void ProcessChunk(const ChannelBuffer<float>& input,
ChannelBuffer<float>* output) override; ChannelBuffer<float>* output) override;
void AimAt(const SphericalPointf& target_direction) override;
bool IsInBeam(const SphericalPointf& spherical_point) override; bool IsInBeam(const SphericalPointf& spherical_point) override;
// After processing each block |is_target_present_| is set to true if the // After processing each block |is_target_present_| is set to true if the
@ -62,15 +73,21 @@ class NonlinearBeamformer
complex<float>* const* output) override; complex<float>* const* output) override;
private: private:
FRIEND_TEST_ALL_PREFIXES(NonlinearBeamformerTest,
InterfAnglesTakeAmbiguityIntoAccount);
typedef Matrix<float> MatrixF; typedef Matrix<float> MatrixF;
typedef ComplexMatrix<float> ComplexMatrixF; typedef ComplexMatrix<float> ComplexMatrixF;
typedef complex<float> complex_f; typedef complex<float> complex_f;
void InitFrequencyCorrectionRanges(); void InitLowFrequencyCorrectionRanges();
void InitHighFrequencyCorrectionRanges();
void InitInterfAngles(); void InitInterfAngles();
void InitDelaySumMasks(); void InitDelaySumMasks();
void InitTargetCovMats(); void InitTargetCovMats();
void InitDiffuseCovMats();
void InitInterfCovMats(); void InitInterfCovMats();
void NormalizeCovMats();
// Calculates postfilter masks that minimize the mean squared error of our // Calculates postfilter masks that minimize the mean squared error of our
// estimation of the desired signal. // estimation of the desired signal.
@ -116,6 +133,8 @@ class NonlinearBeamformer
int sample_rate_hz_; int sample_rate_hz_;
const std::vector<Point> array_geometry_; const std::vector<Point> array_geometry_;
// The normal direction of the array if it has one and it is in the xy-plane.
const rtc::Maybe<Point> array_normal_;
// Minimum spacing between microphone pairs. // Minimum spacing between microphone pairs.
const float min_mic_spacing_; const float min_mic_spacing_;
@ -133,17 +152,20 @@ class NonlinearBeamformer
// Time and frequency smoothed mask. // Time and frequency smoothed mask.
float final_mask_[kNumFreqBins]; float final_mask_[kNumFreqBins];
float target_angle_radians_;
// Angles of the interferer scenarios. // Angles of the interferer scenarios.
std::vector<float> interf_angles_radians_; std::vector<float> interf_angles_radians_;
// The angle between the target and the interferer scenarios.
const float away_radians_;
// Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
ComplexMatrixF delay_sum_masks_[kNumFreqBins]; ComplexMatrixF delay_sum_masks_[kNumFreqBins];
ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins]; ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x // Arrays of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
// |num_input_channels_|. // |num_input_channels_|.
ComplexMatrixF target_cov_mats_[kNumFreqBins]; ComplexMatrixF target_cov_mats_[kNumFreqBins];
ComplexMatrixF uniform_cov_mat_[kNumFreqBins];
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
// |num_input_channels_|. ScopedVector has a size equal to the number of // |num_input_channels_|. ScopedVector has a size equal to the number of
// interferer scenarios. // interferer scenarios.

View File

@ -0,0 +1,147 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// MSVC++ requires this to be set before any other includes to get M_PI.
#define _USE_MATH_DEFINES
#include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
#include <math.h>
#include "testing/gtest/include/gtest/gtest.h"
namespace webrtc {
namespace {
const int kChunkSizeMs = 10;
const int kSampleRateHz = 16000;
SphericalPointf AzimuthToSphericalPoint(float azimuth_radians) {
return SphericalPointf(azimuth_radians, 0.f, 1.f);
}
void Verify(NonlinearBeamformer* bf, float target_azimuth_radians) {
EXPECT_TRUE(bf->IsInBeam(AzimuthToSphericalPoint(target_azimuth_radians)));
EXPECT_TRUE(bf->IsInBeam(AzimuthToSphericalPoint(
target_azimuth_radians - NonlinearBeamformer::kHalfBeamWidthRadians +
0.001f)));
EXPECT_TRUE(bf->IsInBeam(AzimuthToSphericalPoint(
target_azimuth_radians + NonlinearBeamformer::kHalfBeamWidthRadians -
0.001f)));
EXPECT_FALSE(bf->IsInBeam(AzimuthToSphericalPoint(
target_azimuth_radians - NonlinearBeamformer::kHalfBeamWidthRadians -
0.001f)));
EXPECT_FALSE(bf->IsInBeam(AzimuthToSphericalPoint(
target_azimuth_radians + NonlinearBeamformer::kHalfBeamWidthRadians +
0.001f)));
}
void AimAndVerify(NonlinearBeamformer* bf, float target_azimuth_radians) {
bf->AimAt(AzimuthToSphericalPoint(target_azimuth_radians));
Verify(bf, target_azimuth_radians);
}
} // namespace
TEST(NonlinearBeamformerTest, AimingModifiesBeam) {
std::vector<Point> array_geometry;
array_geometry.push_back(Point(-0.025f, 0.f, 0.f));
array_geometry.push_back(Point(0.025f, 0.f, 0.f));
NonlinearBeamformer bf(array_geometry);
bf.Initialize(kChunkSizeMs, kSampleRateHz);
// The default constructor parameter sets the target angle to PI / 2.
Verify(&bf, static_cast<float>(M_PI) / 2.f);
AimAndVerify(&bf, static_cast<float>(M_PI) / 3.f);
AimAndVerify(&bf, 3.f * static_cast<float>(M_PI) / 4.f);
AimAndVerify(&bf, static_cast<float>(M_PI) / 6.f);
AimAndVerify(&bf, static_cast<float>(M_PI));
}
TEST(NonlinearBeamformerTest, InterfAnglesTakeAmbiguityIntoAccount) {
{
// For linear arrays there is ambiguity.
std::vector<Point> array_geometry;
array_geometry.push_back(Point(-0.1f, 0.f, 0.f));
array_geometry.push_back(Point(0.f, 0.f, 0.f));
array_geometry.push_back(Point(0.2f, 0.f, 0.f));
NonlinearBeamformer bf(array_geometry);
bf.Initialize(kChunkSizeMs, kSampleRateHz);
EXPECT_EQ(2u, bf.interf_angles_radians_.size());
EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_,
bf.interf_angles_radians_[0]);
EXPECT_FLOAT_EQ(M_PI / 2.f + bf.away_radians_,
bf.interf_angles_radians_[1]);
bf.AimAt(AzimuthToSphericalPoint(bf.away_radians_ / 2.f));
EXPECT_EQ(2u, bf.interf_angles_radians_.size());
EXPECT_FLOAT_EQ(M_PI - bf.away_radians_ / 2.f,
bf.interf_angles_radians_[0]);
EXPECT_FLOAT_EQ(3.f * bf.away_radians_ / 2.f, bf.interf_angles_radians_[1]);
}
{
// For planar arrays with normal in the xy-plane there is ambiguity.
std::vector<Point> array_geometry;
array_geometry.push_back(Point(-0.1f, 0.f, 0.f));
array_geometry.push_back(Point(0.f, 0.f, 0.f));
array_geometry.push_back(Point(0.2f, 0.f, 0.f));
array_geometry.push_back(Point(0.1f, 0.f, 0.2f));
array_geometry.push_back(Point(0.f, 0.f, -0.1f));
NonlinearBeamformer bf(array_geometry);
bf.Initialize(kChunkSizeMs, kSampleRateHz);
EXPECT_EQ(2u, bf.interf_angles_radians_.size());
EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_,
bf.interf_angles_radians_[0]);
EXPECT_FLOAT_EQ(M_PI / 2.f + bf.away_radians_,
bf.interf_angles_radians_[1]);
bf.AimAt(AzimuthToSphericalPoint(bf.away_radians_ / 2.f));
EXPECT_EQ(2u, bf.interf_angles_radians_.size());
EXPECT_FLOAT_EQ(M_PI - bf.away_radians_ / 2.f,
bf.interf_angles_radians_[0]);
EXPECT_FLOAT_EQ(3.f * bf.away_radians_ / 2.f, bf.interf_angles_radians_[1]);
}
{
// For planar arrays with normal not in the xy-plane there is no ambiguity.
std::vector<Point> array_geometry;
array_geometry.push_back(Point(0.f, 0.f, 0.f));
array_geometry.push_back(Point(0.2f, 0.f, 0.f));
array_geometry.push_back(Point(0.f, 0.1f, -0.2f));
NonlinearBeamformer bf(array_geometry);
bf.Initialize(kChunkSizeMs, kSampleRateHz);
EXPECT_EQ(2u, bf.interf_angles_radians_.size());
EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_,
bf.interf_angles_radians_[0]);
EXPECT_FLOAT_EQ(M_PI / 2.f + bf.away_radians_,
bf.interf_angles_radians_[1]);
bf.AimAt(AzimuthToSphericalPoint(bf.away_radians_ / 2.f));
EXPECT_EQ(2u, bf.interf_angles_radians_.size());
EXPECT_FLOAT_EQ(-bf.away_radians_ / 2.f, bf.interf_angles_radians_[0]);
EXPECT_FLOAT_EQ(3.f * bf.away_radians_ / 2.f, bf.interf_angles_radians_[1]);
}
{
// For arrays which are not linear or planar there is no ambiguity.
std::vector<Point> array_geometry;
array_geometry.push_back(Point(0.f, 0.f, 0.f));
array_geometry.push_back(Point(0.1f, 0.f, 0.f));
array_geometry.push_back(Point(0.f, 0.2f, 0.f));
array_geometry.push_back(Point(0.f, 0.f, 0.3f));
NonlinearBeamformer bf(array_geometry);
bf.Initialize(kChunkSizeMs, kSampleRateHz);
EXPECT_EQ(2u, bf.interf_angles_radians_.size());
EXPECT_FLOAT_EQ(M_PI / 2.f - bf.away_radians_,
bf.interf_angles_radians_[0]);
EXPECT_FLOAT_EQ(M_PI / 2.f + bf.away_radians_,
bf.interf_angles_radians_[1]);
bf.AimAt(AzimuthToSphericalPoint(bf.away_radians_ / 2.f));
EXPECT_EQ(2u, bf.interf_angles_radians_.size());
EXPECT_FLOAT_EQ(-bf.away_radians_ / 2.f, bf.interf_angles_radians_[0]);
EXPECT_FLOAT_EQ(3.f * bf.away_radians_ / 2.f, bf.interf_angles_radians_[1]);
}
}
} // namespace webrtc

View File

@ -11,6 +11,10 @@
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
// MSVC++ requires this to be set before any other includes to get M_PI.
#define _USE_MATH_DEFINES
#include <math.h>
#include <stddef.h> // size_t #include <stddef.h> // size_t
#include <stdio.h> // FILE #include <stdio.h> // FILE
#include <vector> #include <vector>
@ -109,12 +113,23 @@ struct ExperimentalNs {
struct Beamforming { struct Beamforming {
Beamforming() Beamforming()
: enabled(false), : enabled(false),
array_geometry() {} array_geometry(),
target_direction(
SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f)) {}
Beamforming(bool enabled, const std::vector<Point>& array_geometry) Beamforming(bool enabled, const std::vector<Point>& array_geometry)
: Beamforming(enabled,
array_geometry,
SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f)) {
}
Beamforming(bool enabled,
const std::vector<Point>& array_geometry,
SphericalPointf target_direction)
: enabled(enabled), : enabled(enabled),
array_geometry(array_geometry) {} array_geometry(array_geometry),
target_direction(target_direction) {}
const bool enabled; const bool enabled;
const std::vector<Point> array_geometry; const std::vector<Point> array_geometry;
const SphericalPointf target_direction;
}; };
// Use to enable intelligibility enhancer in audio processing. Must be provided // Use to enable intelligibility enhancer in audio processing. Must be provided

View File

@ -37,6 +37,7 @@ DEFINE_string(mic_positions, "",
"Space delimited cartesian coordinates of microphones in meters. " "Space delimited cartesian coordinates of microphones in meters. "
"The coordinates of each point are contiguous. " "The coordinates of each point are contiguous. "
"For a two element array: \"x1 y1 z1 x2 y2 z2\""); "For a two element array: \"x1 y1 z1 x2 y2 z2\"");
DEFINE_double(target_angle_degrees, 90, "The azimuth of the target in radians");
DEFINE_bool(aec, false, "Enable echo cancellation."); DEFINE_bool(aec, false, "Enable echo cancellation.");
DEFINE_bool(agc, false, "Enable automatic gain control."); DEFINE_bool(agc, false, "Enable automatic gain control.");
@ -107,7 +108,10 @@ int main(int argc, char* argv[]) {
ParseArrayGeometry(FLAGS_mic_positions, num_mics); ParseArrayGeometry(FLAGS_mic_positions, num_mics);
RTC_CHECK_EQ(array_geometry.size(), num_mics); RTC_CHECK_EQ(array_geometry.size(), num_mics);
config.Set<Beamforming>(new Beamforming(true, array_geometry)); config.Set<Beamforming>(new Beamforming(
true, array_geometry,
SphericalPointf(DegreesToRadians(FLAGS_target_angle_degrees), 0.f,
1.f)));
} }
rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config)); rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config));

View File

@ -171,6 +171,7 @@
'audio_processing/beamformer/covariance_matrix_generator_unittest.cc', 'audio_processing/beamformer/covariance_matrix_generator_unittest.cc',
'audio_processing/beamformer/matrix_unittest.cc', 'audio_processing/beamformer/matrix_unittest.cc',
'audio_processing/beamformer/mock_nonlinear_beamformer.h', 'audio_processing/beamformer/mock_nonlinear_beamformer.h',
'audio_processing/beamformer/nonlinear_beamformer_unittest.cc',
'audio_processing/echo_cancellation_impl_unittest.cc', 'audio_processing/echo_cancellation_impl_unittest.cc',
'audio_processing/intelligibility/intelligibility_enhancer_unittest.cc', 'audio_processing/intelligibility/intelligibility_enhancer_unittest.cc',
'audio_processing/intelligibility/intelligibility_utils_unittest.cc', 'audio_processing/intelligibility/intelligibility_utils_unittest.cc',