Allow more than 2 input channels in AudioProcessing.

The number of output channels is constrained to be equal to either 1 or the
number of input channels.

R=aluebs@webrtc.org, andrew@webrtc.org, pbos@webrtc.org

Review URL: https://codereview.webrtc.org/1226093007 .

Cr-Commit-Position: refs/heads/master@{#9619}
This commit is contained in:
Michael Graczyk
2015-07-22 21:06:11 -07:00
parent 0b6a204b21
commit c204754b7a
13 changed files with 711 additions and 374 deletions

View File

@ -29,6 +29,9 @@ class AudioFrame;
template<typename T>
class Beamformer;
class StreamConfig;
class ProcessingConfig;
class EchoCancellation;
class EchoControlMobile;
class GainControl;
@ -84,7 +87,7 @@ static const int kAgcStartupMinVolume = 0;
#endif // defined(WEBRTC_CHROMIUM_BUILD)
struct ExperimentalAgc {
ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {}
ExperimentalAgc(bool enabled)
explicit ExperimentalAgc(bool enabled)
: enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {}
ExperimentalAgc(bool enabled, int startup_min_volume)
: enabled(enabled), startup_min_volume(startup_min_volume) {}
@ -199,6 +202,7 @@ static const int kAudioProcMaxNativeSampleRateHz = 32000;
//
class AudioProcessing {
public:
// TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone.
enum ChannelLayout {
kMono,
// Left, right.
@ -236,10 +240,17 @@ class AudioProcessing {
// The int16 interfaces require:
// - only |NativeRate|s be used
// - that the input, output and reverse rates must match
// - that |output_layout| matches |input_layout|
// - that |processing_config.output_stream()| matches
// |processing_config.input_stream()|.
//
// The float interfaces accept arbitrary rates and support differing input
// and output layouts, but the output may only remove channels, not add.
// The float interfaces accept arbitrary rates and support differing input and
// output layouts, but the output must have either one channel or the same
// number of channels as the input.
virtual int Initialize(const ProcessingConfig& processing_config) = 0;
// Initialize with unpacked parameters. See Initialize() above for details.
//
// TODO(mgraczyk): Remove once clients are updated to use the new interface.
virtual int Initialize(int input_sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
@ -292,8 +303,10 @@ class AudioProcessing {
// |input_layout|. At output, the channels will be arranged according to
// |output_layout| at |output_sample_rate_hz| in |dest|.
//
// The output layout may only remove channels, not add. |src| and |dest|
// may use the same memory, if desired.
// The output layout must have one channel or as many channels as the input.
// |src| and |dest| may use the same memory, if desired.
//
// TODO(mgraczyk): Remove once clients are updated to use the new interface.
virtual int ProcessStream(const float* const* src,
int samples_per_channel,
int input_sample_rate_hz,
@ -302,6 +315,18 @@ class AudioProcessing {
ChannelLayout output_layout,
float* const* dest) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element of
// |src| points to a channel buffer, arranged according to |input_stream|. At
// output, the channels will be arranged according to |output_stream| in
// |dest|.
//
// The output must have one channel or as many channels as the input. |src|
// and |dest| may use the same memory, if desired.
virtual int ProcessStream(const float* const* src,
const StreamConfig& input_config,
const StreamConfig& output_config,
float* const* dest) = 0;
// Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame
// will not be modified. On the client-side, this is the far-end (or to be
// rendered) audio.
@ -321,11 +346,18 @@ class AudioProcessing {
// Accepts deinterleaved float audio with the range [-1, 1]. Each element
// of |data| points to a channel buffer, arranged according to |layout|.
//
// TODO(mgraczyk): Remove once clients are updated to use the new interface.
virtual int AnalyzeReverseStream(const float* const* data,
int samples_per_channel,
int sample_rate_hz,
ChannelLayout layout) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element of
// |data| points to a channel buffer, arranged according to |reverse_config|.
virtual int AnalyzeReverseStream(const float* const* data,
const StreamConfig& reverse_config) = 0;
// This must be called if and only if echo processing is enabled.
//
// Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end
@ -432,6 +464,102 @@ class AudioProcessing {
static const int kChunkSizeMs = 10;
};
class StreamConfig {
public:
// sample_rate_hz: The sampling rate of the stream.
//
// num_channels: The number of audio channels in the stream, excluding the
// keyboard channel if it is present. When passing a
// StreamConfig with an array of arrays T*[N],
//
// N == {num_channels + 1 if has_keyboard
// {num_channels if !has_keyboard
//
// has_keyboard: True if the stream has a keyboard channel. When has_keyboard
// is true, the last channel in any corresponding list of
// channels is the keyboard channel.
StreamConfig(int sample_rate_hz = 0,
int num_channels = 0,
bool has_keyboard = false)
: sample_rate_hz_(sample_rate_hz),
num_channels_(num_channels),
has_keyboard_(has_keyboard),
num_frames_(calculate_frames(sample_rate_hz)) {}
void set_sample_rate_hz(int value) {
sample_rate_hz_ = value;
num_frames_ = calculate_frames(value);
}
void set_num_channels(int value) { num_channels_ = value; }
void set_has_keyboard(bool value) { has_keyboard_ = value; }
int sample_rate_hz() const { return sample_rate_hz_; }
// The number of channels in the stream, not including the keyboard channel if
// present.
int num_channels() const { return num_channels_; }
bool has_keyboard() const { return has_keyboard_; }
int num_frames() const { return num_frames_; }
bool operator==(const StreamConfig& other) const {
return sample_rate_hz_ == other.sample_rate_hz_ &&
num_channels_ == other.num_channels_ &&
has_keyboard_ == other.has_keyboard_;
}
bool operator!=(const StreamConfig& other) const { return !(*this == other); }
private:
static int calculate_frames(int sample_rate_hz) {
return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000;
}
int sample_rate_hz_;
int num_channels_;
bool has_keyboard_;
int num_frames_;
};
class ProcessingConfig {
public:
enum StreamName {
kInputStream,
kOutputStream,
kReverseStream,
kNumStreamNames,
};
const StreamConfig& input_stream() const {
return streams[StreamName::kInputStream];
}
const StreamConfig& output_stream() const {
return streams[StreamName::kOutputStream];
}
const StreamConfig& reverse_stream() const {
return streams[StreamName::kReverseStream];
}
StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; }
bool operator==(const ProcessingConfig& other) const {
for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
if (this->streams[i] != other.streams[i]) {
return false;
}
}
return true;
}
bool operator!=(const ProcessingConfig& other) const {
return !(*this == other);
}
StreamConfig streams[StreamName::kNumStreamNames];
};
// The acoustic echo cancellation (AEC) component provides better performance
// than AECM but also requires more processing power and is dependent on delay
// stability and reporting accuracy. As such it is well-suited and recommended