Allow more than 2 input channels in AudioProcessing.
The number of output channels is constrained to be equal to either 1 or the number of input channels. R=aluebs@webrtc.org, andrew@webrtc.org, pbos@webrtc.org Review URL: https://codereview.webrtc.org/1226093007 . Cr-Commit-Position: refs/heads/master@{#9619}
This commit is contained in:
@ -23,39 +23,13 @@ const int kSamplesPer16kHzChannel = 160;
|
||||
const int kSamplesPer32kHzChannel = 320;
|
||||
const int kSamplesPer48kHzChannel = 480;
|
||||
|
||||
bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
|
||||
switch (layout) {
|
||||
case AudioProcessing::kMono:
|
||||
case AudioProcessing::kStereo:
|
||||
return false;
|
||||
case AudioProcessing::kMonoAndKeyboard:
|
||||
case AudioProcessing::kStereoAndKeyboard:
|
||||
return true;
|
||||
int KeyboardChannelIndex(const StreamConfig& stream_config) {
|
||||
if (!stream_config.has_keyboard()) {
|
||||
assert(false);
|
||||
return -1;
|
||||
}
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
|
||||
switch (layout) {
|
||||
case AudioProcessing::kMono:
|
||||
case AudioProcessing::kStereo:
|
||||
assert(false);
|
||||
return -1;
|
||||
case AudioProcessing::kMonoAndKeyboard:
|
||||
return 1;
|
||||
case AudioProcessing::kStereoAndKeyboard:
|
||||
return 2;
|
||||
}
|
||||
assert(false);
|
||||
return -1;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void StereoToMono(const T* left, const T* right, T* out,
|
||||
int num_frames) {
|
||||
for (int i = 0; i < num_frames; ++i)
|
||||
out[i] = (left[i] + right[i]) / 2;
|
||||
return stream_config.num_channels();
|
||||
}
|
||||
|
||||
int NumBandsFromSamplesPerChannel(int num_frames) {
|
||||
@ -91,7 +65,7 @@ AudioBuffer::AudioBuffer(int input_num_frames,
|
||||
assert(input_num_frames_ > 0);
|
||||
assert(proc_num_frames_ > 0);
|
||||
assert(output_num_frames_ > 0);
|
||||
assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
|
||||
assert(num_input_channels_ > 0);
|
||||
assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_);
|
||||
|
||||
if (input_num_frames_ != proc_num_frames_ ||
|
||||
@ -130,29 +104,28 @@ AudioBuffer::AudioBuffer(int input_num_frames,
|
||||
AudioBuffer::~AudioBuffer() {}
|
||||
|
||||
void AudioBuffer::CopyFrom(const float* const* data,
|
||||
int num_frames,
|
||||
AudioProcessing::ChannelLayout layout) {
|
||||
assert(num_frames == input_num_frames_);
|
||||
assert(ChannelsFromLayout(layout) == num_input_channels_);
|
||||
const StreamConfig& stream_config) {
|
||||
assert(stream_config.num_frames() == input_num_frames_);
|
||||
assert(stream_config.num_channels() == num_input_channels_);
|
||||
InitForNewData();
|
||||
// Initialized lazily because there's a different condition in
|
||||
// DeinterleaveFrom.
|
||||
if ((num_input_channels_ == 2 && num_proc_channels_ == 1) && !input_buffer_) {
|
||||
const bool need_to_downmix =
|
||||
num_input_channels_ > 1 && num_proc_channels_ == 1;
|
||||
if (need_to_downmix && !input_buffer_) {
|
||||
input_buffer_.reset(
|
||||
new IFChannelBuffer(input_num_frames_, num_proc_channels_));
|
||||
}
|
||||
|
||||
if (HasKeyboardChannel(layout)) {
|
||||
keyboard_data_ = data[KeyboardChannelIndex(layout)];
|
||||
if (stream_config.has_keyboard()) {
|
||||
keyboard_data_ = data[KeyboardChannelIndex(stream_config)];
|
||||
}
|
||||
|
||||
// Downmix.
|
||||
const float* const* data_ptr = data;
|
||||
if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
|
||||
StereoToMono(data[0],
|
||||
data[1],
|
||||
input_buffer_->fbuf()->channels()[0],
|
||||
input_num_frames_);
|
||||
if (need_to_downmix) {
|
||||
DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_,
|
||||
input_buffer_->fbuf()->channels()[0]);
|
||||
data_ptr = input_buffer_->fbuf_const()->channels();
|
||||
}
|
||||
|
||||
@ -175,11 +148,10 @@ void AudioBuffer::CopyFrom(const float* const* data,
|
||||
}
|
||||
}
|
||||
|
||||
void AudioBuffer::CopyTo(int num_frames,
|
||||
AudioProcessing::ChannelLayout layout,
|
||||
void AudioBuffer::CopyTo(const StreamConfig& stream_config,
|
||||
float* const* data) {
|
||||
assert(num_frames == output_num_frames_);
|
||||
assert(ChannelsFromLayout(layout) == num_channels_);
|
||||
assert(stream_config.num_frames() == output_num_frames_);
|
||||
assert(stream_config.num_channels() == num_channels_);
|
||||
|
||||
// Convert to the float range.
|
||||
float* const* data_ptr = data;
|
||||
@ -327,9 +299,6 @@ const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
|
||||
}
|
||||
|
||||
const int16_t* AudioBuffer::mixed_low_pass_data() {
|
||||
// Currently only mixing stereo to mono is supported.
|
||||
assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);
|
||||
|
||||
if (num_proc_channels_ == 1) {
|
||||
return split_bands_const(0)[kBand0To8kHz];
|
||||
}
|
||||
@ -339,10 +308,10 @@ const int16_t* AudioBuffer::mixed_low_pass_data() {
|
||||
mixed_low_pass_channels_.reset(
|
||||
new ChannelBuffer<int16_t>(num_split_frames_, 1));
|
||||
}
|
||||
StereoToMono(split_bands_const(0)[kBand0To8kHz],
|
||||
split_bands_const(1)[kBand0To8kHz],
|
||||
mixed_low_pass_channels_->channels()[0],
|
||||
num_split_frames_);
|
||||
|
||||
DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz),
|
||||
num_split_frames_, num_channels_,
|
||||
mixed_low_pass_channels_->channels()[0]);
|
||||
mixed_low_pass_valid_ = true;
|
||||
}
|
||||
return mixed_low_pass_channels_->channels()[0];
|
||||
@ -411,11 +380,10 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
|
||||
} else {
|
||||
deinterleaved = input_buffer_->ibuf()->channels();
|
||||
}
|
||||
if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
|
||||
// Downmix directly; no explicit deinterleaving needed.
|
||||
for (int i = 0; i < input_num_frames_; ++i) {
|
||||
deinterleaved[0][i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2;
|
||||
}
|
||||
if (num_proc_channels_ == 1) {
|
||||
// Downmix and deinterleave simultaneously.
|
||||
DownmixInterleavedToMono(frame->data_, input_num_frames_,
|
||||
num_input_channels_, deinterleaved[0]);
|
||||
} else {
|
||||
assert(num_proc_channels_ == num_input_channels_);
|
||||
Deinterleave(frame->data_,
|
||||
|
||||
@ -112,12 +112,8 @@ class AudioBuffer {
|
||||
void InterleaveTo(AudioFrame* frame, bool data_changed) const;
|
||||
|
||||
// Use for float deinterleaved data.
|
||||
void CopyFrom(const float* const* data,
|
||||
int num_frames,
|
||||
AudioProcessing::ChannelLayout layout);
|
||||
void CopyTo(int num_frames,
|
||||
AudioProcessing::ChannelLayout layout,
|
||||
float* const* data);
|
||||
void CopyFrom(const float* const* data, const StreamConfig& stream_config);
|
||||
void CopyTo(const StreamConfig& stream_config, float* const* data);
|
||||
void CopyLowPassToReference();
|
||||
|
||||
// Splits the signal into different bands.
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
#include "webrtc/modules/audio_processing/audio_processing_impl.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/base/checks.h"
|
||||
#include "webrtc/base/platform_file.h"
|
||||
@ -48,15 +49,32 @@ extern "C" {
|
||||
#endif
|
||||
#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
|
||||
#define RETURN_ON_ERR(expr) \
|
||||
do { \
|
||||
int err = (expr); \
|
||||
if (err != kNoError) { \
|
||||
return err; \
|
||||
} \
|
||||
#define RETURN_ON_ERR(expr) \
|
||||
do { \
|
||||
int err = (expr); \
|
||||
if (err != kNoError) { \
|
||||
return err; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {
|
||||
switch (layout) {
|
||||
case AudioProcessing::kMono:
|
||||
case AudioProcessing::kStereo:
|
||||
return false;
|
||||
case AudioProcessing::kMonoAndKeyboard:
|
||||
case AudioProcessing::kStereoAndKeyboard:
|
||||
return true;
|
||||
}
|
||||
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Throughout webrtc, it's assumed that success is represented by zero.
|
||||
static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero");
|
||||
@ -75,9 +93,7 @@ static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero");
|
||||
class GainControlForNewAgc : public GainControl, public VolumeCallbacks {
|
||||
public:
|
||||
explicit GainControlForNewAgc(GainControlImpl* gain_control)
|
||||
: real_gain_control_(gain_control),
|
||||
volume_(0) {
|
||||
}
|
||||
: real_gain_control_(gain_control), volume_(0) {}
|
||||
|
||||
// GainControl implementation.
|
||||
int Enable(bool enable) override {
|
||||
@ -166,10 +182,10 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config,
|
||||
debug_file_(FileWrapper::Create()),
|
||||
event_msg_(new audioproc::Event()),
|
||||
#endif
|
||||
fwd_in_format_(kSampleRate16kHz, 1),
|
||||
api_format_({{{kSampleRate16kHz, 1, false},
|
||||
{kSampleRate16kHz, 1, false},
|
||||
{kSampleRate16kHz, 1, false}}}),
|
||||
fwd_proc_format_(kSampleRate16kHz),
|
||||
fwd_out_format_(kSampleRate16kHz, 1),
|
||||
rev_in_format_(kSampleRate16kHz, 1),
|
||||
rev_proc_format_(kSampleRate16kHz, 1),
|
||||
split_rate_(kSampleRate16kHz),
|
||||
stream_delay_ms_(0),
|
||||
@ -253,12 +269,11 @@ int AudioProcessingImpl::Initialize() {
|
||||
|
||||
int AudioProcessingImpl::set_sample_rate_hz(int rate) {
|
||||
CriticalSectionScoped crit_scoped(crit_);
|
||||
return InitializeLocked(rate,
|
||||
rate,
|
||||
rev_in_format_.rate(),
|
||||
fwd_in_format_.num_channels(),
|
||||
fwd_out_format_.num_channels(),
|
||||
rev_in_format_.num_channels());
|
||||
|
||||
ProcessingConfig processing_config = api_format_;
|
||||
processing_config.input_stream().set_sample_rate_hz(rate);
|
||||
processing_config.output_stream().set_sample_rate_hz(rate);
|
||||
return InitializeLocked(processing_config);
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::Initialize(int input_sample_rate_hz,
|
||||
@ -267,29 +282,39 @@ int AudioProcessingImpl::Initialize(int input_sample_rate_hz,
|
||||
ChannelLayout input_layout,
|
||||
ChannelLayout output_layout,
|
||||
ChannelLayout reverse_layout) {
|
||||
const ProcessingConfig processing_config = {
|
||||
{{input_sample_rate_hz, ChannelsFromLayout(input_layout),
|
||||
LayoutHasKeyboard(input_layout)},
|
||||
{output_sample_rate_hz, ChannelsFromLayout(output_layout),
|
||||
LayoutHasKeyboard(output_layout)},
|
||||
{reverse_sample_rate_hz, ChannelsFromLayout(reverse_layout),
|
||||
LayoutHasKeyboard(reverse_layout)}}};
|
||||
|
||||
return Initialize(processing_config);
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) {
|
||||
CriticalSectionScoped crit_scoped(crit_);
|
||||
return InitializeLocked(input_sample_rate_hz,
|
||||
output_sample_rate_hz,
|
||||
reverse_sample_rate_hz,
|
||||
ChannelsFromLayout(input_layout),
|
||||
ChannelsFromLayout(output_layout),
|
||||
ChannelsFromLayout(reverse_layout));
|
||||
return InitializeLocked(processing_config);
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::InitializeLocked() {
|
||||
const int fwd_audio_buffer_channels = beamformer_enabled_ ?
|
||||
fwd_in_format_.num_channels() :
|
||||
fwd_out_format_.num_channels();
|
||||
render_audio_.reset(new AudioBuffer(rev_in_format_.samples_per_channel(),
|
||||
rev_in_format_.num_channels(),
|
||||
rev_proc_format_.samples_per_channel(),
|
||||
rev_proc_format_.num_channels(),
|
||||
rev_proc_format_.samples_per_channel()));
|
||||
capture_audio_.reset(new AudioBuffer(fwd_in_format_.samples_per_channel(),
|
||||
fwd_in_format_.num_channels(),
|
||||
fwd_proc_format_.samples_per_channel(),
|
||||
fwd_audio_buffer_channels,
|
||||
fwd_out_format_.samples_per_channel()));
|
||||
const int fwd_audio_buffer_channels =
|
||||
beamformer_enabled_ ? api_format_.input_stream().num_channels()
|
||||
: api_format_.output_stream().num_channels();
|
||||
if (api_format_.reverse_stream().num_channels() > 0) {
|
||||
render_audio_.reset(new AudioBuffer(
|
||||
api_format_.reverse_stream().num_frames(),
|
||||
api_format_.reverse_stream().num_channels(),
|
||||
rev_proc_format_.num_frames(), rev_proc_format_.num_channels(),
|
||||
rev_proc_format_.num_frames()));
|
||||
} else {
|
||||
render_audio_.reset(nullptr);
|
||||
}
|
||||
capture_audio_.reset(new AudioBuffer(
|
||||
api_format_.input_stream().num_frames(),
|
||||
api_format_.input_stream().num_channels(), fwd_proc_format_.num_frames(),
|
||||
fwd_audio_buffer_channels, api_format_.output_stream().num_frames()));
|
||||
|
||||
// Initialize all components.
|
||||
for (auto item : component_list_) {
|
||||
@ -317,38 +342,38 @@ int AudioProcessingImpl::InitializeLocked() {
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz,
|
||||
int output_sample_rate_hz,
|
||||
int reverse_sample_rate_hz,
|
||||
int num_input_channels,
|
||||
int num_output_channels,
|
||||
int num_reverse_channels) {
|
||||
if (input_sample_rate_hz <= 0 ||
|
||||
output_sample_rate_hz <= 0 ||
|
||||
reverse_sample_rate_hz <= 0) {
|
||||
return kBadSampleRateError;
|
||||
int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
|
||||
for (const auto& stream : config.streams) {
|
||||
if (stream.num_channels() < 0) {
|
||||
return kBadNumberChannelsError;
|
||||
}
|
||||
if (stream.num_channels() > 0 && stream.sample_rate_hz() <= 0) {
|
||||
return kBadSampleRateError;
|
||||
}
|
||||
}
|
||||
if (num_output_channels > num_input_channels) {
|
||||
return kBadNumberChannelsError;
|
||||
}
|
||||
// Only mono and stereo supported currently.
|
||||
if (num_input_channels > 2 || num_input_channels < 1 ||
|
||||
num_output_channels > 2 || num_output_channels < 1 ||
|
||||
num_reverse_channels > 2 || num_reverse_channels < 1) {
|
||||
return kBadNumberChannelsError;
|
||||
}
|
||||
if (beamformer_enabled_ &&
|
||||
(static_cast<size_t>(num_input_channels) != array_geometry_.size() ||
|
||||
num_output_channels > 1)) {
|
||||
|
||||
const int num_in_channels = config.input_stream().num_channels();
|
||||
const int num_out_channels = config.output_stream().num_channels();
|
||||
|
||||
// Need at least one input channel.
|
||||
// Need either one output channel or as many outputs as there are inputs.
|
||||
if (num_in_channels == 0 ||
|
||||
!(num_out_channels == 1 || num_out_channels == num_in_channels)) {
|
||||
return kBadNumberChannelsError;
|
||||
}
|
||||
|
||||
fwd_in_format_.set(input_sample_rate_hz, num_input_channels);
|
||||
fwd_out_format_.set(output_sample_rate_hz, num_output_channels);
|
||||
rev_in_format_.set(reverse_sample_rate_hz, num_reverse_channels);
|
||||
if (beamformer_enabled_ &&
|
||||
(static_cast<size_t>(num_in_channels) != array_geometry_.size() ||
|
||||
num_out_channels > 1)) {
|
||||
return kBadNumberChannelsError;
|
||||
}
|
||||
|
||||
api_format_ = config;
|
||||
|
||||
// We process at the closest native rate >= min(input rate, output rate)...
|
||||
int min_proc_rate = std::min(fwd_in_format_.rate(), fwd_out_format_.rate());
|
||||
const int min_proc_rate =
|
||||
std::min(api_format_.input_stream().sample_rate_hz(),
|
||||
api_format_.output_stream().sample_rate_hz());
|
||||
int fwd_proc_rate;
|
||||
if (supports_48kHz_ && min_proc_rate > kSampleRate32kHz) {
|
||||
fwd_proc_rate = kSampleRate48kHz;
|
||||
@ -364,15 +389,15 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz,
|
||||
fwd_proc_rate = kSampleRate16kHz;
|
||||
}
|
||||
|
||||
fwd_proc_format_.set(fwd_proc_rate);
|
||||
fwd_proc_format_ = StreamConfig(fwd_proc_rate);
|
||||
|
||||
// We normally process the reverse stream at 16 kHz. Unless...
|
||||
int rev_proc_rate = kSampleRate16kHz;
|
||||
if (fwd_proc_format_.rate() == kSampleRate8kHz) {
|
||||
if (fwd_proc_format_.sample_rate_hz() == kSampleRate8kHz) {
|
||||
// ...the forward stream is at 8 kHz.
|
||||
rev_proc_rate = kSampleRate8kHz;
|
||||
} else {
|
||||
if (rev_in_format_.rate() == kSampleRate32kHz) {
|
||||
if (api_format_.reverse_stream().sample_rate_hz() == kSampleRate32kHz) {
|
||||
// ...or the input is at 32 kHz, in which case we use the splitting
|
||||
// filter rather than the resampler.
|
||||
rev_proc_rate = kSampleRate32kHz;
|
||||
@ -381,13 +406,13 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz,
|
||||
|
||||
// Always downmix the reverse stream to mono for analysis. This has been
|
||||
// demonstrated to work well for AEC in most practical scenarios.
|
||||
rev_proc_format_.set(rev_proc_rate, 1);
|
||||
rev_proc_format_ = StreamConfig(rev_proc_rate, 1);
|
||||
|
||||
if (fwd_proc_format_.rate() == kSampleRate32kHz ||
|
||||
fwd_proc_format_.rate() == kSampleRate48kHz) {
|
||||
if (fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz ||
|
||||
fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz) {
|
||||
split_rate_ = kSampleRate16kHz;
|
||||
} else {
|
||||
split_rate_ = fwd_proc_format_.rate();
|
||||
split_rate_ = fwd_proc_format_.sample_rate_hz();
|
||||
}
|
||||
|
||||
return InitializeLocked();
|
||||
@ -395,26 +420,12 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz,
|
||||
|
||||
// Calls InitializeLocked() if any of the audio parameters have changed from
|
||||
// their current values.
|
||||
int AudioProcessingImpl::MaybeInitializeLocked(int input_sample_rate_hz,
|
||||
int output_sample_rate_hz,
|
||||
int reverse_sample_rate_hz,
|
||||
int num_input_channels,
|
||||
int num_output_channels,
|
||||
int num_reverse_channels) {
|
||||
if (input_sample_rate_hz == fwd_in_format_.rate() &&
|
||||
output_sample_rate_hz == fwd_out_format_.rate() &&
|
||||
reverse_sample_rate_hz == rev_in_format_.rate() &&
|
||||
num_input_channels == fwd_in_format_.num_channels() &&
|
||||
num_output_channels == fwd_out_format_.num_channels() &&
|
||||
num_reverse_channels == rev_in_format_.num_channels()) {
|
||||
int AudioProcessingImpl::MaybeInitializeLocked(
|
||||
const ProcessingConfig& processing_config) {
|
||||
if (processing_config == api_format_) {
|
||||
return kNoError;
|
||||
}
|
||||
return InitializeLocked(input_sample_rate_hz,
|
||||
output_sample_rate_hz,
|
||||
reverse_sample_rate_hz,
|
||||
num_input_channels,
|
||||
num_output_channels,
|
||||
num_reverse_channels);
|
||||
return InitializeLocked(processing_config);
|
||||
}
|
||||
|
||||
void AudioProcessingImpl::SetExtraOptions(const Config& config) {
|
||||
@ -431,16 +442,16 @@ void AudioProcessingImpl::SetExtraOptions(const Config& config) {
|
||||
|
||||
int AudioProcessingImpl::input_sample_rate_hz() const {
|
||||
CriticalSectionScoped crit_scoped(crit_);
|
||||
return fwd_in_format_.rate();
|
||||
return api_format_.input_stream().sample_rate_hz();
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::sample_rate_hz() const {
|
||||
CriticalSectionScoped crit_scoped(crit_);
|
||||
return fwd_in_format_.rate();
|
||||
return api_format_.input_stream().sample_rate_hz();
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::proc_sample_rate_hz() const {
|
||||
return fwd_proc_format_.rate();
|
||||
return fwd_proc_format_.sample_rate_hz();
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::proc_split_sample_rate_hz() const {
|
||||
@ -452,11 +463,11 @@ int AudioProcessingImpl::num_reverse_channels() const {
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::num_input_channels() const {
|
||||
return fwd_in_format_.num_channels();
|
||||
return api_format_.input_stream().num_channels();
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::num_output_channels() const {
|
||||
return fwd_out_format_.num_channels();
|
||||
return api_format_.output_stream().num_channels();
|
||||
}
|
||||
|
||||
void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
|
||||
@ -479,44 +490,60 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
|
||||
int output_sample_rate_hz,
|
||||
ChannelLayout output_layout,
|
||||
float* const* dest) {
|
||||
StreamConfig input_stream = api_format_.input_stream();
|
||||
input_stream.set_sample_rate_hz(input_sample_rate_hz);
|
||||
input_stream.set_num_channels(ChannelsFromLayout(input_layout));
|
||||
input_stream.set_has_keyboard(LayoutHasKeyboard(input_layout));
|
||||
|
||||
StreamConfig output_stream = api_format_.output_stream();
|
||||
output_stream.set_sample_rate_hz(output_sample_rate_hz);
|
||||
output_stream.set_num_channels(ChannelsFromLayout(output_layout));
|
||||
output_stream.set_has_keyboard(LayoutHasKeyboard(output_layout));
|
||||
|
||||
if (samples_per_channel != input_stream.num_frames()) {
|
||||
return kBadDataLengthError;
|
||||
}
|
||||
return ProcessStream(src, input_stream, output_stream, dest);
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::ProcessStream(const float* const* src,
|
||||
const StreamConfig& input_config,
|
||||
const StreamConfig& output_config,
|
||||
float* const* dest) {
|
||||
CriticalSectionScoped crit_scoped(crit_);
|
||||
if (!src || !dest) {
|
||||
return kNullPointerError;
|
||||
}
|
||||
|
||||
RETURN_ON_ERR(MaybeInitializeLocked(input_sample_rate_hz,
|
||||
output_sample_rate_hz,
|
||||
rev_in_format_.rate(),
|
||||
ChannelsFromLayout(input_layout),
|
||||
ChannelsFromLayout(output_layout),
|
||||
rev_in_format_.num_channels()));
|
||||
if (samples_per_channel != fwd_in_format_.samples_per_channel()) {
|
||||
return kBadDataLengthError;
|
||||
}
|
||||
ProcessingConfig processing_config = api_format_;
|
||||
processing_config.input_stream() = input_config;
|
||||
processing_config.output_stream() = output_config;
|
||||
|
||||
RETURN_ON_ERR(MaybeInitializeLocked(processing_config));
|
||||
assert(processing_config.input_stream().num_frames() ==
|
||||
api_format_.input_stream().num_frames());
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_file_->Open()) {
|
||||
event_msg_->set_type(audioproc::Event::STREAM);
|
||||
audioproc::Stream* msg = event_msg_->mutable_stream();
|
||||
const size_t channel_size =
|
||||
sizeof(float) * fwd_in_format_.samples_per_channel();
|
||||
for (int i = 0; i < fwd_in_format_.num_channels(); ++i)
|
||||
sizeof(float) * api_format_.input_stream().num_frames();
|
||||
for (int i = 0; i < api_format_.input_stream().num_channels(); ++i)
|
||||
msg->add_input_channel(src[i], channel_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
capture_audio_->CopyFrom(src, samples_per_channel, input_layout);
|
||||
capture_audio_->CopyFrom(src, api_format_.input_stream());
|
||||
RETURN_ON_ERR(ProcessStreamLocked());
|
||||
capture_audio_->CopyTo(fwd_out_format_.samples_per_channel(),
|
||||
output_layout,
|
||||
dest);
|
||||
capture_audio_->CopyTo(api_format_.output_stream(), dest);
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_file_->Open()) {
|
||||
audioproc::Stream* msg = event_msg_->mutable_stream();
|
||||
const size_t channel_size =
|
||||
sizeof(float) * fwd_out_format_.samples_per_channel();
|
||||
for (int i = 0; i < fwd_out_format_.num_channels(); ++i)
|
||||
sizeof(float) * api_format_.input_stream().num_frames();
|
||||
for (int i = 0; i < api_format_.input_stream().num_channels(); ++i)
|
||||
msg->add_output_channel(dest[i], channel_size);
|
||||
RETURN_ON_ERR(WriteMessageToDebugFile());
|
||||
}
|
||||
@ -545,13 +572,14 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
|
||||
|
||||
// TODO(ajm): The input and output rates and channels are currently
|
||||
// constrained to be identical in the int16 interface.
|
||||
RETURN_ON_ERR(MaybeInitializeLocked(frame->sample_rate_hz_,
|
||||
frame->sample_rate_hz_,
|
||||
rev_in_format_.rate(),
|
||||
frame->num_channels_,
|
||||
frame->num_channels_,
|
||||
rev_in_format_.num_channels()));
|
||||
if (frame->samples_per_channel_ != fwd_in_format_.samples_per_channel()) {
|
||||
ProcessingConfig processing_config = api_format_;
|
||||
processing_config.input_stream().set_sample_rate_hz(frame->sample_rate_hz_);
|
||||
processing_config.input_stream().set_num_channels(frame->num_channels_);
|
||||
processing_config.output_stream().set_sample_rate_hz(frame->sample_rate_hz_);
|
||||
processing_config.output_stream().set_num_channels(frame->num_channels_);
|
||||
|
||||
RETURN_ON_ERR(MaybeInitializeLocked(processing_config));
|
||||
if (frame->samples_per_channel_ != api_format_.input_stream().num_frames()) {
|
||||
return kBadDataLengthError;
|
||||
}
|
||||
|
||||
@ -559,9 +587,8 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
|
||||
if (debug_file_->Open()) {
|
||||
event_msg_->set_type(audioproc::Event::STREAM);
|
||||
audioproc::Stream* msg = event_msg_->mutable_stream();
|
||||
const size_t data_size = sizeof(int16_t) *
|
||||
frame->samples_per_channel_ *
|
||||
frame->num_channels_;
|
||||
const size_t data_size =
|
||||
sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
|
||||
msg->set_input_data(frame->data_, data_size);
|
||||
}
|
||||
#endif
|
||||
@ -573,9 +600,8 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_file_->Open()) {
|
||||
audioproc::Stream* msg = event_msg_->mutable_stream();
|
||||
const size_t data_size = sizeof(int16_t) *
|
||||
frame->samples_per_channel_ *
|
||||
frame->num_channels_;
|
||||
const size_t data_size =
|
||||
sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
|
||||
msg->set_output_data(frame->data_, data_size);
|
||||
RETURN_ON_ERR(WriteMessageToDebugFile());
|
||||
}
|
||||
@ -584,7 +610,6 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
|
||||
int AudioProcessingImpl::ProcessStreamLocked() {
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_file_->Open()) {
|
||||
@ -600,9 +625,8 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
||||
|
||||
AudioBuffer* ca = capture_audio_.get(); // For brevity.
|
||||
if (use_new_agc_ && gain_control_->is_enabled()) {
|
||||
agc_manager_->AnalyzePreProcess(ca->channels()[0],
|
||||
ca->num_channels(),
|
||||
fwd_proc_format_.samples_per_channel());
|
||||
agc_manager_->AnalyzePreProcess(ca->channels()[0], ca->num_channels(),
|
||||
fwd_proc_format_.num_frames());
|
||||
}
|
||||
|
||||
bool data_processed = is_data_processed();
|
||||
@ -627,12 +651,10 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
||||
RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca));
|
||||
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca));
|
||||
|
||||
if (use_new_agc_ &&
|
||||
gain_control_->is_enabled() &&
|
||||
if (use_new_agc_ && gain_control_->is_enabled() &&
|
||||
(!beamformer_enabled_ || beamformer_->is_target_present())) {
|
||||
agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz],
|
||||
ca->num_frames_per_band(),
|
||||
split_rate_);
|
||||
ca->num_frames_per_band(), split_rate_);
|
||||
}
|
||||
RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(ca));
|
||||
|
||||
@ -646,15 +668,11 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
||||
float voice_probability =
|
||||
agc_manager_.get() ? agc_manager_->voice_probability() : 1.f;
|
||||
|
||||
transient_suppressor_->Suppress(ca->channels_f()[0],
|
||||
ca->num_frames(),
|
||||
ca->num_channels(),
|
||||
ca->split_bands_const_f(0)[kBand0To8kHz],
|
||||
ca->num_frames_per_band(),
|
||||
ca->keyboard_data(),
|
||||
ca->num_keyboard_frames(),
|
||||
voice_probability,
|
||||
key_pressed_);
|
||||
transient_suppressor_->Suppress(
|
||||
ca->channels_f()[0], ca->num_frames(), ca->num_channels(),
|
||||
ca->split_bands_const_f(0)[kBand0To8kHz], ca->num_frames_per_band(),
|
||||
ca->keyboard_data(), ca->num_keyboard_frames(), voice_probability,
|
||||
key_pressed_);
|
||||
}
|
||||
|
||||
// The level estimator operates on the recombined data.
|
||||
@ -668,35 +686,47 @@ int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
|
||||
int samples_per_channel,
|
||||
int sample_rate_hz,
|
||||
ChannelLayout layout) {
|
||||
const StreamConfig reverse_config = {
|
||||
sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout),
|
||||
};
|
||||
if (samples_per_channel != reverse_config.num_frames()) {
|
||||
return kBadDataLengthError;
|
||||
}
|
||||
return AnalyzeReverseStream(data, reverse_config);
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::AnalyzeReverseStream(
|
||||
const float* const* data,
|
||||
const StreamConfig& reverse_config) {
|
||||
CriticalSectionScoped crit_scoped(crit_);
|
||||
if (data == NULL) {
|
||||
return kNullPointerError;
|
||||
}
|
||||
|
||||
const int num_channels = ChannelsFromLayout(layout);
|
||||
RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(),
|
||||
fwd_out_format_.rate(),
|
||||
sample_rate_hz,
|
||||
fwd_in_format_.num_channels(),
|
||||
fwd_out_format_.num_channels(),
|
||||
num_channels));
|
||||
if (samples_per_channel != rev_in_format_.samples_per_channel()) {
|
||||
return kBadDataLengthError;
|
||||
if (reverse_config.num_channels() <= 0) {
|
||||
return kBadNumberChannelsError;
|
||||
}
|
||||
|
||||
ProcessingConfig processing_config = api_format_;
|
||||
processing_config.reverse_stream() = reverse_config;
|
||||
|
||||
RETURN_ON_ERR(MaybeInitializeLocked(processing_config));
|
||||
assert(reverse_config.num_frames() ==
|
||||
api_format_.reverse_stream().num_frames());
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_file_->Open()) {
|
||||
event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
|
||||
audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
|
||||
const size_t channel_size =
|
||||
sizeof(float) * rev_in_format_.samples_per_channel();
|
||||
for (int i = 0; i < num_channels; ++i)
|
||||
sizeof(float) * api_format_.reverse_stream().num_frames();
|
||||
for (int i = 0; i < api_format_.reverse_stream().num_channels(); ++i)
|
||||
msg->add_channel(data[i], channel_size);
|
||||
RETURN_ON_ERR(WriteMessageToDebugFile());
|
||||
}
|
||||
#endif
|
||||
|
||||
render_audio_->CopyFrom(data, samples_per_channel, layout);
|
||||
render_audio_->CopyFrom(data, api_format_.reverse_stream());
|
||||
return AnalyzeReverseStreamLocked();
|
||||
}
|
||||
|
||||
@ -713,17 +743,21 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
|
||||
return kBadSampleRateError;
|
||||
}
|
||||
// This interface does not tolerate different forward and reverse rates.
|
||||
if (frame->sample_rate_hz_ != fwd_in_format_.rate()) {
|
||||
if (frame->sample_rate_hz_ != api_format_.input_stream().sample_rate_hz()) {
|
||||
return kBadSampleRateError;
|
||||
}
|
||||
|
||||
RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(),
|
||||
fwd_out_format_.rate(),
|
||||
frame->sample_rate_hz_,
|
||||
fwd_in_format_.num_channels(),
|
||||
fwd_in_format_.num_channels(),
|
||||
frame->num_channels_));
|
||||
if (frame->samples_per_channel_ != rev_in_format_.samples_per_channel()) {
|
||||
if (frame->num_channels_ <= 0) {
|
||||
return kBadNumberChannelsError;
|
||||
}
|
||||
|
||||
ProcessingConfig processing_config = api_format_;
|
||||
processing_config.reverse_stream().set_sample_rate_hz(frame->sample_rate_hz_);
|
||||
processing_config.reverse_stream().set_num_channels(frame->num_channels_);
|
||||
|
||||
RETURN_ON_ERR(MaybeInitializeLocked(processing_config));
|
||||
if (frame->samples_per_channel_ !=
|
||||
api_format_.reverse_stream().num_frames()) {
|
||||
return kBadDataLengthError;
|
||||
}
|
||||
|
||||
@ -731,9 +765,8 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
|
||||
if (debug_file_->Open()) {
|
||||
event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
|
||||
audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
|
||||
const size_t data_size = sizeof(int16_t) *
|
||||
frame->samples_per_channel_ *
|
||||
frame->num_channels_;
|
||||
const size_t data_size =
|
||||
sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
|
||||
msg->set_data(frame->data_, data_size);
|
||||
RETURN_ON_ERR(WriteMessageToDebugFile());
|
||||
}
|
||||
@ -745,7 +778,7 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
|
||||
|
||||
int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
|
||||
AudioBuffer* ra = render_audio_.get(); // For brevity.
|
||||
if (rev_proc_format_.rate() == kSampleRate32kHz) {
|
||||
if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz) {
|
||||
ra->SplitIntoFrequencyBands();
|
||||
}
|
||||
|
||||
@ -947,13 +980,15 @@ bool AudioProcessingImpl::is_data_processed() const {
|
||||
|
||||
bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
|
||||
// Check if we've upmixed or downmixed the audio.
|
||||
return ((fwd_out_format_.num_channels() != fwd_in_format_.num_channels()) ||
|
||||
return ((api_format_.output_stream().num_channels() !=
|
||||
api_format_.input_stream().num_channels()) ||
|
||||
is_data_processed || transient_suppressor_enabled_);
|
||||
}
|
||||
|
||||
bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const {
|
||||
return (is_data_processed && (fwd_proc_format_.rate() == kSampleRate32kHz ||
|
||||
fwd_proc_format_.rate() == kSampleRate48kHz));
|
||||
return (is_data_processed &&
|
||||
(fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz ||
|
||||
fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz));
|
||||
}
|
||||
|
||||
bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
|
||||
@ -961,8 +996,8 @@ bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
|
||||
!transient_suppressor_enabled_) {
|
||||
// Only level_estimator_ is enabled.
|
||||
return false;
|
||||
} else if (fwd_proc_format_.rate() == kSampleRate32kHz ||
|
||||
fwd_proc_format_.rate() == kSampleRate48kHz) {
|
||||
} else if (fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz ||
|
||||
fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz) {
|
||||
// Something besides level_estimator_ is enabled, and we have super-wb.
|
||||
return true;
|
||||
}
|
||||
@ -986,9 +1021,9 @@ void AudioProcessingImpl::InitializeTransient() {
|
||||
if (!transient_suppressor_.get()) {
|
||||
transient_suppressor_.reset(new TransientSuppressor());
|
||||
}
|
||||
transient_suppressor_->Initialize(fwd_proc_format_.rate(),
|
||||
split_rate_,
|
||||
fwd_out_format_.num_channels());
|
||||
transient_suppressor_->Initialize(
|
||||
fwd_proc_format_.sample_rate_hz(), split_rate_,
|
||||
api_format_.output_stream().num_channels());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1031,8 +1066,8 @@ void AudioProcessingImpl::MaybeUpdateHistograms() {
|
||||
const int frames_per_ms = rtc::CheckedDivExact(split_rate_, 1000);
|
||||
const int aec_system_delay_ms =
|
||||
WebRtcAec_system_delay(echo_cancellation()->aec_core()) / frames_per_ms;
|
||||
const int diff_aec_system_delay_ms = aec_system_delay_ms -
|
||||
last_aec_system_delay_ms_;
|
||||
const int diff_aec_system_delay_ms =
|
||||
aec_system_delay_ms - last_aec_system_delay_ms_;
|
||||
if (diff_aec_system_delay_ms > kMinDiffDelayMs &&
|
||||
last_aec_system_delay_ms_ != 0) {
|
||||
RTC_HISTOGRAM_COUNTS("WebRTC.Audio.AecSystemDelayJump",
|
||||
@ -1072,8 +1107,8 @@ int AudioProcessingImpl::WriteMessageToDebugFile() {
|
||||
return kUnspecifiedError;
|
||||
}
|
||||
#if defined(WEBRTC_ARCH_BIG_ENDIAN)
|
||||
// TODO(ajm): Use little-endian "on the wire". For the moment, we can be
|
||||
// pretty safe in assuming little-endian.
|
||||
// TODO(ajm): Use little-endian "on the wire". For the moment, we can be
|
||||
// pretty safe in assuming little-endian.
|
||||
#endif
|
||||
|
||||
if (!event_msg_->SerializeToString(&event_str_)) {
|
||||
@ -1096,12 +1131,12 @@ int AudioProcessingImpl::WriteMessageToDebugFile() {
|
||||
int AudioProcessingImpl::WriteInitMessage() {
|
||||
event_msg_->set_type(audioproc::Event::INIT);
|
||||
audioproc::Init* msg = event_msg_->mutable_init();
|
||||
msg->set_sample_rate(fwd_in_format_.rate());
|
||||
msg->set_num_input_channels(fwd_in_format_.num_channels());
|
||||
msg->set_num_output_channels(fwd_out_format_.num_channels());
|
||||
msg->set_num_reverse_channels(rev_in_format_.num_channels());
|
||||
msg->set_reverse_sample_rate(rev_in_format_.rate());
|
||||
msg->set_output_sample_rate(fwd_out_format_.rate());
|
||||
msg->set_sample_rate(api_format_.input_stream().sample_rate_hz());
|
||||
msg->set_num_input_channels(api_format_.input_stream().num_channels());
|
||||
msg->set_num_output_channels(api_format_.output_stream().num_channels());
|
||||
msg->set_num_reverse_channels(api_format_.reverse_stream().num_channels());
|
||||
msg->set_reverse_sample_rate(api_format_.reverse_stream().sample_rate_hz());
|
||||
msg->set_output_sample_rate(api_format_.output_stream().sample_rate_hz());
|
||||
|
||||
int err = WriteMessageToDebugFile();
|
||||
if (err != kNoError) {
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/base/thread_annotations.h"
|
||||
@ -47,42 +48,6 @@ class Event;
|
||||
} // namespace audioproc
|
||||
#endif
|
||||
|
||||
class AudioRate {
|
||||
public:
|
||||
explicit AudioRate(int sample_rate_hz) { set(sample_rate_hz); }
|
||||
virtual ~AudioRate() {}
|
||||
|
||||
void set(int rate) {
|
||||
rate_ = rate;
|
||||
samples_per_channel_ = AudioProcessing::kChunkSizeMs * rate_ / 1000;
|
||||
}
|
||||
|
||||
int rate() const { return rate_; }
|
||||
int samples_per_channel() const { return samples_per_channel_; }
|
||||
|
||||
private:
|
||||
int rate_;
|
||||
int samples_per_channel_;
|
||||
};
|
||||
|
||||
class AudioFormat : public AudioRate {
|
||||
public:
|
||||
AudioFormat(int sample_rate_hz, int num_channels)
|
||||
: AudioRate(sample_rate_hz),
|
||||
num_channels_(num_channels) {}
|
||||
virtual ~AudioFormat() {}
|
||||
|
||||
void set(int rate, int num_channels) {
|
||||
AudioRate::set(rate);
|
||||
num_channels_ = num_channels;
|
||||
}
|
||||
|
||||
int num_channels() const { return num_channels_; }
|
||||
|
||||
private:
|
||||
int num_channels_;
|
||||
};
|
||||
|
||||
class AudioProcessingImpl : public AudioProcessing {
|
||||
public:
|
||||
explicit AudioProcessingImpl(const Config& config);
|
||||
@ -99,6 +64,7 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
ChannelLayout input_layout,
|
||||
ChannelLayout output_layout,
|
||||
ChannelLayout reverse_layout) override;
|
||||
int Initialize(const ProcessingConfig& processing_config) override;
|
||||
void SetExtraOptions(const Config& config) override;
|
||||
int set_sample_rate_hz(int rate) override;
|
||||
int input_sample_rate_hz() const override;
|
||||
@ -118,11 +84,17 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
int output_sample_rate_hz,
|
||||
ChannelLayout output_layout,
|
||||
float* const* dest) override;
|
||||
int ProcessStream(const float* const* src,
|
||||
const StreamConfig& input_config,
|
||||
const StreamConfig& output_config,
|
||||
float* const* dest) override;
|
||||
int AnalyzeReverseStream(AudioFrame* frame) override;
|
||||
int AnalyzeReverseStream(const float* const* data,
|
||||
int samples_per_channel,
|
||||
int sample_rate_hz,
|
||||
ChannelLayout layout) override;
|
||||
int AnalyzeReverseStream(const float* const* data,
|
||||
const StreamConfig& reverse_config) override;
|
||||
int set_stream_delay_ms(int delay) override;
|
||||
int stream_delay_ms() const override;
|
||||
bool was_stream_delay_set() const override;
|
||||
@ -148,19 +120,9 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
virtual int InitializeLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
|
||||
private:
|
||||
int InitializeLocked(int input_sample_rate_hz,
|
||||
int output_sample_rate_hz,
|
||||
int reverse_sample_rate_hz,
|
||||
int num_input_channels,
|
||||
int num_output_channels,
|
||||
int num_reverse_channels)
|
||||
int InitializeLocked(const ProcessingConfig& config)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
int MaybeInitializeLocked(int input_sample_rate_hz,
|
||||
int output_sample_rate_hz,
|
||||
int reverse_sample_rate_hz,
|
||||
int num_input_channels,
|
||||
int num_output_channels,
|
||||
int num_reverse_channels)
|
||||
int MaybeInitializeLocked(const ProcessingConfig& config)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
int AnalyzeReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
@ -197,13 +159,14 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
std::string event_str_; // Memory for protobuf serialization.
|
||||
#endif
|
||||
|
||||
AudioFormat fwd_in_format_;
|
||||
// This one is an AudioRate, because the forward processing number of channels
|
||||
// is mutable and is tracked by the capture_audio_.
|
||||
AudioRate fwd_proc_format_;
|
||||
AudioFormat fwd_out_format_;
|
||||
AudioFormat rev_in_format_;
|
||||
AudioFormat rev_proc_format_;
|
||||
// Format of processing streams at input/output call sites.
|
||||
ProcessingConfig api_format_;
|
||||
|
||||
// Only the rate and samples fields of fwd_proc_format_ are used because the
|
||||
// forward processing number of channels is mutable and is tracked by the
|
||||
// capture_audio_.
|
||||
StreamConfig fwd_proc_format_;
|
||||
StreamConfig rev_proc_format_;
|
||||
int split_rate_;
|
||||
|
||||
int stream_delay_ms_;
|
||||
|
||||
@ -29,6 +29,9 @@ class AudioFrame;
|
||||
template<typename T>
|
||||
class Beamformer;
|
||||
|
||||
class StreamConfig;
|
||||
class ProcessingConfig;
|
||||
|
||||
class EchoCancellation;
|
||||
class EchoControlMobile;
|
||||
class GainControl;
|
||||
@ -84,7 +87,7 @@ static const int kAgcStartupMinVolume = 0;
|
||||
#endif // defined(WEBRTC_CHROMIUM_BUILD)
|
||||
struct ExperimentalAgc {
|
||||
ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {}
|
||||
ExperimentalAgc(bool enabled)
|
||||
explicit ExperimentalAgc(bool enabled)
|
||||
: enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {}
|
||||
ExperimentalAgc(bool enabled, int startup_min_volume)
|
||||
: enabled(enabled), startup_min_volume(startup_min_volume) {}
|
||||
@ -199,6 +202,7 @@ static const int kAudioProcMaxNativeSampleRateHz = 32000;
|
||||
//
|
||||
class AudioProcessing {
|
||||
public:
|
||||
// TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone.
|
||||
enum ChannelLayout {
|
||||
kMono,
|
||||
// Left, right.
|
||||
@ -236,10 +240,17 @@ class AudioProcessing {
|
||||
// The int16 interfaces require:
|
||||
// - only |NativeRate|s be used
|
||||
// - that the input, output and reverse rates must match
|
||||
// - that |output_layout| matches |input_layout|
|
||||
// - that |processing_config.output_stream()| matches
|
||||
// |processing_config.input_stream()|.
|
||||
//
|
||||
// The float interfaces accept arbitrary rates and support differing input
|
||||
// and output layouts, but the output may only remove channels, not add.
|
||||
// The float interfaces accept arbitrary rates and support differing input and
|
||||
// output layouts, but the output must have either one channel or the same
|
||||
// number of channels as the input.
|
||||
virtual int Initialize(const ProcessingConfig& processing_config) = 0;
|
||||
|
||||
// Initialize with unpacked parameters. See Initialize() above for details.
|
||||
//
|
||||
// TODO(mgraczyk): Remove once clients are updated to use the new interface.
|
||||
virtual int Initialize(int input_sample_rate_hz,
|
||||
int output_sample_rate_hz,
|
||||
int reverse_sample_rate_hz,
|
||||
@ -292,8 +303,10 @@ class AudioProcessing {
|
||||
// |input_layout|. At output, the channels will be arranged according to
|
||||
// |output_layout| at |output_sample_rate_hz| in |dest|.
|
||||
//
|
||||
// The output layout may only remove channels, not add. |src| and |dest|
|
||||
// may use the same memory, if desired.
|
||||
// The output layout must have one channel or as many channels as the input.
|
||||
// |src| and |dest| may use the same memory, if desired.
|
||||
//
|
||||
// TODO(mgraczyk): Remove once clients are updated to use the new interface.
|
||||
virtual int ProcessStream(const float* const* src,
|
||||
int samples_per_channel,
|
||||
int input_sample_rate_hz,
|
||||
@ -302,6 +315,18 @@ class AudioProcessing {
|
||||
ChannelLayout output_layout,
|
||||
float* const* dest) = 0;
|
||||
|
||||
// Accepts deinterleaved float audio with the range [-1, 1]. Each element of
|
||||
// |src| points to a channel buffer, arranged according to |input_stream|. At
|
||||
// output, the channels will be arranged according to |output_stream| in
|
||||
// |dest|.
|
||||
//
|
||||
// The output must have one channel or as many channels as the input. |src|
|
||||
// and |dest| may use the same memory, if desired.
|
||||
virtual int ProcessStream(const float* const* src,
|
||||
const StreamConfig& input_config,
|
||||
const StreamConfig& output_config,
|
||||
float* const* dest) = 0;
|
||||
|
||||
// Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame
|
||||
// will not be modified. On the client-side, this is the far-end (or to be
|
||||
// rendered) audio.
|
||||
@ -321,11 +346,18 @@ class AudioProcessing {
|
||||
|
||||
// Accepts deinterleaved float audio with the range [-1, 1]. Each element
|
||||
// of |data| points to a channel buffer, arranged according to |layout|.
|
||||
//
|
||||
// TODO(mgraczyk): Remove once clients are updated to use the new interface.
|
||||
virtual int AnalyzeReverseStream(const float* const* data,
|
||||
int samples_per_channel,
|
||||
int sample_rate_hz,
|
||||
ChannelLayout layout) = 0;
|
||||
|
||||
// Accepts deinterleaved float audio with the range [-1, 1]. Each element of
|
||||
// |data| points to a channel buffer, arranged according to |reverse_config|.
|
||||
virtual int AnalyzeReverseStream(const float* const* data,
|
||||
const StreamConfig& reverse_config) = 0;
|
||||
|
||||
// This must be called if and only if echo processing is enabled.
|
||||
//
|
||||
// Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end
|
||||
@ -432,6 +464,102 @@ class AudioProcessing {
|
||||
static const int kChunkSizeMs = 10;
|
||||
};
|
||||
|
||||
class StreamConfig {
|
||||
public:
|
||||
// sample_rate_hz: The sampling rate of the stream.
|
||||
//
|
||||
// num_channels: The number of audio channels in the stream, excluding the
|
||||
// keyboard channel if it is present. When passing a
|
||||
// StreamConfig with an array of arrays T*[N],
|
||||
//
|
||||
// N == {num_channels + 1 if has_keyboard
|
||||
// {num_channels if !has_keyboard
|
||||
//
|
||||
// has_keyboard: True if the stream has a keyboard channel. When has_keyboard
|
||||
// is true, the last channel in any corresponding list of
|
||||
// channels is the keyboard channel.
|
||||
StreamConfig(int sample_rate_hz = 0,
|
||||
int num_channels = 0,
|
||||
bool has_keyboard = false)
|
||||
: sample_rate_hz_(sample_rate_hz),
|
||||
num_channels_(num_channels),
|
||||
has_keyboard_(has_keyboard),
|
||||
num_frames_(calculate_frames(sample_rate_hz)) {}
|
||||
|
||||
void set_sample_rate_hz(int value) {
|
||||
sample_rate_hz_ = value;
|
||||
num_frames_ = calculate_frames(value);
|
||||
}
|
||||
void set_num_channels(int value) { num_channels_ = value; }
|
||||
void set_has_keyboard(bool value) { has_keyboard_ = value; }
|
||||
|
||||
int sample_rate_hz() const { return sample_rate_hz_; }
|
||||
|
||||
// The number of channels in the stream, not including the keyboard channel if
|
||||
// present.
|
||||
int num_channels() const { return num_channels_; }
|
||||
|
||||
bool has_keyboard() const { return has_keyboard_; }
|
||||
int num_frames() const { return num_frames_; }
|
||||
|
||||
bool operator==(const StreamConfig& other) const {
|
||||
return sample_rate_hz_ == other.sample_rate_hz_ &&
|
||||
num_channels_ == other.num_channels_ &&
|
||||
has_keyboard_ == other.has_keyboard_;
|
||||
}
|
||||
|
||||
bool operator!=(const StreamConfig& other) const { return !(*this == other); }
|
||||
|
||||
private:
|
||||
static int calculate_frames(int sample_rate_hz) {
|
||||
return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000;
|
||||
}
|
||||
|
||||
int sample_rate_hz_;
|
||||
int num_channels_;
|
||||
bool has_keyboard_;
|
||||
int num_frames_;
|
||||
};
|
||||
|
||||
class ProcessingConfig {
|
||||
public:
|
||||
enum StreamName {
|
||||
kInputStream,
|
||||
kOutputStream,
|
||||
kReverseStream,
|
||||
kNumStreamNames,
|
||||
};
|
||||
|
||||
const StreamConfig& input_stream() const {
|
||||
return streams[StreamName::kInputStream];
|
||||
}
|
||||
const StreamConfig& output_stream() const {
|
||||
return streams[StreamName::kOutputStream];
|
||||
}
|
||||
const StreamConfig& reverse_stream() const {
|
||||
return streams[StreamName::kReverseStream];
|
||||
}
|
||||
|
||||
StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
|
||||
StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
|
||||
StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; }
|
||||
|
||||
bool operator==(const ProcessingConfig& other) const {
|
||||
for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
|
||||
if (this->streams[i] != other.streams[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool operator!=(const ProcessingConfig& other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
StreamConfig streams[StreamName::kNumStreamNames];
|
||||
};
|
||||
|
||||
// The acoustic echo cancellation (AEC) component provides better performance
|
||||
// than AECM but also requires more processing power and is dependent on delay
|
||||
// stability and reporting accuracy. As such it is well-suited and recommended
|
||||
|
||||
@ -186,6 +186,8 @@ class MockAudioProcessing : public AudioProcessing {
|
||||
ChannelLayout input_layout,
|
||||
ChannelLayout output_layout,
|
||||
ChannelLayout reverse_layout));
|
||||
MOCK_METHOD1(Initialize,
|
||||
int(const ProcessingConfig& processing_config));
|
||||
MOCK_METHOD1(SetExtraOptions,
|
||||
void(const Config& config));
|
||||
MOCK_METHOD1(set_sample_rate_hz,
|
||||
@ -218,11 +220,18 @@ class MockAudioProcessing : public AudioProcessing {
|
||||
int output_sample_rate_hz,
|
||||
ChannelLayout output_layout,
|
||||
float* const* dest));
|
||||
MOCK_METHOD4(ProcessStream,
|
||||
int(const float* const* src,
|
||||
const StreamConfig& input_config,
|
||||
const StreamConfig& output_config,
|
||||
float* const* dest));
|
||||
MOCK_METHOD1(AnalyzeReverseStream,
|
||||
int(AudioFrame* frame));
|
||||
MOCK_METHOD4(AnalyzeReverseStream,
|
||||
int(const float* const* data, int frames, int sample_rate_hz,
|
||||
ChannelLayout input_layout));
|
||||
MOCK_METHOD2(AnalyzeReverseStream,
|
||||
int(const float* const* data, const StreamConfig& reverse_config));
|
||||
MOCK_METHOD1(set_stream_delay_ms,
|
||||
int(int delay));
|
||||
MOCK_CONST_METHOD0(stream_delay_ms,
|
||||
|
||||
@ -354,8 +354,14 @@ class ApmTest : public ::testing::Test {
|
||||
void ProcessWithDefaultStreamParameters(AudioFrame* frame);
|
||||
void ProcessDelayVerificationTest(int delay_ms, int system_delay_ms,
|
||||
int delay_min, int delay_max);
|
||||
void TestChangingChannels(int num_channels,
|
||||
AudioProcessing::Error expected_return);
|
||||
void TestChangingChannelsInt16Interface(
|
||||
int num_channels,
|
||||
AudioProcessing::Error expected_return);
|
||||
void TestChangingForwardChannels(int num_in_channels,
|
||||
int num_out_channels,
|
||||
AudioProcessing::Error expected_return);
|
||||
void TestChangingReverseChannels(int num_rev_channels,
|
||||
AudioProcessing::Error expected_return);
|
||||
void RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate);
|
||||
void RunManualVolumeChangeIsPossibleTest(int sample_rate);
|
||||
void StreamParametersTest(Format format);
|
||||
@ -449,12 +455,10 @@ void ApmTest::TearDown() {
|
||||
|
||||
void ApmTest::Init(AudioProcessing* ap) {
|
||||
ASSERT_EQ(kNoErr,
|
||||
ap->Initialize(frame_->sample_rate_hz_,
|
||||
output_sample_rate_hz_,
|
||||
revframe_->sample_rate_hz_,
|
||||
LayoutFromChannels(frame_->num_channels_),
|
||||
LayoutFromChannels(num_output_channels_),
|
||||
LayoutFromChannels(revframe_->num_channels_)));
|
||||
ap->Initialize(
|
||||
{{{frame_->sample_rate_hz_, frame_->num_channels_},
|
||||
{output_sample_rate_hz_, num_output_channels_},
|
||||
{revframe_->sample_rate_hz_, revframe_->num_channels_}}}));
|
||||
}
|
||||
|
||||
void ApmTest::Init(int sample_rate_hz,
|
||||
@ -791,26 +795,79 @@ TEST_F(ApmTest, DelayOffsetWithLimitsIsSetProperly) {
|
||||
EXPECT_EQ(50, apm_->stream_delay_ms());
|
||||
}
|
||||
|
||||
void ApmTest::TestChangingChannels(int num_channels,
|
||||
AudioProcessing::Error expected_return) {
|
||||
void ApmTest::TestChangingChannelsInt16Interface(
|
||||
int num_channels,
|
||||
AudioProcessing::Error expected_return) {
|
||||
frame_->num_channels_ = num_channels;
|
||||
EXPECT_EQ(expected_return, apm_->ProcessStream(frame_));
|
||||
EXPECT_EQ(expected_return, apm_->AnalyzeReverseStream(frame_));
|
||||
}
|
||||
|
||||
TEST_F(ApmTest, Channels) {
|
||||
// Testing number of invalid channels.
|
||||
TestChangingChannels(0, apm_->kBadNumberChannelsError);
|
||||
TestChangingChannels(3, apm_->kBadNumberChannelsError);
|
||||
// Testing number of valid channels.
|
||||
for (int i = 1; i < 3; i++) {
|
||||
TestChangingChannels(i, kNoErr);
|
||||
void ApmTest::TestChangingForwardChannels(
|
||||
int num_in_channels,
|
||||
int num_out_channels,
|
||||
AudioProcessing::Error expected_return) {
|
||||
const StreamConfig input_stream = {frame_->sample_rate_hz_, num_in_channels};
|
||||
const StreamConfig output_stream = {output_sample_rate_hz_, num_out_channels};
|
||||
|
||||
EXPECT_EQ(expected_return,
|
||||
apm_->ProcessStream(float_cb_->channels(), input_stream,
|
||||
output_stream, float_cb_->channels()));
|
||||
}
|
||||
|
||||
void ApmTest::TestChangingReverseChannels(
|
||||
int num_rev_channels,
|
||||
AudioProcessing::Error expected_return) {
|
||||
const ProcessingConfig processing_config = {
|
||||
{{ frame_->sample_rate_hz_, apm_->num_input_channels() },
|
||||
{ output_sample_rate_hz_, apm_->num_output_channels() },
|
||||
{ frame_->sample_rate_hz_, num_rev_channels }}};
|
||||
|
||||
EXPECT_EQ(expected_return,
|
||||
apm_->AnalyzeReverseStream(float_cb_->channels(),
|
||||
processing_config.reverse_stream()));
|
||||
}
|
||||
|
||||
TEST_F(ApmTest, ChannelsInt16Interface) {
|
||||
// Testing number of invalid and valid channels.
|
||||
Init(16000, 16000, 16000, 4, 4, 4, false);
|
||||
|
||||
TestChangingChannelsInt16Interface(0, apm_->kBadNumberChannelsError);
|
||||
|
||||
for (int i = 1; i < 4; i++) {
|
||||
TestChangingChannelsInt16Interface(i, kNoErr);
|
||||
EXPECT_EQ(i, apm_->num_input_channels());
|
||||
// We always force the number of reverse channels used for processing to 1.
|
||||
EXPECT_EQ(1, apm_->num_reverse_channels());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ApmTest, Channels) {
|
||||
// Testing number of invalid and valid channels.
|
||||
Init(16000, 16000, 16000, 4, 4, 4, false);
|
||||
|
||||
TestChangingForwardChannels(0, 1, apm_->kBadNumberChannelsError);
|
||||
TestChangingReverseChannels(0, apm_->kBadNumberChannelsError);
|
||||
|
||||
for (int i = 1; i < 4; ++i) {
|
||||
for (int j = 0; j < 1; ++j) {
|
||||
// Output channels much be one or match input channels.
|
||||
if (j == 1 || i == j) {
|
||||
TestChangingForwardChannels(i, j, kNoErr);
|
||||
TestChangingReverseChannels(i, kNoErr);
|
||||
|
||||
EXPECT_EQ(i, apm_->num_input_channels());
|
||||
EXPECT_EQ(j, apm_->num_output_channels());
|
||||
// The number of reverse channels used for processing to is always 1.
|
||||
EXPECT_EQ(1, apm_->num_reverse_channels());
|
||||
} else {
|
||||
TestChangingForwardChannels(i, j,
|
||||
AudioProcessing::kBadNumberChannelsError);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ApmTest, SampleRatesInt) {
|
||||
// Testing invalid sample rates
|
||||
SetContainerFormat(10000, 2, frame_, &float_cb_);
|
||||
@ -2294,12 +2351,9 @@ class AudioProcessingTest
|
||||
config.Set<ExperimentalAgc>(new ExperimentalAgc(false));
|
||||
rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config));
|
||||
EnableAllAPComponents(ap.get());
|
||||
ap->Initialize(input_rate,
|
||||
output_rate,
|
||||
reverse_rate,
|
||||
LayoutFromChannels(num_input_channels),
|
||||
LayoutFromChannels(num_output_channels),
|
||||
LayoutFromChannels(num_reverse_channels));
|
||||
ap->Initialize({{{input_rate, num_input_channels},
|
||||
{output_rate, num_output_channels},
|
||||
{reverse_rate, num_reverse_channels}}});
|
||||
|
||||
FILE* far_file = fopen(ResourceFilePath("far", reverse_rate).c_str(), "rb");
|
||||
FILE* near_file = fopen(ResourceFilePath("near", input_rate).c_str(), "rb");
|
||||
|
||||
@ -127,6 +127,13 @@ int main(int argc, char* argv[]) {
|
||||
TickTime processing_start_time;
|
||||
TickInterval accumulated_time;
|
||||
int num_chunks = 0;
|
||||
|
||||
const StreamConfig input_config = {
|
||||
in_file.sample_rate(), in_buf.num_channels(),
|
||||
};
|
||||
const StreamConfig output_config = {
|
||||
out_file.sample_rate(), out_buf.num_channels(),
|
||||
};
|
||||
while (in_file.ReadSamples(in_interleaved.size(),
|
||||
&in_interleaved[0]) == in_interleaved.size()) {
|
||||
// Have logs display the file time rather than wallclock time.
|
||||
@ -139,14 +146,8 @@ int main(int argc, char* argv[]) {
|
||||
if (FLAGS_perf) {
|
||||
processing_start_time = TickTime::Now();
|
||||
}
|
||||
CHECK_EQ(kNoErr,
|
||||
ap->ProcessStream(in_buf.channels(),
|
||||
in_buf.num_frames(),
|
||||
in_file.sample_rate(),
|
||||
LayoutFromChannels(in_buf.num_channels()),
|
||||
out_file.sample_rate(),
|
||||
LayoutFromChannels(out_buf.num_channels()),
|
||||
out_buf.channels()));
|
||||
CHECK_EQ(kNoErr, ap->ProcessStream(in_buf.channels(), input_config,
|
||||
output_config, out_buf.channels()));
|
||||
if (FLAGS_perf) {
|
||||
accumulated_time += TickTime::Now() - processing_start_time;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user