Add a deinterleaved float interface to AudioProcessing.

This is mainly to support the native audio format in Chrome. Although
this implementation just moves the float->int conversion under the hood,
we will transition AudioProcessing towards supporting this format
throughout.

- Add a test which verifies we get identical output with the float and
int interfaces.
- The float and int wrappers are tasked with conversion to the
AudioBuffer format. A new shared Process/Analyze method does most of
the work.
- Add a new field to the debug.proto to hold deinterleaved data.
- Add helpers to audio_utils.cc, and start using numeric_limits.
- Note that there was no performance difference between numeric_limits
and a literal value when measured on Linux using gcc or clang.

BUG=2894
R=aluebs@webrtc.org, bjornv@webrtc.org, henrikg@webrtc.org, tommi@webrtc.org, turaj@webrtc.org, xians@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/9179004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@5641 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
andrew@webrtc.org
2014-03-04 20:58:13 +00:00
parent b90991dade
commit 17e40641b3
12 changed files with 660 additions and 250 deletions

View File

@ -12,6 +12,7 @@
#include <assert.h>
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/modules/audio_processing/echo_cancellation_impl.h"
@ -37,8 +38,6 @@
#endif
#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
static const int kChunkSizeMs = 10;
#define RETURN_ON_ERR(expr) \
do { \
int err = expr; \
@ -48,6 +47,24 @@ static const int kChunkSizeMs = 10;
} while (0)
namespace webrtc {
namespace {
const int kChunkSizeMs = 10;
int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) {
switch (layout) {
case AudioProcessing::kMono:
case AudioProcessing::kMonoAndKeyboard:
return 1;
case AudioProcessing::kStereo:
case AudioProcessing::kStereoAndKeyboard:
return 2;
}
assert(false);
return -1;
}
} // namespace
// Throughout webrtc, it's assumed that success is represented by zero.
COMPILE_ASSERT(AudioProcessing::kNoError == 0, no_error_must_be_zero);
@ -299,6 +316,8 @@ bool AudioProcessingImpl::output_will_be_muted() const {
return output_will_be_muted_;
}
// Calls InitializeLocked() if any of the audio parameters have changed from
// their current values.
int AudioProcessingImpl::MaybeInitializeLocked(int sample_rate_hz,
int num_input_channels, int num_output_channels, int num_reverse_channels) {
if (sample_rate_hz == sample_rate_hz_ &&
@ -342,15 +361,62 @@ int AudioProcessingImpl::MaybeInitializeLocked(int sample_rate_hz,
return InitializeLocked();
}
int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
int AudioProcessingImpl::ProcessStream(float* const* data,
int samples_per_channel,
int sample_rate_hz,
ChannelLayout input_layout,
ChannelLayout output_layout) {
CriticalSectionScoped crit_scoped(crit_);
int err = kNoError;
if (frame == NULL) {
if (!data) {
return kNullPointerError;
}
const int num_input_channels = ChannelsFromLayout(input_layout);
// TODO(ajm): We now always set the output channels equal to the input
// channels here. Remove the ability to downmix entirely.
// channels here. Restore the ability to downmix.
RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz,
num_input_channels, num_input_channels, num_reverse_channels_));
if (samples_per_channel != samples_per_channel_) {
return kBadDataLengthError;
}
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
event_msg_->set_type(audioproc::Event::STREAM);
audioproc::Stream* msg = event_msg_->mutable_stream();
const size_t channel_size = sizeof(float) * samples_per_channel;
for (int i = 0; i < num_input_channels; ++i)
msg->set_input_channel(i, data[i], channel_size);
}
#endif
capture_audio_->CopyFrom(data, samples_per_channel, num_output_channels_);
RETURN_ON_ERR(ProcessStreamLocked());
if (output_copy_needed(is_data_processed())) {
capture_audio_->CopyTo(samples_per_channel, num_output_channels_, data);
}
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
audioproc::Stream* msg = event_msg_->mutable_stream();
const size_t channel_size = sizeof(float) * samples_per_channel;
for (int i = 0; i < num_output_channels_; ++i)
msg->set_output_channel(i, data[i], channel_size);
RETURN_ON_ERR(WriteMessageToDebugFile());
}
#endif
return kNoError;
}
int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
CriticalSectionScoped crit_scoped(crit_);
if (!frame) {
return kNullPointerError;
}
// TODO(ajm): We now always set the output channels equal to the input
// channels here. Restore the ability to downmix.
RETURN_ON_ERR(MaybeInitializeLocked(frame->sample_rate_hz_,
frame->num_channels_, frame->num_channels_, num_reverse_channels_));
if (frame->samples_per_channel_ != samples_per_channel_) {
@ -365,6 +431,36 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
frame->samples_per_channel_ *
frame->num_channels_;
msg->set_input_data(frame->data_, data_size);
}
#endif
capture_audio_->DeinterleaveFrom(frame);
if (num_output_channels_ < num_input_channels_) {
capture_audio_->Mix(num_output_channels_);
frame->num_channels_ = num_output_channels_;
}
RETURN_ON_ERR(ProcessStreamLocked());
capture_audio_->InterleaveTo(frame, output_copy_needed(is_data_processed()));
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
audioproc::Stream* msg = event_msg_->mutable_stream();
const size_t data_size = sizeof(int16_t) *
frame->samples_per_channel_ *
frame->num_channels_;
msg->set_output_data(frame->data_, data_size);
RETURN_ON_ERR(WriteMessageToDebugFile());
}
#endif
return kNoError;
}
int AudioProcessingImpl::ProcessStreamLocked() {
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
audioproc::Stream* msg = event_msg_->mutable_stream();
msg->set_delay(stream_delay_ms_);
msg->set_drift(echo_cancellation_->stream_drift_samples());
msg->set_level(gain_control_->stream_analog_level());
@ -372,14 +468,6 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
}
#endif
capture_audio_->DeinterleaveFrom(frame);
// TODO(ajm): experiment with mixing and AEC placement.
if (num_output_channels_ < num_input_channels_) {
capture_audio_->Mix(num_output_channels_);
frame->num_channels_ = num_output_channels_;
}
bool data_processed = is_data_processed();
if (analysis_needed(data_processed)) {
for (int i = 0; i < num_output_channels_; i++) {
@ -393,45 +481,18 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
}
}
err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
if (err != kNoError) {
return err;
}
err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
if (err != kNoError) {
return err;
}
err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
if (err != kNoError) {
return err;
}
RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_));
RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_));
RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_));
if (echo_control_mobile_->is_enabled() &&
noise_suppression_->is_enabled()) {
capture_audio_->CopyLowPassToReference();
}
err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
if (err != kNoError) {
return err;
}
err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
if (err != kNoError) {
return err;
}
err = voice_detection_->ProcessCaptureAudio(capture_audio_);
if (err != kNoError) {
return err;
}
err = gain_control_->ProcessCaptureAudio(capture_audio_);
if (err != kNoError) {
return err;
}
RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_));
RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(capture_audio_));
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_));
RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_));
if (synthesis_needed(data_processed)) {
for (int i = 0; i < num_output_channels_; i++) {
@ -446,38 +507,48 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
}
// The level estimator operates on the recombined data.
err = level_estimator_->ProcessStream(capture_audio_);
if (err != kNoError) {
return err;
}
capture_audio_->InterleaveTo(frame, interleave_needed(data_processed));
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
audioproc::Stream* msg = event_msg_->mutable_stream();
const size_t data_size = sizeof(int16_t) *
frame->samples_per_channel_ *
frame->num_channels_;
msg->set_output_data(frame->data_, data_size);
err = WriteMessageToDebugFile();
if (err != kNoError) {
return err;
}
}
#endif
RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_));
was_stream_delay_set_ = false;
return kNoError;
}
// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the
// primary stream and convert ourselves rather than having the user manage it.
// We can be smarter and use the splitting filter when appropriate. Similarly,
// perform downmixing here.
int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
int samples_per_channel,
int sample_rate_hz,
ChannelLayout layout) {
CriticalSectionScoped crit_scoped(crit_);
if (data == NULL) {
return kNullPointerError;
}
if (sample_rate_hz != sample_rate_hz_) {
return kBadSampleRateError;
}
const int num_channels = ChannelsFromLayout(layout);
RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, num_input_channels_,
num_output_channels_, num_channels));
if (samples_per_channel != samples_per_channel_) {
return kBadDataLengthError;
}
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
const size_t channel_size = sizeof(float) * samples_per_channel;
for (int i = 0; i < num_channels; ++i)
msg->set_channel(i, data[i], channel_size);
RETURN_ON_ERR(WriteMessageToDebugFile());
}
#endif
render_audio_->CopyFrom(data, samples_per_channel, num_channels);
return AnalyzeReverseStreamLocked();
}
int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
CriticalSectionScoped crit_scoped(crit_);
int err = kNoError;
if (frame == NULL) {
return kNullPointerError;
}
@ -486,6 +557,9 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
}
RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, num_input_channels_,
num_output_channels_, frame->num_channels_));
if (frame->samples_per_channel_ != samples_per_channel_) {
return kBadDataLengthError;
}
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
@ -495,15 +569,19 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
frame->samples_per_channel_ *
frame->num_channels_;
msg->set_data(frame->data_, data_size);
err = WriteMessageToDebugFile();
if (err != kNoError) {
return err;
}
RETURN_ON_ERR(WriteMessageToDebugFile());
}
#endif
render_audio_->DeinterleaveFrom(frame);
return AnalyzeReverseStreamLocked();
}
// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the
// primary stream and convert ourselves rather than having the user manage it.
// We can be smarter and use the splitting filter when appropriate. Similarly,
// perform downmixing here.
int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
if (sample_rate_hz_ == kSampleRate32kHz) {
for (int i = 0; i < num_reverse_channels_; i++) {
// Split into low and high band.
@ -516,23 +594,11 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
}
}
// TODO(ajm): warnings possible from components?
err = echo_cancellation_->ProcessRenderAudio(render_audio_);
if (err != kNoError) {
return err;
}
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_));
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_));
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_));
err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
if (err != kNoError) {
return err;
}
err = gain_control_->ProcessRenderAudio(render_audio_);
if (err != kNoError) {
return err;
}
return err; // TODO(ajm): this is for returning warnings; necessary?
return kNoError;
}
int AudioProcessingImpl::set_stream_delay_ms(int delay) {
@ -563,6 +629,14 @@ bool AudioProcessingImpl::was_stream_delay_set() const {
return was_stream_delay_set_;
}
void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
key_pressed_ = key_pressed;
}
bool AudioProcessingImpl::stream_key_pressed() const {
return key_pressed_;
}
void AudioProcessingImpl::set_delay_offset_ms(int offset) {
CriticalSectionScoped crit_scoped(crit_);
delay_offset_ms_ = offset;
@ -572,14 +646,6 @@ int AudioProcessingImpl::delay_offset_ms() const {
return delay_offset_ms_;
}
void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
key_pressed_ = key_pressed;
}
bool AudioProcessingImpl::stream_key_pressed() const {
return key_pressed_;
}
int AudioProcessingImpl::StartDebugRecording(
const char filename[AudioProcessing::kMaxFilenameSize]) {
CriticalSectionScoped crit_scoped(crit_);
@ -710,7 +776,7 @@ bool AudioProcessingImpl::is_data_processed() const {
return true;
}
bool AudioProcessingImpl::interleave_needed(bool is_data_processed) const {
bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
// Check if we've upmixed or downmixed the audio.
return (num_output_channels_ != num_input_channels_ || is_data_processed);
}
@ -755,7 +821,7 @@ int AudioProcessingImpl::WriteMessageToDebugFile() {
event_msg_->Clear();
return 0;
return kNoError;
}
int AudioProcessingImpl::WriteInitMessage() {