Add a deinterleaved float interface to AudioProcessing.
This is mainly to support the native audio format in Chrome. Although this implementation just moves the float->int conversion under the hood, we will transition AudioProcessing towards supporting this format throughout. - Add a test which verifies we get identical output with the float and int interfaces. - The float and int wrappers are tasked with conversion to the AudioBuffer format. A new shared Process/Analyze method does most of the work. - Add a new field to the debug.proto to hold deinterleaved data. - Add helpers to audio_utils.cc, and start using numeric_limits. - Note that there was no performance difference between numeric_limits and a literal value when measured on Linux using gcc or clang. BUG=2894 R=aluebs@webrtc.org, bjornv@webrtc.org, henrikg@webrtc.org, tommi@webrtc.org, turaj@webrtc.org, xians@webrtc.org Review URL: https://webrtc-codereview.appspot.com/9179004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@5641 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
@ -12,6 +12,7 @@
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "webrtc/common_audio/include/audio_util.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_processing/audio_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/echo_cancellation_impl.h"
|
||||
@ -37,8 +38,6 @@
|
||||
#endif
|
||||
#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
|
||||
static const int kChunkSizeMs = 10;
|
||||
|
||||
#define RETURN_ON_ERR(expr) \
|
||||
do { \
|
||||
int err = expr; \
|
||||
@ -48,6 +47,24 @@ static const int kChunkSizeMs = 10;
|
||||
} while (0)
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
const int kChunkSizeMs = 10;
|
||||
|
||||
int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) {
|
||||
switch (layout) {
|
||||
case AudioProcessing::kMono:
|
||||
case AudioProcessing::kMonoAndKeyboard:
|
||||
return 1;
|
||||
case AudioProcessing::kStereo:
|
||||
case AudioProcessing::kStereoAndKeyboard:
|
||||
return 2;
|
||||
}
|
||||
assert(false);
|
||||
return -1;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Throughout webrtc, it's assumed that success is represented by zero.
|
||||
COMPILE_ASSERT(AudioProcessing::kNoError == 0, no_error_must_be_zero);
|
||||
@ -299,6 +316,8 @@ bool AudioProcessingImpl::output_will_be_muted() const {
|
||||
return output_will_be_muted_;
|
||||
}
|
||||
|
||||
// Calls InitializeLocked() if any of the audio parameters have changed from
|
||||
// their current values.
|
||||
int AudioProcessingImpl::MaybeInitializeLocked(int sample_rate_hz,
|
||||
int num_input_channels, int num_output_channels, int num_reverse_channels) {
|
||||
if (sample_rate_hz == sample_rate_hz_ &&
|
||||
@ -342,15 +361,62 @@ int AudioProcessingImpl::MaybeInitializeLocked(int sample_rate_hz,
|
||||
return InitializeLocked();
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
|
||||
int AudioProcessingImpl::ProcessStream(float* const* data,
|
||||
int samples_per_channel,
|
||||
int sample_rate_hz,
|
||||
ChannelLayout input_layout,
|
||||
ChannelLayout output_layout) {
|
||||
CriticalSectionScoped crit_scoped(crit_);
|
||||
int err = kNoError;
|
||||
|
||||
if (frame == NULL) {
|
||||
if (!data) {
|
||||
return kNullPointerError;
|
||||
}
|
||||
|
||||
const int num_input_channels = ChannelsFromLayout(input_layout);
|
||||
// TODO(ajm): We now always set the output channels equal to the input
|
||||
// channels here. Remove the ability to downmix entirely.
|
||||
// channels here. Restore the ability to downmix.
|
||||
RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz,
|
||||
num_input_channels, num_input_channels, num_reverse_channels_));
|
||||
if (samples_per_channel != samples_per_channel_) {
|
||||
return kBadDataLengthError;
|
||||
}
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_file_->Open()) {
|
||||
event_msg_->set_type(audioproc::Event::STREAM);
|
||||
audioproc::Stream* msg = event_msg_->mutable_stream();
|
||||
const size_t channel_size = sizeof(float) * samples_per_channel;
|
||||
for (int i = 0; i < num_input_channels; ++i)
|
||||
msg->set_input_channel(i, data[i], channel_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
capture_audio_->CopyFrom(data, samples_per_channel, num_output_channels_);
|
||||
RETURN_ON_ERR(ProcessStreamLocked());
|
||||
if (output_copy_needed(is_data_processed())) {
|
||||
capture_audio_->CopyTo(samples_per_channel, num_output_channels_, data);
|
||||
}
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_file_->Open()) {
|
||||
audioproc::Stream* msg = event_msg_->mutable_stream();
|
||||
const size_t channel_size = sizeof(float) * samples_per_channel;
|
||||
for (int i = 0; i < num_output_channels_; ++i)
|
||||
msg->set_output_channel(i, data[i], channel_size);
|
||||
RETURN_ON_ERR(WriteMessageToDebugFile());
|
||||
}
|
||||
#endif
|
||||
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
|
||||
CriticalSectionScoped crit_scoped(crit_);
|
||||
if (!frame) {
|
||||
return kNullPointerError;
|
||||
}
|
||||
|
||||
// TODO(ajm): We now always set the output channels equal to the input
|
||||
// channels here. Restore the ability to downmix.
|
||||
RETURN_ON_ERR(MaybeInitializeLocked(frame->sample_rate_hz_,
|
||||
frame->num_channels_, frame->num_channels_, num_reverse_channels_));
|
||||
if (frame->samples_per_channel_ != samples_per_channel_) {
|
||||
@ -365,6 +431,36 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
|
||||
frame->samples_per_channel_ *
|
||||
frame->num_channels_;
|
||||
msg->set_input_data(frame->data_, data_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
capture_audio_->DeinterleaveFrom(frame);
|
||||
if (num_output_channels_ < num_input_channels_) {
|
||||
capture_audio_->Mix(num_output_channels_);
|
||||
frame->num_channels_ = num_output_channels_;
|
||||
}
|
||||
RETURN_ON_ERR(ProcessStreamLocked());
|
||||
capture_audio_->InterleaveTo(frame, output_copy_needed(is_data_processed()));
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_file_->Open()) {
|
||||
audioproc::Stream* msg = event_msg_->mutable_stream();
|
||||
const size_t data_size = sizeof(int16_t) *
|
||||
frame->samples_per_channel_ *
|
||||
frame->num_channels_;
|
||||
msg->set_output_data(frame->data_, data_size);
|
||||
RETURN_ON_ERR(WriteMessageToDebugFile());
|
||||
}
|
||||
#endif
|
||||
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
|
||||
int AudioProcessingImpl::ProcessStreamLocked() {
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_file_->Open()) {
|
||||
audioproc::Stream* msg = event_msg_->mutable_stream();
|
||||
msg->set_delay(stream_delay_ms_);
|
||||
msg->set_drift(echo_cancellation_->stream_drift_samples());
|
||||
msg->set_level(gain_control_->stream_analog_level());
|
||||
@ -372,14 +468,6 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
|
||||
}
|
||||
#endif
|
||||
|
||||
capture_audio_->DeinterleaveFrom(frame);
|
||||
|
||||
// TODO(ajm): experiment with mixing and AEC placement.
|
||||
if (num_output_channels_ < num_input_channels_) {
|
||||
capture_audio_->Mix(num_output_channels_);
|
||||
frame->num_channels_ = num_output_channels_;
|
||||
}
|
||||
|
||||
bool data_processed = is_data_processed();
|
||||
if (analysis_needed(data_processed)) {
|
||||
for (int i = 0; i < num_output_channels_; i++) {
|
||||
@ -393,45 +481,18 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
|
||||
}
|
||||
}
|
||||
|
||||
err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_));
|
||||
RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_));
|
||||
RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_));
|
||||
|
||||
if (echo_control_mobile_->is_enabled() &&
|
||||
noise_suppression_->is_enabled()) {
|
||||
capture_audio_->CopyLowPassToReference();
|
||||
}
|
||||
|
||||
err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = voice_detection_->ProcessCaptureAudio(capture_audio_);
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = gain_control_->ProcessCaptureAudio(capture_audio_);
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_));
|
||||
RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(capture_audio_));
|
||||
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_));
|
||||
RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_));
|
||||
|
||||
if (synthesis_needed(data_processed)) {
|
||||
for (int i = 0; i < num_output_channels_; i++) {
|
||||
@ -446,38 +507,48 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
|
||||
}
|
||||
|
||||
// The level estimator operates on the recombined data.
|
||||
err = level_estimator_->ProcessStream(capture_audio_);
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
|
||||
capture_audio_->InterleaveTo(frame, interleave_needed(data_processed));
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_file_->Open()) {
|
||||
audioproc::Stream* msg = event_msg_->mutable_stream();
|
||||
const size_t data_size = sizeof(int16_t) *
|
||||
frame->samples_per_channel_ *
|
||||
frame->num_channels_;
|
||||
msg->set_output_data(frame->data_, data_size);
|
||||
err = WriteMessageToDebugFile();
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_));
|
||||
|
||||
was_stream_delay_set_ = false;
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the
|
||||
// primary stream and convert ourselves rather than having the user manage it.
|
||||
// We can be smarter and use the splitting filter when appropriate. Similarly,
|
||||
// perform downmixing here.
|
||||
int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
|
||||
int samples_per_channel,
|
||||
int sample_rate_hz,
|
||||
ChannelLayout layout) {
|
||||
CriticalSectionScoped crit_scoped(crit_);
|
||||
if (data == NULL) {
|
||||
return kNullPointerError;
|
||||
}
|
||||
if (sample_rate_hz != sample_rate_hz_) {
|
||||
return kBadSampleRateError;
|
||||
}
|
||||
|
||||
const int num_channels = ChannelsFromLayout(layout);
|
||||
RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, num_input_channels_,
|
||||
num_output_channels_, num_channels));
|
||||
if (samples_per_channel != samples_per_channel_) {
|
||||
return kBadDataLengthError;
|
||||
}
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_file_->Open()) {
|
||||
event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
|
||||
audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
|
||||
const size_t channel_size = sizeof(float) * samples_per_channel;
|
||||
for (int i = 0; i < num_channels; ++i)
|
||||
msg->set_channel(i, data[i], channel_size);
|
||||
RETURN_ON_ERR(WriteMessageToDebugFile());
|
||||
}
|
||||
#endif
|
||||
|
||||
render_audio_->CopyFrom(data, samples_per_channel, num_channels);
|
||||
return AnalyzeReverseStreamLocked();
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
|
||||
CriticalSectionScoped crit_scoped(crit_);
|
||||
int err = kNoError;
|
||||
if (frame == NULL) {
|
||||
return kNullPointerError;
|
||||
}
|
||||
@ -486,6 +557,9 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
|
||||
}
|
||||
RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, num_input_channels_,
|
||||
num_output_channels_, frame->num_channels_));
|
||||
if (frame->samples_per_channel_ != samples_per_channel_) {
|
||||
return kBadDataLengthError;
|
||||
}
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_file_->Open()) {
|
||||
@ -495,15 +569,19 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
|
||||
frame->samples_per_channel_ *
|
||||
frame->num_channels_;
|
||||
msg->set_data(frame->data_, data_size);
|
||||
err = WriteMessageToDebugFile();
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
RETURN_ON_ERR(WriteMessageToDebugFile());
|
||||
}
|
||||
#endif
|
||||
|
||||
render_audio_->DeinterleaveFrom(frame);
|
||||
return AnalyzeReverseStreamLocked();
|
||||
}
|
||||
|
||||
// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the
|
||||
// primary stream and convert ourselves rather than having the user manage it.
|
||||
// We can be smarter and use the splitting filter when appropriate. Similarly,
|
||||
// perform downmixing here.
|
||||
int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
|
||||
if (sample_rate_hz_ == kSampleRate32kHz) {
|
||||
for (int i = 0; i < num_reverse_channels_; i++) {
|
||||
// Split into low and high band.
|
||||
@ -516,23 +594,11 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(ajm): warnings possible from components?
|
||||
err = echo_cancellation_->ProcessRenderAudio(render_audio_);
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_));
|
||||
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_));
|
||||
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_));
|
||||
|
||||
err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = gain_control_->ProcessRenderAudio(render_audio_);
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
|
||||
return err; // TODO(ajm): this is for returning warnings; necessary?
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::set_stream_delay_ms(int delay) {
|
||||
@ -563,6 +629,14 @@ bool AudioProcessingImpl::was_stream_delay_set() const {
|
||||
return was_stream_delay_set_;
|
||||
}
|
||||
|
||||
void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
|
||||
key_pressed_ = key_pressed;
|
||||
}
|
||||
|
||||
bool AudioProcessingImpl::stream_key_pressed() const {
|
||||
return key_pressed_;
|
||||
}
|
||||
|
||||
void AudioProcessingImpl::set_delay_offset_ms(int offset) {
|
||||
CriticalSectionScoped crit_scoped(crit_);
|
||||
delay_offset_ms_ = offset;
|
||||
@ -572,14 +646,6 @@ int AudioProcessingImpl::delay_offset_ms() const {
|
||||
return delay_offset_ms_;
|
||||
}
|
||||
|
||||
void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
|
||||
key_pressed_ = key_pressed;
|
||||
}
|
||||
|
||||
bool AudioProcessingImpl::stream_key_pressed() const {
|
||||
return key_pressed_;
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::StartDebugRecording(
|
||||
const char filename[AudioProcessing::kMaxFilenameSize]) {
|
||||
CriticalSectionScoped crit_scoped(crit_);
|
||||
@ -710,7 +776,7 @@ bool AudioProcessingImpl::is_data_processed() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AudioProcessingImpl::interleave_needed(bool is_data_processed) const {
|
||||
bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
|
||||
// Check if we've upmixed or downmixed the audio.
|
||||
return (num_output_channels_ != num_input_channels_ || is_data_processed);
|
||||
}
|
||||
@ -755,7 +821,7 @@ int AudioProcessingImpl::WriteMessageToDebugFile() {
|
||||
|
||||
event_msg_->Clear();
|
||||
|
||||
return 0;
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::WriteInitMessage() {
|
||||
|
||||
Reference in New Issue
Block a user