Add a deinterleaved float interface to AudioProcessing.

This is mainly to support the native audio format in Chrome. Although this implementation just moves the float->int conversion under the hood, we will transition AudioProcessing towards supporting this format throughout. - Add a test which verifies we get identical output with the float and int interfaces. - The float and int wrappers are tasked with conversion to the AudioBuffer format. A new shared Process/Analyze method does most of the work. - Add a new field to the debug.proto to hold deinterleaved data. - Add helpers to audio_utils.cc, and start using numeric_limits. - Note that there was no performance difference between numeric_limits and a literal value when measured on Linux using gcc or clang. BUG=2894 R=aluebs@webrtc.org, bjornv@webrtc.org, henrikg@webrtc.org, tommi@webrtc.org, turaj@webrtc.org, xians@webrtc.org Review URL: https://webrtc-codereview.appspot.com/9179004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@5641 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-03-04 20:58:13 +00:00
parent b90991dade
commit 17e40641b3
12 changed files with 660 additions and 250 deletions
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@ -12,6 +12,7 @@

 #include <assert.h>

+#include "webrtc/common_audio/include/audio_util.h"
 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 #include "webrtc/modules/audio_processing/audio_buffer.h"
 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"
@ -37,8 +38,6 @@
 #endif
 #endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP

-static const int kChunkSizeMs = 10;
-
 #define RETURN_ON_ERR(expr)  \
  do {                       \
    int err = expr;          \
@ -48,6 +47,24 @@ static const int kChunkSizeMs = 10;
  } while (0)

 namespace webrtc {
+namespace {
+
+const int kChunkSizeMs = 10;
+
+int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) {
+  switch (layout) {
+    case AudioProcessing::kMono:
+    case AudioProcessing::kMonoAndKeyboard:
+      return 1;
+    case AudioProcessing::kStereo:
+    case AudioProcessing::kStereoAndKeyboard:
+      return 2;
+  }
+  assert(false);
+  return -1;
+}
+
+}  // namespace

 // Throughout webrtc, it's assumed that success is represented by zero.
 COMPILE_ASSERT(AudioProcessing::kNoError == 0, no_error_must_be_zero);
@ -299,6 +316,8 @@ bool AudioProcessingImpl::output_will_be_muted() const {
  return output_will_be_muted_;
 }

+// Calls InitializeLocked() if any of the audio parameters have changed from
+// their current values.
 int AudioProcessingImpl::MaybeInitializeLocked(int sample_rate_hz,
    int num_input_channels, int num_output_channels, int num_reverse_channels) {
  if (sample_rate_hz == sample_rate_hz_ &&
@ -342,15 +361,62 @@ int AudioProcessingImpl::MaybeInitializeLocked(int sample_rate_hz,
  return InitializeLocked();
 }

-int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
+int AudioProcessingImpl::ProcessStream(float* const* data,
+                                       int samples_per_channel,
+                                       int sample_rate_hz,
+                                       ChannelLayout input_layout,
+                                       ChannelLayout output_layout) {
  CriticalSectionScoped crit_scoped(crit_);
-  int err = kNoError;
-
-  if (frame == NULL) {
+  if (!data) {
    return kNullPointerError;
  }
+
+  const int num_input_channels = ChannelsFromLayout(input_layout);
  // TODO(ajm): We now always set the output channels equal to the input
-  // channels here. Remove the ability to downmix entirely.
+  // channels here. Restore the ability to downmix.
+  RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz,
+      num_input_channels, num_input_channels, num_reverse_channels_));
+  if (samples_per_channel != samples_per_channel_) {
+    return kBadDataLengthError;
+  }
+
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+  if (debug_file_->Open()) {
+    event_msg_->set_type(audioproc::Event::STREAM);
+    audioproc::Stream* msg = event_msg_->mutable_stream();
+    const size_t channel_size = sizeof(float) * samples_per_channel;
+    for (int i = 0; i < num_input_channels; ++i)
+      msg->set_input_channel(i, data[i], channel_size);
+  }
+#endif
+
+  capture_audio_->CopyFrom(data, samples_per_channel, num_output_channels_);
+  RETURN_ON_ERR(ProcessStreamLocked());
+  if (output_copy_needed(is_data_processed())) {
+    capture_audio_->CopyTo(samples_per_channel, num_output_channels_, data);
+  }
+
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+  if (debug_file_->Open()) {
+    audioproc::Stream* msg = event_msg_->mutable_stream();
+    const size_t channel_size = sizeof(float) * samples_per_channel;
+    for (int i = 0; i < num_output_channels_; ++i)
+      msg->set_output_channel(i, data[i], channel_size);
+    RETURN_ON_ERR(WriteMessageToDebugFile());
+  }
+#endif
+
+  return kNoError;
+}
+
+int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
+  CriticalSectionScoped crit_scoped(crit_);
+  if (!frame) {
+    return kNullPointerError;
+  }
+
+  // TODO(ajm): We now always set the output channels equal to the input
+  // channels here. Restore the ability to downmix.
  RETURN_ON_ERR(MaybeInitializeLocked(frame->sample_rate_hz_,
      frame->num_channels_, frame->num_channels_, num_reverse_channels_));
  if (frame->samples_per_channel_ != samples_per_channel_) {
@ -365,6 +431,36 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
                             frame->samples_per_channel_ *
                             frame->num_channels_;
    msg->set_input_data(frame->data_, data_size);
+  }
+#endif
+
+  capture_audio_->DeinterleaveFrom(frame);
+  if (num_output_channels_ < num_input_channels_) {
+    capture_audio_->Mix(num_output_channels_);
+    frame->num_channels_ = num_output_channels_;
+  }
+  RETURN_ON_ERR(ProcessStreamLocked());
+  capture_audio_->InterleaveTo(frame, output_copy_needed(is_data_processed()));
+
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+  if (debug_file_->Open()) {
+    audioproc::Stream* msg = event_msg_->mutable_stream();
+    const size_t data_size = sizeof(int16_t) *
+                             frame->samples_per_channel_ *
+                             frame->num_channels_;
+    msg->set_output_data(frame->data_, data_size);
+    RETURN_ON_ERR(WriteMessageToDebugFile());
+  }
+#endif
+
+  return kNoError;
+}
+
+
+int AudioProcessingImpl::ProcessStreamLocked() {
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+  if (debug_file_->Open()) {
+    audioproc::Stream* msg = event_msg_->mutable_stream();
    msg->set_delay(stream_delay_ms_);
    msg->set_drift(echo_cancellation_->stream_drift_samples());
    msg->set_level(gain_control_->stream_analog_level());
@ -372,14 +468,6 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
  }
 #endif

-  capture_audio_->DeinterleaveFrom(frame);
-
-  // TODO(ajm): experiment with mixing and AEC placement.
-  if (num_output_channels_ < num_input_channels_) {
-    capture_audio_->Mix(num_output_channels_);
-    frame->num_channels_ = num_output_channels_;
-  }
-
  bool data_processed = is_data_processed();
  if (analysis_needed(data_processed)) {
    for (int i = 0; i < num_output_channels_; i++) {
@ -393,45 +481,18 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
    }
  }

-  err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
+  RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_));
+  RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_));
+  RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_));

  if (echo_control_mobile_->is_enabled() &&
      noise_suppression_->is_enabled()) {
    capture_audio_->CopyLowPassToReference();
  }
-
-  err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  err = voice_detection_->ProcessCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  err = gain_control_->ProcessCaptureAudio(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
+  RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_));
+  RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(capture_audio_));
+  RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_));
+  RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_));

  if (synthesis_needed(data_processed)) {
    for (int i = 0; i < num_output_channels_; i++) {
@ -446,38 +507,48 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
  }

  // The level estimator operates on the recombined data.
-  err = level_estimator_->ProcessStream(capture_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  capture_audio_->InterleaveTo(frame, interleave_needed(data_processed));
-
-#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
-  if (debug_file_->Open()) {
-    audioproc::Stream* msg = event_msg_->mutable_stream();
-    const size_t data_size = sizeof(int16_t) *
-                             frame->samples_per_channel_ *
-                             frame->num_channels_;
-    msg->set_output_data(frame->data_, data_size);
-    err = WriteMessageToDebugFile();
-    if (err != kNoError) {
-      return err;
-    }
-  }
-#endif
+  RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_));

  was_stream_delay_set_ = false;
  return kNoError;
 }

-// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the
-// primary stream and convert ourselves rather than having the user manage it.
-// We can be smarter and use the splitting filter when appropriate. Similarly,
-// perform downmixing here.
+int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
+                                              int samples_per_channel,
+                                              int sample_rate_hz,
+                                              ChannelLayout layout) {
+  CriticalSectionScoped crit_scoped(crit_);
+  if (data == NULL) {
+    return kNullPointerError;
+  }
+  if (sample_rate_hz != sample_rate_hz_) {
+    return kBadSampleRateError;
+  }
+
+  const int num_channels = ChannelsFromLayout(layout);
+  RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, num_input_channels_,
+      num_output_channels_, num_channels));
+  if (samples_per_channel != samples_per_channel_) {
+    return kBadDataLengthError;
+  }
+
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+  if (debug_file_->Open()) {
+    event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
+    audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
+    const size_t channel_size = sizeof(float) * samples_per_channel;
+    for (int i = 0; i < num_channels; ++i)
+      msg->set_channel(i, data[i], channel_size);
+    RETURN_ON_ERR(WriteMessageToDebugFile());
+  }
+#endif
+
+  render_audio_->CopyFrom(data, samples_per_channel, num_channels);
+  return AnalyzeReverseStreamLocked();
+}
+
 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
  CriticalSectionScoped crit_scoped(crit_);
-  int err = kNoError;
  if (frame == NULL) {
    return kNullPointerError;
  }
@ -486,6 +557,9 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
  }
  RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, num_input_channels_,
      num_output_channels_, frame->num_channels_));
+  if (frame->samples_per_channel_ != samples_per_channel_) {
+    return kBadDataLengthError;
+  }

 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
  if (debug_file_->Open()) {
@ -495,15 +569,19 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
                             frame->samples_per_channel_ *
                             frame->num_channels_;
    msg->set_data(frame->data_, data_size);
-    err = WriteMessageToDebugFile();
-    if (err != kNoError) {
-      return err;
-    }
+    RETURN_ON_ERR(WriteMessageToDebugFile());
  }
 #endif

  render_audio_->DeinterleaveFrom(frame);
+  return AnalyzeReverseStreamLocked();
+}

+// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the
+// primary stream and convert ourselves rather than having the user manage it.
+// We can be smarter and use the splitting filter when appropriate. Similarly,
+// perform downmixing here.
+int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
  if (sample_rate_hz_ == kSampleRate32kHz) {
    for (int i = 0; i < num_reverse_channels_; i++) {
      // Split into low and high band.
@ -516,23 +594,11 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
    }
  }

-  // TODO(ajm): warnings possible from components?
-  err = echo_cancellation_->ProcessRenderAudio(render_audio_);
-  if (err != kNoError) {
-    return err;
-  }
+  RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_));
+  RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_));
+  RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_));

-  err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  err = gain_control_->ProcessRenderAudio(render_audio_);
-  if (err != kNoError) {
-    return err;
-  }
-
-  return err;  // TODO(ajm): this is for returning warnings; necessary?
+  return kNoError;
 }

 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
@ -563,6 +629,14 @@ bool AudioProcessingImpl::was_stream_delay_set() const {
  return was_stream_delay_set_;
 }

+void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
+  key_pressed_ = key_pressed;
+}
+
+bool AudioProcessingImpl::stream_key_pressed() const {
+  return key_pressed_;
+}
+
 void AudioProcessingImpl::set_delay_offset_ms(int offset) {
  CriticalSectionScoped crit_scoped(crit_);
  delay_offset_ms_ = offset;
@ -572,14 +646,6 @@ int AudioProcessingImpl::delay_offset_ms() const {
  return delay_offset_ms_;
 }

-void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
-  key_pressed_ = key_pressed;
-}
-
-bool AudioProcessingImpl::stream_key_pressed() const {
-  return key_pressed_;
-}
-
 int AudioProcessingImpl::StartDebugRecording(
    const char filename[AudioProcessing::kMaxFilenameSize]) {
  CriticalSectionScoped crit_scoped(crit_);
@ -710,7 +776,7 @@ bool AudioProcessingImpl::is_data_processed() const {
  return true;
 }

-bool AudioProcessingImpl::interleave_needed(bool is_data_processed) const {
+bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
  // Check if we've upmixed or downmixed the audio.
  return (num_output_channels_ != num_input_channels_ || is_data_processed);
 }
@ -755,7 +821,7 @@ int AudioProcessingImpl::WriteMessageToDebugFile() {

  event_msg_->Clear();

-  return 0;
+  return kNoError;
 }

 int AudioProcessingImpl::WriteInitMessage() {