Audio processing: Feed each processing step its choice of int or float data

Each audio processing step is given a pointer to an AudioBuffer, where it can read and write int data. This patch adds corresponding AudioBuffer methods to read and write float data; the buffer will automatically convert the stored data between int and float as necessary. This patch also modifies the echo cancellation step to make use of the new methods (it was already using floats internally; now it doesn't have to convert from and to ints anymore). (The reference data to the ApmTest.Process test had to be modified slightly; this is because the echo canceller no longer unnecessarily converts float data to int and then immediately back to float for each iteration in the loop in EchoCancellationImpl::ProcessCaptureAudio.) BUG= R=aluebs@webrtc.org, andrew@webrtc.org Review URL: https://webrtc-codereview.appspot.com/18399005 git-svn-id: http://webrtc.googlecode.com/svn/trunk@6138 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-05-14 09:01:35 +00:00
parent 3d5cb33da4
commit 934a265a47
9 changed files with 180 additions and 95 deletions
--- a/webrtc/modules/audio_processing/audio_buffer.cc
+++ b/webrtc/modules/audio_processing/audio_buffer.cc
@ -68,6 +68,64 @@ void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,

 }  // namespace

+// One int16_t and one float ChannelBuffer that are kept in sync. The sync is
+// broken when someone requests write access to either ChannelBuffer, and
+// reestablished when someone requests the outdated ChannelBuffer. It is
+// therefore safe to use the return value of ibuf() and fbuf() until the next
+// call to the other method.
+class IFChannelBuffer {
+ public:
+  IFChannelBuffer(int samples_per_channel, int num_channels)
+      : ivalid_(true),
+        ibuf_(samples_per_channel, num_channels),
+        fvalid_(true),
+        fbuf_(samples_per_channel, num_channels) {}
+
+  ChannelBuffer<int16_t>* ibuf() {
+    RefreshI();
+    fvalid_ = false;
+    return &ibuf_;
+  }
+
+  ChannelBuffer<float>* fbuf() {
+    RefreshF();
+    ivalid_ = false;
+    return &fbuf_;
+  }
+
+ private:
+  void RefreshF() {
+    if (!fvalid_) {
+      assert(ivalid_);
+      const int16_t* const int_data = ibuf_.data();
+      float* const float_data = fbuf_.data();
+      const int length = fbuf_.length();
+      for (int i = 0; i < length; ++i)
+        float_data[i] = int_data[i];
+      fvalid_ = true;
+    }
+  }
+
+  void RefreshI() {
+    if (!ivalid_) {
+      assert(fvalid_);
+      const float* const float_data = fbuf_.data();
+      int16_t* const int_data = ibuf_.data();
+      const int length = ibuf_.length();
+      for (int i = 0; i < length; ++i)
+        int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(),
+                                     float_data[i],
+                                     std::numeric_limits<int16_t>::min());
+      ivalid_ = true;
+    }
+  }
+
+  bool ivalid_;
+  ChannelBuffer<int16_t> ibuf_;
+  bool fvalid_;
+  ChannelBuffer<float> fbuf_;
+};
+
 class SplitChannelBuffer {
 public:
  SplitChannelBuffer(int samples_per_split_channel, int num_channels)
@ -76,12 +134,14 @@ class SplitChannelBuffer {
  }
  ~SplitChannelBuffer() {}

-  int16_t* low_channel(int i) { return low_.channel(i); }
-  int16_t* high_channel(int i) { return high_.channel(i); }
+  int16_t* low_channel(int i) { return low_.ibuf()->channel(i); }
+  int16_t* high_channel(int i) { return high_.ibuf()->channel(i); }
+  float* low_channel_f(int i) { return low_.fbuf()->channel(i); }
+  float* high_channel_f(int i) { return high_.fbuf()->channel(i); }

 private:
-  ChannelBuffer<int16_t> low_;
-  ChannelBuffer<int16_t> high_;
+  IFChannelBuffer low_;
+  IFChannelBuffer high_;
 };

 AudioBuffer::AudioBuffer(int input_samples_per_channel,
@ -102,8 +162,8 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
    is_muted_(false),
    data_(NULL),
    keyboard_data_(NULL),
-    channels_(new ChannelBuffer<int16_t>(proc_samples_per_channel_,
-                                         num_proc_channels_)) {
+    channels_(new IFChannelBuffer(proc_samples_per_channel_,
+                                  num_proc_channels_)) {
  assert(input_samples_per_channel_ > 0);
  assert(proc_samples_per_channel_ > 0);
  assert(output_samples_per_channel_ > 0);
@ -185,7 +245,7 @@ void AudioBuffer::CopyFrom(const float* const* data,
  // Convert to int16.
  for (int i = 0; i < num_proc_channels_; ++i) {
    ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_,
-                         channels_->channel(i));
+                         channels_->ibuf()->channel(i));
  }
 }

@ -202,7 +262,9 @@ void AudioBuffer::CopyTo(int samples_per_channel,
    data_ptr = process_buffer_->channels();
  }
  for (int i = 0; i < num_proc_channels_; ++i) {
-    ScaleToFloat(channels_->channel(i), proc_samples_per_channel_, data_ptr[i]);
+    ScaleToFloat(channels_->ibuf()->channel(i),
+                 proc_samples_per_channel_,
+                 data_ptr[i]);
  }

  // Resample.
@ -233,7 +295,7 @@ const int16_t* AudioBuffer::data(int channel) const {
    return data_;
  }

-  return channels_->channel(channel);
+  return channels_->ibuf()->channel(channel);
 }

 int16_t* AudioBuffer::data(int channel) {
@ -241,6 +303,19 @@ int16_t* AudioBuffer::data(int channel) {
  return const_cast<int16_t*>(t->data(channel));
 }

+float* AudioBuffer::data_f(int channel) {
+  assert(channel >= 0 && channel < num_proc_channels_);
+  if (data_ != NULL) {
+    // Need to make a copy of the data instead of just pointing to it, since
+    // we're about to convert it to float.
+    assert(channel == 0 && num_proc_channels_ == 1);
+    memcpy(channels_->ibuf()->channel(0), data_,
+           sizeof(*data_) * proc_samples_per_channel_);
+    data_ = NULL;
+  }
+  return channels_->fbuf()->channel(channel);
+}
+
 const int16_t* AudioBuffer::low_pass_split_data(int channel) const {
  assert(channel >= 0 && channel < num_proc_channels_);
  if (split_channels_.get() == NULL) {
@ -255,6 +330,12 @@ int16_t* AudioBuffer::low_pass_split_data(int channel) {
  return const_cast<int16_t*>(t->low_pass_split_data(channel));
 }

+float* AudioBuffer::low_pass_split_data_f(int channel) {
+  assert(channel >= 0 && channel < num_proc_channels_);
+  return split_channels_.get() ? split_channels_->low_channel_f(channel)
+                               : data_f(channel);
+}
+
 const int16_t* AudioBuffer::high_pass_split_data(int channel) const {
  assert(channel >= 0 && channel < num_proc_channels_);
  if (split_channels_.get() == NULL) {
@ -269,6 +350,12 @@ int16_t* AudioBuffer::high_pass_split_data(int channel) {
  return const_cast<int16_t*>(t->high_pass_split_data(channel));
 }

+float* AudioBuffer::high_pass_split_data_f(int channel) {
+  assert(channel >= 0 && channel < num_proc_channels_);
+  return split_channels_.get() ? split_channels_->high_channel_f(channel)
+                               : NULL;
+}
+
 const int16_t* AudioBuffer::mixed_data(int channel) const {
  assert(channel >= 0 && channel < num_mixed_channels_);

@ -348,7 +435,7 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {

  int16_t* interleaved = frame->data_;
  for (int i = 0; i < num_proc_channels_; i++) {
-    int16_t* deinterleaved = channels_->channel(i);
+    int16_t* deinterleaved = channels_->ibuf()->channel(i);
    int interleaved_idx = i;
    for (int j = 0; j < proc_samples_per_channel_; j++) {
      deinterleaved[j] = interleaved[interleaved_idx];
@ -368,14 +455,15 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
    return;
  }

-  if (num_proc_channels_ == 1) {
+  if (data_) {
+    assert(num_proc_channels_ == 1);
    assert(data_ == frame->data_);
    return;
  }

  int16_t* interleaved = frame->data_;
  for (int i = 0; i < num_proc_channels_; i++) {
-    int16_t* deinterleaved = channels_->channel(i);
+    int16_t* deinterleaved = channels_->ibuf()->channel(i);
    int interleaved_idx = i;
    for (int j = 0; j < proc_samples_per_channel_; j++) {
      interleaved[interleaved_idx] = deinterleaved[j];
@ -394,8 +482,8 @@ void AudioBuffer::CopyAndMix(int num_mixed_channels) {
                                   num_mixed_channels));
  }

-  StereoToMono(channels_->channel(0),
-               channels_->channel(1),
+  StereoToMono(channels_->ibuf()->channel(0),
+               channels_->ibuf()->channel(1),
               mixed_channels_->channel(0),
               proc_samples_per_channel_);