Allow more than 2 input channels in AudioProcessing.

The number of output channels is constrained to be equal to either 1 or the number of input channels. R=aluebs@webrtc.org, andrew@webrtc.org, pbos@webrtc.org Review URL: https://codereview.webrtc.org/1226093007 . Cr-Commit-Position: refs/heads/master@{#9619}
2015-07-22 21:06:11 -07:00
parent 0b6a204b21
commit c204754b7a
13 changed files with 711 additions and 374 deletions
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@ -29,6 +29,9 @@ class AudioFrame;
 template<typename T>
 class Beamformer;

+class StreamConfig;
+class ProcessingConfig;
+
 class EchoCancellation;
 class EchoControlMobile;
 class GainControl;
@ -84,7 +87,7 @@ static const int kAgcStartupMinVolume = 0;
 #endif  // defined(WEBRTC_CHROMIUM_BUILD)
 struct ExperimentalAgc {
  ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {}
-  ExperimentalAgc(bool enabled)
+  explicit ExperimentalAgc(bool enabled)
      : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {}
  ExperimentalAgc(bool enabled, int startup_min_volume)
      : enabled(enabled), startup_min_volume(startup_min_volume) {}
@ -199,6 +202,7 @@ static const int kAudioProcMaxNativeSampleRateHz = 32000;
 //
 class AudioProcessing {
 public:
+  // TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone.
  enum ChannelLayout {
    kMono,
    // Left, right.
@ -236,10 +240,17 @@ class AudioProcessing {
  // The int16 interfaces require:
  //   - only |NativeRate|s be used
  //   - that the input, output and reverse rates must match
-  //   - that |output_layout| matches |input_layout|
+  //   - that |processing_config.output_stream()| matches
+  //     |processing_config.input_stream()|.
  //
-  // The float interfaces accept arbitrary rates and support differing input
-  // and output layouts, but the output may only remove channels, not add.
+  // The float interfaces accept arbitrary rates and support differing input and
+  // output layouts, but the output must have either one channel or the same
+  // number of channels as the input.
+  virtual int Initialize(const ProcessingConfig& processing_config) = 0;
+
+  // Initialize with unpacked parameters. See Initialize() above for details.
+  //
+  // TODO(mgraczyk): Remove once clients are updated to use the new interface.
  virtual int Initialize(int input_sample_rate_hz,
                         int output_sample_rate_hz,
                         int reverse_sample_rate_hz,
@ -292,8 +303,10 @@ class AudioProcessing {
  // |input_layout|. At output, the channels will be arranged according to
  // |output_layout| at |output_sample_rate_hz| in |dest|.
  //
-  // The output layout may only remove channels, not add. |src| and |dest|
-  // may use the same memory, if desired.
+  // The output layout must have one channel or as many channels as the input.
+  // |src| and |dest| may use the same memory, if desired.
+  //
+  // TODO(mgraczyk): Remove once clients are updated to use the new interface.
  virtual int ProcessStream(const float* const* src,
                            int samples_per_channel,
                            int input_sample_rate_hz,
@ -302,6 +315,18 @@ class AudioProcessing {
                            ChannelLayout output_layout,
                            float* const* dest) = 0;

+  // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
+  // |src| points to a channel buffer, arranged according to |input_stream|. At
+  // output, the channels will be arranged according to |output_stream| in
+  // |dest|.
+  //
+  // The output must have one channel or as many channels as the input. |src|
+  // and |dest| may use the same memory, if desired.
+  virtual int ProcessStream(const float* const* src,
+                            const StreamConfig& input_config,
+                            const StreamConfig& output_config,
+                            float* const* dest) = 0;
+
  // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame
  // will not be modified. On the client-side, this is the far-end (or to be
  // rendered) audio.
@ -321,11 +346,18 @@ class AudioProcessing {

  // Accepts deinterleaved float audio with the range [-1, 1]. Each element
  // of |data| points to a channel buffer, arranged according to |layout|.
+  //
+  // TODO(mgraczyk): Remove once clients are updated to use the new interface.
  virtual int AnalyzeReverseStream(const float* const* data,
                                   int samples_per_channel,
                                   int sample_rate_hz,
                                   ChannelLayout layout) = 0;

+  // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
+  // |data| points to a channel buffer, arranged according to |reverse_config|.
+  virtual int AnalyzeReverseStream(const float* const* data,
+                                   const StreamConfig& reverse_config) = 0;
+
  // This must be called if and only if echo processing is enabled.
  //
  // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end
@ -432,6 +464,102 @@ class AudioProcessing {
  static const int kChunkSizeMs = 10;
 };

+class StreamConfig {
+ public:
+  // sample_rate_hz: The sampling rate of the stream.
+  //
+  // num_channels: The number of audio channels in the stream, excluding the
+  //               keyboard channel if it is present. When passing a
+  //               StreamConfig with an array of arrays T*[N],
+  //
+  //                N == {num_channels + 1  if  has_keyboard
+  //                     {num_channels      if  !has_keyboard
+  //
+  // has_keyboard: True if the stream has a keyboard channel. When has_keyboard
+  //               is true, the last channel in any corresponding list of
+  //               channels is the keyboard channel.
+  StreamConfig(int sample_rate_hz = 0,
+               int num_channels = 0,
+               bool has_keyboard = false)
+      : sample_rate_hz_(sample_rate_hz),
+        num_channels_(num_channels),
+        has_keyboard_(has_keyboard),
+        num_frames_(calculate_frames(sample_rate_hz)) {}
+
+  void set_sample_rate_hz(int value) {
+    sample_rate_hz_ = value;
+    num_frames_ = calculate_frames(value);
+  }
+  void set_num_channels(int value) { num_channels_ = value; }
+  void set_has_keyboard(bool value) { has_keyboard_ = value; }
+
+  int sample_rate_hz() const { return sample_rate_hz_; }
+
+  // The number of channels in the stream, not including the keyboard channel if
+  // present.
+  int num_channels() const { return num_channels_; }
+
+  bool has_keyboard() const { return has_keyboard_; }
+  int num_frames() const { return num_frames_; }
+
+  bool operator==(const StreamConfig& other) const {
+    return sample_rate_hz_ == other.sample_rate_hz_ &&
+           num_channels_ == other.num_channels_ &&
+           has_keyboard_ == other.has_keyboard_;
+  }
+
+  bool operator!=(const StreamConfig& other) const { return !(*this == other); }
+
+ private:
+  static int calculate_frames(int sample_rate_hz) {
+    return AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000;
+  }
+
+  int sample_rate_hz_;
+  int num_channels_;
+  bool has_keyboard_;
+  int num_frames_;
+};
+
+class ProcessingConfig {
+ public:
+  enum StreamName {
+    kInputStream,
+    kOutputStream,
+    kReverseStream,
+    kNumStreamNames,
+  };
+
+  const StreamConfig& input_stream() const {
+    return streams[StreamName::kInputStream];
+  }
+  const StreamConfig& output_stream() const {
+    return streams[StreamName::kOutputStream];
+  }
+  const StreamConfig& reverse_stream() const {
+    return streams[StreamName::kReverseStream];
+  }
+
+  StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
+  StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
+  StreamConfig& reverse_stream() { return streams[StreamName::kReverseStream]; }
+
+  bool operator==(const ProcessingConfig& other) const {
+    for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
+      if (this->streams[i] != other.streams[i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool operator!=(const ProcessingConfig& other) const {
+    return !(*this == other);
+  }
+
+  StreamConfig streams[StreamName::kNumStreamNames];
+};
+
 // The acoustic echo cancellation (AEC) component provides better performance
 // than AECM but also requires more processing power and is dependent on delay
 // stability and reporting accuracy. As such it is well-suited and recommended