ACM: Adding support for more than 2 channels in the send pipeline

This CL adds support in the audio coding module for sending more than 2 channels to the encoder. Bug: webrtc:11007 Change-Id: I0909b5c37a54c9d2e1353b864e55008cda50ffae Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155583 Reviewed-by: Henrik Andreassson <henrika@webrtc.org> Reviewed-by: Alex Loiko <aleloi@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#29385}
2019-10-04 11:06:15 +02:00
parent dc34a25ca4
commit 4f2e9406c9
2 changed files with 179 additions and 44 deletions
--- a/modules/audio_coding/acm2/audio_coding_module.cc
+++ b/modules/audio_coding/acm2/audio_coding_module.cc
@ -33,6 +33,10 @@ namespace webrtc {
 namespace {
 // Initial size for the buffer in InputBuffer. This matches 6 channels of 10 ms
 // 48 kHz data.
 constexpr size_t kInitialInputDataBufferSize = 6 * 480;
 class AudioCodingModuleImpl final : public AudioCodingModule {
 public:
  explicit AudioCodingModuleImpl(const AudioCodingModule::Config& config);
@ -97,15 +101,18 @@ class AudioCodingModuleImpl final : public AudioCodingModule {
 private:
  struct InputData {
    InputData() : buffer(kInitialInputDataBufferSize) {}
    uint32_t input_timestamp;
    const int16_t* audio;
    size_t length_per_channel;
    size_t audio_channel;
    // If a re-mix is required (up or down), this buffer will store a re-mixed
    // version of the input.
-    int16_t buffer[WEBRTC_10MS_PCM_AUDIO];
+    std::vector<int16_t> buffer;
  };
  InputData input_data_ RTC_GUARDED_BY(acm_crit_sect_);
  // This member class writes values to the named UMA histogram, but only if
  // the value has changed since the last time (and always for the first call).
  class ChangeLogger {
@ -193,7 +200,7 @@ void UpdateCodecTypeHistogram(size_t codec_type) {
 }
 // Stereo-to-mono can be used as in-place.
-int DownMix(const AudioFrame& frame,
+void DownMix(const AudioFrame& frame,
             size_t length_out_buff,
             int16_t* out_buff) {
  RTC_DCHECK_EQ(frame.num_channels_, 2);
@ -210,26 +217,70 @@ int DownMix(const AudioFrame& frame,
  } else {
    std::fill(out_buff, out_buff + frame.samples_per_channel_, 0);
  }
  return 0;
 }
-// Mono-to-stereo can be used as in-place.
+// Remixes the input frame to an output data vector. The output vector is
-int UpMix(const AudioFrame& frame, size_t length_out_buff, int16_t* out_buff) {
+// resized if needed.
-  RTC_DCHECK_EQ(frame.num_channels_, 1);
+void ReMix(const AudioFrame& input,
-  RTC_DCHECK_GE(length_out_buff, 2 * frame.samples_per_channel_);
+           size_t num_output_channels,
           std::vector<int16_t>* output) {
  const size_t output_size = num_output_channels * input.samples_per_channel_;
-  if (!frame.muted()) {
+  if (output->size() != output_size) {
-    const int16_t* frame_data = frame.data();
+    output->resize(output_size);
    for (size_t n = frame.samples_per_channel_; n != 0; --n) {
      size_t i = n - 1;
      int16_t sample = frame_data[i];
      out_buff[2 * i + 1] = sample;
      out_buff[2 * i] = sample;
  }
-  } else {
+
-    std::fill(out_buff, out_buff + frame.samples_per_channel_ * 2, 0);
+  // For muted frames, fill the frame with zeros.
  if (input.muted()) {
    std::fill(output->begin(), output->end(), 0);
    return;
  }
  // Ensure that the special case of zero input channels is handled correctly
  // (zero samples per channel is already handled correctly in the code below).
  if (input.num_channels_ == 0) {
    return;
  }
  const int16_t* input_data = input.data();
  size_t in_index = 0;
  size_t out_index = 0;
  // When upmixing is needed, duplicate the last channel of the input.
  if (input.num_channels_ < num_output_channels) {
    for (size_t k = 0; k < input.samples_per_channel_; ++k) {
      for (size_t j = 0; j < input.num_channels_; ++j) {
        (*output)[out_index++] = input_data[in_index++];
      }
      RTC_DCHECK_GT(in_index, 0);
      const int16_t value_last_channel = input_data[in_index - 1];
      for (size_t j = input.num_channels_; j < num_output_channels; ++j) {
        (*output)[out_index++] = value_last_channel;
      }
    }
    return;
  }
  // When downmixing is needed, and the input is stereo, average the channels.
  if (input.num_channels_ == 2) {
    for (size_t n = 0; n < input.samples_per_channel_; ++n) {
      (*output)[n] =
          static_cast<int16_t>((static_cast<int32_t>(input_data[2 * n]) +
                                static_cast<int32_t>(input_data[2 * n + 1])) >>
                               1);
    }
    return;
  }
  // When downmixing is needed, and the input is multichannel, drop the surplus
  // channels.
  const size_t num_channels_to_drop = input.num_channels_ - num_output_channels;
  for (size_t k = 0; k < input.samples_per_channel_; ++k) {
    for (size_t j = 0; j < num_output_channels; ++j) {
      (*output)[out_index++] = input_data[in_index++];
    }
    in_index += num_channels_to_drop;
  }
  return 0;
 }
 void AudioCodingModuleImpl::ChangeLogger::MaybeLog(int value) {
@ -367,10 +418,9 @@ int AudioCodingModuleImpl::RegisterTransportCallback(
 // Add 10MS of raw (PCM) audio data to the encoder.
 int AudioCodingModuleImpl::Add10MsData(const AudioFrame& audio_frame) {
  InputData input_data;
  rtc::CritScope lock(&acm_crit_sect_);
-  int r = Add10MsDataInternal(audio_frame, &input_data);
+  int r = Add10MsDataInternal(audio_frame, &input_data_);
-  return r < 0 ? r : Encode(input_data);
+  return r < 0 ? r : Encode(input_data_);
 }
 int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,
@ -421,30 +471,26 @@ int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,
  const bool same_num_channels =
      ptr_frame->num_channels_ == current_num_channels;
  if (!same_num_channels) {
    if (ptr_frame->num_channels_ == 1) {
      if (UpMix(*ptr_frame, WEBRTC_10MS_PCM_AUDIO, input_data->buffer) < 0)
        return -1;
    } else {
      if (DownMix(*ptr_frame, WEBRTC_10MS_PCM_AUDIO, input_data->buffer) < 0)
        return -1;
    }
  }
  // When adding data to encoders this pointer is pointing to an audio buffer
  // with correct number of channels.
  const int16_t* ptr_audio = ptr_frame->data();
  // For pushing data to primary, point the |ptr_audio| to correct buffer.
  if (!same_num_channels)
    ptr_audio = input_data->buffer;
  // TODO(yujo): Skip encode of muted frames.
  input_data->input_timestamp = ptr_frame->timestamp_;
  input_data->audio = ptr_audio;
  input_data->length_per_channel = ptr_frame->samples_per_channel_;
  input_data->audio_channel = current_num_channels;
  if (!same_num_channels) {
    // Remixes the input frame to the output data and in the process resize the
    // output data if needed.
    ReMix(*ptr_frame, current_num_channels, &input_data->buffer);
    // For pushing data to primary, point the |ptr_audio| to correct buffer.
    input_data->audio = input_data->buffer.data();
    RTC_DCHECK_GE(input_data->buffer.size(),
                  input_data->length_per_channel * input_data->audio_channel);
  } else {
    // When adding data to encoders this pointer is pointing to an audio buffer
    // with correct number of channels.
    input_data->audio = ptr_frame->data();
  }
  return 0;
 }
@ -508,8 +554,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
    // local buffer, otherwise, it will be written to the output frame.
    int16_t* dest_ptr_audio =
        resample ? audio : preprocess_frame_.mutable_data();
-    if (DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio) < 0)
+    DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio);
      return -1;
    preprocess_frame_.num_channels_ = 1;
    // Set the input of the resampler is the down-mixed signal.
    src_ptr_audio = audio;
--- a/modules/audio_coding/acm2/audio_coding_module_unittest.cc
+++ b/modules/audio_coding/acm2/audio_coding_module_unittest.cc
@ -1634,6 +1634,96 @@ TEST_F(AcmSetBitRateNewApi, OpusFromFormat_48khz_20ms_50kbps) {
  RunInner(40000, 60000);
 }
 // Verify that it works when the data to send is mono and the encoder is set to
 // send surround audio.
 TEST_F(AudioCodingModuleTestOldApi, SendingMultiChannelForMonoInput) {
  constexpr int kSampleRateHz = 48000;
  constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000;
  audio_format_ = SdpAudioFormat({"multiopus",
                                  kSampleRateHz,
                                  6,
                                  {{"minptime", "10"},
                                   {"useinbandfec", "1"},
                                   {"channel_mapping", "0,4,1,2,3,5"},
                                   {"num_streams", "4"},
                                   {"coupled_streams", "2"}}});
  RegisterCodec();
  input_frame_.sample_rate_hz_ = kSampleRateHz;
  input_frame_.num_channels_ = 1;
  input_frame_.samples_per_channel_ = kSamplesPerChannel;
  for (size_t k = 0; k < 10; ++k) {
    ASSERT_GE(acm_->Add10MsData(input_frame_), 0);
    input_frame_.timestamp_ += kSamplesPerChannel;
  }
 }
 // Verify that it works when the data to send is stereo and the encoder is set
 // to send surround audio.
 TEST_F(AudioCodingModuleTestOldApi, SendingMultiChannelForStereoInput) {
  constexpr int kSampleRateHz = 48000;
  constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000;
  audio_format_ = SdpAudioFormat({"multiopus",
                                  kSampleRateHz,
                                  6,
                                  {{"minptime", "10"},
                                   {"useinbandfec", "1"},
                                   {"channel_mapping", "0,4,1,2,3,5"},
                                   {"num_streams", "4"},
                                   {"coupled_streams", "2"}}});
  RegisterCodec();
  input_frame_.sample_rate_hz_ = kSampleRateHz;
  input_frame_.num_channels_ = 2;
  input_frame_.samples_per_channel_ = kSamplesPerChannel;
  for (size_t k = 0; k < 10; ++k) {
    ASSERT_GE(acm_->Add10MsData(input_frame_), 0);
    input_frame_.timestamp_ += kSamplesPerChannel;
  }
 }
 // Verify that it works when the data to send is mono and the encoder is set to
 // send stereo audio.
 TEST_F(AudioCodingModuleTestOldApi, SendingStereoForMonoInput) {
  constexpr int kSampleRateHz = 48000;
  constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000;
  audio_format_ = SdpAudioFormat("opus", kSampleRateHz, 2);
  RegisterCodec();
  input_frame_.sample_rate_hz_ = kSampleRateHz;
  input_frame_.num_channels_ = 1;
  input_frame_.samples_per_channel_ = kSamplesPerChannel;
  for (size_t k = 0; k < 10; ++k) {
    ASSERT_GE(acm_->Add10MsData(input_frame_), 0);
    input_frame_.timestamp_ += kSamplesPerChannel;
  }
 }
 // Verify that it works when the data to send is stereo and the encoder is set
 // to send mono audio.
 TEST_F(AudioCodingModuleTestOldApi, SendingMonoForStereoInput) {
  constexpr int kSampleRateHz = 48000;
  constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000;
  audio_format_ = SdpAudioFormat("L16", kSampleRateHz, 1);
  RegisterCodec();
  input_frame_.sample_rate_hz_ = kSampleRateHz;
  input_frame_.num_channels_ = 1;
  input_frame_.samples_per_channel_ = kSamplesPerChannel;
  for (size_t k = 0; k < 10; ++k) {
    ASSERT_GE(acm_->Add10MsData(input_frame_), 0);
    input_frame_.timestamp_ += kSamplesPerChannel;
  }
 }
 // The result on the Android platforms is inconsistent for this test case.
 // On android_rel the result is different from android and android arm64 rel.
 #if defined(WEBRTC_ANDROID)