ACM: Adding support for more than 2 channels in the send pipeline
This CL adds support in the audio coding module for sending more than 2 channels to the encoder. Bug: webrtc:11007 Change-Id: I0909b5c37a54c9d2e1353b864e55008cda50ffae Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155583 Reviewed-by: Henrik Andreassson <henrika@webrtc.org> Reviewed-by: Alex Loiko <aleloi@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#29385}
This commit is contained in:
@ -33,6 +33,10 @@ namespace webrtc {
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
// Initial size for the buffer in InputBuffer. This matches 6 channels of 10 ms
|
||||||
|
// 48 kHz data.
|
||||||
|
constexpr size_t kInitialInputDataBufferSize = 6 * 480;
|
||||||
|
|
||||||
class AudioCodingModuleImpl final : public AudioCodingModule {
|
class AudioCodingModuleImpl final : public AudioCodingModule {
|
||||||
public:
|
public:
|
||||||
explicit AudioCodingModuleImpl(const AudioCodingModule::Config& config);
|
explicit AudioCodingModuleImpl(const AudioCodingModule::Config& config);
|
||||||
@ -97,15 +101,18 @@ class AudioCodingModuleImpl final : public AudioCodingModule {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
struct InputData {
|
struct InputData {
|
||||||
|
InputData() : buffer(kInitialInputDataBufferSize) {}
|
||||||
uint32_t input_timestamp;
|
uint32_t input_timestamp;
|
||||||
const int16_t* audio;
|
const int16_t* audio;
|
||||||
size_t length_per_channel;
|
size_t length_per_channel;
|
||||||
size_t audio_channel;
|
size_t audio_channel;
|
||||||
// If a re-mix is required (up or down), this buffer will store a re-mixed
|
// If a re-mix is required (up or down), this buffer will store a re-mixed
|
||||||
// version of the input.
|
// version of the input.
|
||||||
int16_t buffer[WEBRTC_10MS_PCM_AUDIO];
|
std::vector<int16_t> buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
InputData input_data_ RTC_GUARDED_BY(acm_crit_sect_);
|
||||||
|
|
||||||
// This member class writes values to the named UMA histogram, but only if
|
// This member class writes values to the named UMA histogram, but only if
|
||||||
// the value has changed since the last time (and always for the first call).
|
// the value has changed since the last time (and always for the first call).
|
||||||
class ChangeLogger {
|
class ChangeLogger {
|
||||||
@ -193,7 +200,7 @@ void UpdateCodecTypeHistogram(size_t codec_type) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Stereo-to-mono can be used as in-place.
|
// Stereo-to-mono can be used as in-place.
|
||||||
int DownMix(const AudioFrame& frame,
|
void DownMix(const AudioFrame& frame,
|
||||||
size_t length_out_buff,
|
size_t length_out_buff,
|
||||||
int16_t* out_buff) {
|
int16_t* out_buff) {
|
||||||
RTC_DCHECK_EQ(frame.num_channels_, 2);
|
RTC_DCHECK_EQ(frame.num_channels_, 2);
|
||||||
@ -210,26 +217,70 @@ int DownMix(const AudioFrame& frame,
|
|||||||
} else {
|
} else {
|
||||||
std::fill(out_buff, out_buff + frame.samples_per_channel_, 0);
|
std::fill(out_buff, out_buff + frame.samples_per_channel_, 0);
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mono-to-stereo can be used as in-place.
|
// Remixes the input frame to an output data vector. The output vector is
|
||||||
int UpMix(const AudioFrame& frame, size_t length_out_buff, int16_t* out_buff) {
|
// resized if needed.
|
||||||
RTC_DCHECK_EQ(frame.num_channels_, 1);
|
void ReMix(const AudioFrame& input,
|
||||||
RTC_DCHECK_GE(length_out_buff, 2 * frame.samples_per_channel_);
|
size_t num_output_channels,
|
||||||
|
std::vector<int16_t>* output) {
|
||||||
|
const size_t output_size = num_output_channels * input.samples_per_channel_;
|
||||||
|
|
||||||
if (!frame.muted()) {
|
if (output->size() != output_size) {
|
||||||
const int16_t* frame_data = frame.data();
|
output->resize(output_size);
|
||||||
for (size_t n = frame.samples_per_channel_; n != 0; --n) {
|
|
||||||
size_t i = n - 1;
|
|
||||||
int16_t sample = frame_data[i];
|
|
||||||
out_buff[2 * i + 1] = sample;
|
|
||||||
out_buff[2 * i] = sample;
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
std::fill(out_buff, out_buff + frame.samples_per_channel_ * 2, 0);
|
// For muted frames, fill the frame with zeros.
|
||||||
|
if (input.muted()) {
|
||||||
|
std::fill(output->begin(), output->end(), 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that the special case of zero input channels is handled correctly
|
||||||
|
// (zero samples per channel is already handled correctly in the code below).
|
||||||
|
if (input.num_channels_ == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const int16_t* input_data = input.data();
|
||||||
|
size_t in_index = 0;
|
||||||
|
size_t out_index = 0;
|
||||||
|
|
||||||
|
// When upmixing is needed, duplicate the last channel of the input.
|
||||||
|
if (input.num_channels_ < num_output_channels) {
|
||||||
|
for (size_t k = 0; k < input.samples_per_channel_; ++k) {
|
||||||
|
for (size_t j = 0; j < input.num_channels_; ++j) {
|
||||||
|
(*output)[out_index++] = input_data[in_index++];
|
||||||
|
}
|
||||||
|
RTC_DCHECK_GT(in_index, 0);
|
||||||
|
const int16_t value_last_channel = input_data[in_index - 1];
|
||||||
|
for (size_t j = input.num_channels_; j < num_output_channels; ++j) {
|
||||||
|
(*output)[out_index++] = value_last_channel;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// When downmixing is needed, and the input is stereo, average the channels.
|
||||||
|
if (input.num_channels_ == 2) {
|
||||||
|
for (size_t n = 0; n < input.samples_per_channel_; ++n) {
|
||||||
|
(*output)[n] =
|
||||||
|
static_cast<int16_t>((static_cast<int32_t>(input_data[2 * n]) +
|
||||||
|
static_cast<int32_t>(input_data[2 * n + 1])) >>
|
||||||
|
1);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// When downmixing is needed, and the input is multichannel, drop the surplus
|
||||||
|
// channels.
|
||||||
|
const size_t num_channels_to_drop = input.num_channels_ - num_output_channels;
|
||||||
|
for (size_t k = 0; k < input.samples_per_channel_; ++k) {
|
||||||
|
for (size_t j = 0; j < num_output_channels; ++j) {
|
||||||
|
(*output)[out_index++] = input_data[in_index++];
|
||||||
|
}
|
||||||
|
in_index += num_channels_to_drop;
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void AudioCodingModuleImpl::ChangeLogger::MaybeLog(int value) {
|
void AudioCodingModuleImpl::ChangeLogger::MaybeLog(int value) {
|
||||||
@ -367,10 +418,9 @@ int AudioCodingModuleImpl::RegisterTransportCallback(
|
|||||||
|
|
||||||
// Add 10MS of raw (PCM) audio data to the encoder.
|
// Add 10MS of raw (PCM) audio data to the encoder.
|
||||||
int AudioCodingModuleImpl::Add10MsData(const AudioFrame& audio_frame) {
|
int AudioCodingModuleImpl::Add10MsData(const AudioFrame& audio_frame) {
|
||||||
InputData input_data;
|
|
||||||
rtc::CritScope lock(&acm_crit_sect_);
|
rtc::CritScope lock(&acm_crit_sect_);
|
||||||
int r = Add10MsDataInternal(audio_frame, &input_data);
|
int r = Add10MsDataInternal(audio_frame, &input_data_);
|
||||||
return r < 0 ? r : Encode(input_data);
|
return r < 0 ? r : Encode(input_data_);
|
||||||
}
|
}
|
||||||
|
|
||||||
int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,
|
int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,
|
||||||
@ -421,30 +471,26 @@ int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,
|
|||||||
const bool same_num_channels =
|
const bool same_num_channels =
|
||||||
ptr_frame->num_channels_ == current_num_channels;
|
ptr_frame->num_channels_ == current_num_channels;
|
||||||
|
|
||||||
if (!same_num_channels) {
|
|
||||||
if (ptr_frame->num_channels_ == 1) {
|
|
||||||
if (UpMix(*ptr_frame, WEBRTC_10MS_PCM_AUDIO, input_data->buffer) < 0)
|
|
||||||
return -1;
|
|
||||||
} else {
|
|
||||||
if (DownMix(*ptr_frame, WEBRTC_10MS_PCM_AUDIO, input_data->buffer) < 0)
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// When adding data to encoders this pointer is pointing to an audio buffer
|
|
||||||
// with correct number of channels.
|
|
||||||
const int16_t* ptr_audio = ptr_frame->data();
|
|
||||||
|
|
||||||
// For pushing data to primary, point the |ptr_audio| to correct buffer.
|
|
||||||
if (!same_num_channels)
|
|
||||||
ptr_audio = input_data->buffer;
|
|
||||||
|
|
||||||
// TODO(yujo): Skip encode of muted frames.
|
// TODO(yujo): Skip encode of muted frames.
|
||||||
input_data->input_timestamp = ptr_frame->timestamp_;
|
input_data->input_timestamp = ptr_frame->timestamp_;
|
||||||
input_data->audio = ptr_audio;
|
|
||||||
input_data->length_per_channel = ptr_frame->samples_per_channel_;
|
input_data->length_per_channel = ptr_frame->samples_per_channel_;
|
||||||
input_data->audio_channel = current_num_channels;
|
input_data->audio_channel = current_num_channels;
|
||||||
|
|
||||||
|
if (!same_num_channels) {
|
||||||
|
// Remixes the input frame to the output data and in the process resize the
|
||||||
|
// output data if needed.
|
||||||
|
ReMix(*ptr_frame, current_num_channels, &input_data->buffer);
|
||||||
|
|
||||||
|
// For pushing data to primary, point the |ptr_audio| to correct buffer.
|
||||||
|
input_data->audio = input_data->buffer.data();
|
||||||
|
RTC_DCHECK_GE(input_data->buffer.size(),
|
||||||
|
input_data->length_per_channel * input_data->audio_channel);
|
||||||
|
} else {
|
||||||
|
// When adding data to encoders this pointer is pointing to an audio buffer
|
||||||
|
// with correct number of channels.
|
||||||
|
input_data->audio = ptr_frame->data();
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -508,8 +554,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
|
|||||||
// local buffer, otherwise, it will be written to the output frame.
|
// local buffer, otherwise, it will be written to the output frame.
|
||||||
int16_t* dest_ptr_audio =
|
int16_t* dest_ptr_audio =
|
||||||
resample ? audio : preprocess_frame_.mutable_data();
|
resample ? audio : preprocess_frame_.mutable_data();
|
||||||
if (DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio) < 0)
|
DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio);
|
||||||
return -1;
|
|
||||||
preprocess_frame_.num_channels_ = 1;
|
preprocess_frame_.num_channels_ = 1;
|
||||||
// Set the input of the resampler is the down-mixed signal.
|
// Set the input of the resampler is the down-mixed signal.
|
||||||
src_ptr_audio = audio;
|
src_ptr_audio = audio;
|
||||||
|
@ -1634,6 +1634,96 @@ TEST_F(AcmSetBitRateNewApi, OpusFromFormat_48khz_20ms_50kbps) {
|
|||||||
RunInner(40000, 60000);
|
RunInner(40000, 60000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify that it works when the data to send is mono and the encoder is set to
|
||||||
|
// send surround audio.
|
||||||
|
TEST_F(AudioCodingModuleTestOldApi, SendingMultiChannelForMonoInput) {
|
||||||
|
constexpr int kSampleRateHz = 48000;
|
||||||
|
constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000;
|
||||||
|
|
||||||
|
audio_format_ = SdpAudioFormat({"multiopus",
|
||||||
|
kSampleRateHz,
|
||||||
|
6,
|
||||||
|
{{"minptime", "10"},
|
||||||
|
{"useinbandfec", "1"},
|
||||||
|
{"channel_mapping", "0,4,1,2,3,5"},
|
||||||
|
{"num_streams", "4"},
|
||||||
|
{"coupled_streams", "2"}}});
|
||||||
|
|
||||||
|
RegisterCodec();
|
||||||
|
|
||||||
|
input_frame_.sample_rate_hz_ = kSampleRateHz;
|
||||||
|
input_frame_.num_channels_ = 1;
|
||||||
|
input_frame_.samples_per_channel_ = kSamplesPerChannel;
|
||||||
|
for (size_t k = 0; k < 10; ++k) {
|
||||||
|
ASSERT_GE(acm_->Add10MsData(input_frame_), 0);
|
||||||
|
input_frame_.timestamp_ += kSamplesPerChannel;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify that it works when the data to send is stereo and the encoder is set
|
||||||
|
// to send surround audio.
|
||||||
|
TEST_F(AudioCodingModuleTestOldApi, SendingMultiChannelForStereoInput) {
|
||||||
|
constexpr int kSampleRateHz = 48000;
|
||||||
|
constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000;
|
||||||
|
|
||||||
|
audio_format_ = SdpAudioFormat({"multiopus",
|
||||||
|
kSampleRateHz,
|
||||||
|
6,
|
||||||
|
{{"minptime", "10"},
|
||||||
|
{"useinbandfec", "1"},
|
||||||
|
{"channel_mapping", "0,4,1,2,3,5"},
|
||||||
|
{"num_streams", "4"},
|
||||||
|
{"coupled_streams", "2"}}});
|
||||||
|
|
||||||
|
RegisterCodec();
|
||||||
|
|
||||||
|
input_frame_.sample_rate_hz_ = kSampleRateHz;
|
||||||
|
input_frame_.num_channels_ = 2;
|
||||||
|
input_frame_.samples_per_channel_ = kSamplesPerChannel;
|
||||||
|
for (size_t k = 0; k < 10; ++k) {
|
||||||
|
ASSERT_GE(acm_->Add10MsData(input_frame_), 0);
|
||||||
|
input_frame_.timestamp_ += kSamplesPerChannel;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify that it works when the data to send is mono and the encoder is set to
|
||||||
|
// send stereo audio.
|
||||||
|
TEST_F(AudioCodingModuleTestOldApi, SendingStereoForMonoInput) {
|
||||||
|
constexpr int kSampleRateHz = 48000;
|
||||||
|
constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000;
|
||||||
|
|
||||||
|
audio_format_ = SdpAudioFormat("opus", kSampleRateHz, 2);
|
||||||
|
|
||||||
|
RegisterCodec();
|
||||||
|
|
||||||
|
input_frame_.sample_rate_hz_ = kSampleRateHz;
|
||||||
|
input_frame_.num_channels_ = 1;
|
||||||
|
input_frame_.samples_per_channel_ = kSamplesPerChannel;
|
||||||
|
for (size_t k = 0; k < 10; ++k) {
|
||||||
|
ASSERT_GE(acm_->Add10MsData(input_frame_), 0);
|
||||||
|
input_frame_.timestamp_ += kSamplesPerChannel;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify that it works when the data to send is stereo and the encoder is set
|
||||||
|
// to send mono audio.
|
||||||
|
TEST_F(AudioCodingModuleTestOldApi, SendingMonoForStereoInput) {
|
||||||
|
constexpr int kSampleRateHz = 48000;
|
||||||
|
constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000;
|
||||||
|
|
||||||
|
audio_format_ = SdpAudioFormat("L16", kSampleRateHz, 1);
|
||||||
|
|
||||||
|
RegisterCodec();
|
||||||
|
|
||||||
|
input_frame_.sample_rate_hz_ = kSampleRateHz;
|
||||||
|
input_frame_.num_channels_ = 1;
|
||||||
|
input_frame_.samples_per_channel_ = kSamplesPerChannel;
|
||||||
|
for (size_t k = 0; k < 10; ++k) {
|
||||||
|
ASSERT_GE(acm_->Add10MsData(input_frame_), 0);
|
||||||
|
input_frame_.timestamp_ += kSamplesPerChannel;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// The result on the Android platforms is inconsistent for this test case.
|
// The result on the Android platforms is inconsistent for this test case.
|
||||||
// On android_rel the result is different from android and android arm64 rel.
|
// On android_rel the result is different from android and android arm64 rel.
|
||||||
#if defined(WEBRTC_ANDROID)
|
#if defined(WEBRTC_ANDROID)
|
||||||
|
Reference in New Issue
Block a user