diff --git a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc index da9d1fa32c..82480d2e65 100644 --- a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc +++ b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc @@ -290,8 +290,9 @@ int32_t AudioConferenceMixerImpl::Process() { // We only use the limiter if it supports the output sample rate and // we're actually mixing multiple streams. - use_limiter_ = _numMixedParticipants > 1 && - _outputFrequency <= kAudioProcMaxNativeSampleRateHz; + use_limiter_ = + _numMixedParticipants > 1 && + _outputFrequency <= AudioProcessing::kMaxNativeSampleRateHz; MixFromList(mixedAudio, mixList); MixAnonomouslyFromList(mixedAudio, additionalFramesList); diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index ff4128b6ed..4ef4e6da6a 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -147,6 +147,17 @@ class GainControlForNewAgc : public GainControl, public VolumeCallbacks { int volume_; }; +const int AudioProcessing::kNativeSampleRatesHz[] = { + AudioProcessing::kSampleRate8kHz, + AudioProcessing::kSampleRate16kHz, + AudioProcessing::kSampleRate32kHz, + AudioProcessing::kSampleRate48kHz}; +const size_t AudioProcessing::kNumNativeSampleRates = + arraysize(AudioProcessing::kNativeSampleRatesHz); +const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing:: + kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1]; +const int AudioProcessing::kMaxAECMSampleRateHz = kSampleRate16kHz; + AudioProcessing* AudioProcessing::Create() { Config config; return Create(config, nullptr); @@ -400,18 +411,16 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { std::min(api_format_.input_stream().sample_rate_hz(), api_format_.output_stream().sample_rate_hz()); int fwd_proc_rate; - if (min_proc_rate > kSampleRate32kHz) { - fwd_proc_rate = kSampleRate48kHz; - } else if (min_proc_rate > kSampleRate16kHz) { - fwd_proc_rate = kSampleRate32kHz; - } else if (min_proc_rate > kSampleRate8kHz) { - fwd_proc_rate = kSampleRate16kHz; - } else { - fwd_proc_rate = kSampleRate8kHz; + for (size_t i = 0; i < kNumNativeSampleRates; ++i) { + fwd_proc_rate = kNativeSampleRatesHz[i]; + if (fwd_proc_rate >= min_proc_rate) { + break; + } } // ...with one exception. - if (echo_control_mobile_->is_enabled() && min_proc_rate > kSampleRate16kHz) { - fwd_proc_rate = kSampleRate16kHz; + if (echo_control_mobile_->is_enabled() && + min_proc_rate > kMaxAECMSampleRateHz) { + fwd_proc_rate = kMaxAECMSampleRateHz; } fwd_proc_format_ = StreamConfig(fwd_proc_rate); @@ -592,7 +601,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { return kBadSampleRateError; } if (echo_control_mobile_->is_enabled() && - frame->sample_rate_hz_ > kSampleRate16kHz) { + frame->sample_rate_hz_ > kMaxAECMSampleRateHz) { LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates"; return kUnsupportedComponentError; } diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h index 445d5c8c24..5eb3b62f98 100644 --- a/webrtc/modules/audio_processing/include/audio_processing.h +++ b/webrtc/modules/audio_processing/include/audio_processing.h @@ -15,6 +15,7 @@ #include // FILE #include +#include "webrtc/base/arraysize.h" #include "webrtc/base/platform_file.h" #include "webrtc/common.h" #include "webrtc/modules/audio_processing/beamformer/array_util.h" @@ -128,8 +129,6 @@ struct Intelligibility { bool enabled; }; -static const int kAudioProcMaxNativeSampleRateHz = 32000; - // The Audio Processing Module (APM) provides a collection of voice processing // components designed for real-time communications software. // @@ -471,6 +470,11 @@ class AudioProcessing { kSampleRate48kHz = 48000 }; + static const int kNativeSampleRatesHz[]; + static const size_t kNumNativeSampleRates; + static const int kMaxNativeSampleRateHz; + static const int kMaxAECMSampleRateHz; + static const int kChunkSizeMs = 10; }; diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc index 4ac4b8944e..fa44785344 100644 --- a/webrtc/voice_engine/channel.cc +++ b/webrtc/voice_engine/channel.cc @@ -3190,19 +3190,12 @@ void Channel::Demultiplex(const int16_t* audio_data, CodecInst codec; GetSendCodec(codec); - if (!mono_recording_audio_.get()) { - // Temporary space for DownConvertToCodecFormat. - mono_recording_audio_.reset(new int16_t[kMaxMonoDataSizeSamples]); - } - DownConvertToCodecFormat(audio_data, - number_of_frames, - number_of_channels, - sample_rate, - codec.channels, - codec.plfreq, - mono_recording_audio_.get(), - &input_resampler_, - &_audioFrame); + // Never upsample or upmix the capture signal here. This should be done at the + // end of the send chain. + _audioFrame.sample_rate_hz_ = std::min(codec.plfreq, sample_rate); + _audioFrame.num_channels_ = std::min(number_of_channels, codec.channels); + RemixAndResample(audio_data, number_of_frames, number_of_channels, + sample_rate, &input_resampler_, &_audioFrame); } uint32_t diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h index d9e4575032..bf792a5fb6 100644 --- a/webrtc/voice_engine/channel.h +++ b/webrtc/voice_engine/channel.h @@ -499,7 +499,6 @@ private: AudioLevel _outputAudioLevel; bool _externalTransport; AudioFrame _audioFrame; - rtc::scoped_ptr mono_recording_audio_; // Downsamples to the codec rate if necessary. PushResampler input_resampler_; FilePlayer* _inputFilePlayerPtr; diff --git a/webrtc/voice_engine/transmit_mixer.cc b/webrtc/voice_engine/transmit_mixer.cc index 92b9c6e41c..5e62939d54 100644 --- a/webrtc/voice_engine/transmit_mixer.cc +++ b/webrtc/voice_engine/transmit_mixer.cc @@ -1133,31 +1133,25 @@ void TransmitMixer::GenerateAudioFrame(const int16_t* audio, int codec_rate; int num_codec_channels; GetSendCodecInfo(&codec_rate, &num_codec_channels); - // TODO(ajm): This currently restricts the sample rate to 32 kHz. - // See: https://code.google.com/p/webrtc/issues/detail?id=3146 - // When 48 kHz is supported natively by AudioProcessing, this will have - // to be changed to handle 44.1 kHz. - int max_sample_rate_hz = kAudioProcMaxNativeSampleRateHz; - if (audioproc_->echo_control_mobile()->is_enabled()) { - // AECM only supports 8 and 16 kHz. - max_sample_rate_hz = 16000; - } - codec_rate = std::min(codec_rate, max_sample_rate_hz); stereo_codec_ = num_codec_channels == 2; - if (!mono_buffer_.get()) { - // Temporary space for DownConvertToCodecFormat. - mono_buffer_.reset(new int16_t[kMaxMonoDataSizeSamples]); + // We want to process at the lowest rate possible without losing information. + // Choose the lowest native rate at least equal to the input and codec rates. + const int min_processing_rate = std::min(sample_rate_hz, codec_rate); + for (size_t i = 0; i < AudioProcessing::kNumNativeSampleRates; ++i) { + _audioFrame.sample_rate_hz_ = AudioProcessing::kNativeSampleRatesHz[i]; + if (_audioFrame.sample_rate_hz_ >= min_processing_rate) { + break; + } } - DownConvertToCodecFormat(audio, - samples_per_channel, - num_channels, - sample_rate_hz, - num_codec_channels, - codec_rate, - mono_buffer_.get(), - &resampler_, - &_audioFrame); + if (audioproc_->echo_control_mobile()->is_enabled()) { + // AECM only supports 8 and 16 kHz. + _audioFrame.sample_rate_hz_ = std::min( + _audioFrame.sample_rate_hz_, AudioProcessing::kMaxAECMSampleRateHz); + } + _audioFrame.num_channels_ = std::min(num_channels, num_codec_channels); + RemixAndResample(audio, samples_per_channel, num_channels, sample_rate_hz, + &resampler_, &_audioFrame); } int32_t TransmitMixer::RecordAudioToFile( diff --git a/webrtc/voice_engine/transmit_mixer.h b/webrtc/voice_engine/transmit_mixer.h index 8bbb421a3b..714efb48dc 100644 --- a/webrtc/voice_engine/transmit_mixer.h +++ b/webrtc/voice_engine/transmit_mixer.h @@ -229,7 +229,6 @@ private: int32_t _remainingMuteMicTimeMs; bool stereo_codec_; bool swap_stereo_channels_; - rtc::scoped_ptr mono_buffer_; }; } // namespace voe diff --git a/webrtc/voice_engine/utility.cc b/webrtc/voice_engine/utility.cc index 82ef076d41..498620acaf 100644 --- a/webrtc/voice_engine/utility.cc +++ b/webrtc/voice_engine/utility.cc @@ -21,34 +21,43 @@ namespace webrtc { namespace voe { -// TODO(ajm): There is significant overlap between RemixAndResample and -// ConvertToCodecFormat. Consolidate using AudioConverter. void RemixAndResample(const AudioFrame& src_frame, PushResampler* resampler, AudioFrame* dst_frame) { - const int16_t* audio_ptr = src_frame.data_; - int audio_ptr_num_channels = src_frame.num_channels_; + RemixAndResample(src_frame.data_, src_frame.samples_per_channel_, + src_frame.num_channels_, src_frame.sample_rate_hz_, + resampler, dst_frame); + dst_frame->timestamp_ = src_frame.timestamp_; + dst_frame->elapsed_time_ms_ = src_frame.elapsed_time_ms_; + dst_frame->ntp_time_ms_ = src_frame.ntp_time_ms_; +} + +void RemixAndResample(const int16_t* src_data, + size_t samples_per_channel, + int num_channels, + int sample_rate_hz, + PushResampler* resampler, + AudioFrame* dst_frame) { + const int16_t* audio_ptr = src_data; + int audio_ptr_num_channels = num_channels; int16_t mono_audio[AudioFrame::kMaxDataSizeSamples]; // Downmix before resampling. - if (src_frame.num_channels_ == 2 && dst_frame->num_channels_ == 1) { - AudioFrameOperations::StereoToMono(src_frame.data_, - src_frame.samples_per_channel_, + if (num_channels == 2 && dst_frame->num_channels_ == 1) { + AudioFrameOperations::StereoToMono(src_data, samples_per_channel, mono_audio); audio_ptr = mono_audio; audio_ptr_num_channels = 1; } - if (resampler->InitializeIfNeeded(src_frame.sample_rate_hz_, - dst_frame->sample_rate_hz_, + if (resampler->InitializeIfNeeded(sample_rate_hz, dst_frame->sample_rate_hz_, audio_ptr_num_channels) == -1) { - LOG_FERR3(LS_ERROR, InitializeIfNeeded, src_frame.sample_rate_hz_, + LOG_FERR3(LS_ERROR, InitializeIfNeeded, sample_rate_hz, dst_frame->sample_rate_hz_, audio_ptr_num_channels); assert(false); } - const size_t src_length = src_frame.samples_per_channel_ * - audio_ptr_num_channels; + const size_t src_length = samples_per_channel * audio_ptr_num_channels; int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_, AudioFrame::kMaxDataSizeSamples); if (out_length == -1) { @@ -59,66 +68,12 @@ void RemixAndResample(const AudioFrame& src_frame, static_cast(out_length / audio_ptr_num_channels); // Upmix after resampling. - if (src_frame.num_channels_ == 1 && dst_frame->num_channels_ == 2) { + if (num_channels == 1 && dst_frame->num_channels_ == 2) { // The audio in dst_frame really is mono at this point; MonoToStereo will // set this back to stereo. dst_frame->num_channels_ = 1; AudioFrameOperations::MonoToStereo(dst_frame); } - - dst_frame->timestamp_ = src_frame.timestamp_; - dst_frame->elapsed_time_ms_ = src_frame.elapsed_time_ms_; - dst_frame->ntp_time_ms_ = src_frame.ntp_time_ms_; -} - -void DownConvertToCodecFormat(const int16_t* src_data, - size_t samples_per_channel, - int num_channels, - int sample_rate_hz, - int codec_num_channels, - int codec_rate_hz, - int16_t* mono_buffer, - PushResampler* resampler, - AudioFrame* dst_af) { - assert(samples_per_channel <= kMaxMonoDataSizeSamples); - assert(num_channels == 1 || num_channels == 2); - assert(codec_num_channels == 1 || codec_num_channels == 2); - dst_af->Reset(); - - // Never upsample the capture signal here. This should be done at the - // end of the send chain. - int destination_rate = std::min(codec_rate_hz, sample_rate_hz); - - // If no stereo codecs are in use, we downmix a stereo stream from the - // device early in the chain, before resampling. - if (num_channels == 2 && codec_num_channels == 1) { - AudioFrameOperations::StereoToMono(src_data, samples_per_channel, - mono_buffer); - src_data = mono_buffer; - num_channels = 1; - } - - if (resampler->InitializeIfNeeded( - sample_rate_hz, destination_rate, num_channels) != 0) { - LOG_FERR3(LS_ERROR, - InitializeIfNeeded, - sample_rate_hz, - destination_rate, - num_channels); - assert(false); - } - - const size_t in_length = samples_per_channel * num_channels; - int out_length = resampler->Resample( - src_data, in_length, dst_af->data_, AudioFrame::kMaxDataSizeSamples); - if (out_length == -1) { - LOG_FERR3(LS_ERROR, Resample, src_data, in_length, dst_af->data_); - assert(false); - } - - dst_af->samples_per_channel_ = static_cast(out_length / num_channels); - dst_af->sample_rate_hz_ = destination_rate; - dst_af->num_channels_ = num_channels; } void MixWithSat(int16_t target[], diff --git a/webrtc/voice_engine/utility.h b/webrtc/voice_engine/utility.h index 87003c4258..cc44533665 100644 --- a/webrtc/voice_engine/utility.h +++ b/webrtc/voice_engine/utility.h @@ -24,32 +24,26 @@ class AudioFrame; namespace voe { -// Upmix or downmix and resample the audio in |src_frame| to |dst_frame|. -// Expects |dst_frame| to have its sample rate and channels members set to the -// desired values. Updates the samples per channel member accordingly. No other -// members will be changed. +// Upmix or downmix and resample the audio to |dst_frame|. Expects |dst_frame| +// to have its sample rate and channels members set to the desired values. +// Updates the |samples_per_channel_| member accordingly. +// +// This version has an AudioFrame |src_frame| as input and sets the output +// |timestamp_|, |elapsed_time_ms_| and |ntp_time_ms_| members equals to the +// input ones. void RemixAndResample(const AudioFrame& src_frame, PushResampler* resampler, AudioFrame* dst_frame); -// Downmix and downsample the audio in |src_data| to |dst_af| as necessary, -// specified by |codec_num_channels| and |codec_rate_hz|. |mono_buffer| is -// temporary space and must be of sufficient size to hold the downmixed source -// audio (recommend using a size of kMaxMonoDataSizeSamples). -// -// |dst_af| will have its data and format members (sample rate, channels and -// samples per channel) set appropriately. No other members will be changed. -// TODO(ajm): For now, this still calls Reset() on |dst_af|. Remove this, as -// it shouldn't be needed. -void DownConvertToCodecFormat(const int16_t* src_data, - size_t samples_per_channel, - int num_channels, - int sample_rate_hz, - int codec_num_channels, - int codec_rate_hz, - int16_t* mono_buffer, - PushResampler* resampler, - AudioFrame* dst_af); +// This version has a pointer to the samples |src_data| as input and receives +// |samples_per_channel|, |num_channels| and |sample_rate_hz| of the data as +// parameters. +void RemixAndResample(const int16_t* src_data, + size_t samples_per_channel, + int num_channels, + int sample_rate_hz, + PushResampler* resampler, + AudioFrame* dst_frame); void MixWithSat(int16_t target[], int target_channel, diff --git a/webrtc/voice_engine/utility_unittest.cc b/webrtc/voice_engine/utility_unittest.cc index 5f02f512fd..226e38366d 100644 --- a/webrtc/voice_engine/utility_unittest.cc +++ b/webrtc/voice_engine/utility_unittest.cc @@ -21,11 +21,6 @@ namespace webrtc { namespace voe { namespace { -enum FunctionToTest { - TestRemixAndResample, - TestDownConvertToCodecFormat -}; - class UtilityTest : public ::testing::Test { protected: UtilityTest() { @@ -36,9 +31,10 @@ class UtilityTest : public ::testing::Test { golden_frame_.CopyFrom(src_frame_); } - void RunResampleTest(int src_channels, int src_sample_rate_hz, - int dst_channels, int dst_sample_rate_hz, - FunctionToTest function); + void RunResampleTest(int src_channels, + int src_sample_rate_hz, + int dst_channels, + int dst_sample_rate_hz); PushResampler resampler_; AudioFrame src_frame_; @@ -130,8 +126,7 @@ void VerifyFramesAreEqual(const AudioFrame& ref_frame, void UtilityTest::RunResampleTest(int src_channels, int src_sample_rate_hz, int dst_channels, - int dst_sample_rate_hz, - FunctionToTest function) { + int dst_sample_rate_hz) { PushResampler resampler; // Create a new one with every test. const int16_t kSrcLeft = 30; // Shouldn't overflow for any used sample rate. const int16_t kSrcRight = 15; @@ -168,20 +163,7 @@ void UtilityTest::RunResampleTest(int src_channels, kInputKernelDelaySamples * dst_channels * 2); printf("(%d, %d Hz) -> (%d, %d Hz) ", // SNR reported on the same line later. src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz); - if (function == TestRemixAndResample) { - RemixAndResample(src_frame_, &resampler, &dst_frame_); - } else { - int16_t mono_buffer[kMaxMonoDataSizeSamples]; - DownConvertToCodecFormat(src_frame_.data_, - src_frame_.samples_per_channel_, - src_frame_.num_channels_, - src_frame_.sample_rate_hz_, - dst_frame_.num_channels_, - dst_frame_.sample_rate_hz_, - mono_buffer, - &resampler, - &dst_frame_); - } + RemixAndResample(src_frame_, &resampler, &dst_frame_); if (src_sample_rate_hz == 96000 && dst_sample_rate_hz == 8000) { // The sinc resampler gives poor SNR at this extreme conversion, but we @@ -232,28 +214,7 @@ TEST_F(UtilityTest, RemixAndResampleSucceeds) { for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) { for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) { RunResampleTest(kChannels[src_channel], kSampleRates[src_rate], - kChannels[dst_channel], kSampleRates[dst_rate], - TestRemixAndResample); - } - } - } - } -} - -TEST_F(UtilityTest, ConvertToCodecFormatSucceeds) { - const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000}; - const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates); - const int kChannels[] = {1, 2}; - const int kChannelsSize = sizeof(kChannels) / sizeof(*kChannels); - for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) { - for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) { - for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) { - for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) { - if (dst_rate <= src_rate && dst_channel <= src_channel) { - RunResampleTest(kChannels[src_channel], kSampleRates[src_rate], - kChannels[src_channel], kSampleRates[dst_rate], - TestDownConvertToCodecFormat); - } + kChannels[dst_channel], kSampleRates[dst_rate]); } } }