Before this change, the APM limiter used in FrameCombiner (a sub-component of AudioMixer) only gets to process the data when the number of non-muted streams is >1. If this number varies between <=1 and >1, the limiter's view of the data will have gaps during the periods with <= 1 active stream. This leads to discontinuities in the applied gain. These discontinuities cause clicks in the output audio. This change activates APM limiter processing based on the number of audio streams, independently of their mutedness status. BUG=chromium:695993 Review-Url: https://codereview.webrtc.org/2776113002 Cr-Commit-Position: refs/heads/master@{#17442}
211 lines
8.0 KiB
C++
211 lines
8.0 KiB
C++
/*
|
|
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "webrtc/modules/audio_mixer/frame_combiner.h"
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <functional>
|
|
#include <memory>
|
|
|
|
#include "webrtc/audio/utility/audio_frame_operations.h"
|
|
#include "webrtc/base/array_view.h"
|
|
#include "webrtc/base/checks.h"
|
|
#include "webrtc/base/logging.h"
|
|
#include "webrtc/modules/audio_mixer/audio_frame_manipulator.h"
|
|
#include "webrtc/modules/audio_mixer/audio_mixer_impl.h"
|
|
|
|
namespace webrtc {
|
|
namespace {
|
|
|
|
// Stereo, 48 kHz, 10 ms.
|
|
constexpr int kMaximalFrameSize = 2 * 48 * 10;
|
|
|
|
void CombineZeroFrames(bool use_limiter,
|
|
AudioProcessing* limiter,
|
|
AudioFrame* audio_frame_for_mixing) {
|
|
audio_frame_for_mixing->elapsed_time_ms_ = -1;
|
|
AudioFrameOperations::Mute(audio_frame_for_mixing);
|
|
// The limiter should still process a zero frame to avoid jumps in
|
|
// its gain curve.
|
|
if (use_limiter) {
|
|
RTC_DCHECK(limiter);
|
|
// The limiter smoothly increases frames with half gain to full
|
|
// volume. Here there's no need to apply half gain, since the frame
|
|
// is zero anyway.
|
|
limiter->ProcessStream(audio_frame_for_mixing);
|
|
}
|
|
}
|
|
|
|
void CombineOneFrame(const AudioFrame* input_frame,
|
|
bool use_limiter,
|
|
AudioProcessing* limiter,
|
|
AudioFrame* audio_frame_for_mixing) {
|
|
audio_frame_for_mixing->timestamp_ = input_frame->timestamp_;
|
|
audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_;
|
|
std::copy(input_frame->data_,
|
|
input_frame->data_ +
|
|
input_frame->num_channels_ * input_frame->samples_per_channel_,
|
|
audio_frame_for_mixing->data_);
|
|
if (use_limiter) {
|
|
AudioFrameOperations::ApplyHalfGain(audio_frame_for_mixing);
|
|
RTC_DCHECK(limiter);
|
|
limiter->ProcessStream(audio_frame_for_mixing);
|
|
AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing);
|
|
}
|
|
}
|
|
|
|
// Lower-level helper function called from Combine(...) when there
|
|
// are several input frames.
|
|
//
|
|
// TODO(aleloi): change interface to ArrayView<int16_t> output_frame
|
|
// once we have gotten rid of the APM limiter.
|
|
//
|
|
// Only the 'data' field of output_frame should be modified. The
|
|
// rest are used for potentially sending the output to the APM
|
|
// limiter.
|
|
void CombineMultipleFrames(
|
|
const std::vector<rtc::ArrayView<const int16_t>>& input_frames,
|
|
bool use_limiter,
|
|
AudioProcessing* limiter,
|
|
AudioFrame* audio_frame_for_mixing) {
|
|
RTC_DCHECK(!input_frames.empty());
|
|
RTC_DCHECK(audio_frame_for_mixing);
|
|
|
|
const size_t frame_length = input_frames.front().size();
|
|
for (const auto& frame : input_frames) {
|
|
RTC_DCHECK_EQ(frame_length, frame.size());
|
|
}
|
|
|
|
// Algorithm: int16 frames are added to a sufficiently large
|
|
// statically allocated int32 buffer. For > 2 participants this is
|
|
// more efficient than addition in place in the int16 audio
|
|
// frame. The audio quality loss due to halving the samples is
|
|
// smaller than 16-bit addition in place.
|
|
RTC_DCHECK_GE(kMaximalFrameSize, frame_length);
|
|
std::array<int32_t, kMaximalFrameSize> add_buffer;
|
|
|
|
add_buffer.fill(0);
|
|
|
|
for (const auto& frame : input_frames) {
|
|
std::transform(frame.begin(), frame.end(), add_buffer.begin(),
|
|
add_buffer.begin(), std::plus<int32_t>());
|
|
}
|
|
|
|
if (use_limiter) {
|
|
// Halve all samples to avoid saturation before limiting.
|
|
std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
|
|
audio_frame_for_mixing->data_, [](int32_t a) {
|
|
return rtc::saturated_cast<int16_t>(a / 2);
|
|
});
|
|
|
|
// Smoothly limit the audio.
|
|
RTC_DCHECK(limiter);
|
|
const int error = limiter->ProcessStream(audio_frame_for_mixing);
|
|
if (error != limiter->kNoError) {
|
|
LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error;
|
|
RTC_NOTREACHED();
|
|
}
|
|
|
|
// And now we can safely restore the level. This procedure results in
|
|
// some loss of resolution, deemed acceptable.
|
|
//
|
|
// It's possible to apply the gain in the AGC (with a target level of 0 dbFS
|
|
// and compression gain of 6 dB). However, in the transition frame when this
|
|
// is enabled (moving from one to two audio sources) it has the potential to
|
|
// create discontinuities in the mixed frame.
|
|
//
|
|
// Instead we double the frame (with addition since left-shifting a
|
|
// negative value is undefined).
|
|
AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing);
|
|
} else {
|
|
std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
|
|
audio_frame_for_mixing->data_,
|
|
[](int32_t a) { return rtc::saturated_cast<int16_t>(a); });
|
|
}
|
|
}
|
|
|
|
std::unique_ptr<AudioProcessing> CreateLimiter() {
|
|
Config config;
|
|
config.Set<ExperimentalAgc>(new ExperimentalAgc(false));
|
|
std::unique_ptr<AudioProcessing> limiter(AudioProcessing::Create(config));
|
|
RTC_DCHECK(limiter);
|
|
|
|
const auto check_no_error = [](int x) {
|
|
RTC_DCHECK_EQ(x, AudioProcessing::kNoError);
|
|
};
|
|
auto* const gain_control = limiter->gain_control();
|
|
check_no_error(gain_control->set_mode(GainControl::kFixedDigital));
|
|
|
|
// We smoothly limit the mixed frame to -7 dbFS. -6 would correspond to the
|
|
// divide-by-2 but -7 is used instead to give a bit of headroom since the
|
|
// AGC is not a hard limiter.
|
|
check_no_error(gain_control->set_target_level_dbfs(7));
|
|
|
|
check_no_error(gain_control->set_compression_gain_db(0));
|
|
check_no_error(gain_control->enable_limiter(true));
|
|
check_no_error(gain_control->Enable(true));
|
|
return limiter;
|
|
}
|
|
} // namespace
|
|
|
|
FrameCombiner::FrameCombiner(bool use_apm_limiter)
|
|
: use_apm_limiter_(use_apm_limiter),
|
|
limiter_(use_apm_limiter ? CreateLimiter() : nullptr) {}
|
|
|
|
FrameCombiner::~FrameCombiner() = default;
|
|
|
|
void FrameCombiner::Combine(const std::vector<AudioFrame*>& mix_list,
|
|
size_t number_of_channels,
|
|
int sample_rate,
|
|
size_t number_of_streams,
|
|
AudioFrame* audio_frame_for_mixing) const {
|
|
RTC_DCHECK(audio_frame_for_mixing);
|
|
const size_t samples_per_channel = static_cast<size_t>(
|
|
(sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000);
|
|
|
|
for (const auto* frame : mix_list) {
|
|
RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_);
|
|
RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_);
|
|
}
|
|
|
|
// Frames could be both stereo and mono.
|
|
for (auto* frame : mix_list) {
|
|
RemixFrame(number_of_channels, frame);
|
|
}
|
|
|
|
// TODO(aleloi): Issue bugs.webrtc.org/3390.
|
|
// Audio frame timestamp. The 'timestamp_' field is set to dummy
|
|
// value '0', because it is only supported in the one channel case and
|
|
// is then updated in the helper functions.
|
|
audio_frame_for_mixing->UpdateFrame(
|
|
-1, 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined,
|
|
AudioFrame::kVadUnknown, number_of_channels);
|
|
|
|
const bool use_limiter_this_round = use_apm_limiter_ && number_of_streams > 1;
|
|
|
|
if (mix_list.empty()) {
|
|
CombineZeroFrames(use_limiter_this_round, limiter_.get(),
|
|
audio_frame_for_mixing);
|
|
} else if (mix_list.size() == 1) {
|
|
CombineOneFrame(mix_list.front(), use_limiter_this_round, limiter_.get(),
|
|
audio_frame_for_mixing);
|
|
} else {
|
|
std::vector<rtc::ArrayView<const int16_t>> input_frames;
|
|
for (size_t i = 0; i < mix_list.size(); ++i) {
|
|
input_frames.push_back(rtc::ArrayView<const int16_t>(
|
|
mix_list[i]->data_, samples_per_channel * number_of_channels));
|
|
}
|
|
CombineMultipleFrames(input_frames, use_limiter_this_round, limiter_.get(),
|
|
audio_frame_for_mixing);
|
|
}
|
|
}
|
|
} // namespace webrtc
|