Files
platform-external-webrtc/webrtc/modules/audio_mixer/audio_mixer_impl.cc
aleloi 623427c522 Injectable output rate calculater for AudioMixer.
This CL breaks out the output sample rate calculation from
webrtc::AudioMixerImpl. A new OutputRateCalculator interface is added
to make the sample rate configurable. There are at least three reasons
for this change:

  1. The mixer will be used for an internal project, in which no
     resampling is done after the mixing. There the sample rate should
     be static. Currently, it can differ across mix iterations and
     depends on the number of audio sources. If there are no sources,
     the WebRTC mixer behavior is to produce silence at 48 kHz.

  2. A planned change to WebRTC will make audio processing steps
     happen at constant sample rates. A configurable sample rate
     calculator will make the transition simpler for the mixer.

  3. The current mixer design is a single large file. Behavior is not
     always simple to change (e.g. as in this case to mix at a
     constant rate), unrelated behavior can be broken, reusing the
     mixer in internal projects is tricky. Using DI for the sample
     rate calculation solves parts of these issues.

Changes:

The protected mixer c-tor now takes
unique_ptr<OutputRateCalculator>. The current output rate calculation
is moved to DefaultOutputRateCalculator. A new factory method
AudioMixerImpl::CreateWithOutputRateCalculator is added. The old
factory method passes the default rate calculator.

BUG=webrtc:6346

Review-Url: https://codereview.webrtc.org/2557713006
Cr-Commit-Position: refs/heads/master@{#15472}
2016-12-08 10:38:07 +00:00

378 lines
12 KiB
C++

/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_mixer/audio_mixer_impl.h"
#include <algorithm>
#include <functional>
#include <iterator>
#include <utility>
#include "webrtc/audio/utility/audio_frame_operations.h"
#include "webrtc/base/logging.h"
#include "webrtc/modules/audio_mixer/audio_frame_manipulator.h"
#include "webrtc/modules/audio_mixer/default_output_rate_calculator.h"
namespace webrtc {
namespace {
struct SourceFrame {
SourceFrame(AudioMixerImpl::SourceStatus* source_status,
AudioFrame* audio_frame,
bool muted)
: source_status(source_status), audio_frame(audio_frame), muted(muted) {
RTC_DCHECK(source_status);
RTC_DCHECK(audio_frame);
if (!muted) {
energy = AudioMixerCalculateEnergy(*audio_frame);
}
}
SourceFrame(AudioMixerImpl::SourceStatus* source_status,
AudioFrame* audio_frame,
bool muted,
uint32_t energy)
: source_status(source_status),
audio_frame(audio_frame),
muted(muted),
energy(energy) {
RTC_DCHECK(source_status);
RTC_DCHECK(audio_frame);
}
AudioMixerImpl::SourceStatus* source_status = nullptr;
AudioFrame* audio_frame = nullptr;
bool muted = true;
uint32_t energy = 0;
};
// ShouldMixBefore(a, b) is used to select mixer sources.
bool ShouldMixBefore(const SourceFrame& a, const SourceFrame& b) {
if (a.muted != b.muted) {
return b.muted;
}
const auto a_activity = a.audio_frame->vad_activity_;
const auto b_activity = b.audio_frame->vad_activity_;
if (a_activity != b_activity) {
return a_activity == AudioFrame::kVadActive;
}
return a.energy > b.energy;
}
void RampAndUpdateGain(
const std::vector<SourceFrame>& mixed_sources_and_frames) {
for (const auto& source_frame : mixed_sources_and_frames) {
float target_gain = source_frame.source_status->is_mixed ? 1.0f : 0.0f;
Ramp(source_frame.source_status->gain, target_gain,
source_frame.audio_frame);
source_frame.source_status->gain = target_gain;
}
}
// Mix the AudioFrames stored in audioFrameList into mixed_audio.
int32_t MixFromList(AudioFrame* mixed_audio,
const AudioFrameList& audio_frame_list,
bool use_limiter) {
if (audio_frame_list.empty()) {
return 0;
}
if (audio_frame_list.size() == 1) {
mixed_audio->timestamp_ = audio_frame_list.front()->timestamp_;
mixed_audio->elapsed_time_ms_ = audio_frame_list.front()->elapsed_time_ms_;
} else {
// TODO(wu): Issue 3390.
// Audio frame timestamp is only supported in one channel case.
mixed_audio->timestamp_ = 0;
mixed_audio->elapsed_time_ms_ = -1;
}
for (const auto& frame : audio_frame_list) {
RTC_DCHECK_EQ(mixed_audio->sample_rate_hz_, frame->sample_rate_hz_);
RTC_DCHECK_EQ(
frame->samples_per_channel_,
static_cast<size_t>((mixed_audio->sample_rate_hz_ *
webrtc::AudioMixerImpl::kFrameDurationInMs) /
1000));
// Mix |f.frame| into |mixed_audio|, with saturation protection.
// These effect is applied to |f.frame| itself prior to mixing.
if (use_limiter) {
// This is to avoid saturation in the mixing. It is only
// meaningful if the limiter will be used.
AudioFrameOperations::ApplyHalfGain(frame);
}
RTC_DCHECK_EQ(frame->num_channels_, mixed_audio->num_channels_);
AudioFrameOperations::Add(*frame, mixed_audio);
}
return 0;
}
AudioMixerImpl::SourceStatusList::const_iterator FindSourceInList(
AudioMixerImpl::Source const* audio_source,
AudioMixerImpl::SourceStatusList const* audio_source_list) {
return std::find_if(
audio_source_list->begin(), audio_source_list->end(),
[audio_source](const std::unique_ptr<AudioMixerImpl::SourceStatus>& p) {
return p->audio_source == audio_source;
});
}
// TODO(aleloi): remove non-const version when WEBRTC only supports modern STL.
AudioMixerImpl::SourceStatusList::iterator FindSourceInList(
AudioMixerImpl::Source const* audio_source,
AudioMixerImpl::SourceStatusList* audio_source_list) {
return std::find_if(
audio_source_list->begin(), audio_source_list->end(),
[audio_source](const std::unique_ptr<AudioMixerImpl::SourceStatus>& p) {
return p->audio_source == audio_source;
});
}
std::unique_ptr<AudioProcessing> CreateLimiter() {
Config config;
config.Set<ExperimentalAgc>(new ExperimentalAgc(false));
std::unique_ptr<AudioProcessing> limiter(AudioProcessing::Create(config));
if (!limiter.get()) {
return nullptr;
}
if (limiter->gain_control()->set_mode(GainControl::kFixedDigital) !=
limiter->kNoError) {
return nullptr;
}
// We smoothly limit the mixed frame to -7 dbFS. -6 would correspond to the
// divide-by-2 but -7 is used instead to give a bit of headroom since the
// AGC is not a hard limiter.
if (limiter->gain_control()->set_target_level_dbfs(7) != limiter->kNoError) {
return nullptr;
}
if (limiter->gain_control()->set_compression_gain_db(0) !=
limiter->kNoError) {
return nullptr;
}
if (limiter->gain_control()->enable_limiter(true) != limiter->kNoError) {
return nullptr;
}
if (limiter->gain_control()->Enable(true) != limiter->kNoError) {
return nullptr;
}
return limiter;
}
} // namespace
AudioMixerImpl::AudioMixerImpl(
std::unique_ptr<AudioProcessing> limiter,
std::unique_ptr<OutputRateCalculator> output_rate_calculator)
: output_rate_calculator_(std::move(output_rate_calculator)),
output_frequency_(0),
sample_size_(0),
audio_source_list_(),
use_limiter_(true),
time_stamp_(0),
limiter_(std::move(limiter)) {}
AudioMixerImpl::~AudioMixerImpl() {}
rtc::scoped_refptr<AudioMixerImpl> AudioMixerImpl::Create() {
return CreateWithOutputRateCalculator(
std::unique_ptr<DefaultOutputRateCalculator>(
new DefaultOutputRateCalculator()));
}
rtc::scoped_refptr<AudioMixerImpl>
AudioMixerImpl::CreateWithOutputRateCalculator(
std::unique_ptr<OutputRateCalculator> output_rate_calculator) {
return rtc::scoped_refptr<AudioMixerImpl>(
new rtc::RefCountedObject<AudioMixerImpl>(
CreateLimiter(), std::move(output_rate_calculator)));
}
void AudioMixerImpl::Mix(size_t number_of_channels,
AudioFrame* audio_frame_for_mixing) {
RTC_DCHECK(number_of_channels == 1 || number_of_channels == 2);
RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
CalculateOutputFrequency();
AudioFrameList mix_list;
{
rtc::CritScope lock(&crit_);
mix_list = GetAudioFromSources();
for (const auto& frame : mix_list) {
RemixFrame(number_of_channels, frame);
}
audio_frame_for_mixing->UpdateFrame(
-1, time_stamp_, NULL, 0, OutputFrequency(), AudioFrame::kNormalSpeech,
AudioFrame::kVadPassive, number_of_channels);
time_stamp_ += static_cast<uint32_t>(sample_size_);
use_limiter_ = mix_list.size() > 1;
// We only use the limiter if we're actually mixing multiple streams.
MixFromList(audio_frame_for_mixing, mix_list, use_limiter_);
}
if (audio_frame_for_mixing->samples_per_channel_ == 0) {
// Nothing was mixed, set the audio samples to silence.
audio_frame_for_mixing->samples_per_channel_ = sample_size_;
AudioFrameOperations::Mute(audio_frame_for_mixing);
} else {
// Only call the limiter if we have something to mix.
LimitMixedAudio(audio_frame_for_mixing);
}
return;
}
void AudioMixerImpl::CalculateOutputFrequency() {
RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
rtc::CritScope lock(&crit_);
std::vector<int> preferred_rates;
std::transform(audio_source_list_.begin(), audio_source_list_.end(),
std::back_inserter(preferred_rates),
[&](std::unique_ptr<SourceStatus>& a) {
return a->audio_source->PreferredSampleRate();
});
output_frequency_ =
output_rate_calculator_->CalculateOutputRate(preferred_rates);
sample_size_ = (output_frequency_ * kFrameDurationInMs) / 1000;
}
int AudioMixerImpl::OutputFrequency() const {
RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
return output_frequency_;
}
bool AudioMixerImpl::AddSource(Source* audio_source) {
RTC_DCHECK(audio_source);
rtc::CritScope lock(&crit_);
RTC_DCHECK(FindSourceInList(audio_source, &audio_source_list_) ==
audio_source_list_.end())
<< "Source already added to mixer";
audio_source_list_.emplace_back(new SourceStatus(audio_source, false, 0));
return true;
}
void AudioMixerImpl::RemoveSource(Source* audio_source) {
RTC_DCHECK(audio_source);
rtc::CritScope lock(&crit_);
const auto iter = FindSourceInList(audio_source, &audio_source_list_);
RTC_DCHECK(iter != audio_source_list_.end()) << "Source not present in mixer";
audio_source_list_.erase(iter);
}
AudioFrameList AudioMixerImpl::GetAudioFromSources() {
RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
AudioFrameList result;
std::vector<SourceFrame> audio_source_mixing_data_list;
std::vector<SourceFrame> ramp_list;
// Get audio from the audio sources and put it in the SourceFrame vector.
for (auto& source_and_status : audio_source_list_) {
const auto audio_frame_info =
source_and_status->audio_source->GetAudioFrameWithInfo(
OutputFrequency(), &source_and_status->audio_frame);
if (audio_frame_info == Source::AudioFrameInfo::kError) {
LOG_F(LS_WARNING) << "failed to GetAudioFrameWithInfo() from source";
continue;
}
audio_source_mixing_data_list.emplace_back(
source_and_status.get(), &source_and_status->audio_frame,
audio_frame_info == Source::AudioFrameInfo::kMuted);
}
// Sort frames by sorting function.
std::sort(audio_source_mixing_data_list.begin(),
audio_source_mixing_data_list.end(), ShouldMixBefore);
int max_audio_frame_counter = kMaximumAmountOfMixedAudioSources;
// Go through list in order and put unmuted frames in result list.
for (const auto& p : audio_source_mixing_data_list) {
// Filter muted.
if (p.muted) {
p.source_status->is_mixed = false;
continue;
}
// Add frame to result vector for mixing.
bool is_mixed = false;
if (max_audio_frame_counter > 0) {
--max_audio_frame_counter;
result.push_back(p.audio_frame);
ramp_list.emplace_back(p.source_status, p.audio_frame, false, -1);
is_mixed = true;
}
p.source_status->is_mixed = is_mixed;
}
RampAndUpdateGain(ramp_list);
return result;
}
bool AudioMixerImpl::LimitMixedAudio(AudioFrame* mixed_audio) const {
RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
if (!use_limiter_) {
return true;
}
// Smoothly limit the mixed frame.
const int error = limiter_->ProcessStream(mixed_audio);
// And now we can safely restore the level. This procedure results in
// some loss of resolution, deemed acceptable.
//
// It's possible to apply the gain in the AGC (with a target level of 0 dbFS
// and compression gain of 6 dB). However, in the transition frame when this
// is enabled (moving from one to two audio sources) it has the potential to
// create discontinuities in the mixed frame.
//
// Instead we double the frame (with addition since left-shifting a
// negative value is undefined).
AudioFrameOperations::Add(*mixed_audio, mixed_audio);
if (error != limiter_->kNoError) {
LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error;
RTC_NOTREACHED();
return false;
}
return true;
}
bool AudioMixerImpl::GetAudioSourceMixabilityStatusForTest(
AudioMixerImpl::Source* audio_source) const {
RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
rtc::CritScope lock(&crit_);
const auto iter = FindSourceInList(audio_source, &audio_source_list_);
if (iter != audio_source_list_.end()) {
return (*iter)->is_mixed;
}
LOG(LS_ERROR) << "Audio source unknown";
return false;
}
} // namespace webrtc