Implement Opus bandwidth adjustment behind a FieldTrial

Bug: webrtc:8522
Change-Id: I3a32ebfecd27ff74b507c2cee9e16aab17153442
Reviewed-on: https://webrtc-review.googlesource.com/22210
Commit-Queue: Alejandro Luebs <aluebs@webrtc.org>
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20799}
This commit is contained in:
Alex Luebs
2017-11-20 11:13:56 -08:00
committed by Commit Bot
parent 64eaa99cfc
commit eeb2765f6c
9 changed files with 368 additions and 3 deletions

View File

@ -330,6 +330,28 @@ rtc::Optional<int> AudioEncoderOpusImpl::GetNewComplexity(
}
}
rtc::Optional<int> AudioEncoderOpusImpl::GetNewBandwidth(
const AudioEncoderOpusConfig& config,
OpusEncInst* inst) {
constexpr int kMinWidebandBitrate = 8000;
constexpr int kMaxNarrowbandBitrate = 9000;
constexpr int kAutomaticThreshold = 11000;
RTC_DCHECK(config.IsOk());
const int bitrate = GetBitrateBps(config);
if (bitrate > kAutomaticThreshold) {
return rtc::Optional<int>(OPUS_AUTO);
}
const int bandwidth = WebRtcOpus_GetBandwidth(inst);
RTC_DCHECK_GE(bandwidth, 0);
if (bitrate > kMaxNarrowbandBitrate && bandwidth < OPUS_BANDWIDTH_WIDEBAND) {
return rtc::Optional<int>(OPUS_BANDWIDTH_WIDEBAND);
} else if (bitrate < kMinWidebandBitrate &&
bandwidth > OPUS_BANDWIDTH_NARROWBAND) {
return rtc::Optional<int>(OPUS_BANDWIDTH_NARROWBAND);
}
return rtc::Optional<int>();
}
class AudioEncoderOpusImpl::PacketLossFractionSmoother {
public:
explicit PacketLossFractionSmoother()
@ -376,6 +398,9 @@ AudioEncoderOpusImpl::AudioEncoderOpusImpl(
: payload_type_(payload_type),
send_side_bwe_with_overhead_(
webrtc::field_trial::IsEnabled("WebRTC-SendSideBwe-WithOverhead")),
adjust_bandwidth_(
webrtc::field_trial::IsEnabled("WebRTC-AdjustOpusBandwidth")),
bitrate_changed_(true),
packet_loss_rate_(0.0),
inst_(nullptr),
packet_loss_fraction_smoother_(new PacketLossFractionSmoother()),
@ -609,6 +634,14 @@ AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
// Will use new packet size for next encoding.
config_.frame_size_ms = next_frame_length_ms_;
if (adjust_bandwidth_ && bitrate_changed_) {
const auto bandwidth = GetNewBandwidth(config_, inst_);
if (bandwidth) {
RTC_CHECK_EQ(0, WebRtcOpus_SetBandwidth(inst_, *bandwidth));
}
bitrate_changed_ = false;
}
info.encoded_timestamp = first_timestamp_in_buffer_;
info.payload_type = payload_type_;
info.send_even_if_empty = true; // Allows Opus to send empty packets.
@ -672,6 +705,7 @@ bool AudioEncoderOpusImpl::RecreateEncoderInstance(
// window.
complexity_ = GetNewComplexity(config).value_or(config.complexity);
RTC_CHECK_EQ(0, WebRtcOpus_SetComplexity(inst_, complexity_));
bitrate_changed_ = true;
if (config.dtx_enabled) {
RTC_CHECK_EQ(0, WebRtcOpus_EnableDtx(inst_));
} else {
@ -727,6 +761,7 @@ void AudioEncoderOpusImpl::SetTargetBitrate(int bits_per_second) {
complexity_ = *new_complexity;
RTC_CHECK_EQ(0, WebRtcOpus_SetComplexity(inst_, complexity_));
}
bitrate_changed_ = true;
}
void AudioEncoderOpusImpl::ApplyAudioNetworkAdaptor() {

View File

@ -43,6 +43,13 @@ class AudioEncoderOpusImpl final : public AudioEncoder {
static rtc::Optional<int> GetNewComplexity(
const AudioEncoderOpusConfig& config);
// Returns OPUS_AUTO if the the current bitrate is above wideband threshold.
// Returns empty if it is below, but bandwidth coincides with the desired one.
// Otherwise returns the desired bandwidth.
static rtc::Optional<int> GetNewBandwidth(
const AudioEncoderOpusConfig& config,
OpusEncInst* inst);
using AudioNetworkAdaptorCreator =
std::function<std::unique_ptr<AudioNetworkAdaptor>(const std::string&,
RtcEventLog*)>;
@ -148,6 +155,8 @@ class AudioEncoderOpusImpl final : public AudioEncoder {
AudioEncoderOpusConfig config_;
const int payload_type_;
const bool send_side_bwe_with_overhead_;
const bool adjust_bandwidth_;
bool bitrate_changed_;
float packet_loss_rate_;
std::vector<int16_t> input_buffer_;
OpusEncInst* inst_;

View File

@ -17,6 +17,7 @@
#include "common_types.h" // NOLINT(build/include)
#include "modules/audio_coding/audio_network_adaptor/mock/mock_audio_network_adaptor.h"
#include "modules/audio_coding/codecs/opus/audio_encoder_opus.h"
#include "modules/audio_coding/codecs/opus/opus_interface.h"
#include "modules/audio_coding/neteq/tools/audio_loop.h"
#include "rtc_base/checks.h"
#include "rtc_base/fakeclock.h"
@ -473,6 +474,64 @@ TEST(AudioEncoderOpusTest, ConfigComplexityAdaptation) {
EXPECT_EQ(6, AudioEncoderOpusImpl::GetNewComplexity(config));
}
// Verifies that the bandwidth adaptation in the config works as intended.
TEST(AudioEncoderOpusTest, ConfigBandwidthAdaptation) {
AudioEncoderOpusConfig config;
// Sample rate of Opus.
constexpr size_t kOpusRateKhz = 48;
std::vector<int16_t> silence(
kOpusRateKhz * config.frame_size_ms * config.num_channels, 0);
constexpr size_t kMaxBytes = 1000;
uint8_t bitstream[kMaxBytes];
OpusEncInst* inst;
EXPECT_EQ(0, WebRtcOpus_EncoderCreate(
&inst, config.num_channels,
config.application ==
AudioEncoderOpusConfig::ApplicationMode::kVoip
? 0
: 1));
// Bitrate below minmum wideband. Expect narrowband.
config.bitrate_bps = rtc::Optional<int>(7999);
auto bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
EXPECT_EQ(rtc::Optional<int>(OPUS_BANDWIDTH_NARROWBAND), bandwidth);
WebRtcOpus_SetBandwidth(inst, *bandwidth);
// It is necessary to encode here because Opus has some logic in the encoder
// that goes from the user-set bandwidth to the used and returned one.
WebRtcOpus_Encode(inst, silence.data(),
rtc::CheckedDivExact(silence.size(), config.num_channels),
kMaxBytes, bitstream);
// Bitrate not yet above maximum narrowband. Expect empty.
config.bitrate_bps = rtc::Optional<int>(9000);
bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
EXPECT_EQ(rtc::Optional<int>(), bandwidth);
// Bitrate above maximum narrowband. Expect wideband.
config.bitrate_bps = rtc::Optional<int>(9001);
bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
EXPECT_EQ(rtc::Optional<int>(OPUS_BANDWIDTH_WIDEBAND), bandwidth);
WebRtcOpus_SetBandwidth(inst, *bandwidth);
// It is necessary to encode here because Opus has some logic in the encoder
// that goes from the user-set bandwidth to the used and returned one.
WebRtcOpus_Encode(inst, silence.data(),
rtc::CheckedDivExact(silence.size(), config.num_channels),
kMaxBytes, bitstream);
// Bitrate not yet below minimum wideband. Expect empty.
config.bitrate_bps = rtc::Optional<int>(8000);
bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
EXPECT_EQ(rtc::Optional<int>(), bandwidth);
// Bitrate above automatic threshold. Expect automatic.
config.bitrate_bps = rtc::Optional<int>(12001);
bandwidth = AudioEncoderOpusImpl::GetNewBandwidth(config, inst);
EXPECT_EQ(rtc::Optional<int>(OPUS_AUTO), bandwidth);
EXPECT_EQ(0, WebRtcOpus_EncoderFree(inst));
}
TEST(AudioEncoderOpusTest, EmptyConfigDoesNotAffectEncoderSettings) {
auto states = CreateCodec(2);
states.encoder->EnableAudioNetworkAdaptor("", nullptr);

View File

@ -0,0 +1,151 @@
/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/audio_codecs/opus/audio_decoder_opus.h"
#include "api/audio_codecs/opus/audio_encoder_opus.h"
#include "common_audio/include/audio_util.h"
#include "common_audio/lapped_transform.h"
#include "common_audio/window_generator.h"
#include "modules/audio_coding/neteq/tools/audio_loop.h"
#include "test/field_trial.h"
#include "test/gtest.h"
#include "test/testsupport/fileutils.h"
namespace webrtc {
namespace {
constexpr size_t kNumChannels = 1u;
constexpr int kSampleRateHz = 48000;
constexpr size_t kMaxLoopLengthSamples = kSampleRateHz * 50; // 50 seconds.
constexpr size_t kInputBlockSizeSamples = 10 * kSampleRateHz / 1000; // 10 ms
constexpr size_t kOutputBlockSizeSamples = 20 * kSampleRateHz / 1000; // 20 ms
constexpr size_t kFftSize = 1024;
constexpr size_t kNarrowbandSize = 4000 * kFftSize / kSampleRateHz;
constexpr float kKbdAlpha = 1.5f;
class PowerRatioEstimator : public LappedTransform::Callback {
public:
PowerRatioEstimator() : low_pow_(0.f), high_pow_(0.f) {
WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
transform_.reset(new LappedTransform(kNumChannels, 0u,
kInputBlockSizeSamples, window_,
kFftSize, kFftSize / 2, this));
}
void ProcessBlock(float* data) { transform_->ProcessChunk(&data, nullptr); }
float PowerRatio() { return high_pow_ / low_pow_; }
protected:
void ProcessAudioBlock(const std::complex<float>* const* input,
size_t num_input_channels,
size_t num_freq_bins,
size_t num_output_channels,
std::complex<float>* const* output) override {
float low_pow = 0.f;
float high_pow = 0.f;
for (size_t i = 0u; i < num_input_channels; ++i) {
for (size_t j = 0u; j < kNarrowbandSize; ++j) {
float low_mag = std::abs(input[i][j]);
low_pow += low_mag * low_mag;
float high_mag = std::abs(input[i][j + kNarrowbandSize]);
high_pow += high_mag * high_mag;
}
}
low_pow_ += low_pow / (num_input_channels * kFftSize);
high_pow_ += high_pow / (num_input_channels * kFftSize);
}
private:
std::unique_ptr<LappedTransform> transform_;
float window_[kFftSize];
float low_pow_;
float high_pow_;
};
float EncodedPowerRatio(AudioEncoder* encoder,
AudioDecoder* decoder,
test::AudioLoop* audio_loop) {
// Encode and decode.
uint32_t rtp_timestamp = 0u;
constexpr size_t kBufferSize = 500;
rtc::Buffer encoded(kBufferSize);
std::vector<int16_t> decoded(kOutputBlockSizeSamples);
std::vector<float> decoded_float(kOutputBlockSizeSamples);
AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;
PowerRatioEstimator power_ratio_estimator;
for (size_t i = 0; i < 1000; ++i) {
encoded.Clear();
AudioEncoder::EncodedInfo encoder_info =
encoder->Encode(rtp_timestamp, audio_loop->GetNextBlock(), &encoded);
rtp_timestamp += kInputBlockSizeSamples;
if (encoded.size() > 0) {
int decoder_info = decoder->Decode(
encoded.data(), encoded.size(), kSampleRateHz,
decoded.size() * sizeof(decoded[0]), decoded.data(), &speech_type);
if (decoder_info > 0) {
S16ToFloat(decoded.data(), decoded.size(), decoded_float.data());
power_ratio_estimator.ProcessBlock(decoded_float.data());
}
}
}
return power_ratio_estimator.PowerRatio();
}
} // namespace
TEST(BandwidthAdaptationTest, BandwidthAdaptationTest) {
test::ScopedFieldTrials override_field_trials(
"WebRTC-AdjustOpusBandwidth/Enabled/");
constexpr float kMaxNarrowbandRatio = 0.003f;
constexpr float kMinWidebandRatio = 0.03f;
// Create encoder.
AudioEncoderOpusConfig enc_config;
enc_config.bitrate_bps = rtc::Optional<int>(7999);
enc_config.num_channels = kNumChannels;
constexpr int payload_type = 17;
auto encoder = AudioEncoderOpus::MakeAudioEncoder(enc_config, payload_type);
// Create decoder.
AudioDecoderOpus::Config dec_config;
dec_config.num_channels = kNumChannels;
auto decoder = AudioDecoderOpus::MakeAudioDecoder(dec_config);
// Open speech file.
const std::string kInputFileName =
webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", "pcm");
test::AudioLoop audio_loop;
EXPECT_EQ(kSampleRateHz, encoder->SampleRateHz());
ASSERT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSamples,
kInputBlockSizeSamples));
EXPECT_LT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
kMaxNarrowbandRatio);
encoder->OnReceivedTargetAudioBitrate(9000);
EXPECT_LT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
kMaxNarrowbandRatio);
encoder->OnReceivedTargetAudioBitrate(9001);
EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
kMinWidebandRatio);
encoder->OnReceivedTargetAudioBitrate(8000);
EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
kMinWidebandRatio);
encoder->OnReceivedTargetAudioBitrate(12001);
EXPECT_GT(EncodedPowerRatio(encoder.get(), decoder.get(), &audio_loop),
kMinWidebandRatio);
}
} // namespace webrtc

View File

@ -11,7 +11,6 @@
#include "modules/audio_coding/codecs/opus/opus_interface.h"
#include "rtc_base/checks.h"
#include "modules/audio_coding/codecs/opus/opus_inst.h"
#include <stdlib.h>
#include <string.h>
@ -229,6 +228,27 @@ int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
}
}
int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst) {
if (!inst) {
return -1;
}
int32_t bandwidth;
if (opus_encoder_ctl(inst->encoder, OPUS_GET_BANDWIDTH(&bandwidth)) == 0) {
return bandwidth;
} else {
return -1;
}
}
int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth) {
if (inst) {
return opus_encoder_ctl(inst->encoder, OPUS_SET_BANDWIDTH(bandwidth));
} else {
return -1;
}
}
int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels) {
if (!inst)
return -1;

View File

@ -13,6 +13,7 @@
#include <stddef.h>
#include "modules/audio_coding/codecs/opus/opus_inst.h"
#include "typedefs.h" // NOLINT(build/include)
#ifdef __cplusplus
@ -221,6 +222,40 @@ int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst);
*/
int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity);
/*
* WebRtcOpus_GetBandwidth(...)
*
* This function returns the current bandwidth.
*
* Input:
* - inst : Encoder context
*
* Return value : Bandwidth - Success
* -1 - Error
*/
int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst);
/*
* WebRtcOpus_SetBandwidth(...)
*
* By default Opus decides which bandwidth to encode the signal in depending on
* the the bitrate. This function overrules the previous setting and forces the
* encoder to encode in narrowband/wideband/fullband/etc.
*
* Input:
* - inst : Encoder context
* - bandwidth : New target bandwidth. Valid values are:
* OPUS_BANDWIDTH_NARROWBAND
* OPUS_BANDWIDTH_MEDIUMBAND
* OPUS_BANDWIDTH_WIDEBAND
* OPUS_BANDWIDTH_SUPERWIDEBAND
* OPUS_BANDWIDTH_FULLBAND
*
* Return value : 0 - Success
* -1 - Error
*/
int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth);
/*
* WebRtcOpus_SetForceChannels(...)
*

View File

@ -81,10 +81,11 @@ float OpusSpeedTest::DecodeABlock(const uint8_t* bit_stream,
return 1000.0 * clocks / CLOCKS_PER_SEC;
}
/* Test audio length in second. */
constexpr size_t kDurationSec = 400;
#define ADD_TEST(complexity) \
TEST_P(OpusSpeedTest, OpusSetComplexityTest##complexity) { \
/* Test audio length in second. */ \
size_t kDurationSec = 400; \
/* Set complexity. */ \
printf("Setting complexity to %d ...\n", complexity); \
EXPECT_EQ(0, WebRtcOpus_SetComplexity(opus_encoder_, complexity)); \
@ -103,6 +104,20 @@ ADD_TEST(2);
ADD_TEST(1);
ADD_TEST(0);
#define ADD_BANDWIDTH_TEST(bandwidth) \
TEST_P(OpusSpeedTest, OpusSetBandwidthTest##bandwidth) { \
/* Set bandwidth. */ \
printf("Setting bandwidth to %d ...\n", bandwidth); \
EXPECT_EQ(0, WebRtcOpus_SetBandwidth(opus_encoder_, bandwidth)); \
EncodeDecode(kDurationSec); \
}
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_NARROWBAND);
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_MEDIUMBAND);
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_WIDEBAND);
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_SUPERWIDEBAND);
ADD_BANDWIDTH_TEST(OPUS_BANDWIDTH_FULLBAND);
// List all test cases: (channel, bit rat, filename, extension).
const coding_param param_set[] = {
std::make_tuple(1,

View File

@ -458,6 +458,45 @@ TEST_P(OpusTest, OpusSetComplexity) {
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
}
TEST_P(OpusTest, OpusSetBandwidth) {
PrepareSpeechData(channels_, 20, 20);
int16_t audio_type;
std::unique_ptr<int16_t[]> output_data_decode(
new int16_t[kOpus20msFrameSamples * channels_]());
// Test without creating encoder memory.
EXPECT_EQ(-1,
WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND));
EXPECT_EQ(-1, WebRtcOpus_GetBandwidth(opus_encoder_));
// Create encoder memory, try with different bandwidths.
EXPECT_EQ(0,
WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, application_));
EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_decoder_, channels_));
EXPECT_EQ(-1, WebRtcOpus_SetBandwidth(opus_encoder_,
OPUS_BANDWIDTH_NARROWBAND - 1));
EXPECT_EQ(0,
WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_NARROWBAND));
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
output_data_decode.get(), &audio_type);
EXPECT_EQ(OPUS_BANDWIDTH_NARROWBAND, WebRtcOpus_GetBandwidth(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_FULLBAND));
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
output_data_decode.get(), &audio_type);
EXPECT_EQ(OPUS_BANDWIDTH_FULLBAND, WebRtcOpus_GetBandwidth(opus_encoder_));
EXPECT_EQ(
-1, WebRtcOpus_SetBandwidth(opus_encoder_, OPUS_BANDWIDTH_FULLBAND + 1));
EncodeDecode(opus_encoder_, speech_data_.GetNextBlock(), opus_decoder_,
output_data_decode.get(), &audio_type);
EXPECT_EQ(OPUS_BANDWIDTH_FULLBAND, WebRtcOpus_GetBandwidth(opus_encoder_));
// Free memory.
EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_));
EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_));
}
TEST_P(OpusTest, OpusForceChannels) {
// Test without creating encoder memory.
EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 1));