Fold AudioEncoderMutable into AudioEncoder

It makes more sense to combine the two interfaces, since there wasn't
a clear line separating them. The result is a combined interface with
just over a dozen methods, half of which need to be implemented by
every subclass, while the other half have sensible (and trivial)
default implementations and are implemented only by the few subclasses
that need non-default behavior.

Review URL: https://codereview.webrtc.org/1322973004

Cr-Commit-Position: refs/heads/master@{#9894}
This commit is contained in:
kwiberg
2015-09-08 05:57:53 -07:00
committed by Commit bot
parent cd3c475407
commit 12cfc9b4da
40 changed files with 851 additions and 984 deletions

View File

@ -8,6 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
// TODO(kwiberg): Merge these tests into audio_encoder_opus_unittest.cc
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/common_types.h"
#include "webrtc/modules/audio_coding/codecs/opus/interface/audio_encoder_opus.h"
@ -26,14 +28,14 @@ class AudioEncoderMutableOpusTest : public ::testing::Test {
void CreateCodec(int num_channels) {
codec_inst_.channels = num_channels;
encoder_.reset(new AudioEncoderMutableOpus(codec_inst_));
encoder_.reset(new AudioEncoderOpus(codec_inst_));
auto expected_app =
num_channels == 1 ? AudioEncoderOpus::kVoip : AudioEncoderOpus::kAudio;
EXPECT_EQ(expected_app, encoder_->application());
}
CodecInst codec_inst_;
rtc::scoped_ptr<AudioEncoderMutableOpus> encoder_;
rtc::scoped_ptr<AudioEncoderOpus> encoder_;
};
TEST_F(AudioEncoderMutableOpusTest, DefaultApplicationModeMono) {
@ -46,8 +48,7 @@ TEST_F(AudioEncoderMutableOpusTest, DefaultApplicationModeStereo) {
TEST_F(AudioEncoderMutableOpusTest, ChangeApplicationMode) {
CreateCodec(2);
EXPECT_TRUE(
encoder_->SetApplication(AudioEncoderMutable::kApplicationSpeech));
EXPECT_TRUE(encoder_->SetApplication(AudioEncoder::Application::kSpeech));
EXPECT_EQ(AudioEncoderOpus::kVoip, encoder_->application());
}
@ -60,8 +61,7 @@ TEST_F(AudioEncoderMutableOpusTest, ResetWontChangeApplicationMode) {
EXPECT_EQ(AudioEncoderOpus::kAudio, encoder_->application());
// Now change to kVoip.
EXPECT_TRUE(
encoder_->SetApplication(AudioEncoderMutable::kApplicationSpeech));
EXPECT_TRUE(encoder_->SetApplication(AudioEncoder::Application::kSpeech));
EXPECT_EQ(AudioEncoderOpus::kVoip, encoder_->application());
// Trigger a reset again.

View File

@ -11,6 +11,7 @@
#include "webrtc/modules/audio_coding/codecs/opus/interface/audio_encoder_opus.h"
#include "webrtc/base/checks.h"
#include "webrtc/base/safe_conversions.h"
#include "webrtc/common_types.h"
#include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h"
@ -18,36 +19,63 @@ namespace webrtc {
namespace {
const int kSampleRateHz = 48000;
const int kMinBitrateBps = 500;
const int kMaxBitrateBps = 512000;
// TODO(tlegrand): Remove this code when we have proper APIs to set the
// complexity at a higher level.
#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) || defined(WEBRTC_ARCH_ARM)
// If we are on Android, iOS and/or ARM, use a lower complexity setting as
// default, to save encoder complexity.
const int kDefaultComplexity = 5;
#else
const int kDefaultComplexity = 9;
#endif
AudioEncoderOpus::Config CreateConfig(const CodecInst& codec_inst) {
AudioEncoderOpus::Config config;
config.frame_size_ms = rtc::CheckedDivExact(codec_inst.pacsize, 48);
config.num_channels = codec_inst.channels;
config.bitrate_bps = codec_inst.rate;
config.payload_type = codec_inst.pltype;
config.application = config.num_channels == 1 ? AudioEncoderOpus::kVoip
: AudioEncoderOpus::kAudio;
return config;
}
// We always encode at 48 kHz.
const int kSampleRateHz = 48000;
// Optimize the loss rate to configure Opus. Basically, optimized loss rate is
// the input loss rate rounded down to various levels, because a robustly good
// audio quality is achieved by lowering the packet loss down.
// Additionally, to prevent toggling, margins are used, i.e., when jumping to
// a loss rate from below, a higher threshold is used than jumping to the same
// level from above.
double OptimizePacketLossRate(double new_loss_rate, double old_loss_rate) {
DCHECK_GE(new_loss_rate, 0.0);
DCHECK_LE(new_loss_rate, 1.0);
DCHECK_GE(old_loss_rate, 0.0);
DCHECK_LE(old_loss_rate, 1.0);
const double kPacketLossRate20 = 0.20;
const double kPacketLossRate10 = 0.10;
const double kPacketLossRate5 = 0.05;
const double kPacketLossRate1 = 0.01;
const double kLossRate20Margin = 0.02;
const double kLossRate10Margin = 0.01;
const double kLossRate5Margin = 0.01;
if (new_loss_rate >=
kPacketLossRate20 +
kLossRate20Margin *
(kPacketLossRate20 - old_loss_rate > 0 ? 1 : -1)) {
return kPacketLossRate20;
} else if (new_loss_rate >=
kPacketLossRate10 +
kLossRate10Margin *
(kPacketLossRate10 - old_loss_rate > 0 ? 1 : -1)) {
return kPacketLossRate10;
} else if (new_loss_rate >=
kPacketLossRate5 +
kLossRate5Margin *
(kPacketLossRate5 - old_loss_rate > 0 ? 1 : -1)) {
return kPacketLossRate5;
} else if (new_loss_rate >= kPacketLossRate1) {
return kPacketLossRate1;
} else {
return 0.0;
}
}
} // namespace
AudioEncoderOpus::Config::Config()
: frame_size_ms(20),
num_channels(1),
payload_type(120),
application(kVoip),
bitrate_bps(64000),
fec_enabled(false),
max_playback_rate_hz(48000),
complexity(kDefaultComplexity),
dtx_enabled(false) {
}
bool AudioEncoderOpus::Config::IsOk() const {
if (frame_size_ms <= 0 || frame_size_ms % 10 != 0)
return false;
@ -61,19 +89,150 @@ bool AudioEncoderOpus::Config::IsOk() const {
}
AudioEncoderOpus::AudioEncoderOpus(const Config& config)
: num_10ms_frames_per_packet_(
static_cast<size_t>(rtc::CheckedDivExact(config.frame_size_ms, 10))),
num_channels_(config.num_channels),
payload_type_(config.payload_type),
application_(config.application),
dtx_enabled_(config.dtx_enabled),
samples_per_10ms_frame_(static_cast<size_t>(
rtc::CheckedDivExact(kSampleRateHz, 100) * num_channels_)),
packet_loss_rate_(0.0) {
CHECK(config.IsOk());
input_buffer_.reserve(num_10ms_frames_per_packet_ * samples_per_10ms_frame_);
CHECK_EQ(0, WebRtcOpus_EncoderCreate(&inst_, num_channels_, application_));
SetTargetBitrate(config.bitrate_bps);
: packet_loss_rate_(0.0), inst_(nullptr) {
CHECK(RecreateEncoderInstance(config));
}
AudioEncoderOpus::AudioEncoderOpus(const CodecInst& codec_inst)
: AudioEncoderOpus(CreateConfig(codec_inst)) {}
AudioEncoderOpus::~AudioEncoderOpus() {
CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_));
}
size_t AudioEncoderOpus::MaxEncodedBytes() const {
// Calculate the number of bytes we expect the encoder to produce,
// then multiply by two to give a wide margin for error.
const size_t bytes_per_millisecond =
static_cast<size_t>(config_.bitrate_bps / (1000 * 8) + 1);
const size_t approx_encoded_bytes =
Num10msFramesPerPacket() * 10 * bytes_per_millisecond;
return 2 * approx_encoded_bytes;
}
int AudioEncoderOpus::SampleRateHz() const {
return kSampleRateHz;
}
int AudioEncoderOpus::NumChannels() const {
return config_.num_channels;
}
size_t AudioEncoderOpus::Num10MsFramesInNextPacket() const {
return Num10msFramesPerPacket();
}
size_t AudioEncoderOpus::Max10MsFramesInAPacket() const {
return Num10msFramesPerPacket();
}
int AudioEncoderOpus::GetTargetBitrate() const {
return config_.bitrate_bps;
}
AudioEncoder::EncodedInfo AudioEncoderOpus::EncodeInternal(
uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded) {
if (input_buffer_.empty())
first_timestamp_in_buffer_ = rtp_timestamp;
input_buffer_.insert(input_buffer_.end(), audio,
audio + SamplesPer10msFrame());
if (input_buffer_.size() <
(static_cast<size_t>(Num10msFramesPerPacket()) * SamplesPer10msFrame())) {
return EncodedInfo();
}
CHECK_EQ(input_buffer_.size(), static_cast<size_t>(Num10msFramesPerPacket()) *
SamplesPer10msFrame());
int status = WebRtcOpus_Encode(
inst_, &input_buffer_[0],
rtc::CheckedDivExact(input_buffer_.size(),
static_cast<size_t>(config_.num_channels)),
rtc::saturated_cast<int16_t>(max_encoded_bytes), encoded);
CHECK_GE(status, 0); // Fails only if fed invalid data.
input_buffer_.clear();
EncodedInfo info;
info.encoded_bytes = static_cast<size_t>(status);
info.encoded_timestamp = first_timestamp_in_buffer_;
info.payload_type = config_.payload_type;
info.send_even_if_empty = true; // Allows Opus to send empty packets.
info.speech = (status > 0);
return info;
}
void AudioEncoderOpus::Reset() {
CHECK(RecreateEncoderInstance(config_));
}
bool AudioEncoderOpus::SetFec(bool enable) {
auto conf = config_;
conf.fec_enabled = enable;
return RecreateEncoderInstance(conf);
}
bool AudioEncoderOpus::SetDtx(bool enable) {
auto conf = config_;
conf.dtx_enabled = enable;
return RecreateEncoderInstance(conf);
}
bool AudioEncoderOpus::SetApplication(Application application) {
auto conf = config_;
switch (application) {
case Application::kSpeech:
conf.application = AudioEncoderOpus::kVoip;
break;
case Application::kAudio:
conf.application = AudioEncoderOpus::kAudio;
break;
}
return RecreateEncoderInstance(conf);
}
bool AudioEncoderOpus::SetMaxPlaybackRate(int frequency_hz) {
auto conf = config_;
conf.max_playback_rate_hz = frequency_hz;
return RecreateEncoderInstance(conf);
}
void AudioEncoderOpus::SetProjectedPacketLossRate(double fraction) {
double opt_loss_rate = OptimizePacketLossRate(fraction, packet_loss_rate_);
if (packet_loss_rate_ != opt_loss_rate) {
packet_loss_rate_ = opt_loss_rate;
CHECK_EQ(0, WebRtcOpus_SetPacketLossRate(
inst_, static_cast<int32_t>(packet_loss_rate_ * 100 + .5)));
}
}
void AudioEncoderOpus::SetTargetBitrate(int bits_per_second) {
config_.bitrate_bps =
std::max(std::min(bits_per_second, kMaxBitrateBps), kMinBitrateBps);
DCHECK(config_.IsOk());
CHECK_EQ(0, WebRtcOpus_SetBitRate(inst_, config_.bitrate_bps));
}
int AudioEncoderOpus::Num10msFramesPerPacket() const {
return rtc::CheckedDivExact(config_.frame_size_ms, 10);
}
int AudioEncoderOpus::SamplesPer10msFrame() const {
return rtc::CheckedDivExact(kSampleRateHz, 100) * config_.num_channels;
}
// If the given config is OK, recreate the Opus encoder instance with those
// settings, save the config, and return true. Otherwise, do nothing and return
// false.
bool AudioEncoderOpus::RecreateEncoderInstance(const Config& config) {
if (!config.IsOk())
return false;
if (inst_)
CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_));
input_buffer_.clear();
input_buffer_.reserve(Num10msFramesPerPacket() * SamplesPer10msFrame());
CHECK_EQ(0, WebRtcOpus_EncoderCreate(&inst_, config.num_channels,
config.application));
CHECK_EQ(0, WebRtcOpus_SetBitRate(inst_, config.bitrate_bps));
if (config.fec_enabled) {
CHECK_EQ(0, WebRtcOpus_EnableFec(inst_));
} else {
@ -87,172 +246,10 @@ AudioEncoderOpus::AudioEncoderOpus(const Config& config)
} else {
CHECK_EQ(0, WebRtcOpus_DisableDtx(inst_));
}
}
AudioEncoderOpus::~AudioEncoderOpus() {
CHECK_EQ(0, WebRtcOpus_EncoderFree(inst_));
}
int AudioEncoderOpus::SampleRateHz() const {
return kSampleRateHz;
}
int AudioEncoderOpus::NumChannels() const {
return num_channels_;
}
size_t AudioEncoderOpus::MaxEncodedBytes() const {
// Calculate the number of bytes we expect the encoder to produce,
// then multiply by two to give a wide margin for error.
size_t bytes_per_millisecond =
static_cast<size_t>(bitrate_bps_ / (1000 * 8) + 1);
size_t approx_encoded_bytes =
num_10ms_frames_per_packet_ * 10 * bytes_per_millisecond;
return 2 * approx_encoded_bytes;
}
size_t AudioEncoderOpus::Num10MsFramesInNextPacket() const {
return num_10ms_frames_per_packet_;
}
size_t AudioEncoderOpus::Max10MsFramesInAPacket() const {
return num_10ms_frames_per_packet_;
}
int AudioEncoderOpus::GetTargetBitrate() const {
return bitrate_bps_;
}
void AudioEncoderOpus::SetTargetBitrate(int bits_per_second) {
bitrate_bps_ = std::max(std::min(bits_per_second, kMaxBitrateBps),
kMinBitrateBps);
CHECK_EQ(WebRtcOpus_SetBitRate(inst_, bitrate_bps_), 0);
}
void AudioEncoderOpus::SetProjectedPacketLossRate(double fraction) {
DCHECK_GE(fraction, 0.0);
DCHECK_LE(fraction, 1.0);
// Optimize the loss rate to configure Opus. Basically, optimized loss rate is
// the input loss rate rounded down to various levels, because a robustly good
// audio quality is achieved by lowering the packet loss down.
// Additionally, to prevent toggling, margins are used, i.e., when jumping to
// a loss rate from below, a higher threshold is used than jumping to the same
// level from above.
const double kPacketLossRate20 = 0.20;
const double kPacketLossRate10 = 0.10;
const double kPacketLossRate5 = 0.05;
const double kPacketLossRate1 = 0.01;
const double kLossRate20Margin = 0.02;
const double kLossRate10Margin = 0.01;
const double kLossRate5Margin = 0.01;
double opt_loss_rate;
if (fraction >=
kPacketLossRate20 +
kLossRate20Margin *
(kPacketLossRate20 - packet_loss_rate_ > 0 ? 1 : -1)) {
opt_loss_rate = kPacketLossRate20;
} else if (fraction >=
kPacketLossRate10 +
kLossRate10Margin *
(kPacketLossRate10 - packet_loss_rate_ > 0 ? 1 : -1)) {
opt_loss_rate = kPacketLossRate10;
} else if (fraction >=
kPacketLossRate5 +
kLossRate5Margin *
(kPacketLossRate5 - packet_loss_rate_ > 0 ? 1 : -1)) {
opt_loss_rate = kPacketLossRate5;
} else if (fraction >= kPacketLossRate1) {
opt_loss_rate = kPacketLossRate1;
} else {
opt_loss_rate = 0;
}
if (packet_loss_rate_ != opt_loss_rate) {
// Ask the encoder to change the target packet loss rate.
CHECK_EQ(WebRtcOpus_SetPacketLossRate(
inst_, static_cast<int32_t>(opt_loss_rate * 100 + .5)),
0);
packet_loss_rate_ = opt_loss_rate;
}
}
AudioEncoder::EncodedInfo AudioEncoderOpus::EncodeInternal(
uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded) {
if (input_buffer_.empty())
first_timestamp_in_buffer_ = rtp_timestamp;
input_buffer_.insert(input_buffer_.end(), audio,
audio + samples_per_10ms_frame_);
if (input_buffer_.size() <
(num_10ms_frames_per_packet_ * samples_per_10ms_frame_)) {
return EncodedInfo();
}
CHECK_EQ(input_buffer_.size(),
num_10ms_frames_per_packet_ * samples_per_10ms_frame_);
int status = WebRtcOpus_Encode(
inst_, &input_buffer_[0],
rtc::CheckedDivExact(input_buffer_.size(),
static_cast<size_t>(num_channels_)),
max_encoded_bytes, encoded);
CHECK_GE(status, 0); // Fails only if fed invalid data.
input_buffer_.clear();
EncodedInfo info;
info.encoded_bytes = static_cast<size_t>(status);
info.encoded_timestamp = first_timestamp_in_buffer_;
info.payload_type = payload_type_;
info.send_even_if_empty = true; // Allows Opus to send empty packets.
info.speech = (status > 0);
return info;
}
namespace {
AudioEncoderOpus::Config CreateConfig(const CodecInst& codec_inst) {
AudioEncoderOpus::Config config;
config.frame_size_ms = rtc::CheckedDivExact(codec_inst.pacsize, 48);
config.num_channels = codec_inst.channels;
config.bitrate_bps = codec_inst.rate;
config.payload_type = codec_inst.pltype;
config.application = (config.num_channels == 1 ? AudioEncoderOpus::kVoip
: AudioEncoderOpus::kAudio);
return config;
}
} // namespace
AudioEncoderMutableOpus::AudioEncoderMutableOpus(const CodecInst& codec_inst)
: AudioEncoderMutableImpl<AudioEncoderOpus>(CreateConfig(codec_inst)) {
}
bool AudioEncoderMutableOpus::SetFec(bool enable) {
auto conf = config();
conf.fec_enabled = enable;
return Reconstruct(conf);
}
bool AudioEncoderMutableOpus::SetDtx(bool enable) {
auto conf = config();
conf.dtx_enabled = enable;
return Reconstruct(conf);
}
bool AudioEncoderMutableOpus::SetApplication(Application application) {
auto conf = config();
switch (application) {
case kApplicationSpeech:
conf.application = AudioEncoderOpus::kVoip;
break;
case kApplicationAudio:
conf.application = AudioEncoderOpus::kAudio;
break;
}
return Reconstruct(conf);
}
bool AudioEncoderMutableOpus::SetMaxPlaybackRate(int frequency_hz) {
auto conf = config();
conf.max_playback_rate_hz = frequency_hz;
return Reconstruct(conf);
CHECK_EQ(0, WebRtcOpus_SetPacketLossRate(
inst_, static_cast<int32_t>(packet_loss_rate_ * 100 + .5)));
config_ = config;
return true;
}
} // namespace webrtc

View File

@ -13,15 +13,14 @@
#include <vector>
#include "webrtc/base/checks.h"
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_coding/codecs/audio_encoder_mutable_impl.h"
#include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h"
#include "webrtc/modules/audio_coding/codecs/audio_encoder.h"
namespace webrtc {
// NOTE: This class has neither ThreadChecker, nor locks. The owner of an
// AudioEncoderOpus object must ensure that it is not accessed concurrently.
struct CodecInst;
class AudioEncoderOpus final : public AudioEncoder {
public:
@ -31,60 +30,44 @@ class AudioEncoderOpus final : public AudioEncoder {
};
struct Config {
Config();
bool IsOk() const;
int frame_size_ms;
int num_channels;
int payload_type;
ApplicationMode application;
int bitrate_bps;
bool fec_enabled;
int max_playback_rate_hz;
int complexity;
bool dtx_enabled;
int frame_size_ms = 20;
int num_channels = 1;
int payload_type = 120;
ApplicationMode application = kVoip;
int bitrate_bps = 64000;
bool fec_enabled = false;
int max_playback_rate_hz = 48000;
int complexity = kDefaultComplexity;
bool dtx_enabled = false;
private:
#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) || defined(WEBRTC_ARCH_ARM)
// If we are on Android, iOS and/or ARM, use a lower complexity setting as
// default, to save encoder complexity.
static const int kDefaultComplexity = 5;
#else
static const int kDefaultComplexity = 9;
#endif
};
explicit AudioEncoderOpus(const Config& config);
explicit AudioEncoderOpus(const CodecInst& codec_inst);
~AudioEncoderOpus() override;
size_t MaxEncodedBytes() const override;
int SampleRateHz() const override;
int NumChannels() const override;
size_t MaxEncodedBytes() const override;
size_t Num10MsFramesInNextPacket() const override;
size_t Max10MsFramesInAPacket() const override;
int GetTargetBitrate() const override;
void SetTargetBitrate(int bits_per_second) override;
void SetProjectedPacketLossRate(double fraction) override;
double packet_loss_rate() const { return packet_loss_rate_; }
ApplicationMode application() const { return application_; }
bool dtx_enabled() const { return dtx_enabled_; }
EncodedInfo EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded) override;
private:
const size_t num_10ms_frames_per_packet_;
const int num_channels_;
const int payload_type_;
const ApplicationMode application_;
int bitrate_bps_;
const bool dtx_enabled_;
const size_t samples_per_10ms_frame_;
std::vector<int16_t> input_buffer_;
OpusEncInst* inst_;
uint32_t first_timestamp_in_buffer_;
double packet_loss_rate_;
};
struct CodecInst;
class AudioEncoderMutableOpus
: public AudioEncoderMutableImpl<AudioEncoderOpus> {
public:
explicit AudioEncoderMutableOpus(const CodecInst& codec_inst);
void Reset() override;
bool SetFec(bool enable) override;
// Set Opus DTX. Once enabled, Opus stops transmission, when it detects voice
@ -94,18 +77,24 @@ class AudioEncoderMutableOpus
bool SetApplication(Application application) override;
bool SetMaxPlaybackRate(int frequency_hz) override;
AudioEncoderOpus::ApplicationMode application() const {
CriticalSectionScoped cs(encoder_lock_.get());
return encoder()->application();
}
double packet_loss_rate() const {
CriticalSectionScoped cs(encoder_lock_.get());
return encoder()->packet_loss_rate();
}
bool dtx_enabled() const {
CriticalSectionScoped cs(encoder_lock_.get());
return encoder()->dtx_enabled();
}
void SetProjectedPacketLossRate(double fraction) override;
void SetTargetBitrate(int target_bps) override;
// Getters for testing.
double packet_loss_rate() const { return packet_loss_rate_; }
ApplicationMode application() const { return config_.application; }
bool dtx_enabled() const { return config_.dtx_enabled; }
private:
int Num10msFramesPerPacket() const;
int SamplesPer10msFrame() const;
bool RecreateEncoderInstance(const Config& config);
Config config_;
double packet_loss_rate_;
std::vector<int16_t> input_buffer_;
OpusEncInst* inst_;
uint32_t first_timestamp_in_buffer_;
};
} // namespace webrtc