Moved Opus-specific payload splitting into AudioDecoderOpus.

The biggest change to NetEq is the move from a primary flag, to a
Priority with two separate levels: one set by RED splitting and one
set by the codec itself. This allows us to unambigously prioritize
"fallback" packets from these two sources. I've chosen what I believe
is the sensible ordering: packets that the codec prioritizes are
chosen first, regardless of if they are secondary RED packets or
not. So if we were to use Opus w/ FEC in RED, we'd only do Opus FEC
decoding if there was no RED packet that could cover the time slot.

With this change, PayloadSplitter now only deals with RED
packets. Maybe it should be renamed RedPayloadSplitter?

BUG=webrtc:5805

Review-Url: https://codereview.webrtc.org/2342443005
Cr-Commit-Position: refs/heads/master@{#14347}
This commit is contained in:
ossu
2016-09-22 02:06:28 -07:00
committed by Commit bot
parent 2beb42983c
commit a70695a3e1
32 changed files with 441 additions and 464 deletions

View File

@ -14,8 +14,6 @@
#include <memory>
#include <utility>
#include <utility>
#include "webrtc/base/array_view.h"
#include "webrtc/base/checks.h"
#include "webrtc/base/sanitizer.h"
@ -27,9 +25,11 @@ namespace webrtc {
AudioDecoder::ParseResult::ParseResult() = default;
AudioDecoder::ParseResult::ParseResult(ParseResult&& b) = default;
AudioDecoder::ParseResult::ParseResult(uint32_t timestamp,
bool primary,
int priority,
std::unique_ptr<EncodedAudioFrame> frame)
: timestamp(timestamp), primary(primary), frame(std::move(frame)) {}
: timestamp(timestamp), priority(priority), frame(std::move(frame)) {
RTC_DCHECK_GE(priority, 0);
}
AudioDecoder::ParseResult::~ParseResult() = default;
@ -38,12 +38,11 @@ AudioDecoder::ParseResult& AudioDecoder::ParseResult::operator=(
std::vector<AudioDecoder::ParseResult> AudioDecoder::ParsePayload(
rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) {
uint32_t timestamp) {
std::vector<ParseResult> results;
std::unique_ptr<EncodedAudioFrame> frame(
new LegacyEncodedAudioFrame(this, std::move(payload), is_primary));
results.emplace_back(timestamp, is_primary, std::move(frame));
new LegacyEncodedAudioFrame(this, std::move(payload)));
results.emplace_back(timestamp, 0, std::move(frame));
return results;
}

View File

@ -8,11 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_AUDIO_DECODER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_AUDIO_DECODER_H_
#include <memory>
#include <vector>
#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_DECODER_H_
#define WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_DECODER_H_
#include <memory>
#include <vector>
@ -66,7 +63,7 @@ class AudioDecoder {
struct ParseResult {
ParseResult();
ParseResult(uint32_t timestamp,
bool primary,
int priority,
std::unique_ptr<EncodedAudioFrame> frame);
ParseResult(ParseResult&& b);
~ParseResult();
@ -75,7 +72,10 @@ class AudioDecoder {
// The timestamp of the frame is in samples per channel.
uint32_t timestamp;
bool primary;
// The relative priority of the frame compared to other frames of the same
// payload and the same timeframe. A higher value means a lower priority.
// The highest priority is zero - negative values are not allowed.
int priority;
std::unique_ptr<EncodedAudioFrame> frame;
};
@ -86,8 +86,7 @@ class AudioDecoder {
// buffer. |timestamp| is the input timestamp, in samples, corresponding to
// the start of the payload.
virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary);
uint32_t timestamp);
// Decodes |encode_len| bytes from |encoded| and writes the result in
// |decoded|. The maximum bytes allowed to be written into |decoded| is
@ -177,4 +176,4 @@ class AudioDecoder {
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_AUDIO_DECODER_H_
#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_DECODER_H_

View File

@ -19,10 +19,9 @@ void AudioDecoderPcmU::Reset() {}
std::vector<AudioDecoder::ParseResult> AudioDecoderPcmU::ParsePayload(
rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) {
uint32_t timestamp) {
return LegacyEncodedAudioFrame::SplitBySamples(
this, std::move(payload), timestamp, is_primary, 8 * num_channels_, 8);
this, std::move(payload), timestamp, 8 * num_channels_, 8);
}
int AudioDecoderPcmU::SampleRateHz() const {
@ -55,10 +54,9 @@ void AudioDecoderPcmA::Reset() {}
std::vector<AudioDecoder::ParseResult> AudioDecoderPcmA::ParsePayload(
rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) {
uint32_t timestamp) {
return LegacyEncodedAudioFrame::SplitBySamples(
this, std::move(payload), timestamp, is_primary, 8 * num_channels_, 8);
this, std::move(payload), timestamp, 8 * num_channels_, 8);
}
int AudioDecoderPcmA::SampleRateHz() const {

View File

@ -24,8 +24,7 @@ class AudioDecoderPcmU final : public AudioDecoder {
}
void Reset() override;
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) override;
uint32_t timestamp) override;
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
int SampleRateHz() const override;
size_t Channels() const override;
@ -49,8 +48,7 @@ class AudioDecoderPcmA final : public AudioDecoder {
}
void Reset() override;
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) override;
uint32_t timestamp) override;
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
int SampleRateHz() const override;
size_t Channels() const override;

View File

@ -50,10 +50,9 @@ void AudioDecoderG722::Reset() {
std::vector<AudioDecoder::ParseResult> AudioDecoderG722::ParsePayload(
rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) {
uint32_t timestamp) {
return LegacyEncodedAudioFrame::SplitBySamples(this, std::move(payload),
timestamp, is_primary, 8, 16);
timestamp, 8, 16);
}
int AudioDecoderG722::PacketDuration(const uint8_t* encoded,
@ -128,10 +127,9 @@ void AudioDecoderG722Stereo::Reset() {
std::vector<AudioDecoder::ParseResult> AudioDecoderG722Stereo::ParsePayload(
rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) {
return LegacyEncodedAudioFrame::SplitBySamples(
this, std::move(payload), timestamp, is_primary, 2 * 8, 16);
uint32_t timestamp) {
return LegacyEncodedAudioFrame::SplitBySamples(this, std::move(payload),
timestamp, 2 * 8, 16);
}
// Split the stereo packet and place left and right channel after each other

View File

@ -25,8 +25,7 @@ class AudioDecoderG722 final : public AudioDecoder {
bool HasDecodePlc() const override;
void Reset() override;
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) override;
uint32_t timestamp) override;
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
int SampleRateHz() const override;
size_t Channels() const override;
@ -49,8 +48,7 @@ class AudioDecoderG722Stereo final : public AudioDecoder {
~AudioDecoderG722Stereo() override;
void Reset() override;
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) override;
uint32_t timestamp) override;
int SampleRateHz() const override;
size_t Channels() const override;

View File

@ -10,6 +10,8 @@
#include "webrtc/modules/audio_coding/codecs/ilbc/audio_decoder_ilbc.h"
#include <utility>
#include "webrtc/base/checks.h"
#include "webrtc/base/logging.h"
#include "webrtc/modules/audio_coding/codecs/ilbc/ilbc.h"
@ -53,8 +55,7 @@ void AudioDecoderIlbc::Reset() {
std::vector<AudioDecoder::ParseResult> AudioDecoderIlbc::ParsePayload(
rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) {
uint32_t timestamp) {
std::vector<ParseResult> results;
size_t bytes_per_frame;
int timestamps_per_frame;
@ -78,8 +79,8 @@ std::vector<AudioDecoder::ParseResult> AudioDecoderIlbc::ParsePayload(
RTC_DCHECK_EQ(0u, payload.size() % bytes_per_frame);
if (payload.size() == bytes_per_frame) {
std::unique_ptr<EncodedAudioFrame> frame(
new LegacyEncodedAudioFrame(this, std::move(payload), is_primary));
results.emplace_back(timestamp, is_primary, std::move(frame));
new LegacyEncodedAudioFrame(this, std::move(payload)));
results.emplace_back(timestamp, 0, std::move(frame));
} else {
size_t byte_offset;
uint32_t timestamp_offset;
@ -87,11 +88,9 @@ std::vector<AudioDecoder::ParseResult> AudioDecoderIlbc::ParsePayload(
byte_offset < payload.size();
byte_offset += bytes_per_frame,
timestamp_offset += timestamps_per_frame) {
rtc::Buffer new_payload(payload.data() + byte_offset, bytes_per_frame);
std::unique_ptr<EncodedAudioFrame> frame(new LegacyEncodedAudioFrame(
this, std::move(new_payload), is_primary));
results.emplace_back(timestamp + timestamp_offset, is_primary,
std::move(frame));
this, rtc::Buffer(payload.data() + byte_offset, bytes_per_frame)));
results.emplace_back(timestamp + timestamp_offset, 0, std::move(frame));
}
}

View File

@ -26,8 +26,7 @@ class AudioDecoderIlbc final : public AudioDecoder {
size_t DecodePlc(size_t num_frames, int16_t* decoded) override;
void Reset() override;
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) override;
uint32_t timestamp) override;
int SampleRateHz() const override;
size_t Channels() const override;

View File

@ -81,7 +81,7 @@ TEST_P(SplitIlbcTest, NumFrames) {
};
const auto results = decoder.ParsePayload(
generate_payload(frame_length_bytes_ * num_frames_), 0, true);
generate_payload(frame_length_bytes_ * num_frames_), 0);
EXPECT_EQ(num_frames_, results.size());
size_t frame_num = 0;
@ -123,7 +123,7 @@ TEST(IlbcTest, SplitTooLargePayload) {
AudioDecoderIlbc decoder;
constexpr size_t kPayloadLengthBytes = 950;
const auto results =
decoder.ParsePayload(rtc::Buffer(kPayloadLengthBytes), 0, true);
decoder.ParsePayload(rtc::Buffer(kPayloadLengthBytes), 0);
EXPECT_TRUE(results.empty());
}
@ -132,7 +132,7 @@ TEST(IlbcTest, SplitUnevenPayload) {
AudioDecoderIlbc decoder;
constexpr size_t kPayloadLengthBytes = 39; // Not an even number of frames.
const auto results =
decoder.ParsePayload(rtc::Buffer(kPayloadLengthBytes), 0, true);
decoder.ParsePayload(rtc::Buffer(kPayloadLengthBytes), 0);
EXPECT_TRUE(results.empty());
}

View File

@ -17,37 +17,22 @@
namespace webrtc {
LegacyEncodedAudioFrame::LegacyEncodedAudioFrame(AudioDecoder* decoder,
rtc::Buffer&& payload,
bool is_primary_payload)
: decoder_(decoder),
payload_(std::move(payload)),
is_primary_payload_(is_primary_payload) {}
rtc::Buffer&& payload)
: decoder_(decoder), payload_(std::move(payload)) {}
LegacyEncodedAudioFrame::~LegacyEncodedAudioFrame() = default;
size_t LegacyEncodedAudioFrame::Duration() const {
int ret;
if (is_primary_payload_) {
ret = decoder_->PacketDuration(payload_.data(), payload_.size());
} else {
ret = decoder_->PacketDurationRedundant(payload_.data(), payload_.size());
}
const int ret = decoder_->PacketDuration(payload_.data(), payload_.size());
return (ret < 0) ? 0 : static_cast<size_t>(ret);
}
rtc::Optional<AudioDecoder::EncodedAudioFrame::DecodeResult>
LegacyEncodedAudioFrame::Decode(rtc::ArrayView<int16_t> decoded) const {
AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;
int ret;
if (is_primary_payload_) {
ret = decoder_->Decode(
payload_.data(), payload_.size(), decoder_->SampleRateHz(),
decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
} else {
ret = decoder_->DecodeRedundant(
payload_.data(), payload_.size(), decoder_->SampleRateHz(),
decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
}
const int ret = decoder_->Decode(
payload_.data(), payload_.size(), decoder_->SampleRateHz(),
decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
if (ret < 0)
return rtc::Optional<DecodeResult>();
@ -59,7 +44,6 @@ std::vector<AudioDecoder::ParseResult> LegacyEncodedAudioFrame::SplitBySamples(
AudioDecoder* decoder,
rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary,
size_t bytes_per_ms,
uint32_t timestamps_per_ms) {
RTC_DCHECK(payload.data());
@ -70,8 +54,8 @@ std::vector<AudioDecoder::ParseResult> LegacyEncodedAudioFrame::SplitBySamples(
const size_t min_chunk_size = bytes_per_ms * 20;
if (min_chunk_size >= payload.size()) {
std::unique_ptr<LegacyEncodedAudioFrame> frame(
new LegacyEncodedAudioFrame(decoder, std::move(payload), is_primary));
results.emplace_back(timestamp, is_primary, std::move(frame));
new LegacyEncodedAudioFrame(decoder, std::move(payload)));
results.emplace_back(timestamp, 0, std::move(frame));
} else {
// Reduce the split size by half as long as |split_size_bytes| is at least
// twice the minimum chunk size (so that the resulting size is at least as
@ -92,10 +76,8 @@ std::vector<AudioDecoder::ParseResult> LegacyEncodedAudioFrame::SplitBySamples(
std::min(split_size_bytes, payload.size() - byte_offset);
rtc::Buffer new_payload(payload.data() + byte_offset, split_size_bytes);
std::unique_ptr<LegacyEncodedAudioFrame> frame(
new LegacyEncodedAudioFrame(decoder, std::move(new_payload),
is_primary));
results.emplace_back(timestamp + timestamp_offset, is_primary,
std::move(frame));
new LegacyEncodedAudioFrame(decoder, std::move(new_payload)));
results.emplace_back(timestamp + timestamp_offset, 0, std::move(frame));
}
}

View File

@ -20,16 +20,13 @@ namespace webrtc {
class LegacyEncodedAudioFrame final : public AudioDecoder::EncodedAudioFrame {
public:
LegacyEncodedAudioFrame(AudioDecoder* decoder,
rtc::Buffer&& payload,
bool is_primary_payload);
LegacyEncodedAudioFrame(AudioDecoder* decoder, rtc::Buffer&& payload);
~LegacyEncodedAudioFrame() override;
static std::vector<AudioDecoder::ParseResult> SplitBySamples(
AudioDecoder* decoder,
rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary,
size_t bytes_per_ms,
uint32_t timestamps_per_ms);
@ -44,7 +41,6 @@ class LegacyEncodedAudioFrame final : public AudioDecoder::EncodedAudioFrame {
private:
AudioDecoder* const decoder_;
const rtc::Buffer payload_;
const bool is_primary_payload_;
};
} // namespace webrtc

View File

@ -123,7 +123,7 @@ TEST_P(SplitBySamplesTest, PayloadSizes) {
const auto results = LegacyEncodedAudioFrame::SplitBySamples(
nullptr,
generate_payload(expected_split.payload_size_ms * bytes_per_ms_),
kBaseTimestamp, true, bytes_per_ms_, samples_per_ms_);
kBaseTimestamp, bytes_per_ms_, samples_per_ms_);
EXPECT_EQ(expected_split.num_frames, results.size());
uint32_t expected_timestamp = kBaseTimestamp;

View File

@ -10,10 +10,60 @@
#include "webrtc/modules/audio_coding/codecs/opus/audio_decoder_opus.h"
#include <utility>
#include "webrtc/base/checks.h"
namespace webrtc {
namespace {
class OpusFrame : public AudioDecoder::EncodedAudioFrame {
public:
OpusFrame(AudioDecoderOpus* decoder,
rtc::Buffer&& payload,
bool is_primary_payload)
: decoder_(decoder),
payload_(std::move(payload)),
is_primary_payload_(is_primary_payload) {}
size_t Duration() const override {
int ret;
if (is_primary_payload_) {
ret = decoder_->PacketDuration(payload_.data(), payload_.size());
} else {
ret = decoder_->PacketDurationRedundant(payload_.data(), payload_.size());
}
return (ret < 0) ? 0 : static_cast<size_t>(ret);
}
rtc::Optional<DecodeResult> Decode(
rtc::ArrayView<int16_t> decoded) const override {
AudioDecoder::SpeechType speech_type = AudioDecoder::kSpeech;
int ret;
if (is_primary_payload_) {
ret = decoder_->Decode(
payload_.data(), payload_.size(), decoder_->SampleRateHz(),
decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
} else {
ret = decoder_->DecodeRedundant(
payload_.data(), payload_.size(), decoder_->SampleRateHz(),
decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
}
if (ret < 0)
return rtc::Optional<DecodeResult>();
return rtc::Optional<DecodeResult>({static_cast<size_t>(ret), speech_type});
}
private:
AudioDecoderOpus* const decoder_;
const rtc::Buffer payload_;
const bool is_primary_payload_;
};
} // namespace
AudioDecoderOpus::AudioDecoderOpus(size_t num_channels)
: channels_(num_channels) {
RTC_DCHECK(num_channels == 1 || num_channels == 2);
@ -25,6 +75,26 @@ AudioDecoderOpus::~AudioDecoderOpus() {
WebRtcOpus_DecoderFree(dec_state_);
}
std::vector<AudioDecoder::ParseResult> AudioDecoderOpus::ParsePayload(
rtc::Buffer&& payload,
uint32_t timestamp) {
std::vector<ParseResult> results;
if (PacketHasFec(payload.data(), payload.size())) {
const int duration =
PacketDurationRedundant(payload.data(), payload.size());
RTC_DCHECK_GE(duration, 0);
rtc::Buffer payload_copy(payload.data(), payload.size());
std::unique_ptr<EncodedAudioFrame> fec_frame(
new OpusFrame(this, std::move(payload_copy), false));
results.emplace_back(timestamp - duration, 1, std::move(fec_frame));
}
std::unique_ptr<EncodedAudioFrame> frame(
new OpusFrame(this, std::move(payload), true));
results.emplace_back(timestamp, 0, std::move(frame));
return results;
}
int AudioDecoderOpus::DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,

View File

@ -22,6 +22,8 @@ class AudioDecoderOpus final : public AudioDecoder {
explicit AudioDecoderOpus(size_t num_channels);
~AudioDecoderOpus() override;
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp) override;
void Reset() override;
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
int PacketDurationRedundant(const uint8_t* encoded,

View File

@ -47,12 +47,11 @@ int AudioDecoderPcm16B::DecodeInternal(const uint8_t* encoded,
std::vector<AudioDecoder::ParseResult> AudioDecoderPcm16B::ParsePayload(
rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) {
uint32_t timestamp) {
const int samples_per_ms = rtc::CheckedDivExact(sample_rate_hz_, 1000);
return LegacyEncodedAudioFrame::SplitBySamples(
this, std::move(payload), timestamp, is_primary,
samples_per_ms * 2 * num_channels_, samples_per_ms);
this, std::move(payload), timestamp, samples_per_ms * 2 * num_channels_,
samples_per_ms);
}
int AudioDecoderPcm16B::PacketDuration(const uint8_t* encoded,

View File

@ -21,8 +21,7 @@ class AudioDecoderPcm16B final : public AudioDecoder {
AudioDecoderPcm16B(int sample_rate_hz, size_t num_channels);
void Reset() override;
std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
uint32_t timestamp,
bool is_primary) override;
uint32_t timestamp) override;
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
int SampleRateHz() const override;
size_t Channels() const override;