Prepare to move SPS VUI rewriting out of H.264 packetizer.

- add ParseOutgoingBitstreamAndRewriteSps to SpsVuiRewriter
  which takes encoded H.264 bitstream and NAL unit boundaries,
  rewrites SPS if needed and updates the NAL unit boundaries
  accordingly
- move SPS rewriting stats updates to SpsVuiRewriter

Bug: webrtc:10559
Change-Id: I7ca21756628ee6d6abbcbd501bdb4f3df024168b
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/133174
Reviewed-by: Stefan Holmer <stefan@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Commit-Queue: Mirta Dvornicic <mirtad@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#27665}
This commit is contained in:
Mirta Dvornicic
2019-04-17 13:43:39 +02:00
committed by Commit Bot
parent aa274d0287
commit 8a7dcb163e
5 changed files with 310 additions and 113 deletions

View File

@ -21,13 +21,28 @@
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/numerics/safe_minmax.h"
#include "system_wrappers/include/metrics.h"
namespace webrtc {
namespace {
// The maximum expected growth from adding a VUI to the SPS. It's actually
// closer to 24 or so, but better safe than sorry.
const size_t kMaxVuiSpsIncrease = 64;
const char* kSpsValidHistogramName = "WebRTC.Video.H264.SpsValid";
enum SpsValidEvent {
kReceivedSpsVuiOk = 1,
kReceivedSpsRewritten = 2,
kReceivedSpsParseFailure = 3,
kSentSpsPocOk = 4,
kSentSpsVuiOk = 5,
kSentSpsRewritten = 6,
kSentSpsParseFailure = 7,
kSpsRewrittenMax = 8
};
#define RETURN_FALSE_ON_FAIL(x) \
if (!(x)) { \
RTC_LOG_F(LS_ERROR) << " (line:" << __LINE__ << ") FAILED: " #x; \
@ -55,9 +70,7 @@ const size_t kMaxVuiSpsIncrease = 64;
RETURN_FALSE_ON_FAIL((dest)->WriteBits(tmp, bits)); \
} while (0)
typedef const SpsParser::SpsState& Sps;
bool CopyAndRewriteVui(Sps sps,
bool CopyAndRewriteVui(const SpsParser::SpsState& sps,
rtc::BitBuffer* source,
rtc::BitBufferWriter* destination,
SpsVuiRewriter::ParseResult* out_vui_rewritten);
@ -68,6 +81,37 @@ bool AddBitstreamRestriction(rtc::BitBufferWriter* destination,
bool CopyRemainingBits(rtc::BitBuffer* source,
rtc::BitBufferWriter* destination);
} // namespace
void SpsVuiRewriter::UpdateStats(ParseResult result, Direction direction) {
switch (result) {
case SpsVuiRewriter::ParseResult::kVuiRewritten:
RTC_HISTOGRAM_ENUMERATION(
kSpsValidHistogramName,
direction == SpsVuiRewriter::Direction::kIncoming
? SpsValidEvent::kReceivedSpsRewritten
: SpsValidEvent::kSentSpsRewritten,
SpsValidEvent::kSpsRewrittenMax);
break;
case SpsVuiRewriter::ParseResult::kVuiOk:
RTC_HISTOGRAM_ENUMERATION(
kSpsValidHistogramName,
direction == SpsVuiRewriter::Direction::kIncoming
? SpsValidEvent::kReceivedSpsVuiOk
: SpsValidEvent::kSentSpsVuiOk,
SpsValidEvent::kSpsRewrittenMax);
break;
case SpsVuiRewriter::ParseResult::kFailure:
RTC_HISTOGRAM_ENUMERATION(
kSpsValidHistogramName,
direction == SpsVuiRewriter::Direction::kIncoming
? SpsValidEvent::kReceivedSpsParseFailure
: SpsValidEvent::kSentSpsParseFailure,
SpsValidEvent::kSpsRewrittenMax);
break;
}
}
SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps(
const uint8_t* buffer,
size_t length,
@ -142,7 +186,87 @@ SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps(
return ParseResult::kVuiRewritten;
}
bool CopyAndRewriteVui(Sps sps,
SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps(
const uint8_t* buffer,
size_t length,
absl::optional<SpsParser::SpsState>* sps,
rtc::Buffer* destination,
Direction direction) {
ParseResult result = ParseAndRewriteSps(buffer, length, sps, destination);
UpdateStats(result, direction);
return result;
}
void SpsVuiRewriter::ParseOutgoingBitstreamAndRewriteSps(
rtc::ArrayView<const uint8_t> buffer,
size_t num_nalus,
const size_t* nalu_offsets,
const size_t* nalu_lengths,
rtc::Buffer* output_buffer,
size_t* output_nalu_offsets,
size_t* output_nalu_lengths) {
// Allocate some extra space for potentially adding a missing VUI.
output_buffer->EnsureCapacity(buffer.size() + num_nalus * kMaxVuiSpsIncrease);
const uint8_t* prev_nalu_ptr = buffer.data();
size_t prev_nalu_length = 0;
for (size_t i = 0; i < num_nalus; ++i) {
const uint8_t* nalu_ptr = buffer.data() + nalu_offsets[i];
const size_t nalu_length = nalu_lengths[i];
// Copy NAL unit start code.
const uint8_t* start_code_ptr = prev_nalu_ptr + prev_nalu_length;
const size_t start_code_length =
(nalu_ptr - prev_nalu_ptr) - prev_nalu_length;
output_buffer->AppendData(start_code_ptr, start_code_length);
bool updated_sps = false;
if (H264::ParseNaluType(nalu_ptr[0]) == H264::NaluType::kSps) {
// Check if stream uses picture order count type 0, and if so rewrite it
// to enable faster decoding. Streams in that format incur additional
// delay because it allows decode order to differ from render order.
// The mechanism used is to rewrite (edit or add) the SPS's VUI to contain
// restrictions on the maximum number of reordered pictures. This reduces
// latency significantly, though it still adds about a frame of latency to
// decoding.
// Note that we do this rewriting both here (send side, in order to
// protect legacy receive clients) in RtpDepacketizerH264::ParseSingleNalu
// (receive side, in orderer to protect us from unknown or legacy send
// clients).
absl::optional<SpsParser::SpsState> sps;
rtc::Buffer output_nalu;
// Add the type header to the output buffer first, so that the rewriter
// can append modified payload on top of that.
output_nalu.AppendData(nalu_ptr[0]);
ParseResult result = ParseAndRewriteSps(
nalu_ptr + H264::kNaluTypeSize, nalu_length - H264::kNaluTypeSize,
&sps, &output_nalu, Direction::kOutgoing);
if (result == ParseResult::kVuiRewritten) {
updated_sps = true;
output_nalu_offsets[i] = output_buffer->size();
output_nalu_lengths[i] = output_nalu.size();
output_buffer->AppendData(output_nalu.data(), output_nalu.size());
}
}
if (!updated_sps) {
output_nalu_offsets[i] = output_buffer->size();
output_nalu_lengths[i] = nalu_length;
output_buffer->AppendData(nalu_ptr, nalu_length);
}
prev_nalu_ptr = nalu_ptr;
prev_nalu_length = nalu_length;
}
}
namespace {
bool CopyAndRewriteVui(const SpsParser::SpsState& sps,
rtc::BitBuffer* source,
rtc::BitBufferWriter* destination,
SpsVuiRewriter::ParseResult* out_vui_rewritten) {
@ -354,4 +478,6 @@ bool CopyRemainingBits(rtc::BitBuffer* source,
return true;
}
} // namespace
} // namespace webrtc

View File

@ -32,6 +32,7 @@ namespace webrtc {
class SpsVuiRewriter : private SpsParser {
public:
enum class ParseResult { kFailure, kVuiOk, kVuiRewritten };
enum class Direction { kIncoming, kOutgoing };
// Parses an SPS block and if necessary copies it and rewrites the VUI.
// Returns kFailure on failure, kParseOk if parsing succeeded and no update
@ -42,11 +43,35 @@ class SpsVuiRewriter : private SpsParser {
// SPS state. This function assumes that any previous headers
// (NALU start, type, Stap-A, etc) have already been parsed and that RBSP
// decoding has been performed.
static ParseResult ParseAndRewriteSps(
const uint8_t* buffer,
size_t length,
absl::optional<SpsParser::SpsState>* sps,
rtc::Buffer* destination,
Direction Direction);
// Parses NAL units from |buffer| based on |nalu_offsets| and |nalu_lengths|
// and rewrites VUI in SPS blocks if necessary.
// The result is written to |output_buffer| and modified NAL unit offsets
// and lenghts are written to |output_nalu_offsets| and |output_nalu_lenghts|
// to account for any added data.
static void ParseOutgoingBitstreamAndRewriteSps(
rtc::ArrayView<const uint8_t> buffer,
size_t num_nalus,
const size_t* nalu_offsets,
const size_t* nalu_lengths,
rtc::Buffer* output_buffer,
size_t* output_nalu_offsets,
size_t* output_nalu_lengths);
private:
static ParseResult ParseAndRewriteSps(
const uint8_t* buffer,
size_t length,
absl::optional<SpsParser::SpsState>* sps,
rtc::Buffer* destination);
static void UpdateStats(ParseResult result, Direction direction);
};
} // namespace webrtc

View File

@ -16,6 +16,7 @@
#include "rtc_base/bit_buffer.h"
#include "rtc_base/buffer.h"
#include "rtc_base/logging.h"
#include "test/gmock.h"
#include "test/gtest.h"
namespace webrtc {
@ -31,6 +32,11 @@ static const size_t kSpsBufferMaxSize = 256;
static const size_t kWidth = 640;
static const size_t kHeight = 480;
static const uint8_t kStartSequence[] = {0x00, 0x00, 0x00, 0x01};
static const uint8_t kSpsNaluType[] = {H264::NaluType::kSps};
static const uint8_t kIdr1[] = {H264::NaluType::kIdr, 0xFF, 0x00, 0x00, 0x04};
static const uint8_t kIdr2[] = {H264::NaluType::kIdr, 0xFF, 0x00, 0x11};
// Generates a fake SPS with basically everything empty and with characteristics
// based off SpsMode.
// Pass in a buffer of at least kSpsBufferMaxSize.
@ -144,7 +150,8 @@ void TestSps(SpsMode mode, SpsVuiRewriter::ParseResult expected_parse_result) {
absl::optional<SpsParser::SpsState> sps;
rtc::Buffer rewritten_sps;
SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
original_sps.data(), original_sps.size(), &sps, &rewritten_sps);
original_sps.data(), original_sps.size(), &sps, &rewritten_sps,
SpsVuiRewriter::Direction::kIncoming);
EXPECT_EQ(expected_parse_result, result);
ASSERT_TRUE(sps);
EXPECT_EQ(sps->width, kWidth);
@ -157,7 +164,8 @@ void TestSps(SpsMode mode, SpsVuiRewriter::ParseResult expected_parse_result) {
// Ensure that added/rewritten SPS is parsable.
rtc::Buffer tmp;
result = SpsVuiRewriter::ParseAndRewriteSps(
rewritten_sps.data(), rewritten_sps.size(), &sps, &tmp);
rewritten_sps.data(), rewritten_sps.size(), &sps, &tmp,
SpsVuiRewriter::Direction::kIncoming);
EXPECT_EQ(SpsVuiRewriter::ParseResult::kVuiOk, result);
ASSERT_TRUE(sps);
EXPECT_EQ(sps->width, kWidth);
@ -181,4 +189,108 @@ REWRITE_TEST(AddBitstreamRestriction,
REWRITE_TEST(RewriteSuboptimalVui,
kRewriteRequired_VuiSuboptimal,
SpsVuiRewriter::ParseResult::kVuiRewritten)
TEST(SpsVuiRewriterTest, ParseOutgoingBitstreamOptimalVui) {
rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
rtc::Buffer optimal_sps;
GenerateFakeSps(kNoRewriteRequired_VuiOptimal, &optimal_sps);
rtc::Buffer buffer;
const size_t kNumNalus = 2;
size_t nalu_offsets[kNumNalus];
size_t nalu_lengths[kNumNalus];
buffer.AppendData(kStartSequence);
nalu_offsets[0] = buffer.size();
nalu_lengths[0] = optimal_sps.size();
buffer.AppendData(optimal_sps);
buffer.AppendData(kStartSequence);
nalu_offsets[1] = buffer.size();
nalu_lengths[1] = sizeof(kIdr1);
buffer.AppendData(kIdr1);
rtc::Buffer modified_buffer;
size_t modified_nalu_offsets[kNumNalus];
size_t modified_nalu_lengths[kNumNalus];
SpsVuiRewriter::ParseOutgoingBitstreamAndRewriteSps(
buffer, kNumNalus, nalu_offsets, nalu_lengths, &modified_buffer,
modified_nalu_offsets, modified_nalu_lengths);
EXPECT_THAT(
std::vector<uint8_t>(modified_buffer.data(),
modified_buffer.data() + modified_buffer.size()),
::testing::ElementsAreArray(buffer.data(), buffer.size()));
EXPECT_THAT(std::vector<size_t>(modified_nalu_offsets,
modified_nalu_offsets + kNumNalus),
::testing::ElementsAreArray(nalu_offsets, kNumNalus));
EXPECT_THAT(std::vector<size_t>(modified_nalu_lengths,
modified_nalu_lengths + kNumNalus),
::testing::ElementsAreArray(nalu_lengths, kNumNalus));
}
TEST(SpsVuiRewriterTest, ParseOutgoingBitstreamNoVui) {
rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
rtc::Buffer sps;
GenerateFakeSps(kRewriteRequired_NoVui, &sps);
rtc::Buffer buffer;
const size_t kNumNalus = 3;
size_t nalu_offsets[kNumNalus];
size_t nalu_lengths[kNumNalus];
buffer.AppendData(kStartSequence);
nalu_offsets[0] = buffer.size();
nalu_lengths[0] = sizeof(kIdr1);
buffer.AppendData(kIdr1);
buffer.AppendData(kStartSequence);
nalu_offsets[1] = buffer.size();
nalu_lengths[1] = sizeof(kSpsNaluType) + sps.size();
buffer.AppendData(kSpsNaluType);
buffer.AppendData(sps);
buffer.AppendData(kStartSequence);
nalu_offsets[2] = buffer.size();
nalu_lengths[2] = sizeof(kIdr2);
buffer.AppendData(kIdr2);
rtc::Buffer optimal_sps;
GenerateFakeSps(kNoRewriteRequired_VuiOptimal, &optimal_sps);
rtc::Buffer expected_buffer;
size_t expected_nalu_offsets[kNumNalus];
size_t expected_nalu_lengths[kNumNalus];
expected_buffer.AppendData(kStartSequence);
expected_nalu_offsets[0] = expected_buffer.size();
expected_nalu_lengths[0] = sizeof(kIdr1);
expected_buffer.AppendData(kIdr1);
expected_buffer.AppendData(kStartSequence);
expected_nalu_offsets[1] = expected_buffer.size();
expected_nalu_lengths[1] = sizeof(kSpsNaluType) + optimal_sps.size();
expected_buffer.AppendData(kSpsNaluType);
expected_buffer.AppendData(optimal_sps);
expected_buffer.AppendData(kStartSequence);
expected_nalu_offsets[2] = expected_buffer.size();
expected_nalu_lengths[2] = sizeof(kIdr2);
expected_buffer.AppendData(kIdr2);
rtc::Buffer modified_buffer;
size_t modified_nalu_offsets[kNumNalus];
size_t modified_nalu_lengths[kNumNalus];
SpsVuiRewriter::ParseOutgoingBitstreamAndRewriteSps(
buffer, kNumNalus, nalu_offsets, nalu_lengths, &modified_buffer,
modified_nalu_offsets, modified_nalu_lengths);
EXPECT_THAT(
std::vector<uint8_t>(modified_buffer.data(),
modified_buffer.data() + modified_buffer.size()),
::testing::ElementsAreArray(expected_buffer.data(),
expected_buffer.size()));
EXPECT_THAT(std::vector<size_t>(modified_nalu_offsets,
modified_nalu_offsets + kNumNalus),
::testing::ElementsAreArray(expected_nalu_offsets, kNumNalus));
EXPECT_THAT(std::vector<size_t>(modified_nalu_lengths,
modified_nalu_lengths + kNumNalus),
::testing::ElementsAreArray(expected_nalu_lengths, kNumNalus));
}
} // namespace webrtc

View File

@ -30,7 +30,6 @@
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
#include "rtc_base/system/fallthrough.h"
#include "system_wrappers/include/metrics.h"
namespace webrtc {
namespace {
@ -40,18 +39,6 @@ static const size_t kFuAHeaderSize = 2;
static const size_t kLengthFieldSize = 2;
static const size_t kStapAHeaderSize = kNalHeaderSize + kLengthFieldSize;
static const char* kSpsValidHistogramName = "WebRTC.Video.H264.SpsValid";
enum SpsValidEvent {
kReceivedSpsVuiOk = 1,
kReceivedSpsRewritten = 2,
kReceivedSpsParseFailure = 3,
kSentSpsPocOk = 4,
kSentSpsVuiOk = 5,
kSentSpsRewritten = 6,
kSentSpsParseFailure = 7,
kSpsRewrittenMax = 8
};
// Bit masks for FU (A and B) indicators.
enum NalDefs : uint8_t { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
@ -88,67 +75,30 @@ RtpPacketizerH264::RtpPacketizerH264(
PayloadSizeLimits limits,
H264PacketizationMode packetization_mode,
const RTPFragmentationHeader& fragmentation)
: limits_(limits), num_packets_left_(0) {
: limits_(limits),
modified_buffer_(new rtc::Buffer()),
num_packets_left_(0) {
// Guard against uninitialized memory in packetization_mode.
RTC_CHECK(packetization_mode == H264PacketizationMode::NonInterleaved ||
packetization_mode == H264PacketizationMode::SingleNalUnit);
for (int i = 0; i < fragmentation.fragmentationVectorSize; ++i) {
const uint8_t* buffer =
payload.data() + fragmentation.fragmentationOffset[i];
size_t length = fragmentation.fragmentationLength[i];
RTPFragmentationHeader modified_fragmentation;
modified_fragmentation.CopyFrom(fragmentation);
bool updated_sps = false;
H264::NaluType nalu_type = H264::ParseNaluType(buffer[0]);
if (nalu_type == H264::NaluType::kSps) {
// Check if stream uses picture order count type 0, and if so rewrite it
// to enable faster decoding. Streams in that format incur additional
// delay because it allows decode order to differ from render order.
// The mechanism used is to rewrite (edit or add) the SPS's VUI to contain
// restrictions on the maximum number of reordered pictures. This reduces
// latency significantly, though it still adds about a frame of latency to
// decoding.
// Note that we do this rewriting both here (send side, in order to
// protect legacy receive clients) and below in
// RtpDepacketizerH264::ParseSingleNalu (receive side, in orderer to
// protect us from unknown or legacy send clients).
SpsVuiRewriter::ParseOutgoingBitstreamAndRewriteSps(
payload, fragmentation.fragmentationVectorSize,
fragmentation.fragmentationOffset, fragmentation.fragmentationLength,
modified_buffer_.get(), modified_fragmentation.fragmentationOffset,
modified_fragmentation.fragmentationLength);
absl::optional<SpsParser::SpsState> sps;
std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer());
// Add the type header to the output buffer first, so that the rewriter
// can append modified payload on top of that.
output_buffer->AppendData(buffer[0]);
SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
buffer + H264::kNaluTypeSize, length - H264::kNaluTypeSize, &sps,
output_buffer.get());
switch (result) {
case SpsVuiRewriter::ParseResult::kVuiRewritten:
input_fragments_.push_back(
Fragment(output_buffer->data(), output_buffer->size()));
input_fragments_.rbegin()->tmp_buffer = std::move(output_buffer);
updated_sps = true;
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kSentSpsRewritten,
SpsValidEvent::kSpsRewrittenMax);
break;
case SpsVuiRewriter::ParseResult::kVuiOk:
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kSentSpsVuiOk,
SpsValidEvent::kSpsRewrittenMax);
break;
case SpsVuiRewriter::ParseResult::kFailure:
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kSentSpsParseFailure,
SpsValidEvent::kSpsRewrittenMax);
break;
}
for (size_t i = 0; i < modified_fragmentation.fragmentationVectorSize; ++i) {
const uint8_t* fragment = modified_buffer_->data() +
modified_fragmentation.fragmentationOffset[i];
const size_t fragment_length =
modified_fragmentation.fragmentationLength[i];
input_fragments_.push_back(Fragment(fragment, fragment_length));
}
if (!updated_sps)
input_fragments_.push_back(Fragment(buffer, length));
}
if (!GeneratePackets(packetization_mode)) {
// If failed to generate all the packets, discard already generated
// packets in case the caller would ignore return value and still try to
@ -519,9 +469,9 @@ bool RtpDepacketizerH264::ProcessStapAOrSingleNalu(
SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
&payload_data[start_offset], end_offset - start_offset, &sps,
output_buffer.get());
switch (result) {
case SpsVuiRewriter::ParseResult::kVuiRewritten:
output_buffer.get(), SpsVuiRewriter::Direction::kIncoming);
if (result == SpsVuiRewriter::ParseResult::kVuiRewritten) {
if (modified_buffer_) {
RTC_LOG(LS_WARNING)
<< "More than one H264 SPS NAL units needing "
@ -541,27 +491,11 @@ bool RtpDepacketizerH264::ProcessStapAOrSingleNalu(
}
// Append rest of packet.
output_buffer->AppendData(
&payload_data[end_offset],
output_buffer->AppendData(&payload_data[end_offset],
nalu_length + kNalHeaderSize - end_offset);
modified_buffer_ = std::move(output_buffer);
length_ = modified_buffer_->size();
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kReceivedSpsRewritten,
SpsValidEvent::kSpsRewrittenMax);
break;
case SpsVuiRewriter::ParseResult::kVuiOk:
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kReceivedSpsVuiOk,
SpsValidEvent::kSpsRewrittenMax);
break;
case SpsVuiRewriter::ParseResult::kFailure:
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kReceivedSpsParseFailure,
SpsValidEvent::kSpsRewrittenMax);
break;
}
if (sps) {

View File

@ -54,7 +54,6 @@ class RtpPacketizerH264 : public RtpPacketizer {
~Fragment();
const uint8_t* buffer = nullptr;
size_t length = 0;
std::unique_ptr<rtc::Buffer> tmp_buffer;
};
// A packet unit (H264 packet), to be put into an RTP packet:
@ -91,6 +90,7 @@ class RtpPacketizerH264 : public RtpPacketizer {
void NextFragmentPacket(RtpPacketToSend* rtp_packet);
const PayloadSizeLimits limits_;
std::unique_ptr<rtc::Buffer> modified_buffer_;
size_t num_packets_left_;
std::deque<Fragment> input_fragments_;
std::queue<PacketUnit> packets_;