From 49470c2ac460ed8cce250942e8525c5f14e32778 Mon Sep 17 00:00:00 2001 From: Danil Chapovalov Date: Thu, 14 Nov 2019 17:33:55 +0100 Subject: [PATCH] Add AV1 RtpDepacketizer class Implement Parse function that extracts is_first_packet_in_frame, is_last_packet_in_frame, and frame_type fields. Bug: webrtc:11042 Change-Id: I9360ea52ef274281b5c5e4c31955100b92155bfe Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/159180 Reviewed-by: Philip Eliasson Reviewed-by: Sam Zackrisson Commit-Queue: Danil Chapovalov Cr-Commit-Position: refs/heads/master@{#29814} --- modules/rtp_rtcp/BUILD.gn | 3 + .../rtp_rtcp/source/rtp_depacketizer_av1.cc | 162 +++++++++++++++ .../rtp_rtcp/source/rtp_depacketizer_av1.h | 34 +++ .../source/rtp_depacketizer_av1_unittest.cc | 196 ++++++++++++++++++ test/fuzzers/BUILD.gn | 9 + .../rtp_depacketizer_av1_parse_fuzzer.cc | 18 ++ 6 files changed, 422 insertions(+) create mode 100644 modules/rtp_rtcp/source/rtp_depacketizer_av1.cc create mode 100644 modules/rtp_rtcp/source/rtp_depacketizer_av1.h create mode 100644 modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc create mode 100644 test/fuzzers/rtp_depacketizer_av1_parse_fuzzer.cc diff --git a/modules/rtp_rtcp/BUILD.gn b/modules/rtp_rtcp/BUILD.gn index 55cda86435..0a1dc4b1e2 100644 --- a/modules/rtp_rtcp/BUILD.gn +++ b/modules/rtp_rtcp/BUILD.gn @@ -165,6 +165,8 @@ rtc_library("rtp_rtcp") { "source/rtcp_receiver.h", "source/rtcp_sender.cc", "source/rtcp_sender.h", + "source/rtp_depacketizer_av1.cc", + "source/rtp_depacketizer_av1.h", "source/rtp_format.cc", "source/rtp_format.h", "source/rtp_format_h264.cc", @@ -442,6 +444,7 @@ if (rtc_include_tests) { "source/rtcp_sender_unittest.cc", "source/rtcp_transceiver_impl_unittest.cc", "source/rtcp_transceiver_unittest.cc", + "source/rtp_depacketizer_av1_unittest.cc", "source/rtp_fec_unittest.cc", "source/rtp_format_h264_unittest.cc", "source/rtp_format_unittest.cc", diff --git a/modules/rtp_rtcp/source/rtp_depacketizer_av1.cc b/modules/rtp_rtcp/source/rtp_depacketizer_av1.cc new file mode 100644 index 0000000000..cc92526177 --- /dev/null +++ b/modules/rtp_rtcp/source/rtp_depacketizer_av1.cc @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h" + +#include +#include + +#include "modules/rtp_rtcp/source/rtp_video_header.h" +#include "rtc_base/byte_buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace { +// AV1 format: +// +// RTP payload syntax: +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |Z|Y| W |-|-|-|-| (REQUIRED) +// +=+=+=+=+=+=+=+=+ (REPEATED W-1 times, or any times if W = 0) +// |1| | +// +-+ OBU fragment| +// |1| | (REQUIRED, leb128 encoded) +// +-+ size | +// |0| | +// +-+-+-+-+-+-+-+-+ +// | OBU fragment | +// | ... | +// +=+=+=+=+=+=+=+=+ +// | ... | +// +=+=+=+=+=+=+=+=+ if W > 0, last fragment MUST NOT have size field +// | OBU fragment | +// | ... | +// +=+=+=+=+=+=+=+=+ +// +// +// OBU syntax: +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |0| type |X|S|-| (REQUIRED) +// +-+-+-+-+-+-+-+-+ +// X: | TID |SID|-|-|-| (OPTIONAL) +// +-+-+-+-+-+-+-+-+ +// |1| | +// +-+ OBU payload | +// S: |1| | (OPTIONAL, variable length leb128 encoded) +// +-+ size | +// |0| | +// +-+-+-+-+-+-+-+-+ +// | OBU payload | +// | ... | +constexpr int kObuTypeSequenceHeader = 1; + +int ObuType(uint8_t obu_header) { + return (obu_header & 0b0'1111'000u) >> 3; +} + +bool RtpStartsWithFragment(uint8_t aggregation_header) { + return aggregation_header & 0b1000'0000u; +} +bool RtpEndsWithFragment(uint8_t aggregation_header) { + return aggregation_header & 0b0100'0000u; +} +int RtpNumObus(uint8_t aggregation_header) { // 0 for any number of obus. + return (aggregation_header & 0b0011'0000u) >> 4; +} + +} // namespace + +bool RtpDepacketizerAv1::Parse(ParsedPayload* parsed_payload, + const uint8_t* payload_data, + size_t payload_data_length) { + RTC_DCHECK(parsed_payload); + if (payload_data_length == 0) { + RTC_DLOG(LS_ERROR) << "Empty rtp payload."; + return false; + } + // To assemble frame, all of the rtp payload is required, including + // aggregation header. + parsed_payload->payload = payload_data; + parsed_payload->payload_length = payload_data_length; + + rtc::ByteBufferReader payload(reinterpret_cast(payload_data), + payload_data_length); + uint8_t aggregation_header; + RTC_CHECK(payload.ReadUInt8(&aggregation_header)); + + // TODO(danilchap): Set AV1 codec when there is such enum value + parsed_payload->video.codec = VideoCodecType::kVideoCodecGeneric; + // These are not accurate since frame may consist of several packet aligned + // chunks of obus, but should be good enough for most cases. It might produce + // frame that do not map to any real frame, but av1 decoder should be able to + // handle it since it promise to handle individual obus rather than full + // frames. + parsed_payload->video.is_first_packet_in_frame = + !RtpStartsWithFragment(aggregation_header); + parsed_payload->video.is_last_packet_in_frame = + !RtpEndsWithFragment(aggregation_header); + parsed_payload->video.frame_type = VideoFrameType::kVideoFrameDelta; + // If packet starts a frame, check if it contains Sequence Header OBU. + // In that case treat it as key frame packet. + if (parsed_payload->video.is_first_packet_in_frame) { + int num_expected_obus = RtpNumObus(aggregation_header); + + // The only OBU that can preceed SequenceHeader is a TemporalDelimiter OBU, + // so check no more than two OBUs while searching for SH. + for (int obu_index = 1; payload.Length() > 0 && obu_index <= 2; + ++obu_index) { + uint64_t fragment_size; + // When num_expected_obus > 0, last OBU (fragment) is not preceeded by + // the size field. See W field in + // https://aomediacodec.github.io/av1-rtp-spec/#43-av1-aggregation-header + bool has_fragment_size = (obu_index != num_expected_obus); + if (has_fragment_size) { + if (!payload.ReadUVarint(&fragment_size)) { + RTC_DLOG(LS_WARNING) + << "Failed to read OBU fragment size for OBU#" << obu_index; + return false; + } + if (fragment_size > payload.Length()) { + RTC_DLOG(LS_WARNING) << "OBU fragment size " << fragment_size + << " exceeds remaining payload size " + << payload.Length() << " for OBU#" << obu_index; + // Malformed input: written size is larger than remaining buffer. + return false; + } + } else { + fragment_size = payload.Length(); + } + // Though it is inpractical to pass empty fragments, it is allowed. + if (fragment_size == 0) { + RTC_LOG(LS_WARNING) + << "Weird obu of size 0 at offset " + << (payload_data_length - payload.Length()) << ", skipping."; + continue; + } + uint8_t obu_header = *reinterpret_cast(payload.Data()); + if (ObuType(obu_header) == kObuTypeSequenceHeader) { + // TODO(bugs.webrtc.org/11042): Check frame_header OBU and/or frame OBU + // too for other conditions of the start of a new coded video sequence. + // For proper checks checking single packet might not be enough. See + // https://aomediacodec.github.io/av1-spec/av1-spec.pdf section 7.5 + parsed_payload->video.frame_type = VideoFrameType::kVideoFrameKey; + break; + } + payload.Consume(fragment_size); + } + } + + return true; +} + +} // namespace webrtc diff --git a/modules/rtp_rtcp/source/rtp_depacketizer_av1.h b/modules/rtp_rtcp/source/rtp_depacketizer_av1.h new file mode 100644 index 0000000000..e4a6dceb94 --- /dev/null +++ b/modules/rtp_rtcp/source/rtp_depacketizer_av1.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_RTP_RTCP_SOURCE_RTP_DEPACKETIZER_AV1_H_ +#define MODULES_RTP_RTCP_SOURCE_RTP_DEPACKETIZER_AV1_H_ + +#include +#include + +#include "modules/rtp_rtcp/source/rtp_format.h" + +namespace webrtc { + +class RtpDepacketizerAv1 : public RtpDepacketizer { + public: + RtpDepacketizerAv1() = default; + RtpDepacketizerAv1(const RtpDepacketizerAv1&) = delete; + RtpDepacketizerAv1& operator=(const RtpDepacketizerAv1&) = delete; + ~RtpDepacketizerAv1() override = default; + + bool Parse(ParsedPayload* parsed_payload, + const uint8_t* payload_data, + size_t payload_data_length) override; +}; + +} // namespace webrtc +#endif // MODULES_RTP_RTCP_SOURCE_RTP_DEPACKETIZER_AV1_H_ diff --git a/modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc b/modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc new file mode 100644 index 0000000000..2520f74279 --- /dev/null +++ b/modules/rtp_rtcp/source/rtp_depacketizer_av1_unittest.cc @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h" + +#include "test/gtest.h" + +namespace webrtc { +namespace { +// Signals number of the OBU (fragments) in the packet. +constexpr uint8_t kObuCountAny = 0b0000'0000; +constexpr uint8_t kObuCountOne = 0b0001'0000; +constexpr uint8_t kObuCountTwo = 0b0010'0000; + +constexpr uint8_t kObuHeaderSequenceHeader = 0b0'0001'000; +constexpr uint8_t kObuHeaderTemporalDelimiter = 0b0'0010'000; +constexpr uint8_t kObuHeaderFrame = 0b0'0110'000; + +TEST(RtpDepacketizerAv1Test, ParsePassFullRtpPayloadAsCodecPayload) { + const uint8_t packet[] = {(uint8_t{1} << 7) | kObuCountOne, 1, 2, 3, 4}; + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_EQ(parsed.payload_length, sizeof(packet)); + EXPECT_TRUE(parsed.payload == packet); +} + +TEST(RtpDepacketizerAv1Test, ParseTreatsContinuationFlagAsNotBeginningOfFrame) { + const uint8_t packet[] = { + (uint8_t{1} << 7) | kObuCountOne, + kObuHeaderFrame}; // Value doesn't matter since it is a + // continuation of the OBU from previous packet. + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_FALSE(parsed.video.is_first_packet_in_frame); +} + +TEST(RtpDepacketizerAv1Test, ParseTreatsNoContinuationFlagAsBeginningOfFrame) { + const uint8_t packet[] = {(uint8_t{0} << 7) | kObuCountOne, kObuHeaderFrame}; + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_TRUE(parsed.video.is_first_packet_in_frame); +} + +TEST(RtpDepacketizerAv1Test, ParseTreatsWillContinueFlagAsNotEndOfFrame) { + const uint8_t packet[] = {(uint8_t{1} << 6) | kObuCountOne, kObuHeaderFrame}; + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_FALSE(parsed.video.is_last_packet_in_frame); +} + +TEST(RtpDepacketizerAv1Test, ParseTreatsNoWillContinueFlagAsEndOfFrame) { + const uint8_t packet[] = {(uint8_t{0} << 6) | kObuCountOne, kObuHeaderFrame}; + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_TRUE(parsed.video.is_last_packet_in_frame); +} + +TEST(RtpDepacketizerAv1Test, ParseTreatsStartOfSequenceHeaderAsKeyFrame) { + const uint8_t packet[] = {kObuCountOne, kObuHeaderSequenceHeader}; + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_TRUE(parsed.video.is_first_packet_in_frame); + EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey); +} + +TEST(RtpDepacketizerAv1Test, ParseTreatsNotStartOfFrameAsDeltaFrame) { + const uint8_t packet[] = { + (uint8_t{1} << 7) | kObuCountOne, + // Byte that look like start of sequence header, but since it is not start + // of an OBU, it is actually not a start of sequence header. + kObuHeaderSequenceHeader}; + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_FALSE(parsed.video.is_first_packet_in_frame); + EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameDelta); +} + +TEST(RtpDepacketizerAv1Test, + ParseTreatsStartOfFrameWithoutSequenceHeaderAsDeltaFrame) { + const uint8_t packet[] = {kObuCountOne, kObuHeaderFrame}; + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_TRUE(parsed.video.is_first_packet_in_frame); + EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameDelta); +} + +TEST(RtpDepacketizerAv1Test, ParseFindsSequenceHeaderBehindFragmentSize1) { + const uint8_t packet[] = {kObuCountAny, + 1, // size of the next fragment + kObuHeaderSequenceHeader}; + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey); +} + +TEST(RtpDepacketizerAv1Test, ParseFindsSequenceHeaderBehindFragmentSize2) { + const uint8_t packet[] = {kObuCountTwo, + 2, // size of the next fragment + kObuHeaderSequenceHeader, + 42, // SH payload. + kObuHeaderFrame}; + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey); +} + +TEST(RtpDepacketizerAv1Test, + ParseFindsSequenceHeaderBehindMultiByteFragmentSize) { + const uint8_t packet[] = {kObuCountTwo, + 0b1000'0101, // leb128 encoded value of 5 + 0b1000'0000, // using 3 bytes + 0b0000'0000, // to encode the value. + kObuHeaderSequenceHeader, + 8, // 4 bytes of SH payload. + 0, + 0, + 0, + kObuHeaderFrame}; + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey); +} + +TEST(RtpDepacketizerAv1Test, ParseFindsSequenceHeaderBehindTemporalDelimiter) { + const uint8_t packet[] = {kObuCountTwo, + 1, // size of the next fragment + kObuHeaderTemporalDelimiter, + kObuHeaderSequenceHeader, + 8, // 4 bytes of SH payload. + 0, + 0, + 0}; + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey); +} + +TEST(RtpDepacketizerAv1Test, + ParseFindsSequenceHeaderBehindTemporalDelimiterAndSize) { + const uint8_t packet[] = {kObuCountAny, + 1, // size of the next fragment + kObuHeaderTemporalDelimiter, + 5, // size of the next fragment + kObuHeaderSequenceHeader, + 8, // 4 bytes of SH payload. + 0, + 0, + 0, + 1, // size of the next fragment + kObuHeaderFrame}; + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameKey); +} + +TEST(RtpDepacketizerAv1Test, ParseSkipsEmptyFragments) { + static_assert(kObuHeaderSequenceHeader == 8, ""); + const uint8_t packet[] = {kObuCountAny, + 0, // size of the next fragment + 8, // size of the next fragment that look like SH + kObuHeaderFrame, + 1, + 2, + 3, + 4, + 5, + 6, + 7}; + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer.Parse(&parsed, packet, sizeof(packet))); + EXPECT_TRUE(parsed.video.frame_type == VideoFrameType::kVideoFrameDelta); +} + +} // namespace +} // namespace webrtc diff --git a/test/fuzzers/BUILD.gn b/test/fuzzers/BUILD.gn index 9bd8cef5da..b1723e939b 100644 --- a/test/fuzzers/BUILD.gn +++ b/test/fuzzers/BUILD.gn @@ -537,6 +537,15 @@ webrtc_fuzzer_test("comfort_noise_decoder_fuzzer") { ] } +webrtc_fuzzer_test("rtp_depacketizer_av1_parse_fuzzer") { + sources = [ + "rtp_depacketizer_av1_parse_fuzzer.cc", + ] + deps = [ + "../../modules/rtp_rtcp", + ] +} + webrtc_fuzzer_test("rtp_dependency_descriptor_fuzzer") { sources = [ "rtp_dependency_descriptor_fuzzer.cc", diff --git a/test/fuzzers/rtp_depacketizer_av1_parse_fuzzer.cc b/test/fuzzers/rtp_depacketizer_av1_parse_fuzzer.cc new file mode 100644 index 0000000000..d46860a100 --- /dev/null +++ b/test/fuzzers/rtp_depacketizer_av1_parse_fuzzer.cc @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/rtp_rtcp/source/rtp_depacketizer_av1.h" + +namespace webrtc { +void FuzzOneInput(const uint8_t* data, size_t size) { + RtpDepacketizerAv1 depacketizer; + RtpDepacketizer::ParsedPayload parsed_payload; + depacketizer.Parse(&parsed_payload, data, size); +} +} // namespace webrtc