Add h.264 AVC SPS parsing for resolution.

BUG= R=stefan@webrtc.org Review URL: https://webrtc-codereview.appspot.com/48129004 Cr-Commit-Position: refs/heads/master@{#9073}
2015-04-23 16:45:56 -07:00
parent 9728241e6a
commit 5ea8eff55e
8 changed files with 389 additions and 1 deletions
--- a/webrtc/modules/modules.gyp
+++ b/webrtc/modules/modules.gyp
@ -223,6 +223,7 @@
            'rtp_rtcp/source/fec_receiver_unittest.cc',
            'rtp_rtcp/source/fec_test_helper.cc',
            'rtp_rtcp/source/fec_test_helper.h',
+            'rtp_rtcp/source/h264_sps_parser_unittest.cc',
            'rtp_rtcp/source/nack_rtx_unittest.cc',
            'rtp_rtcp/source/producer_fec_unittest.cc',
            'rtp_rtcp/source/receive_statistics_unittest.cc',
--- a/webrtc/modules/rtp_rtcp/BUILD.gn
+++ b/webrtc/modules/rtp_rtcp/BUILD.gn
@ -67,6 +67,8 @@ source_set("rtp_rtcp") {
    "source/forward_error_correction.h",
    "source/forward_error_correction_internal.cc",
    "source/forward_error_correction_internal.h",
+    "source/h264_sps_parser.cc",
+    "source/h264_sps_parser.h",
    "source/producer_fec.cc",
    "source/producer_fec.h",
    "source/rtp_packet_history.cc",
--- a/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi
+++ b/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi
@ -74,6 +74,8 @@
        'source/forward_error_correction.h',
        'source/forward_error_correction_internal.cc',
        'source/forward_error_correction_internal.h',
+        'source/h264_sps_parser.cc',
+        'source/h264_sps_parser.h',
        'source/producer_fec.cc',
        'source/producer_fec.h',
        'source/rtp_packet_history.cc',
--- a/webrtc/modules/rtp_rtcp/source/h264_sps_parser.cc
+++ b/webrtc/modules/rtp_rtcp/source/h264_sps_parser.cc
@ -0,0 +1,226 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
+
+#include "webrtc/base/bytebuffer.h"
+#include "webrtc/base/bitbuffer.h"
+#include "webrtc/system_wrappers/interface/logging.h"
+
+#define RETURN_FALSE_ON_FAIL(x) \
+  if (!(x)) {                   \
+    return false;               \
+  }
+
+namespace webrtc {
+
+H264SpsParser::H264SpsParser(const uint8* sps, size_t byte_length)
+    : sps_(sps), byte_length_(byte_length), width_(), height_() {
+}
+
+bool H264SpsParser::Parse() {
+  // General note: this is based off the 02/2014 version of the H.264 standard.
+  // You can find it on this page:
+  // http://www.itu.int/rec/T-REC-H.264
+
+  const char* sps_bytes = reinterpret_cast<const char*>(sps_);
+  // First, parse out rbsp, which is basically the source buffer minus emulation
+  // bytes (0x03). RBSP is defined in section 7.3.1 of the H.264 standard.
+  rtc::ByteBuffer rbsp_buffer;
+  for (size_t i = 0; i < byte_length_;) {
+    if (i < byte_length_ - 3 && sps_[i + 3] == 3) {
+      // Two rbsp bytes + the emulation byte.
+      rbsp_buffer.WriteBytes(sps_bytes + i, 2);
+      i += 3;
+    } else {
+      // Single rbsp byte.
+      rbsp_buffer.WriteBytes(sps_bytes + i, 1);
+      i++;
+    }
+  }
+
+  // Now, we need to use a bit buffer to parse through the actual AVC SPS
+  // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the
+  // H.264 standard for a complete description.
+  // Since we only care about resolution, we ignore the majority of fields, but
+  // we still have to actively parse through a lot of the data, since many of
+  // the fields have variable size.
+  // We're particularly interested in:
+  // chroma_format_idc -> affects crop units
+  // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).
+  // frame_crop_*_offset -> crop information
+  rtc::BitBuffer parser(reinterpret_cast<const uint8*>(rbsp_buffer.Data()),
+                        rbsp_buffer.Length());
+
+  // The golomb values we have to read, not just consume.
+  uint32 golomb_ignored;
+
+  // separate_colour_plane_flag is optional (assumed 0), but has implications
+  // about the ChromaArrayType, which modifies how we treat crop coordinates.
+  uint32 separate_colour_plane_flag = 0;
+  // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is
+  // 0. It defaults to 1, when not specified.
+  uint32 chroma_format_idc = 1;
+
+  // profile_idc: u(8). We need it to determine if we need to read/skip chroma
+  // formats.
+  uint8 profile_idc;
+  RETURN_FALSE_ON_FAIL(parser.ReadUInt8(&profile_idc));
+  // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits
+  // 1 bit each for the flags + 2 bits = 8 bits = 1 byte.
+  RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));
+  // level_idc: u(8)
+  RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));
+  // seq_parameter_set_id: ue(v)
+  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+  // See if profile_idc has chroma format information.
+  if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
+      profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
+      profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
+      profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
+    // chroma_format_idc: ue(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&chroma_format_idc));
+    if (chroma_format_idc == 3) {
+      // separate_colour_plane_flag: u(1)
+      RETURN_FALSE_ON_FAIL(parser.ReadBits(&separate_colour_plane_flag, 1));
+    }
+    // bit_depth_luma_minus8: ue(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+    // bit_depth_chroma_minus8: ue(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+    // qpprime_y_zero_transform_bypass_flag: u(1)
+    RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
+    // seq_scaling_matrix_present_flag: u(1)
+    uint32 seq_scaling_matrix_present_flag;
+    RETURN_FALSE_ON_FAIL(parser.ReadBits(&seq_scaling_matrix_present_flag, 1));
+    if (seq_scaling_matrix_present_flag) {
+      // seq_scaling_list_present_flags. Either 8 or 12, depending on
+      // chroma_format_idc.
+      uint32 seq_scaling_list_present_flags;
+      if (chroma_format_idc != 3) {
+        RETURN_FALSE_ON_FAIL(
+            parser.ReadBits(&seq_scaling_list_present_flags, 8));
+      } else {
+        RETURN_FALSE_ON_FAIL(
+            parser.ReadBits(&seq_scaling_list_present_flags, 12));
+      }
+      // We don't support reading the sequence scaling list, and we don't really
+      // see/use them in practice, so we'll just reject the full sps if we see
+      // any provided.
+      if (seq_scaling_list_present_flags > 0) {
+        LOG(LS_WARNING) << "SPS contains scaling lists, which are unsupported.";
+        return false;
+      }
+    }
+  }
+  // log2_max_frame_num_minus4: ue(v)
+  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+  // pic_order_cnt_type: ue(v)
+  uint32 pic_order_cnt_type;
+  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_order_cnt_type));
+  if (pic_order_cnt_type == 0) {
+    // log2_max_pic_order_cnt_lsb_minus4: ue(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+  } else if (pic_order_cnt_type == 1) {
+    // delta_pic_order_always_zero_flag: u(1)
+    RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
+    // offset_for_non_ref_pic: se(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+    // offset_for_top_to_bottom_field: se(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+    // num_ref_frames_in_pic_order_cnt_cycle: ue(v)
+    uint32 num_ref_frames_in_pic_order_cnt_cycle;
+    RETURN_FALSE_ON_FAIL(
+        parser.ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle));
+    for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
+      // offset_for_ref_frame[i]: se(v)
+      RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+    }
+  }
+  // max_num_ref_frames: ue(v)
+  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+  // gaps_in_frame_num_value_allowed_flag: u(1)
+  RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
+  //
+  // IMPORTANT ONES! Now we're getting to resolution. First we read the pic
+  // width/height in macroblocks (16x16), which gives us the base resolution,
+  // and then we continue on until we hit the frame crop offsets, which are used
+  // to signify resolutions that aren't multiples of 16.
+  //
+  // pic_width_in_mbs_minus1: ue(v)
+  uint32 pic_width_in_mbs_minus1;
+  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_width_in_mbs_minus1));
+  // pic_height_in_map_units_minus1: ue(v)
+  uint32 pic_height_in_map_units_minus1;
+  RETURN_FALSE_ON_FAIL(
+      parser.ReadExponentialGolomb(&pic_height_in_map_units_minus1));
+  // frame_mbs_only_flag: u(1)
+  uint32 frame_mbs_only_flag;
+  RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_mbs_only_flag, 1));
+  if (!frame_mbs_only_flag) {
+    // mb_adaptive_frame_field_flag: u(1)
+    RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
+  }
+  // direct_8x8_inference_flag: u(1)
+  RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
+  //
+  // MORE IMPORTANT ONES! Now we're at the frame crop information.
+  //
+  // frame_cropping_flag: u(1)
+  uint32 frame_cropping_flag;
+  uint32 frame_crop_left_offset = 0;
+  uint32 frame_crop_right_offset = 0;
+  uint32 frame_crop_top_offset = 0;
+  uint32 frame_crop_bottom_offset = 0;
+  RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_cropping_flag, 1));
+  if (frame_cropping_flag) {
+    // frame_crop_{left, right, top, bottom}_offset: ue(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_left_offset));
+    RETURN_FALSE_ON_FAIL(
+        parser.ReadExponentialGolomb(&frame_crop_right_offset));
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_top_offset));
+    RETURN_FALSE_ON_FAIL(
+        parser.ReadExponentialGolomb(&frame_crop_bottom_offset));
+  }
+
+  // Far enough! We don't use the rest of the SPS.
+
+  // Start with the resolution determined by the pic_width/pic_height fields.
+  int width = 16 * (pic_width_in_mbs_minus1 + 1);
+  int height =
+      16 * (2 - frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1);
+
+  // Figure out the crop units in pixels. That's based on the chroma format's
+  // sampling, which is indicated by chroma_format_idc.
+  if (separate_colour_plane_flag || chroma_format_idc == 0) {
+    frame_crop_bottom_offset *= (2 - frame_mbs_only_flag);
+    frame_crop_top_offset *= (2 - frame_mbs_only_flag);
+  } else if (!separate_colour_plane_flag && chroma_format_idc > 0) {
+    // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2).
+    if (chroma_format_idc == 1 || chroma_format_idc == 2) {
+      frame_crop_left_offset *= 2;
+      frame_crop_right_offset *= 2;
+    }
+    // Height multipliers for format 1 (4:2:0).
+    if (chroma_format_idc == 1) {
+      frame_crop_top_offset *= 2;
+      frame_crop_bottom_offset *= 2;
+    }
+  }
+  // Subtract the crop for each dimension.
+  width -= (frame_crop_left_offset + frame_crop_right_offset);
+  height -= (frame_crop_top_offset + frame_crop_bottom_offset);
+
+  width_ = width;
+  height_ = height;
+  return true;
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/rtp_rtcp/source/h264_sps_parser.h
+++ b/webrtc/modules/rtp_rtcp/source/h264_sps_parser.h
@ -0,0 +1,37 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_
+#define WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_
+
+#include "webrtc/base/common.h"
+
+namespace webrtc {
+
+// A class for parsing out sequence parameter set (SPS) data from an H264 NALU.
+// Currently, only resolution is read without being ignored.
+class H264SpsParser {
+ public:
+  H264SpsParser(const uint8* sps, size_t byte_length);
+  // Parses the SPS to completion. Returns true if the SPS was parsed correctly.
+  bool Parse();
+  uint16 width() { return width_; }
+  uint16 height() { return height_; }
+
+ private:
+  const uint8* const sps_;
+  const size_t byte_length_;
+
+  uint16 width_;
+  uint16 height_;
+};
+
+}  // namespace webrtc
+#endif  // WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_
--- a/webrtc/modules/rtp_rtcp/source/h264_sps_parser_unittest.cc
+++ b/webrtc/modules/rtp_rtcp/source/h264_sps_parser_unittest.cc
@ -0,0 +1,68 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace webrtc {
+
+// Example SPS can be generated with ffmpeg. Here's an example set of commands,
+// runnable on OS X:
+// 1) Generate a video, from the camera:
+// ffmpeg -f avfoundation -i "0" -video_size 640x360 camera.mov
+//
+// 2) Scale the video to the desired size:
+// ffmpeg -i camera.mov -vf scale=640x360 scaled.mov
+//
+// 3) Get just the H.264 bitstream in AnnexB:
+// ffmpeg -i scaled.mov -vcodec copy -vbsf h264_mp4toannexb -an out.h264
+//
+// 4) Open out.h264 and find the SPS, generally everything between the first
+// two start codes (0 0 0 1 or 0 0 1). The first byte should be 0x67,
+// which should be stripped out before being passed to the parser.
+
+TEST(H264SpsParserTest, TestSampleSPSHdLandscape) {
+  // SPS for a 1280x720 camera capture from ffmpeg on osx. Contains
+  // emulation bytes but no cropping.
+  const uint8 buffer[] = {0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05,
+                          0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00,
+                          0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60};
+  H264SpsParser parser = H264SpsParser(buffer, ARRAY_SIZE(buffer));
+  EXPECT_TRUE(parser.Parse());
+  EXPECT_EQ(1280u, parser.width());
+  EXPECT_EQ(720u, parser.height());
+}
+
+TEST(H264SpsParserTest, TestSampleSPSVgaLandscape) {
+  // SPS for a 640x360 camera capture from ffmpeg on osx. Contains emulation
+  // bytes and cropping (360 isn't divisible by 16).
+  const uint8 buffer[] = {0x7A, 0x00, 0x1E, 0xBC, 0xD9, 0x40, 0xA0, 0x2F,
+                          0xF8, 0x98, 0x40, 0x00, 0x00, 0x03, 0x01, 0x80,
+                          0x00, 0x00, 0x56, 0x83, 0xC5, 0x8B, 0x65, 0x80};
+  H264SpsParser parser = H264SpsParser(buffer, ARRAY_SIZE(buffer));
+  EXPECT_TRUE(parser.Parse());
+  EXPECT_EQ(640u, parser.width());
+  EXPECT_EQ(360u, parser.height());
+}
+
+TEST(H264SpsParserTest, TestSampleSPSWeirdResolution) {
+  // SPS for a 200x400 camera capture from ffmpeg on osx. Horizontal and
+  // veritcal crop (neither dimension is divisible by 16).
+  const uint8 buffer[] = {0x7A, 0x00, 0x0D, 0xBC, 0xD9, 0x43, 0x43, 0x3E,
+                          0x5E, 0x10, 0x00, 0x00, 0x03, 0x00, 0x60, 0x00,
+                          0x00, 0x15, 0xA0, 0xF1, 0x42, 0x99, 0x60};
+  H264SpsParser parser = H264SpsParser(buffer, ARRAY_SIZE(buffer));
+  EXPECT_TRUE(parser.Parse());
+  EXPECT_EQ(200u, parser.width());
+  EXPECT_EQ(400u, parser.height());
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
@ -12,6 +12,7 @@

 #include "webrtc/modules/interface/module_common_types.h"
 #include "webrtc/modules/rtp_rtcp/source/byte_io.h"
+#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
 #include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h"

 namespace webrtc {
@ -30,6 +31,7 @@ enum Nalu {
 static const size_t kNalHeaderSize = 1;
 static const size_t kFuAHeaderSize = 2;
 static const size_t kLengthFieldSize = 2;
+static const size_t kStapAHeaderSize = kNalHeaderSize + kLengthFieldSize;

 // Bit masks for FU (A and B) indicators.
 enum NalDefs { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
@ -47,15 +49,28 @@ void ParseSingleNalu(RtpDepacketizer::ParsedPayload* parsed_payload,
  RTPVideoHeaderH264* h264_header =
      &parsed_payload->type.Video.codecHeader.H264;

+  const uint8_t* nalu_start = payload_data + kNalHeaderSize;
+  size_t nalu_length = payload_data_length - kNalHeaderSize;
  uint8_t nal_type = payload_data[0] & kTypeMask;
  if (nal_type == kStapA) {
-    nal_type = payload_data[3] & kTypeMask;
+    // Skip the StapA header (StapA nal type + length).
+    nal_type = payload_data[kStapAHeaderSize] & kTypeMask;
+    nalu_start += kStapAHeaderSize;
+    nalu_length -= kStapAHeaderSize;
    h264_header->packetization_type = kH264StapA;
  } else {
    h264_header->packetization_type = kH264SingleNalu;
  }
  h264_header->nalu_type = nal_type;

+  // We can read resolution out of sps packets.
+  if (nal_type == kSps) {
+    H264SpsParser parser(nalu_start, nalu_length);
+    if (parser.Parse()) {
+      parsed_payload->type.Video.width = parser.width();
+      parsed_payload->type.Video.height = parser.height();
+    }
+  }
  switch (nal_type) {
    case kSps:
    case kPps:
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc
@ -411,6 +411,23 @@ TEST_F(RtpDepacketizerH264Test, TestSingleNalu) {
  EXPECT_EQ(kIdr, payload.type.Video.codecHeader.H264.nalu_type);
 }

+TEST_F(RtpDepacketizerH264Test, TestSingleNaluSpsWithResolution) {
+  uint8_t packet[] = {kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50,
+                      0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0,
+                      0x00, 0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60};
+  RtpDepacketizer::ParsedPayload payload;
+
+  ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
+  ExpectPacket(&payload, packet, sizeof(packet));
+  EXPECT_EQ(kVideoFrameKey, payload.frame_type);
+  EXPECT_EQ(kRtpVideoH264, payload.type.Video.codec);
+  EXPECT_TRUE(payload.type.Video.isFirstPacket);
+  EXPECT_EQ(kH264SingleNalu,
+            payload.type.Video.codecHeader.H264.packetization_type);
+  EXPECT_EQ(1280u, payload.type.Video.width);
+  EXPECT_EQ(720u, payload.type.Video.height);
+}
+
 TEST_F(RtpDepacketizerH264Test, TestStapAKey) {
  uint8_t packet[16] = {kStapA,  // F=0, NRI=0, Type=24.
                        // Length, nal header, payload.
@ -429,6 +446,26 @@ TEST_F(RtpDepacketizerH264Test, TestStapAKey) {
  EXPECT_EQ(kSps, payload.type.Video.codecHeader.H264.nalu_type);
 }

+TEST_F(RtpDepacketizerH264Test, TestStapANaluSpsWithResolution) {
+  uint8_t packet[] = {kStapA,  // F=0, NRI=0, Type=24.
+                      // Length (2 bytes), nal header, payload.
+                      0,      24,   kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9,
+                      0x40,   0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03,
+                      0x00,   0xC0, 0x00, 0x00, 0x2A, 0xE0, 0xF1, 0x83,
+                      0x19,   0x60, 0,    0x03, kIdr, 0xFF, 0x00, 0,
+                      0x04,   kIdr, 0xFF, 0x00, 0x11};
+  RtpDepacketizer::ParsedPayload payload;
+
+  ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
+  ExpectPacket(&payload, packet, sizeof(packet));
+  EXPECT_EQ(kVideoFrameKey, payload.frame_type);
+  EXPECT_EQ(kRtpVideoH264, payload.type.Video.codec);
+  EXPECT_TRUE(payload.type.Video.isFirstPacket);
+  EXPECT_EQ(kH264StapA, payload.type.Video.codecHeader.H264.packetization_type);
+  EXPECT_EQ(1280u, payload.type.Video.width);
+  EXPECT_EQ(720u, payload.type.Video.height);
+}
+
 TEST_F(RtpDepacketizerH264Test, TestStapADelta) {
  uint8_t packet[16] = {kStapA,  // F=0, NRI=0, Type=24.
                        // Length, nal header, payload.