Reland of Add pps id and sps id parsing to the h.264 depacketizer. (patchset #1 id:1 of https://codereview.webrtc.org/2265023002/ )

Reason for revert: Reland this now that downstream tests have been fixed. Original issue's description: > Revert of Add pps id and sps id parsing to the h.264 depacketizer. (patchset #5 id:80001 of https://codereview.webrtc.org/2238253002/ ) > > Reason for revert: > Breaks some h264 bitstream tests downstream. Reverting for now. > > Original issue's description: > > Add pps id and sps id parsing to the h.264 depacketizer. > > > > BUG=webrtc:6208 > > > > Committed: https://crrev.com/abcc3de169d8896ad60e920e5677600fb3d40180 > > Cr-Commit-Position: refs/heads/master@{#13838} > > TBR=sprang@webrtc.org,stefan@webrtc.org > # Skipping CQ checks because original CL landed less than 1 days ago. > NOPRESUBMIT=true > NOTREECHECKS=true > NOTRY=true > BUG=webrtc:6208 > > Committed: https://crrev.com/83d79cd4a2bfbdd1abc1f75480488df4446f5fe0 > Cr-Commit-Position: refs/heads/master@{#13844} TBR=sprang@webrtc.org,kjellander@webrtc.org # Not skipping CQ checks because original CL landed more than 1 days ago. BUG=webrtc:6208 Review-Url: https://codereview.webrtc.org/2302893002 Cr-Commit-Position: refs/heads/master@{#14042}
2016-09-02 04:07:28 -07:00
parent 06a5e1aa39
commit 8a5cef8d0a
11 changed files with 264 additions and 112 deletions
--- a/webrtc/common_video/h264/pps_parser.cc
+++ b/webrtc/common_video/h264/pps_parser.cc
@ -10,6 +10,8 @@

 #include "webrtc/common_video/h264/pps_parser.h"

+#include <memory>
+
 #include "webrtc/common_video/h264/h264_common.h"
 #include "webrtc/base/bitbuffer.h"
 #include "webrtc/base/buffer.h"
@ -36,6 +38,25 @@ rtc::Optional<PpsParser::PpsState> PpsParser::ParsePps(const uint8_t* data,
  return ParseInternal(&bit_buffer);
 }

+rtc::Optional<uint32_t> PpsParser::ParsePpsIdFromSlice(const uint8_t* data,
+                                                       size_t length) {
+  std::unique_ptr<rtc::Buffer> slice_rbsp(H264::ParseRbsp(data, length));
+  rtc::BitBuffer slice_reader(slice_rbsp->data(), slice_rbsp->size());
+
+  uint32_t golomb_tmp;
+  // first_mb_in_slice: ue(v)
+  if (!slice_reader.ReadExponentialGolomb(&golomb_tmp))
+    return rtc::Optional<uint32_t>();
+  // slice_type: ue(v)
+  if (!slice_reader.ReadExponentialGolomb(&golomb_tmp))
+    return rtc::Optional<uint32_t>();
+  // pic_parameter_set_id: ue(v)
+  uint32_t slice_pps_id;
+  if (!slice_reader.ReadExponentialGolomb(&slice_pps_id))
+    return rtc::Optional<uint32_t>();
+  return rtc::Optional<uint32_t>(slice_pps_id);
+}
+
 rtc::Optional<PpsParser::PpsState> PpsParser::ParseInternal(
    rtc::BitBuffer* bit_buffer) {
  PpsState pps;
@ -43,9 +64,9 @@ rtc::Optional<PpsParser::PpsState> PpsParser::ParseInternal(
  uint32_t bits_tmp;
  uint32_t golomb_ignored;
  // pic_parameter_set_id: ue(v)
-  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
+  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&pps.id));
  // seq_parameter_set_id: ue(v)
-  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
+  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&pps.sps_id));
  // entropy_coding_mode_flag: u(1)
  uint32_t entropy_coding_mode_flag;
  RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&entropy_coding_mode_flag, 1));
--- a/webrtc/common_video/h264/pps_parser.h
+++ b/webrtc/common_video/h264/pps_parser.h
@ -33,11 +33,16 @@ class PpsParser {
    uint32_t weighted_bipred_idc = false;
    uint32_t redundant_pic_cnt_present_flag = 0;
    int pic_init_qp_minus26 = 0;
+    uint32_t id = 0;
+    uint32_t sps_id = 0;
  };

  // Unpack RBSP and parse PPS state from the supplied buffer.
  static rtc::Optional<PpsState> ParsePps(const uint8_t* data, size_t length);

+  static rtc::Optional<uint32_t> ParsePpsIdFromSlice(const uint8_t* data,
+                                                     size_t length);
+
 protected:
  // Parse the PPS state, for a bit buffer where RBSP decoding has already been
  // performed.
--- a/webrtc/common_video/h264/pps_parser_unittest.cc
+++ b/webrtc/common_video/h264/pps_parser_unittest.cc
@ -11,6 +11,7 @@
 #include "webrtc/common_video/h264/pps_parser.h"

 #include <limits>
+#include <memory>

 #include "testing/gtest/include/gtest/gtest.h"

@ -20,8 +21,17 @@

 namespace webrtc {

-static const size_t kPpsBufferMaxSize = 256;
-static const uint32_t kIgnored = 0;
+namespace {
+// Contains enough of the image slice to contain slice QP.
+const uint8_t kH264BitstreamChunk[] = {
+    0x00, 0x00, 0x00, 0x01, 0x67, 0x42, 0x80, 0x20, 0xda, 0x01, 0x40, 0x16,
+    0xe8, 0x06, 0xd0, 0xa1, 0x35, 0x00, 0x00, 0x00, 0x01, 0x68, 0xce, 0x06,
+    0xe2, 0x00, 0x00, 0x00, 0x01, 0x65, 0xb8, 0x40, 0xf0, 0x8c, 0x03, 0xf2,
+    0x75, 0x67, 0xad, 0x41, 0x64, 0x24, 0x0e, 0xa0, 0xb2, 0x12, 0x1e, 0xf8,
+};
+const size_t kPpsBufferMaxSize = 256;
+const uint32_t kIgnored = 0;
+}  // namespace

 void WritePps(const PpsParser::PpsState& pps,
              int slice_group_map_type,
@ -32,9 +42,9 @@ void WritePps(const PpsParser::PpsState& pps,
  rtc::BitBufferWriter bit_buffer(data, kPpsBufferMaxSize);

  // pic_parameter_set_id: ue(v)
-  bit_buffer.WriteExponentialGolomb(kIgnored);
+  bit_buffer.WriteExponentialGolomb(pps.id);
  // seq_parameter_set_id: ue(v)
-  bit_buffer.WriteExponentialGolomb(kIgnored);
+  bit_buffer.WriteExponentialGolomb(pps.sps_id);
  // entropy_coding_mode_flag: u(1)
  bit_buffer.WriteBits(kIgnored, 1);
  // bottom_field_pic_order_in_frame_present_flag: u(1)
@ -175,6 +185,8 @@ class PpsParserTest : public ::testing::Test {
    EXPECT_EQ(pps.redundant_pic_cnt_present_flag,
              parsed_pps_->redundant_pic_cnt_present_flag);
    EXPECT_EQ(pps.pic_init_qp_minus26, parsed_pps_->pic_init_qp_minus26);
+    EXPECT_EQ(pps.id, parsed_pps_->id);
+    EXPECT_EQ(pps.sps_id, parsed_pps_->sps_id);
  }

  PpsParser::PpsState generated_pps_;
@ -192,10 +204,19 @@ TEST_F(PpsParserTest, MaxPps) {
  generated_pps_.redundant_pic_cnt_present_flag = 1;  // 1 bit value.
  generated_pps_.weighted_bipred_idc = (1 << 2) - 1;  // 2 bit value.
  generated_pps_.weighted_pred_flag = true;
+  generated_pps_.id = 2;
+  generated_pps_.sps_id = 1;
  RunTest();

  generated_pps_.pic_init_qp_minus26 = std::numeric_limits<int32_t>::min() + 1;
  RunTest();
 }

+TEST_F(PpsParserTest, PpsIdFromSlice) {
+  rtc::Optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice(
+      kH264BitstreamChunk, sizeof(kH264BitstreamChunk));
+  ASSERT_TRUE(pps_id);
+  EXPECT_EQ(2u, *pps_id);
+}
+
 }  // namespace webrtc
--- a/webrtc/common_video/h264/sps_parser.cc
+++ b/webrtc/common_video/h264/sps_parser.cc
@ -10,6 +10,8 @@

 #include "webrtc/common_video/h264/sps_parser.h"

+#include <memory>
+
 #include "webrtc/common_video/h264/h264_common.h"
 #include "webrtc/base/bitbuffer.h"
 #include "webrtc/base/bytebuffer.h"
@ -68,7 +70,7 @@ rtc::Optional<SpsParser::SpsState> SpsParser::ParseSpsUpToVui(
  // level_idc: u(8)
  RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
  // seq_parameter_set_id: ue(v)
-  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
+  RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.id));
  sps.separate_colour_plane_flag = 0;
  // See if profile_idc has chroma format information.
  if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
--- a/webrtc/common_video/h264/sps_parser.h
+++ b/webrtc/common_video/h264/sps_parser.h
@ -38,6 +38,7 @@ class SpsParser {
    uint32_t pic_order_cnt_type = 0;
    uint32_t max_num_ref_frames = 0;
    uint32_t vui_params_present = 0;
+    uint32_t id = 0;
  };

  // Unpack RBSP and parse SPS state from the supplied buffer.
--- a/webrtc/common_video/h264/sps_parser_unittest.cc
+++ b/webrtc/common_video/h264/sps_parser_unittest.cc
@ -41,7 +41,10 @@ static const size_t kSpsBufferMaxSize = 256;
 // The fake SPS that this generates also always has at least one emulation byte
 // at offset 2, since the first two bytes are always 0, and has a 0x3 as the
 // level_idc, to make sure the parser doesn't eat all 0x3 bytes.
-void GenerateFakeSps(uint16_t width, uint16_t height, rtc::Buffer* out_buffer) {
+void GenerateFakeSps(uint16_t width,
+                     uint16_t height,
+                     int id,
+                     rtc::Buffer* out_buffer) {
  uint8_t rbsp[kSpsBufferMaxSize] = {0};
  rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize);
  // Profile byte.
@ -51,7 +54,7 @@ void GenerateFakeSps(uint16_t width, uint16_t height, rtc::Buffer* out_buffer) {
  // level_idc.
  writer.WriteUInt8(0x3u);
  // seq_paramter_set_id.
-  writer.WriteExponentialGolomb(0);
+  writer.WriteExponentialGolomb(id);
  // Profile is not special, so we skip all the chroma format settings.

  // Now some bit magic.
@ -151,20 +154,22 @@ TEST_F(H264SpsParserTest, TestSampleSPSWeirdResolution) {

 TEST_F(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) {
  rtc::Buffer buffer;
-  GenerateFakeSps(320u, 180u, &buffer);
+  GenerateFakeSps(320u, 180u, 1, &buffer);
  EXPECT_TRUE(static_cast<bool>(
      sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
  EXPECT_EQ(320u, sps_->width);
  EXPECT_EQ(180u, sps_->height);
+  EXPECT_EQ(1u, sps_->id);
 }

 TEST_F(H264SpsParserTest, TestSyntheticSPSWeirdResolution) {
  rtc::Buffer buffer;
-  GenerateFakeSps(156u, 122u, &buffer);
+  GenerateFakeSps(156u, 122u, 2, &buffer);
  EXPECT_TRUE(static_cast<bool>(
      sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
  EXPECT_EQ(156u, sps_->width);
  EXPECT_EQ(122u, sps_->height);
+  EXPECT_EQ(2u, sps_->id);
 }

 }  // namespace webrtc
--- a/webrtc/common_video/h264/sps_vui_rewriter.cc
+++ b/webrtc/common_video/h264/sps_vui_rewriter.cc
@ -72,7 +72,10 @@ SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps(
    size_t length,
    rtc::Optional<SpsParser::SpsState>* sps,
    rtc::Buffer* destination) {
-  rtc::BitBuffer source_buffer(buffer, length);
+  // Create temporary RBSP decoded buffer of the payload (exlcuding the
+  // leading nalu type header byte (the SpsParser uses only the payload).
+  std::unique_ptr<rtc::Buffer> rbsp_buffer = H264::ParseRbsp(buffer, length);
+  rtc::BitBuffer source_buffer(rbsp_buffer->data(), rbsp_buffer->size());
  rtc::Optional<SpsParser::SpsState> sps_state =
      SpsParser::ParseSpsUpToVui(&source_buffer);
  if (!sps_state)
@ -94,7 +97,7 @@ SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps(
  size_t byte_offset;
  size_t bit_offset;
  source_buffer.GetCurrentOffset(&byte_offset, &bit_offset);
-  memcpy(out_buffer.data(), buffer,
+  memcpy(out_buffer.data(), rbsp_buffer->data(),
         byte_offset + (bit_offset > 0 ? 1 : 0));  // OK to copy the last bits.

  // SpsParser will have read the vui_params_present flag, which we want to
--- a/webrtc/common_video/h264/sps_vui_rewriter_unittest.cc
+++ b/webrtc/common_video/h264/sps_vui_rewriter_unittest.cc
@ -164,12 +164,11 @@ void TestSps(SpsMode mode, SpsVuiRewriter::ParseResult expected_parse_result) {
  index.payload_start_offset += H264::kNaluTypeSize;
  index.payload_size -= H264::kNaluTypeSize;

-  std::unique_ptr<rtc::Buffer> rbsp_decoded =
-      H264::ParseRbsp(&buffer[index.payload_start_offset], index.payload_size);
  rtc::Optional<SpsParser::SpsState> sps;
  rtc::Buffer out_buffer;
-  SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
-      rbsp_decoded->data(), rbsp_decoded->size(), &sps, &out_buffer);
+  SpsVuiRewriter::ParseResult result =
+      SpsVuiRewriter::ParseAndRewriteSps(&buffer[index.payload_start_offset],
+                                         index.payload_size, &sps, &out_buffer);
  EXPECT_EQ(expected_parse_result, result);
 }

--- a/webrtc/modules/include/module_common_types.h
+++ b/webrtc/modules/include/module_common_types.h
@ -260,6 +260,14 @@ enum H264PacketizationTypes {
                    // that was too large to fit into a single packet.
 };

+struct NaluInfo {
+  uint8_t type;
+  int sps_id;
+  int pps_id;
+};
+
+const size_t kMaxNalusPerPacket = 10;
+
 struct RTPVideoHeaderH264 {
  uint8_t nalu_type;  // The NAL unit type. If this is a header for a
                      // fragmented packet, it's the NAL unit type of
@ -267,6 +275,8 @@ struct RTPVideoHeaderH264 {
                      // aggregated packet, it's the NAL unit type of
                      // the first NAL unit in the packet.
  H264PacketizationTypes packetization_type;
+  NaluInfo nalus[kMaxNalusPerPacket];
+  size_t nalus_length;
 };

 union RTPVideoTypeHeader {
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
@ -11,6 +11,8 @@
 #include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h"

 #include <string.h>
+#include <memory>
+#include <utility>
 #include <vector>

 #include "webrtc/base/checks.h"
@ -19,6 +21,7 @@
 #include "webrtc/modules/rtp_rtcp/source/byte_io.h"
 #include "webrtc/common_video/h264/sps_vui_rewriter.h"
 #include "webrtc/common_video/h264/h264_common.h"
+#include "webrtc/common_video/h264/pps_parser.h"
 #include "webrtc/common_video/h264/sps_parser.h"
 #include "webrtc/system_wrappers/include/metrics.h"

@ -113,10 +116,6 @@ void RtpPacketizerH264::SetPayloadData(
      // RtpDepacketizerH264::ParseSingleNalu (receive side, in orderer to
      // protect us from unknown or legacy send clients).

-      // Create temporary RBSP decoded buffer of the payload (exlcuding the
-      // leading nalu type header byte (the SpsParser uses only the payload).
-      std::unique_ptr<rtc::Buffer> rbsp_buffer = H264::ParseRbsp(
-          buffer + H264::kNaluTypeSize, length - H264::kNaluTypeSize);
      rtc::Optional<SpsParser::SpsState> sps;

      std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer());
@ -124,7 +123,8 @@ void RtpPacketizerH264::SetPayloadData(
      // can append modified payload on top of that.
      output_buffer->AppendData(buffer[0]);
      SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
-          rbsp_buffer->data(), rbsp_buffer->size(), &sps, output_buffer.get());
+          buffer + H264::kNaluTypeSize, length - H264::kNaluTypeSize, &sps,
+          output_buffer.get());

      switch (result) {
        case SpsVuiRewriter::ParseResult::kVuiRewritten:
@ -342,6 +342,7 @@ bool RtpDepacketizerH264::Parse(ParsedPayload* parsed_payload,
  modified_buffer_.reset();

  uint8_t nal_type = payload_data[0] & kTypeMask;
+  parsed_payload->type.Video.codecHeader.H264.nalus_length = 0;
  if (nal_type == H264::NaluType::kFuA) {
    // Fragmented NAL units (FU-A).
    if (!ParseFuaNalu(parsed_payload, payload_data))
@ -408,81 +409,114 @@ bool RtpDepacketizerH264::ProcessStapAOrSingleNalu(
      return false;
    }

-    nal_type = payload_data[start_offset] & kTypeMask;
+    NaluInfo nalu;
+    nalu.type = payload_data[start_offset] & kTypeMask;
+    nalu.sps_id = -1;
+    nalu.pps_id = -1;
    start_offset += H264::kNaluTypeSize;

-    if (nal_type == H264::NaluType::kSps) {
-      // Check if VUI is present in SPS and if it needs to be modified to avoid
-      // excessive decoder latency.
+    switch (nalu.type) {
+      case H264::NaluType::kSps: {
+        // Check if VUI is present in SPS and if it needs to be modified to
+        // avoid
+        // excessive decoder latency.

-      // Copy any previous data first (likely just the first header).
-      std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer());
-      if (start_offset)
-        output_buffer->AppendData(payload_data, start_offset);
+        // Copy any previous data first (likely just the first header).
+        std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer());
+        if (start_offset)
+          output_buffer->AppendData(payload_data, start_offset);

-      // RBSP decode of payload data.
-      std::unique_ptr<rtc::Buffer> rbsp_buffer = H264::ParseRbsp(
-          &payload_data[start_offset], end_offset - start_offset);
-      rtc::Optional<SpsParser::SpsState> sps;
+        rtc::Optional<SpsParser::SpsState> sps;

-      SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
-          rbsp_buffer->data(), rbsp_buffer->size(), &sps, output_buffer.get());
-      switch (result) {
-        case SpsVuiRewriter::ParseResult::kVuiRewritten:
-          if (modified_buffer_) {
-            LOG(LS_WARNING) << "More than one H264 SPS NAL units needing "
-                               "rewriting found within a single STAP-A packet. "
-                               "Keeping the first and rewriting the last.";
-          }
+        SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
+            &payload_data[start_offset], end_offset - start_offset, &sps,
+            output_buffer.get());
+        switch (result) {
+          case SpsVuiRewriter::ParseResult::kVuiRewritten:
+            if (modified_buffer_) {
+              LOG(LS_WARNING)
+                  << "More than one H264 SPS NAL units needing "
+                     "rewriting found within a single STAP-A packet. "
+                     "Keeping the first and rewriting the last.";
+            }

-          // Rewrite length field to new SPS size.
-          if (h264_header->packetization_type == kH264StapA) {
-            size_t length_field_offset =
-                start_offset - (H264::kNaluTypeSize + kLengthFieldSize);
-            // Stap-A Length includes payload data and type header.
-            size_t rewritten_size =
-                output_buffer->size() - start_offset + H264::kNaluTypeSize;
-            ByteWriter<uint16_t>::WriteBigEndian(
-                &(*output_buffer)[length_field_offset], rewritten_size);
-          }
+            // Rewrite length field to new SPS size.
+            if (h264_header->packetization_type == kH264StapA) {
+              size_t length_field_offset =
+                  start_offset - (H264::kNaluTypeSize + kLengthFieldSize);
+              // Stap-A Length includes payload data and type header.
+              size_t rewritten_size =
+                  output_buffer->size() - start_offset + H264::kNaluTypeSize;
+              ByteWriter<uint16_t>::WriteBigEndian(
+                  &(*output_buffer)[length_field_offset], rewritten_size);
+            }

-          // Append rest of packet.
-          output_buffer->AppendData(&payload_data[end_offset],
-                                    nalu_length + kNalHeaderSize - end_offset);
+            // Append rest of packet.
+            output_buffer->AppendData(
+                &payload_data[end_offset],
+                nalu_length + kNalHeaderSize - end_offset);

-          modified_buffer_ = std::move(output_buffer);
-          length_ = modified_buffer_->size();
+            modified_buffer_ = std::move(output_buffer);
+            length_ = modified_buffer_->size();

-          RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
-                                    SpsValidEvent::kReceivedSpsRewritten,
-                                    SpsValidEvent::kSpsRewrittenMax);
-          break;
-        case SpsVuiRewriter::ParseResult::kPocOk:
-          RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
-                                    SpsValidEvent::kReceivedSpsPocOk,
-                                    SpsValidEvent::kSpsRewrittenMax);
-          break;
-        case SpsVuiRewriter::ParseResult::kVuiOk:
-          RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
-                                    SpsValidEvent::kReceivedSpsVuiOk,
-                                    SpsValidEvent::kSpsRewrittenMax);
-          break;
-        case SpsVuiRewriter::ParseResult::kFailure:
-          RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
-                                    SpsValidEvent::kReceivedSpsParseFailure,
-                                    SpsValidEvent::kSpsRewrittenMax);
-          break;
+            RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
+                                      SpsValidEvent::kReceivedSpsRewritten,
+                                      SpsValidEvent::kSpsRewrittenMax);
+            break;
+          case SpsVuiRewriter::ParseResult::kPocOk:
+            RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
+                                      SpsValidEvent::kReceivedSpsPocOk,
+                                      SpsValidEvent::kSpsRewrittenMax);
+            break;
+          case SpsVuiRewriter::ParseResult::kVuiOk:
+            RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
+                                      SpsValidEvent::kReceivedSpsVuiOk,
+                                      SpsValidEvent::kSpsRewrittenMax);
+            break;
+          case SpsVuiRewriter::ParseResult::kFailure:
+            RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
+                                      SpsValidEvent::kReceivedSpsParseFailure,
+                                      SpsValidEvent::kSpsRewrittenMax);
+            break;
+        }
+
+        if (sps) {
+          parsed_payload->type.Video.width = sps->width;
+          parsed_payload->type.Video.height = sps->height;
+          nalu.sps_id = sps->id;
+        }
+        parsed_payload->frame_type = kVideoFrameKey;
+        break;
      }
-
-      if (sps) {
-        parsed_payload->type.Video.width = sps->width;
-        parsed_payload->type.Video.height = sps->height;
+      case H264::NaluType::kPps: {
+        rtc::Optional<PpsParser::PpsState> pps = PpsParser::ParsePps(
+            &payload_data[start_offset], end_offset - start_offset);
+        if (pps) {
+          nalu.sps_id = pps->sps_id;
+          nalu.pps_id = pps->id;
+        }
+        break;
      }
-      parsed_payload->frame_type = kVideoFrameKey;
-    } else if (nal_type == H264::NaluType::kPps ||
-               nal_type == H264::NaluType::kSei ||
-               nal_type == H264::NaluType::kIdr) {
-      parsed_payload->frame_type = kVideoFrameKey;
+      case H264::NaluType::kSei:
+        FALLTHROUGH();
+      case H264::NaluType::kIdr:
+        parsed_payload->frame_type = kVideoFrameKey;
+        FALLTHROUGH();
+      default: {
+        rtc::Optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice(
+            &payload_data[start_offset], end_offset - start_offset);
+        if (pps_id)
+          nalu.pps_id = *pps_id;
+        break;
+      }
+    }
+    RTPVideoHeaderH264* h264 = &parsed_payload->type.Video.codecHeader.H264;
+    if (h264->nalus_length == kMaxNalusPerPacket) {
+      LOG(LS_WARNING)
+          << "Received packet containing more than " << kMaxNalusPerPacket
+          << " NAL units. Will not keep track sps and pps ids for all of them.";
+    } else {
+      h264->nalus[h264->nalus_length++] = nalu;
    }
  }

@ -499,10 +533,17 @@ bool RtpDepacketizerH264::ParseFuaNalu(
  uint8_t fnri = payload_data[0] & (kFBit | kNriMask);
  uint8_t original_nal_type = payload_data[1] & kTypeMask;
  bool first_fragment = (payload_data[1] & kSBit) > 0;
-
+  NaluInfo nalu;
+  nalu.type = original_nal_type;
+  nalu.sps_id = -1;
+  nalu.pps_id = -1;
  if (first_fragment) {
    offset_ = 0;
    length_ -= kNalHeaderSize;
+    rtc::Optional<uint32_t> pps_id = PpsParser::ParsePpsIdFromSlice(
+        payload_data + 2 * kNalHeaderSize, length_ - kNalHeaderSize);
+    if (pps_id)
+      nalu.pps_id = *pps_id;
    uint8_t original_nal_header = fnri | original_nal_type;
    modified_buffer_.reset(new rtc::Buffer());
    modified_buffer_->AppendData(payload_data + kNalHeaderSize, length_);
@ -521,10 +562,11 @@ bool RtpDepacketizerH264::ParseFuaNalu(
  parsed_payload->type.Video.height = 0;
  parsed_payload->type.Video.codec = kRtpVideoH264;
  parsed_payload->type.Video.isFirstPacket = first_fragment;
-  RTPVideoHeaderH264* h264_header =
-      &parsed_payload->type.Video.codecHeader.H264;
-  h264_header->packetization_type = kH264FuA;
-  h264_header->nalu_type = original_nal_type;
+  RTPVideoHeaderH264* h264 = &parsed_payload->type.Video.codecHeader.H264;
+  h264->packetization_type = kH264FuA;
+  h264->nalu_type = original_nal_type;
+  h264->nalus[h264->nalus_length] = nalu;
+  h264->nalus_length = 1;
  return true;
 }

--- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc
@ -531,21 +531,42 @@ TEST_F(RtpDepacketizerH264Test, TestSingleNaluSpsWithResolution) {
 }

 TEST_F(RtpDepacketizerH264Test, TestStapAKey) {
-  uint8_t packet[16] = {kStapA,  // F=0, NRI=0, Type=24.
-                        // Length, nal header, payload.
-                        0, 0x02, kSps, 0xFF,
-                        0, 0x03, kPps, 0xFF, 0x00,
-                        0, 0x04, kIdr, 0xFF, 0x00, 0x11};
-  RtpDepacketizer::ParsedPayload payload;
+  // clang-format off
+  const NaluInfo kExpectedNalus[] = { {H264::kSps, 0, -1},
+                                      {H264::kPps, 1, 2},
+                                      {H264::kIdr, -1, 0} };
+  uint8_t packet[] = {kStapA,  // F=0, NRI=0, Type=24.
+                      // Length, nal header, payload.
+                      0, 0x18, kExpectedNalus[0].type,
+                        0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05, 0xBA,
+                        0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00, 0x00, 0x03,
+                        0x2A, 0xE0, 0xF1, 0x83, 0x25,
+                      0, 0xD, kExpectedNalus[1].type,
+                        0x69, 0xFC, 0x0, 0x0, 0x3, 0x0, 0x7, 0xFF, 0xFF, 0xFF,
+                        0xF6, 0x40,
+                      0, 0xB, kExpectedNalus[2].type,
+                        0x85, 0xB8, 0x0, 0x4, 0x0, 0x0, 0x13, 0x93, 0x12, 0x0};
+  // clang-format on

+  RtpDepacketizer::ParsedPayload payload;
  ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
  ExpectPacket(&payload, packet, sizeof(packet));
  EXPECT_EQ(kVideoFrameKey, payload.frame_type);
  EXPECT_EQ(kRtpVideoH264, payload.type.Video.codec);
  EXPECT_TRUE(payload.type.Video.isFirstPacket);
-  EXPECT_EQ(kH264StapA, payload.type.Video.codecHeader.H264.packetization_type);
+  const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264;
+  EXPECT_EQ(kH264StapA, h264.packetization_type);
  // NALU type for aggregated packets is the type of the first packet only.
-  EXPECT_EQ(kSps, payload.type.Video.codecHeader.H264.nalu_type);
+  EXPECT_EQ(kSps, h264.nalu_type);
+  ASSERT_EQ(3u, h264.nalus_length);
+  for (size_t i = 0; i < h264.nalus_length; ++i) {
+    EXPECT_EQ(kExpectedNalus[i].type, h264.nalus[i].type)
+        << "Failed parsing nalu " << i;
+    EXPECT_EQ(kExpectedNalus[i].sps_id, h264.nalus[i].sps_id)
+        << "Failed parsing nalu " << i;
+    EXPECT_EQ(kExpectedNalus[i].pps_id, h264.nalus[i].pps_id)
+        << "Failed parsing nalu " << i;
+  }
 }

 TEST_F(RtpDepacketizerH264Test, TestStapANaluSpsWithResolution) {
@ -697,26 +718,29 @@ TEST_F(RtpDepacketizerH264Test, TestStapADelta) {
 }

 TEST_F(RtpDepacketizerH264Test, TestFuA) {
-  uint8_t packet1[3] = {
+  // clang-format off
+  uint8_t packet1[] = {
      kFuA,          // F=0, NRI=0, Type=28.
      kSBit | kIdr,  // FU header.
-      0x01           // Payload.
+      0x85, 0xB8, 0x0, 0x4, 0x0, 0x0, 0x13, 0x93, 0x12, 0x0  // Payload.
  };
-  const uint8_t kExpected1[2] = {kIdr, 0x01};
+  // clang-format on
+  const uint8_t kExpected1[] = {kIdr, 0x85, 0xB8, 0x0,  0x4, 0x0,
+                                0x0,  0x13, 0x93, 0x12, 0x0};

-  uint8_t packet2[3] = {
+  uint8_t packet2[] = {
      kFuA,  // F=0, NRI=0, Type=28.
      kIdr,  // FU header.
      0x02   // Payload.
  };
-  const uint8_t kExpected2[1] = {0x02};
+  const uint8_t kExpected2[] = {0x02};

-  uint8_t packet3[3] = {
+  uint8_t packet3[] = {
      kFuA,          // F=0, NRI=0, Type=28.
      kEBit | kIdr,  // FU header.
      0x03           // Payload.
  };
-  const uint8_t kExpected3[1] = {0x03};
+  const uint8_t kExpected3[] = {0x03};

  RtpDepacketizer::ParsedPayload payload;

@ -727,8 +751,13 @@ TEST_F(RtpDepacketizerH264Test, TestFuA) {
  EXPECT_EQ(kVideoFrameKey, payload.frame_type);
  EXPECT_EQ(kRtpVideoH264, payload.type.Video.codec);
  EXPECT_TRUE(payload.type.Video.isFirstPacket);
-  EXPECT_EQ(kH264FuA, payload.type.Video.codecHeader.H264.packetization_type);
-  EXPECT_EQ(kIdr, payload.type.Video.codecHeader.H264.nalu_type);
+  const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264;
+  EXPECT_EQ(kH264FuA, h264.packetization_type);
+  EXPECT_EQ(kIdr, h264.nalu_type);
+  ASSERT_EQ(1u, h264.nalus_length);
+  EXPECT_EQ(static_cast<H264::NaluType>(kIdr), h264.nalus[0].type);
+  EXPECT_EQ(-1, h264.nalus[0].sps_id);
+  EXPECT_EQ(0, h264.nalus[0].pps_id);

  // Following packets will be 2 bytes shorter since they will only be appended
  // onto the first packet.
@ -738,8 +767,15 @@ TEST_F(RtpDepacketizerH264Test, TestFuA) {
  EXPECT_EQ(kVideoFrameKey, payload.frame_type);
  EXPECT_EQ(kRtpVideoH264, payload.type.Video.codec);
  EXPECT_FALSE(payload.type.Video.isFirstPacket);
-  EXPECT_EQ(kH264FuA, payload.type.Video.codecHeader.H264.packetization_type);
-  EXPECT_EQ(kIdr, payload.type.Video.codecHeader.H264.nalu_type);
+  {
+    const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264;
+    EXPECT_EQ(kH264FuA, h264.packetization_type);
+    EXPECT_EQ(kIdr, h264.nalu_type);
+    ASSERT_EQ(1u, h264.nalus_length);
+    EXPECT_EQ(static_cast<H264::NaluType>(kIdr), h264.nalus[0].type);
+    EXPECT_EQ(-1, h264.nalus[0].sps_id);
+    EXPECT_EQ(-1, h264.nalus[0].pps_id);
+  }

  payload = RtpDepacketizer::ParsedPayload();
  ASSERT_TRUE(depacketizer_->Parse(&payload, packet3, sizeof(packet3)));
@ -747,8 +783,15 @@ TEST_F(RtpDepacketizerH264Test, TestFuA) {
  EXPECT_EQ(kVideoFrameKey, payload.frame_type);
  EXPECT_EQ(kRtpVideoH264, payload.type.Video.codec);
  EXPECT_FALSE(payload.type.Video.isFirstPacket);
-  EXPECT_EQ(kH264FuA, payload.type.Video.codecHeader.H264.packetization_type);
-  EXPECT_EQ(kIdr, payload.type.Video.codecHeader.H264.nalu_type);
+  {
+    const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264;
+    EXPECT_EQ(kH264FuA, h264.packetization_type);
+    EXPECT_EQ(kIdr, h264.nalu_type);
+    ASSERT_EQ(1u, h264.nalus_length);
+    EXPECT_EQ(static_cast<H264::NaluType>(kIdr), h264.nalus[0].type);
+    EXPECT_EQ(-1, h264.nalus[0].sps_id);
+    EXPECT_EQ(-1, h264.nalus[0].pps_id);
+  }
 }

 TEST_F(RtpDepacketizerH264Test, TestEmptyPayload) {