Set marker bit on last encoded spatial layer.

In order to handle per-layer frame dropping both VP9 encoder wrapper and RTP packetizer were modified. - Encoder wrapper buffers last encoded frame and passes it to packetizer after frame of next layer is encoded or encoding of superframe is finished. - Encoder wrapper sets end_of_superframe flag on last encoded frame of superframe before passing it to packetizer. - If end_of_superframe is True then packetizer sets marker bit on last packet of frame. Bug: webrtc:9066 Change-Id: I1d45319fbe6bc63d01721ea67bfb7440d4c29275 Reviewed-on: https://webrtc-review.googlesource.com/65540 Commit-Queue: Sergey Silkin <ssilkin@webrtc.org> Reviewed-by: Åsa Persson <asapersson@webrtc.org> Cr-Commit-Position: refs/heads/master@{#22722}
2018-04-04 11:45:41 +02:00
parent e803dbe210
commit 2a1f183e99
9 changed files with 216 additions and 61 deletions
--- a/modules/rtp_rtcp/source/rtp_format_vp9.cc
+++ b/modules/rtp_rtcp/source/rtp_format_vp9.cc
@ -576,9 +576,13 @@ bool RtpPacketizerVp9::NextPacket(RtpPacketToSend* packet) {
  if (!WriteHeaderAndPayload(packet_info, packet, packets_.empty())) {
    return false;
  }
-  packet->SetMarker(packets_.empty() &&
-                    (hdr_.spatial_idx == kNoSpatialIdx ||
-                     hdr_.spatial_idx == hdr_.num_spatial_layers - 1));
+
+  // Ensure end_of_superframe is always set on top spatial layer when it is not
+  // dropped.
+  RTC_DCHECK(hdr_.spatial_idx < hdr_.num_spatial_layers - 1 ||
+             hdr_.end_of_superframe);
+
+  packet->SetMarker(packets_.empty() && hdr_.end_of_superframe);
  return true;
 }

--- a/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
+++ b/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc
@ -478,7 +478,7 @@ TEST_F(RtpPacketizerVp9Test, TestSsDataDoesNotFitInAveragePacket) {
  CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum);
 }

-TEST_F(RtpPacketizerVp9Test, TestOnlyHighestSpatialLayerSetMarker) {
+TEST_F(RtpPacketizerVp9Test, EndOfSuperframeSetsSetMarker) {
  const size_t kFrameSize = 10;
  const size_t kPacketSize = 8;
  const size_t kLastPacketReductionLen = 0;
@ -492,32 +492,21 @@ TEST_F(RtpPacketizerVp9Test, TestOnlyHighestSpatialLayerSetMarker) {

  RtpPacketToSend packet(kNoExtensions);

-  vp9_header.spatial_idx = 0;
-  RtpPacketizerVp9 packetizer0(vp9_header, kPacketSize,
-                               kLastPacketReductionLen);
-  packetizer0.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation);
-  ASSERT_TRUE(packetizer0.NextPacket(&packet));
-  EXPECT_FALSE(packet.Marker());
-  ASSERT_TRUE(packetizer0.NextPacket(&packet));
-  EXPECT_FALSE(packet.Marker());
-
-  vp9_header.spatial_idx = 1;
-  RtpPacketizerVp9 packetizer1(vp9_header, kPacketSize,
-                               kLastPacketReductionLen);
-  packetizer1.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation);
-  ASSERT_TRUE(packetizer1.NextPacket(&packet));
-  EXPECT_FALSE(packet.Marker());
-  ASSERT_TRUE(packetizer1.NextPacket(&packet));
-  EXPECT_FALSE(packet.Marker());
-
-  vp9_header.spatial_idx = 2;
-  RtpPacketizerVp9 packetizer2(vp9_header, kPacketSize,
-                               kLastPacketReductionLen);
-  packetizer2.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation);
-  ASSERT_TRUE(packetizer2.NextPacket(&packet));
-  EXPECT_FALSE(packet.Marker());
-  ASSERT_TRUE(packetizer2.NextPacket(&packet));
-  EXPECT_TRUE(packet.Marker());
+  // Drop top layer and ensure that marker bit is set on last encoded layer.
+  for (size_t spatial_idx = 0; spatial_idx < vp9_header.num_spatial_layers - 1;
+       ++spatial_idx) {
+    const bool end_of_superframe =
+        spatial_idx + 1 == vp9_header.num_spatial_layers - 1;
+    vp9_header.spatial_idx = spatial_idx;
+    vp9_header.end_of_superframe = end_of_superframe;
+    RtpPacketizerVp9 packetizer(vp9_header, kPacketSize,
+                                kLastPacketReductionLen);
+    packetizer.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation);
+    ASSERT_TRUE(packetizer.NextPacket(&packet));
+    EXPECT_FALSE(packet.Marker());
+    ASSERT_TRUE(packetizer.NextPacket(&packet));
+    EXPECT_EQ(packet.Marker(), end_of_superframe);
+  }
 }

 TEST_F(RtpPacketizerVp9Test, TestGeneratesMinimumNumberOfPackets) {
--- a/modules/video_coding/codecs/vp9/include/vp9_globals.h
+++ b/modules/video_coding/codecs/vp9/include/vp9_globals.h
@ -172,6 +172,7 @@ struct RTPVideoHeaderVP9 {
    gof_idx = kNoGofIdx;
    num_ref_pics = 0;
    num_spatial_layers = 1;
+    end_of_superframe = true;
  }

  bool inter_pic_predicted;  // This layer frame is dependent on previously
@ -208,6 +209,8 @@ struct RTPVideoHeaderVP9 {
  uint16_t width[kMaxVp9NumberOfSpatialLayers];
  uint16_t height[kMaxVp9NumberOfSpatialLayers];
  GofInfoVP9 gof;
+
+  bool end_of_superframe;  // This frame is last frame in superframe.
 };

 }  // namespace webrtc
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@ -262,4 +262,60 @@ TEST_F(TestVp9Impl, EnableDisableSpatialLayers) {
  }
 }

+TEST_F(TestVp9Impl, EndOfSuperframe) {
+  const size_t num_spatial_layers = 2;
+  const size_t num_temporal_layers = 1;
+  codec_settings_.VP9()->numberOfSpatialLayers =
+      static_cast<unsigned char>(num_spatial_layers);
+  codec_settings_.VP9()->numberOfTemporalLayers =
+      static_cast<unsigned char>(num_temporal_layers);
+
+  std::vector<SpatialLayer> layers =
+      GetSvcConfig(codec_settings_.width, codec_settings_.height,
+                   num_spatial_layers, num_temporal_layers);
+  for (size_t i = 0; i < layers.size(); ++i) {
+    codec_settings_.spatialLayers[i] = layers[i];
+  }
+
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
+                                 0 /* max payload size (unused) */));
+
+  // Encode both base and upper layers. Check that end-of-superframe flag is
+  // set on upper layer frame but not on base layer frame.
+  BitrateAllocation bitrate_allocation;
+  bitrate_allocation.SetBitrate(0, 0, layers[0].targetBitrate * 1000);
+  bitrate_allocation.SetBitrate(1, 0, layers[1].targetBitrate * 1000);
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->SetRateAllocation(bitrate_allocation,
+                                        codec_settings_.maxFramerate));
+  SetWaitForEncodedFramesThreshold(2);
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
+
+  std::vector<EncodedImage> frames;
+  std::vector<CodecSpecificInfo> codec_specific;
+  ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
+  EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.end_of_superframe);
+  EXPECT_TRUE(codec_specific[1].codecSpecific.VP9.end_of_superframe);
+
+  // Encode only base layer. Check that end-of-superframe flag is
+  // set on base layer frame.
+  bitrate_allocation.SetBitrate(1, 0, 0);
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->SetRateAllocation(bitrate_allocation,
+                                        codec_settings_.maxFramerate));
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
+                                 0 /* max payload size (unused) */));
+
+  SetWaitForEncodedFramesThreshold(1);
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
+
+  ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
+  EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0);
+  EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.end_of_superframe);
+}
+
 }  // namespace webrtc
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@ -586,6 +586,9 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
  }
  timestamp_ += duration;

+  const bool end_of_superframe = true;
+  DeliverBufferedFrame(end_of_superframe);
+
  return WEBRTC_VIDEO_CODEC_OK;
 }

@ -688,6 +691,14 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
 int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
  RTC_DCHECK_EQ(pkt->kind, VPX_CODEC_CX_FRAME_PKT);

+  if (pkt->data.frame.sz == 0) {
+    // Ignore dropped frame.
+    return WEBRTC_VIDEO_CODEC_OK;
+  }
+
+  const bool end_of_superframe = false;
+  DeliverBufferedFrame(end_of_superframe);
+
  if (pkt->data.frame.sz > encoded_image_._size) {
    delete[] encoded_image_._buffer;
    encoded_image_._size = pkt->data.frame.sz;
@ -696,15 +707,6 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
  memcpy(encoded_image_._buffer, pkt->data.frame.buf, pkt->data.frame.sz);
  encoded_image_._length = pkt->data.frame.sz;

-  // No data partitioning in VP9, so 1 partition only.
-  int part_idx = 0;
-  RTPFragmentationHeader frag_info;
-  frag_info.VerifyAndAllocateFragmentationHeader(1);
-  frag_info.fragmentationOffset[part_idx] = 0;
-  frag_info.fragmentationLength[part_idx] = pkt->data.frame.sz;
-  frag_info.fragmentationPlType[part_idx] = 0;
-  frag_info.fragmentationTimeDiff[part_idx] = 0;
-
  vpx_svc_layer_id_t layer_id = {0};
  vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
  if (is_flexible_mode_ && codec_.mode == kScreensharing)
@ -720,32 +722,47 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
  }
  RTC_DCHECK_LE(encoded_image_._length, encoded_image_._size);

-  CodecSpecificInfo codec_specific;
-  PopulateCodecSpecific(&codec_specific, *pkt, input_image_->timestamp());
+  memset(&codec_specific_, 0, sizeof(codec_specific_));
+  PopulateCodecSpecific(&codec_specific_, *pkt, input_image_->timestamp());

-  if (encoded_image_._length > 0) {
-    TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length);
-    encoded_image_._timeStamp = input_image_->timestamp();
-    encoded_image_.capture_time_ms_ = input_image_->render_time_ms();
-    encoded_image_.rotation_ = input_image_->rotation();
-    encoded_image_.content_type_ = (codec_.mode == kScreensharing)
-                                       ? VideoContentType::SCREENSHARE
-                                       : VideoContentType::UNSPECIFIED;
-    encoded_image_._encodedHeight =
-        pkt->data.frame.height[layer_id.spatial_layer_id];
-    encoded_image_._encodedWidth =
-        pkt->data.frame.width[layer_id.spatial_layer_id];
-    encoded_image_.timing_.flags = TimingFrameFlags::kInvalid;
-    int qp = -1;
-    vpx_codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp);
-    encoded_image_.qp_ = qp;
+  TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length);
+  encoded_image_._timeStamp = input_image_->timestamp();
+  encoded_image_.capture_time_ms_ = input_image_->render_time_ms();
+  encoded_image_.rotation_ = input_image_->rotation();
+  encoded_image_.content_type_ = (codec_.mode == kScreensharing)
+                                     ? VideoContentType::SCREENSHARE
+                                     : VideoContentType::UNSPECIFIED;
+  encoded_image_._encodedHeight =
+      pkt->data.frame.height[layer_id.spatial_layer_id];
+  encoded_image_._encodedWidth =
+      pkt->data.frame.width[layer_id.spatial_layer_id];
+  encoded_image_.timing_.flags = TimingFrameFlags::kInvalid;
+  int qp = -1;
+  vpx_codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp);
+  encoded_image_.qp_ = qp;

-    encoded_complete_callback_->OnEncodedImage(encoded_image_, &codec_specific,
-                                               &frag_info);
-  }
  return WEBRTC_VIDEO_CODEC_OK;
 }

+void VP9EncoderImpl::DeliverBufferedFrame(bool end_of_superframe) {
+  if (encoded_image_._length > 0) {
+    codec_specific_.codecSpecific.VP9.end_of_superframe = end_of_superframe;
+
+    // No data partitioning in VP9, so 1 partition only.
+    int part_idx = 0;
+    RTPFragmentationHeader frag_info;
+    frag_info.VerifyAndAllocateFragmentationHeader(1);
+    frag_info.fragmentationOffset[part_idx] = 0;
+    frag_info.fragmentationLength[part_idx] = encoded_image_._length;
+    frag_info.fragmentationPlType[part_idx] = 0;
+    frag_info.fragmentationTimeDiff[part_idx] = 0;
+
+    encoded_complete_callback_->OnEncodedImage(encoded_image_, &codec_specific_,
+                                               &frag_info);
+    encoded_image_._length = 0;
+  }
+}
+
 vpx_svc_ref_frame_config VP9EncoderImpl::GenerateRefsAndFlags(
    const SuperFrameRefSettings& settings) {
  static const vpx_enc_frame_flags_t kAllFlags =
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@ -94,6 +94,8 @@ class VP9EncoderImpl : public VP9Encoder {
  static void EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
                                               void* user_data);

+  void DeliverBufferedFrame(bool end_of_superframe);
+
  // Determine maximum target for Intra frames
  //
  // Input:
@ -103,6 +105,7 @@ class VP9EncoderImpl : public VP9Encoder {
  uint32_t MaxIntraTarget(uint32_t optimal_buffer_size);

  EncodedImage encoded_image_;
+  CodecSpecificInfo codec_specific_;
  EncodedImageCallback* encoded_complete_callback_;
  VideoCodec codec_;
  bool inited_;
--- a/modules/video_coding/include/video_codec_interface.h
+++ b/modules/video_coding/include/video_codec_interface.h
@ -68,6 +68,8 @@ struct CodecSpecificInfoVP9 {
  // Frame reference data.
  uint8_t num_ref_pics;
  uint8_t p_diff[kMaxVp9RefPics];
+
+  bool end_of_superframe;
 };

 struct CodecSpecificInfoGeneric {
--- a/video/payload_router.cc
+++ b/video/payload_router.cc
@ -67,8 +67,11 @@ void CopyCodecSpecific(const CodecSpecificInfo* info, RTPVideoHeader* rtp) {
      }

      rtp->codecHeader.VP9.num_ref_pics = info->codecSpecific.VP9.num_ref_pics;
-      for (int i = 0; i < info->codecSpecific.VP9.num_ref_pics; ++i)
+      for (int i = 0; i < info->codecSpecific.VP9.num_ref_pics; ++i) {
        rtp->codecHeader.VP9.pid_diff[i] = info->codecSpecific.VP9.p_diff[i];
+      }
+      rtp->codecHeader.VP9.end_of_superframe =
+          info->codecSpecific.VP9.end_of_superframe;
      return;
    }
    case kVideoCodecH264:
--- a/video/payload_router_unittest.cc
+++ b/video/payload_router_unittest.cc
@ -352,6 +352,84 @@ TEST(PayloadRouterTest, InfoMappedToRtpVideoHeader_Vp8) {
      payload_router.OnEncodedImage(encoded_image, &codec_info, nullptr).error);
 }

+TEST(PayloadRouterTest, InfoMappedToRtpVideoHeader_Vp9) {
+  RtpPayloadState state;
+  state.picture_id = kPictureId;
+  state.tl0_pic_idx = kTl0PicIdx;
+  std::map<uint32_t, RtpPayloadState> states = {{kSsrc1, state}};
+
+  NiceMock<MockRtpRtcp> rtp;
+  std::vector<RtpRtcp*> modules = {&rtp};
+  PayloadRouter router(modules, {kSsrc1}, kPayloadType, states);
+  router.SetActive(true);
+
+  EncodedImage encoded_image;
+  encoded_image.rotation_ = kVideoRotation_90;
+  encoded_image.content_type_ = VideoContentType::SCREENSHARE;
+
+  CodecSpecificInfo codec_info;
+  memset(&codec_info, 0, sizeof(CodecSpecificInfo));
+  codec_info.codecType = kVideoCodecVP9;
+  codec_info.codecSpecific.VP9.num_spatial_layers = 3;
+  codec_info.codecSpecific.VP9.first_frame_in_picture = true;
+  codec_info.codecSpecific.VP9.spatial_idx = 0;
+  codec_info.codecSpecific.VP9.temporal_idx = 2;
+  codec_info.codecSpecific.VP9.end_of_superframe = false;
+
+  EXPECT_CALL(rtp, SendOutgoingData(_, _, _, _, _, _, nullptr, _, _))
+      .WillOnce(
+          Invoke([&codec_info](Unused, Unused, Unused, Unused, Unused, Unused,
+                               Unused, const RTPVideoHeader* header, Unused) {
+            EXPECT_EQ(kVideoRotation_90, header->rotation);
+            EXPECT_EQ(VideoContentType::SCREENSHARE, header->content_type);
+            EXPECT_EQ(kRtpVideoVp9, header->codec);
+            EXPECT_EQ(kPictureId + 1, header->codecHeader.VP9.picture_id);
+            EXPECT_EQ(kTl0PicIdx, header->codecHeader.VP9.tl0_pic_idx);
+            EXPECT_EQ(header->codecHeader.VP9.temporal_idx,
+                      codec_info.codecSpecific.VP9.temporal_idx);
+            EXPECT_EQ(header->codecHeader.VP9.spatial_idx,
+                      codec_info.codecSpecific.VP9.spatial_idx);
+            EXPECT_EQ(header->codecHeader.VP9.num_spatial_layers,
+                      codec_info.codecSpecific.VP9.num_spatial_layers);
+            EXPECT_EQ(header->codecHeader.VP9.end_of_superframe,
+                      codec_info.codecSpecific.VP9.end_of_superframe);
+            return true;
+          }));
+  EXPECT_CALL(rtp, Sending()).WillOnce(Return(true));
+
+  EXPECT_EQ(EncodedImageCallback::Result::OK,
+            router.OnEncodedImage(encoded_image, &codec_info, nullptr).error);
+
+  // Next spatial layer.
+  codec_info.codecSpecific.VP9.first_frame_in_picture = false;
+  codec_info.codecSpecific.VP9.spatial_idx += 1;
+  codec_info.codecSpecific.VP9.end_of_superframe = true;
+
+  EXPECT_CALL(rtp, SendOutgoingData(_, _, _, _, _, _, nullptr, _, _))
+      .WillOnce(
+          Invoke([&codec_info](Unused, Unused, Unused, Unused, Unused, Unused,
+                               Unused, const RTPVideoHeader* header, Unused) {
+            EXPECT_EQ(kVideoRotation_90, header->rotation);
+            EXPECT_EQ(VideoContentType::SCREENSHARE, header->content_type);
+            EXPECT_EQ(kRtpVideoVp9, header->codec);
+            EXPECT_EQ(kPictureId + 1, header->codecHeader.VP9.picture_id);
+            EXPECT_EQ(kTl0PicIdx, header->codecHeader.VP9.tl0_pic_idx);
+            EXPECT_EQ(header->codecHeader.VP9.temporal_idx,
+                      codec_info.codecSpecific.VP9.temporal_idx);
+            EXPECT_EQ(header->codecHeader.VP9.spatial_idx,
+                      codec_info.codecSpecific.VP9.spatial_idx);
+            EXPECT_EQ(header->codecHeader.VP9.num_spatial_layers,
+                      codec_info.codecSpecific.VP9.num_spatial_layers);
+            EXPECT_EQ(header->codecHeader.VP9.end_of_superframe,
+                      codec_info.codecSpecific.VP9.end_of_superframe);
+            return true;
+          }));
+  EXPECT_CALL(rtp, Sending()).WillOnce(Return(true));
+
+  EXPECT_EQ(EncodedImageCallback::Result::OK,
+            router.OnEncodedImage(encoded_image, &codec_info, nullptr).error);
+}
+
 TEST(PayloadRouterTest, InfoMappedToRtpVideoHeader_H264) {
  NiceMock<MockRtpRtcp> rtp1;
  std::vector<RtpRtcp*> modules = {&rtp1};