diff --git a/modules/rtp_rtcp/source/rtp_format_vp9.cc b/modules/rtp_rtcp/source/rtp_format_vp9.cc index bb3edfced7..ad0a7cf52d 100644 --- a/modules/rtp_rtcp/source/rtp_format_vp9.cc +++ b/modules/rtp_rtcp/source/rtp_format_vp9.cc @@ -576,9 +576,13 @@ bool RtpPacketizerVp9::NextPacket(RtpPacketToSend* packet) { if (!WriteHeaderAndPayload(packet_info, packet, packets_.empty())) { return false; } - packet->SetMarker(packets_.empty() && - (hdr_.spatial_idx == kNoSpatialIdx || - hdr_.spatial_idx == hdr_.num_spatial_layers - 1)); + + // Ensure end_of_superframe is always set on top spatial layer when it is not + // dropped. + RTC_DCHECK(hdr_.spatial_idx < hdr_.num_spatial_layers - 1 || + hdr_.end_of_superframe); + + packet->SetMarker(packets_.empty() && hdr_.end_of_superframe); return true; } diff --git a/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc b/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc index c8987df6fd..d9083fb4c6 100644 --- a/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc +++ b/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc @@ -478,7 +478,7 @@ TEST_F(RtpPacketizerVp9Test, TestSsDataDoesNotFitInAveragePacket) { CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); } -TEST_F(RtpPacketizerVp9Test, TestOnlyHighestSpatialLayerSetMarker) { +TEST_F(RtpPacketizerVp9Test, EndOfSuperframeSetsSetMarker) { const size_t kFrameSize = 10; const size_t kPacketSize = 8; const size_t kLastPacketReductionLen = 0; @@ -492,32 +492,21 @@ TEST_F(RtpPacketizerVp9Test, TestOnlyHighestSpatialLayerSetMarker) { RtpPacketToSend packet(kNoExtensions); - vp9_header.spatial_idx = 0; - RtpPacketizerVp9 packetizer0(vp9_header, kPacketSize, - kLastPacketReductionLen); - packetizer0.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation); - ASSERT_TRUE(packetizer0.NextPacket(&packet)); - EXPECT_FALSE(packet.Marker()); - ASSERT_TRUE(packetizer0.NextPacket(&packet)); - EXPECT_FALSE(packet.Marker()); - - vp9_header.spatial_idx = 1; - RtpPacketizerVp9 packetizer1(vp9_header, kPacketSize, - kLastPacketReductionLen); - packetizer1.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation); - ASSERT_TRUE(packetizer1.NextPacket(&packet)); - EXPECT_FALSE(packet.Marker()); - ASSERT_TRUE(packetizer1.NextPacket(&packet)); - EXPECT_FALSE(packet.Marker()); - - vp9_header.spatial_idx = 2; - RtpPacketizerVp9 packetizer2(vp9_header, kPacketSize, - kLastPacketReductionLen); - packetizer2.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation); - ASSERT_TRUE(packetizer2.NextPacket(&packet)); - EXPECT_FALSE(packet.Marker()); - ASSERT_TRUE(packetizer2.NextPacket(&packet)); - EXPECT_TRUE(packet.Marker()); + // Drop top layer and ensure that marker bit is set on last encoded layer. + for (size_t spatial_idx = 0; spatial_idx < vp9_header.num_spatial_layers - 1; + ++spatial_idx) { + const bool end_of_superframe = + spatial_idx + 1 == vp9_header.num_spatial_layers - 1; + vp9_header.spatial_idx = spatial_idx; + vp9_header.end_of_superframe = end_of_superframe; + RtpPacketizerVp9 packetizer(vp9_header, kPacketSize, + kLastPacketReductionLen); + packetizer.SetPayloadData(kFrame, sizeof(kFrame), kNoFragmentation); + ASSERT_TRUE(packetizer.NextPacket(&packet)); + EXPECT_FALSE(packet.Marker()); + ASSERT_TRUE(packetizer.NextPacket(&packet)); + EXPECT_EQ(packet.Marker(), end_of_superframe); + } } TEST_F(RtpPacketizerVp9Test, TestGeneratesMinimumNumberOfPackets) { diff --git a/modules/video_coding/codecs/vp9/include/vp9_globals.h b/modules/video_coding/codecs/vp9/include/vp9_globals.h index 91507ed165..f24ab3e1b5 100644 --- a/modules/video_coding/codecs/vp9/include/vp9_globals.h +++ b/modules/video_coding/codecs/vp9/include/vp9_globals.h @@ -172,6 +172,7 @@ struct RTPVideoHeaderVP9 { gof_idx = kNoGofIdx; num_ref_pics = 0; num_spatial_layers = 1; + end_of_superframe = true; } bool inter_pic_predicted; // This layer frame is dependent on previously @@ -208,6 +209,8 @@ struct RTPVideoHeaderVP9 { uint16_t width[kMaxVp9NumberOfSpatialLayers]; uint16_t height[kMaxVp9NumberOfSpatialLayers]; GofInfoVP9 gof; + + bool end_of_superframe; // This frame is last frame in superframe. }; } // namespace webrtc diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc index 66ceabedf1..b16419b6b6 100644 --- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc +++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -262,4 +262,60 @@ TEST_F(TestVp9Impl, EnableDisableSpatialLayers) { } } +TEST_F(TestVp9Impl, EndOfSuperframe) { + const size_t num_spatial_layers = 2; + const size_t num_temporal_layers = 1; + codec_settings_.VP9()->numberOfSpatialLayers = + static_cast(num_spatial_layers); + codec_settings_.VP9()->numberOfTemporalLayers = + static_cast(num_temporal_layers); + + std::vector layers = + GetSvcConfig(codec_settings_.width, codec_settings_.height, + num_spatial_layers, num_temporal_layers); + for (size_t i = 0; i < layers.size(); ++i) { + codec_settings_.spatialLayers[i] = layers[i]; + } + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, + 0 /* max payload size (unused) */)); + + // Encode both base and upper layers. Check that end-of-superframe flag is + // set on upper layer frame but not on base layer frame. + BitrateAllocation bitrate_allocation; + bitrate_allocation.SetBitrate(0, 0, layers[0].targetBitrate * 1000); + bitrate_allocation.SetBitrate(1, 0, layers[1].targetBitrate * 1000); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->SetRateAllocation(bitrate_allocation, + codec_settings_.maxFramerate)); + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + + std::vector frames; + std::vector codec_specific; + ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); + EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.end_of_superframe); + EXPECT_TRUE(codec_specific[1].codecSpecific.VP9.end_of_superframe); + + // Encode only base layer. Check that end-of-superframe flag is + // set on base layer frame. + bitrate_allocation.SetBitrate(1, 0, 0); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->SetRateAllocation(bitrate_allocation, + codec_settings_.maxFramerate)); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, + 0 /* max payload size (unused) */)); + + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + + ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); + EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0); + EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.end_of_superframe); +} + } // namespace webrtc diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc index e329536a60..c1f7d5d5aa 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -586,6 +586,9 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, } timestamp_ += duration; + const bool end_of_superframe = true; + DeliverBufferedFrame(end_of_superframe); + return WEBRTC_VIDEO_CODEC_OK; } @@ -688,6 +691,14 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { RTC_DCHECK_EQ(pkt->kind, VPX_CODEC_CX_FRAME_PKT); + if (pkt->data.frame.sz == 0) { + // Ignore dropped frame. + return WEBRTC_VIDEO_CODEC_OK; + } + + const bool end_of_superframe = false; + DeliverBufferedFrame(end_of_superframe); + if (pkt->data.frame.sz > encoded_image_._size) { delete[] encoded_image_._buffer; encoded_image_._size = pkt->data.frame.sz; @@ -696,15 +707,6 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { memcpy(encoded_image_._buffer, pkt->data.frame.buf, pkt->data.frame.sz); encoded_image_._length = pkt->data.frame.sz; - // No data partitioning in VP9, so 1 partition only. - int part_idx = 0; - RTPFragmentationHeader frag_info; - frag_info.VerifyAndAllocateFragmentationHeader(1); - frag_info.fragmentationOffset[part_idx] = 0; - frag_info.fragmentationLength[part_idx] = pkt->data.frame.sz; - frag_info.fragmentationPlType[part_idx] = 0; - frag_info.fragmentationTimeDiff[part_idx] = 0; - vpx_svc_layer_id_t layer_id = {0}; vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); if (is_flexible_mode_ && codec_.mode == kScreensharing) @@ -720,32 +722,47 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { } RTC_DCHECK_LE(encoded_image_._length, encoded_image_._size); - CodecSpecificInfo codec_specific; - PopulateCodecSpecific(&codec_specific, *pkt, input_image_->timestamp()); + memset(&codec_specific_, 0, sizeof(codec_specific_)); + PopulateCodecSpecific(&codec_specific_, *pkt, input_image_->timestamp()); - if (encoded_image_._length > 0) { - TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length); - encoded_image_._timeStamp = input_image_->timestamp(); - encoded_image_.capture_time_ms_ = input_image_->render_time_ms(); - encoded_image_.rotation_ = input_image_->rotation(); - encoded_image_.content_type_ = (codec_.mode == kScreensharing) - ? VideoContentType::SCREENSHARE - : VideoContentType::UNSPECIFIED; - encoded_image_._encodedHeight = - pkt->data.frame.height[layer_id.spatial_layer_id]; - encoded_image_._encodedWidth = - pkt->data.frame.width[layer_id.spatial_layer_id]; - encoded_image_.timing_.flags = TimingFrameFlags::kInvalid; - int qp = -1; - vpx_codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp); - encoded_image_.qp_ = qp; + TRACE_COUNTER1("webrtc", "EncodedFrameSize", encoded_image_._length); + encoded_image_._timeStamp = input_image_->timestamp(); + encoded_image_.capture_time_ms_ = input_image_->render_time_ms(); + encoded_image_.rotation_ = input_image_->rotation(); + encoded_image_.content_type_ = (codec_.mode == kScreensharing) + ? VideoContentType::SCREENSHARE + : VideoContentType::UNSPECIFIED; + encoded_image_._encodedHeight = + pkt->data.frame.height[layer_id.spatial_layer_id]; + encoded_image_._encodedWidth = + pkt->data.frame.width[layer_id.spatial_layer_id]; + encoded_image_.timing_.flags = TimingFrameFlags::kInvalid; + int qp = -1; + vpx_codec_control(encoder_, VP8E_GET_LAST_QUANTIZER, &qp); + encoded_image_.qp_ = qp; - encoded_complete_callback_->OnEncodedImage(encoded_image_, &codec_specific, - &frag_info); - } return WEBRTC_VIDEO_CODEC_OK; } +void VP9EncoderImpl::DeliverBufferedFrame(bool end_of_superframe) { + if (encoded_image_._length > 0) { + codec_specific_.codecSpecific.VP9.end_of_superframe = end_of_superframe; + + // No data partitioning in VP9, so 1 partition only. + int part_idx = 0; + RTPFragmentationHeader frag_info; + frag_info.VerifyAndAllocateFragmentationHeader(1); + frag_info.fragmentationOffset[part_idx] = 0; + frag_info.fragmentationLength[part_idx] = encoded_image_._length; + frag_info.fragmentationPlType[part_idx] = 0; + frag_info.fragmentationTimeDiff[part_idx] = 0; + + encoded_complete_callback_->OnEncodedImage(encoded_image_, &codec_specific_, + &frag_info); + encoded_image_._length = 0; + } +} + vpx_svc_ref_frame_config VP9EncoderImpl::GenerateRefsAndFlags( const SuperFrameRefSettings& settings) { static const vpx_enc_frame_flags_t kAllFlags = diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h index 37076f867d..cdc8a83169 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.h +++ b/modules/video_coding/codecs/vp9/vp9_impl.h @@ -94,6 +94,8 @@ class VP9EncoderImpl : public VP9Encoder { static void EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt, void* user_data); + void DeliverBufferedFrame(bool end_of_superframe); + // Determine maximum target for Intra frames // // Input: @@ -103,6 +105,7 @@ class VP9EncoderImpl : public VP9Encoder { uint32_t MaxIntraTarget(uint32_t optimal_buffer_size); EncodedImage encoded_image_; + CodecSpecificInfo codec_specific_; EncodedImageCallback* encoded_complete_callback_; VideoCodec codec_; bool inited_; diff --git a/modules/video_coding/include/video_codec_interface.h b/modules/video_coding/include/video_codec_interface.h index b534f642e6..204098c6c6 100644 --- a/modules/video_coding/include/video_codec_interface.h +++ b/modules/video_coding/include/video_codec_interface.h @@ -68,6 +68,8 @@ struct CodecSpecificInfoVP9 { // Frame reference data. uint8_t num_ref_pics; uint8_t p_diff[kMaxVp9RefPics]; + + bool end_of_superframe; }; struct CodecSpecificInfoGeneric { diff --git a/video/payload_router.cc b/video/payload_router.cc index f980bc41bf..b06908fe1f 100644 --- a/video/payload_router.cc +++ b/video/payload_router.cc @@ -67,8 +67,11 @@ void CopyCodecSpecific(const CodecSpecificInfo* info, RTPVideoHeader* rtp) { } rtp->codecHeader.VP9.num_ref_pics = info->codecSpecific.VP9.num_ref_pics; - for (int i = 0; i < info->codecSpecific.VP9.num_ref_pics; ++i) + for (int i = 0; i < info->codecSpecific.VP9.num_ref_pics; ++i) { rtp->codecHeader.VP9.pid_diff[i] = info->codecSpecific.VP9.p_diff[i]; + } + rtp->codecHeader.VP9.end_of_superframe = + info->codecSpecific.VP9.end_of_superframe; return; } case kVideoCodecH264: diff --git a/video/payload_router_unittest.cc b/video/payload_router_unittest.cc index 42cafc11ef..af574429a1 100644 --- a/video/payload_router_unittest.cc +++ b/video/payload_router_unittest.cc @@ -352,6 +352,84 @@ TEST(PayloadRouterTest, InfoMappedToRtpVideoHeader_Vp8) { payload_router.OnEncodedImage(encoded_image, &codec_info, nullptr).error); } +TEST(PayloadRouterTest, InfoMappedToRtpVideoHeader_Vp9) { + RtpPayloadState state; + state.picture_id = kPictureId; + state.tl0_pic_idx = kTl0PicIdx; + std::map states = {{kSsrc1, state}}; + + NiceMock rtp; + std::vector modules = {&rtp}; + PayloadRouter router(modules, {kSsrc1}, kPayloadType, states); + router.SetActive(true); + + EncodedImage encoded_image; + encoded_image.rotation_ = kVideoRotation_90; + encoded_image.content_type_ = VideoContentType::SCREENSHARE; + + CodecSpecificInfo codec_info; + memset(&codec_info, 0, sizeof(CodecSpecificInfo)); + codec_info.codecType = kVideoCodecVP9; + codec_info.codecSpecific.VP9.num_spatial_layers = 3; + codec_info.codecSpecific.VP9.first_frame_in_picture = true; + codec_info.codecSpecific.VP9.spatial_idx = 0; + codec_info.codecSpecific.VP9.temporal_idx = 2; + codec_info.codecSpecific.VP9.end_of_superframe = false; + + EXPECT_CALL(rtp, SendOutgoingData(_, _, _, _, _, _, nullptr, _, _)) + .WillOnce( + Invoke([&codec_info](Unused, Unused, Unused, Unused, Unused, Unused, + Unused, const RTPVideoHeader* header, Unused) { + EXPECT_EQ(kVideoRotation_90, header->rotation); + EXPECT_EQ(VideoContentType::SCREENSHARE, header->content_type); + EXPECT_EQ(kRtpVideoVp9, header->codec); + EXPECT_EQ(kPictureId + 1, header->codecHeader.VP9.picture_id); + EXPECT_EQ(kTl0PicIdx, header->codecHeader.VP9.tl0_pic_idx); + EXPECT_EQ(header->codecHeader.VP9.temporal_idx, + codec_info.codecSpecific.VP9.temporal_idx); + EXPECT_EQ(header->codecHeader.VP9.spatial_idx, + codec_info.codecSpecific.VP9.spatial_idx); + EXPECT_EQ(header->codecHeader.VP9.num_spatial_layers, + codec_info.codecSpecific.VP9.num_spatial_layers); + EXPECT_EQ(header->codecHeader.VP9.end_of_superframe, + codec_info.codecSpecific.VP9.end_of_superframe); + return true; + })); + EXPECT_CALL(rtp, Sending()).WillOnce(Return(true)); + + EXPECT_EQ(EncodedImageCallback::Result::OK, + router.OnEncodedImage(encoded_image, &codec_info, nullptr).error); + + // Next spatial layer. + codec_info.codecSpecific.VP9.first_frame_in_picture = false; + codec_info.codecSpecific.VP9.spatial_idx += 1; + codec_info.codecSpecific.VP9.end_of_superframe = true; + + EXPECT_CALL(rtp, SendOutgoingData(_, _, _, _, _, _, nullptr, _, _)) + .WillOnce( + Invoke([&codec_info](Unused, Unused, Unused, Unused, Unused, Unused, + Unused, const RTPVideoHeader* header, Unused) { + EXPECT_EQ(kVideoRotation_90, header->rotation); + EXPECT_EQ(VideoContentType::SCREENSHARE, header->content_type); + EXPECT_EQ(kRtpVideoVp9, header->codec); + EXPECT_EQ(kPictureId + 1, header->codecHeader.VP9.picture_id); + EXPECT_EQ(kTl0PicIdx, header->codecHeader.VP9.tl0_pic_idx); + EXPECT_EQ(header->codecHeader.VP9.temporal_idx, + codec_info.codecSpecific.VP9.temporal_idx); + EXPECT_EQ(header->codecHeader.VP9.spatial_idx, + codec_info.codecSpecific.VP9.spatial_idx); + EXPECT_EQ(header->codecHeader.VP9.num_spatial_layers, + codec_info.codecSpecific.VP9.num_spatial_layers); + EXPECT_EQ(header->codecHeader.VP9.end_of_superframe, + codec_info.codecSpecific.VP9.end_of_superframe); + return true; + })); + EXPECT_CALL(rtp, Sending()).WillOnce(Return(true)); + + EXPECT_EQ(EncodedImageCallback::Result::OK, + router.OnEncodedImage(encoded_image, &codec_info, nullptr).error); +} + TEST(PayloadRouterTest, InfoMappedToRtpVideoHeader_H264) { NiceMock rtp1; std::vector modules = {&rtp1};