From 1a1c52baf9525b59a54a6f509e5a8253c8c0bfe3 Mon Sep 17 00:00:00 2001 From: Johnny Lee Date: Fri, 8 Feb 2019 14:25:40 -0500 Subject: [PATCH] H.264 temporal layers w/frame marking (PART 2/3) Bug: None Change-Id: Id1381d895377d39c3969635e1a59591214aabb71 Reviewed-on: https://webrtc-review.googlesource.com/c/86140 Commit-Queue: Sergey Silkin Reviewed-by: Stefan Holmer Reviewed-by: Sergey Silkin Cr-Commit-Position: refs/heads/master@{#26624} --- api/video_codecs/video_codec.h | 1 + api/video_codecs/video_encoder.cc | 1 + call/rtp_payload_params.cc | 17 +++++++ call/rtp_payload_params_unittest.cc | 36 +++++++++++-- media/engine/fake_webrtc_call.cc | 2 + media/engine/webrtc_video_engine.cc | 3 +- modules/rtp_rtcp/source/rtp_sender_video.cc | 41 +++++++++++---- .../source/rtp_sender_video_unittest.cc | 50 +++++++++++++++++++ modules/rtp_rtcp/source/rtp_video_header.h | 2 +- .../codecs/h264/h264_encoder_impl.cc | 31 +++++++++--- .../codecs/h264/h264_encoder_impl.h | 3 ++ .../codecs/h264/include/h264_globals.h | 2 + modules/video_coding/encoded_frame.cc | 14 ++++++ .../include/video_codec_interface.h | 3 ++ modules/video_coding/session_info.cc | 6 +++ .../video_coding/video_codec_initializer.cc | 6 +++ video/rtp_video_stream_receiver.cc | 6 +++ video/video_send_stream_tests.cc | 17 ++++++- video/video_stream_encoder.cc | 2 + 19 files changed, 222 insertions(+), 21 deletions(-) diff --git a/api/video_codecs/video_codec.h b/api/video_codecs/video_codec.h index 828bd1d94a..57bb8933de 100644 --- a/api/video_codecs/video_codec.h +++ b/api/video_codecs/video_codec.h @@ -82,6 +82,7 @@ struct VideoCodecH264 { } bool frameDroppingOn; int keyFrameInterval; + uint8_t numberOfTemporalLayers; // These are NULL/0 if not externally negotiated. const uint8_t* spsData; size_t spsLen; diff --git a/api/video_codecs/video_encoder.cc b/api/video_codecs/video_encoder.cc index 5b7d89c749..e56c597dc0 100644 --- a/api/video_codecs/video_encoder.cc +++ b/api/video_codecs/video_encoder.cc @@ -53,6 +53,7 @@ VideoCodecH264 VideoEncoder::GetDefaultH264Settings() { h264_settings.frameDroppingOn = true; h264_settings.keyFrameInterval = 3000; + h264_settings.numberOfTemporalLayers = 1; h264_settings.spsData = nullptr; h264_settings.spsLen = 0; h264_settings.ppsData = nullptr; diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc index 93ef6548e5..6042ed31b3 100644 --- a/call/rtp_payload_params.cc +++ b/call/rtp_payload_params.cc @@ -91,6 +91,15 @@ void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info, h264_header.packetization_mode = info.codecSpecific.H264.packetization_mode; rtp->simulcastIdx = spatial_index.value_or(0); + rtp->frame_marking.temporal_id = kNoTemporalIdx; + if (info.codecSpecific.H264.temporal_idx != kNoTemporalIdx) { + rtp->frame_marking.temporal_id = info.codecSpecific.H264.temporal_idx; + rtp->frame_marking.layer_id = 0; + rtp->frame_marking.independent_frame = + info.codecSpecific.H264.idr_frame; + rtp->frame_marking.base_layer_sync = + info.codecSpecific.H264.base_layer_sync; + } return; } case kVideoCodecMultiplex: @@ -222,6 +231,14 @@ void RtpPayloadParams::SetCodecSpecific(RTPVideoHeader* rtp_video_header, vp9_header.tl0_pic_idx = state_.tl0_pic_idx; } } + if (rtp_video_header->codec == kVideoCodecH264) { + if (rtp_video_header->frame_marking.temporal_id != kNoTemporalIdx) { + if (rtp_video_header->frame_marking.temporal_id == 0) { + ++state_.tl0_pic_idx; + } + rtp_video_header->frame_marking.tl0_pic_idx = state_.tl0_pic_idx; + } + } // There are currently two generic descriptors in WebRTC. The old descriptor // can not share a picture id space between simulcast streams, so we use the // |picture_id| in this case. We let the |picture_id| tag along in |frame_id| diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc index c46d9756fd..ff3589effb 100644 --- a/call/rtp_payload_params_unittest.cc +++ b/call/rtp_payload_params_unittest.cc @@ -144,14 +144,18 @@ TEST(RtpPayloadParamsTest, InfoMappedToRtpVideoHeader_Vp9) { } TEST(RtpPayloadParamsTest, InfoMappedToRtpVideoHeader_H264) { - RtpPayloadParams params(kSsrc1, {}); + RtpPayloadState state; + state.picture_id = kPictureId; + state.tl0_pic_idx = kInitialTl0PicIdx1; + RtpPayloadParams params(kSsrc1, &state); EncodedImage encoded_image; CodecSpecificInfo codec_info; + CodecSpecificInfoH264 *h264info = &codec_info.codecSpecific.H264; memset(&codec_info, 0, sizeof(CodecSpecificInfo)); codec_info.codecType = kVideoCodecH264; - codec_info.codecSpecific.H264.packetization_mode = - H264PacketizationMode::SingleNalUnit; + h264info->packetization_mode = H264PacketizationMode::SingleNalUnit; + h264info->temporal_idx = kNoTemporalIdx; RTPVideoHeader header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare); @@ -160,6 +164,32 @@ TEST(RtpPayloadParamsTest, InfoMappedToRtpVideoHeader_H264) { EXPECT_EQ(kVideoCodecH264, header.codec); const auto& h264 = absl::get(header.video_type_header); EXPECT_EQ(H264PacketizationMode::SingleNalUnit, h264.packetization_mode); + + // test temporal param 1 + h264info->temporal_idx = 1; + h264info->base_layer_sync = true; + h264info->idr_frame = false; + + header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare); + + EXPECT_EQ(kVideoCodecH264, header.codec); + EXPECT_EQ(header.frame_marking.tl0_pic_idx, kInitialTl0PicIdx1); + EXPECT_EQ(header.frame_marking.temporal_id, h264info->temporal_idx); + EXPECT_EQ(header.frame_marking.base_layer_sync, h264info->base_layer_sync); + EXPECT_EQ(header.frame_marking.independent_frame, h264info->idr_frame); + + // test temporal param 2 + h264info->temporal_idx = 0; + h264info->base_layer_sync = false; + h264info->idr_frame = true; + + header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare); + + EXPECT_EQ(kVideoCodecH264, header.codec); + EXPECT_EQ(header.frame_marking.tl0_pic_idx, kInitialTl0PicIdx1 + 1); + EXPECT_EQ(header.frame_marking.temporal_id, h264info->temporal_idx); + EXPECT_EQ(header.frame_marking.base_layer_sync, h264info->base_layer_sync); + EXPECT_EQ(header.frame_marking.independent_frame, h264info->idr_frame); } TEST(RtpPayloadParamsTest, PictureIdIsSetForVp8) { diff --git a/media/engine/fake_webrtc_call.cc b/media/engine/fake_webrtc_call.cc index 4dd3b39bb0..662c6d2a9c 100644 --- a/media/engine/fake_webrtc_call.cc +++ b/media/engine/fake_webrtc_call.cc @@ -249,6 +249,8 @@ void FakeVideoSendStream::ReconfigureVideoEncoder( } else if (config_.rtp.payload_name == "H264") { config.encoder_specific_settings->FillVideoCodecH264( &codec_specific_settings_.h264); + codec_specific_settings_.h264.numberOfTemporalLayers = + num_temporal_layers; } else { ADD_FAILURE() << "Unsupported encoder payload: " << config_.rtp.payload_name; diff --git a/media/engine/webrtc_video_engine.cc b/media/engine/webrtc_video_engine.cc index 88f900045c..e0018726f1 100644 --- a/media/engine/webrtc_video_engine.cc +++ b/media/engine/webrtc_video_engine.cc @@ -2690,7 +2690,8 @@ std::vector EncoderStreamFactory::CreateEncoderStreams( absl::EqualsIgnoreCase(codec_name_, kH264CodecName)) && is_screenshare_ && screenshare_config_explicitly_enabled_)) { const bool temporal_layers_supported = - absl::EqualsIgnoreCase(codec_name_, kVp8CodecName); + absl::EqualsIgnoreCase(codec_name_, kVp8CodecName) + || absl::EqualsIgnoreCase(codec_name_, kH264CodecName); layers = GetSimulcastConfig(encoder_config.number_of_streams, width, height, 0 /*not used*/, encoder_config.bitrate_priority, max_qp_, 0 /*not_used*/, is_screenshare_, diff --git a/modules/rtp_rtcp/source/rtp_sender_video.cc b/modules/rtp_rtcp/source/rtp_sender_video.cc index c63f0d7ea9..3adc98aece 100644 --- a/modules/rtp_rtcp/source/rtp_sender_video.cc +++ b/modules/rtp_rtcp/source/rtp_sender_video.cc @@ -58,6 +58,7 @@ void AddRtpHeaderExtensions(const RTPVideoHeader& video_header, FrameType frame_type, bool set_video_rotation, bool set_color_space, + bool set_frame_marking, bool first_packet, bool last_packet, RtpPacketToSend* packet) { @@ -84,6 +85,14 @@ void AddRtpHeaderExtensions(const RTPVideoHeader& video_header, if (playout_delay) { packet->SetExtension(*playout_delay); } + + if (set_frame_marking) { + FrameMarking frame_marking = video_header.frame_marking; + frame_marking.start_of_frame = first_packet; + frame_marking.end_of_frame = last_packet; + packet->SetExtension(frame_marking); + } + if (video_header.generic) { RtpGenericFrameDescriptor generic_descriptor; generic_descriptor.SetFirstPacketInSubFrame(first_packet); @@ -215,6 +224,12 @@ void RTPSenderVideo::RegisterPayloadType(int8_t payload_type, rtc::CritScope cs(&payload_type_crit_); payload_type_map_[payload_type] = video_type; + + // Backward compatibility for older receivers without temporal layer logic + if (video_type == kVideoCodecH264) { + rtc::CritScope cs(&crit_); + retransmission_settings_ = kRetransmitBaseLayer | kRetransmitHigherLayers; + } } void RTPSenderVideo::SendVideoPacket(std::unique_ptr packet, @@ -422,6 +437,8 @@ bool RTPSenderVideo::SendVideo(FrameType frame_type, int32_t retransmission_settings; bool set_video_rotation; bool set_color_space = false; + bool set_frame_marking = video_header->codec == kVideoCodecH264 && + video_header->frame_marking.temporal_id != kNoTemporalIdx; const absl::optional playout_delay = playout_delay_oracle_.PlayoutDelayToSend(video_header->playout_delay); @@ -489,17 +506,18 @@ bool RTPSenderVideo::SendVideo(FrameType frame_type, auto last_packet = absl::make_unique(*single_packet); // Simplest way to estimate how much extensions would occupy is to set them. AddRtpHeaderExtensions(*video_header, playout_delay, frame_type, - set_video_rotation, set_color_space, /*first=*/true, - /*last=*/true, single_packet.get()); + set_video_rotation, set_color_space, set_frame_marking, + /*first=*/true, /*last=*/true, single_packet.get()); AddRtpHeaderExtensions(*video_header, playout_delay, frame_type, - set_video_rotation, set_color_space, /*first=*/true, - /*last=*/false, first_packet.get()); + set_video_rotation, set_color_space, set_frame_marking, + /*first=*/true, /*last=*/false, first_packet.get()); AddRtpHeaderExtensions(*video_header, playout_delay, frame_type, - set_video_rotation, set_color_space, /*first=*/false, - /*last=*/false, middle_packet.get()); + set_video_rotation, set_color_space, set_frame_marking, + /*first=*/false, /*last=*/false, middle_packet.get()); AddRtpHeaderExtensions(*video_header, playout_delay, frame_type, - set_video_rotation, set_color_space, /*first=*/false, - /*last=*/true, last_packet.get()); + set_video_rotation, set_color_space, set_frame_marking, + /*first=*/false, /*last=*/true, last_packet.get()); + RTC_DCHECK_GT(packet_capacity, single_packet->headers_size()); RTC_DCHECK_GT(packet_capacity, first_packet->headers_size()); RTC_DCHECK_GT(packet_capacity, middle_packet->headers_size()); @@ -733,7 +751,12 @@ uint8_t RTPSenderVideo::GetTemporalId(const RTPVideoHeader& header) { uint8_t operator()(const RTPVideoHeaderH264&) { return kNoTemporalIdx; } uint8_t operator()(const absl::monostate&) { return kNoTemporalIdx; } }; - return absl::visit(TemporalIdGetter(), header.video_type_header); + switch (header.codec) { + case kVideoCodecH264: + return header.frame_marking.temporal_id; + default: + return absl::visit(TemporalIdGetter(), header.video_type_header); + } } bool RTPSenderVideo::UpdateConditionalRetransmit( diff --git a/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc b/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc index 85bbc80ac9..43eb4ed454 100644 --- a/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc +++ b/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc @@ -39,6 +39,7 @@ constexpr int kAbsoluteSendTimeExtensionId = 14; constexpr int kTransportSequenceNumberExtensionId = 13; constexpr int kVideoTimingExtensionId = 12; constexpr int kGenericDescriptorId = 10; +constexpr int kFrameMarkingExtensionId = 6; constexpr int kVideoRotationExtensionId = 5; constexpr int kPayload = 100; constexpr uint32_t kTimestamp = 10; @@ -63,6 +64,8 @@ class LoopbackTransportTest : public webrtc::Transport { kVideoTimingExtensionId); receivers_extensions_.Register(kRtpExtensionGenericFrameDescriptor, kGenericDescriptorId); + receivers_extensions_.Register(kRtpExtensionFrameMarking, + kFrameMarkingExtensionId); } bool SendRtp(const uint8_t* data, @@ -230,6 +233,43 @@ TEST_P(RtpSenderVideoTest, DeltaFrameHasCVOWhenNonZero) { EXPECT_EQ(kVideoRotation_90, rotation); } +TEST_P(RtpSenderVideoTest, CheckH264FrameMarking) { + uint8_t kFrame[kMaxPacketLength]; + EXPECT_EQ(0, rtp_sender_.RegisterRtpHeaderExtension( + kRtpExtensionFrameMarking, kFrameMarkingExtensionId)); + + RTPFragmentationHeader frag; + frag.VerifyAndAllocateFragmentationHeader(1); + frag.fragmentationOffset[0] = 0; + frag.fragmentationLength[0] = sizeof(kFrame); + + RTPVideoHeader hdr; + hdr.video_type_header.emplace().packetization_mode = + H264PacketizationMode::NonInterleaved; + hdr.codec = kVideoCodecH264; + hdr.frame_marking.temporal_id = kNoTemporalIdx; + hdr.frame_marking.tl0_pic_idx = 99; + hdr.frame_marking.base_layer_sync = true; + rtp_sender_video_.SendVideo(kVideoFrameDelta, kPayload, + kTimestamp, 0, kFrame, sizeof(kFrame), &frag, + &hdr, kDefaultExpectedRetransmissionTimeMs); + + FrameMarking fm; + EXPECT_FALSE( + transport_.last_sent_packet().GetExtension(&fm)); + + hdr.frame_marking.temporal_id = 0; + rtp_sender_video_.SendVideo(kVideoFrameDelta, kPayload, + kTimestamp + 1, 0, kFrame, sizeof(kFrame), &frag, + &hdr, kDefaultExpectedRetransmissionTimeMs); + + EXPECT_TRUE( + transport_.last_sent_packet().GetExtension(&fm)); + EXPECT_EQ(hdr.frame_marking.temporal_id, fm.temporal_id); + EXPECT_EQ(hdr.frame_marking.tl0_pic_idx, fm.tl0_pic_idx); + EXPECT_EQ(hdr.frame_marking.base_layer_sync, fm.base_layer_sync); +} + // Make sure rotation is parsed correctly when the Camera (C) and Flip (F) bits // are set in the CVO byte. TEST_P(RtpSenderVideoTest, SendVideoWithCameraAndFlipCVO) { @@ -275,6 +315,7 @@ TEST_P(RtpSenderVideoTest, RetransmissionTypesH264) { header.video_type_header.emplace().packetization_mode = H264PacketizationMode::NonInterleaved; header.codec = kVideoCodecH264; + header.frame_marking.temporal_id = kNoTemporalIdx; EXPECT_EQ(kDontRetransmit, rtp_sender_video_.GetStorageType( @@ -289,6 +330,15 @@ TEST_P(RtpSenderVideoTest, RetransmissionTypesH264) { rtp_sender_video_.GetStorageType( header, kConditionallyRetransmitHigherLayers, kDefaultExpectedRetransmissionTimeMs)); + + // Test higher level retransmit. + for (int tid = 0; tid <= kMaxTemporalStreams; ++tid) { + header.frame_marking.temporal_id = tid; + EXPECT_EQ(kAllowRetransmission, + rtp_sender_video_.GetStorageType( + header, kRetransmitHigherLayers | kRetransmitBaseLayer, + kDefaultExpectedRetransmissionTimeMs)); + } } TEST_P(RtpSenderVideoTest, RetransmissionTypesVP8BaseLayer) { diff --git a/modules/rtp_rtcp/source/rtp_video_header.h b/modules/rtp_rtcp/source/rtp_video_header.h index b6c43ef111..49d8c28ed2 100644 --- a/modules/rtp_rtcp/source/rtp_video_header.h +++ b/modules/rtp_rtcp/source/rtp_video_header.h @@ -63,7 +63,7 @@ struct RTPVideoHeader { PlayoutDelay playout_delay = {-1, -1}; VideoSendTiming video_timing; - FrameMarking frame_marking; + FrameMarking frame_marking = {false, false, false, false, false, 0xFF, 0, 0}; absl::optional color_space; RTPVideoTypeHeader video_type_header; }; diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.cc b/modules/video_coding/codecs/h264/h264_encoder_impl.cc index 7aeac663dc..7e9464cc9a 100644 --- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc +++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc @@ -167,7 +167,9 @@ H264EncoderImpl::H264EncoderImpl(const cricket::VideoCodec& codec) number_of_cores_(0), encoded_image_callback_(nullptr), has_reported_init_(false), - has_reported_error_(false) { + has_reported_error_(false), + num_temporal_layers_(1), + tl0sync_limit_(0) { RTC_CHECK(absl::EqualsIgnoreCase(codec.name, cricket::kH264CodecName)); std::string packetization_mode_string; if (codec.GetParam(cricket::kH264FmtpPacketizationMode, @@ -236,13 +238,10 @@ int32_t H264EncoderImpl::InitEncode(const VideoCodec* inst, codec_.simulcastStream[0].height = codec_.height; } + num_temporal_layers_ = codec_.H264()->numberOfTemporalLayers; + for (int i = 0, idx = number_of_streams - 1; i < number_of_streams; ++i, --idx) { - // Temporal layers still not supported. - if (inst->simulcastStream[i].numberOfTemporalLayers > 1) { - Release(); - return WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED; - } ISVCEncoder* openh264_encoder; // Create encoder. if (WelsCreateSVCEncoder(&openh264_encoder) != 0) { @@ -530,6 +529,22 @@ int32_t H264EncoderImpl::Encode(const VideoFrame& input_frame, codec_specific.codecType = kVideoCodecH264; codec_specific.codecSpecific.H264.packetization_mode = packetization_mode_; + codec_specific.codecSpecific.H264.temporal_idx = kNoTemporalIdx; + codec_specific.codecSpecific.H264.idr_frame = + info.eFrameType == videoFrameTypeIDR; + codec_specific.codecSpecific.H264.base_layer_sync = false; + if (num_temporal_layers_ > 1) { + const uint8_t tid = info.sLayerInfo[0].uiTemporalId; + codec_specific.codecSpecific.H264.temporal_idx = tid; + codec_specific.codecSpecific.H264.base_layer_sync = + tid > 0 && tid < tl0sync_limit_; + if (codec_specific.codecSpecific.H264.base_layer_sync) { + tl0sync_limit_ = tid; + } + if (tid == 0) { + tl0sync_limit_ = num_temporal_layers_; + } + } encoded_image_callback_->OnEncodedImage(encoded_images_[i], &codec_specific, &frag_header); } @@ -581,6 +596,10 @@ SEncParamExt H264EncoderImpl::CreateEncoderParams(size_t i) const { encoder_params.iTargetBitrate; encoder_params.sSpatialLayers[0].iMaxSpatialBitrate = encoder_params.iMaxBitrate; + encoder_params.iTemporalLayerNum = num_temporal_layers_; + if (encoder_params.iTemporalLayerNum > 1) { + encoder_params.iNumRefFrame = 1; + } RTC_LOG(INFO) << "OpenH264 version is " << OPENH264_MAJOR << "." << OPENH264_MINOR; switch (packetization_mode_) { diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.h b/modules/video_coding/codecs/h264/h264_encoder_impl.h index da32563c9b..044b6eb2eb 100644 --- a/modules/video_coding/codecs/h264/h264_encoder_impl.h +++ b/modules/video_coding/codecs/h264/h264_encoder_impl.h @@ -100,6 +100,9 @@ class H264EncoderImpl : public H264Encoder { bool has_reported_init_; bool has_reported_error_; + + int num_temporal_layers_; + uint8_t tl0sync_limit_; }; } // namespace webrtc diff --git a/modules/video_coding/codecs/h264/include/h264_globals.h b/modules/video_coding/codecs/h264/include/h264_globals.h index e3215002d5..321a6b7739 100644 --- a/modules/video_coding/codecs/h264/include/h264_globals.h +++ b/modules/video_coding/codecs/h264/include/h264_globals.h @@ -15,6 +15,8 @@ #define MODULES_VIDEO_CODING_CODECS_H264_INCLUDE_H264_GLOBALS_H_ #include +#include "modules/video_coding/codecs/interface/common_constants.h" + #include "rtc_base/checks.h" diff --git a/modules/video_coding/encoded_frame.cc b/modules/video_coding/encoded_frame.cc index c18ef131a6..6e8e3422b1 100644 --- a/modules/video_coding/encoded_frame.cc +++ b/modules/video_coding/encoded_frame.cc @@ -135,6 +135,20 @@ void VCMEncodedFrame::CopyCodecSpecific(const RTPVideoHeader* header) { } case kVideoCodecH264: { _codecSpecificInfo.codecType = kVideoCodecH264; + + // The following H264 codec specific data are not used elsewhere. + // Instead they are read directly from the frame marking extension. + // These codec specific data structures should be removed + // when frame marking is used. + _codecSpecificInfo.codecSpecific.H264.temporal_idx = kNoTemporalIdx; + if (header->frame_marking.temporal_id != kNoTemporalIdx) { + _codecSpecificInfo.codecSpecific.H264.temporal_idx = + header->frame_marking.temporal_id; + _codecSpecificInfo.codecSpecific.H264.base_layer_sync = + header->frame_marking.base_layer_sync; + _codecSpecificInfo.codecSpecific.H264.idr_frame = + header->frame_marking.independent_frame; + } break; } default: { diff --git a/modules/video_coding/include/video_codec_interface.h b/modules/video_coding/include/video_codec_interface.h index 052963ce76..0a6c12d1a1 100644 --- a/modules/video_coding/include/video_codec_interface.h +++ b/modules/video_coding/include/video_codec_interface.h @@ -83,6 +83,9 @@ static_assert(std::is_pod::value, ""); // Hack alert - the code assumes that thisstruct is memset when constructed. struct CodecSpecificInfoH264 { H264PacketizationMode packetization_mode; + uint8_t temporal_idx; + bool base_layer_sync; + bool idr_frame; }; static_assert(std::is_pod::value, ""); diff --git a/modules/video_coding/session_info.cc b/modules/video_coding/session_info.cc index d6b59be644..f77f6d668b 100644 --- a/modules/video_coding/session_info.cc +++ b/modules/video_coding/session_info.cc @@ -95,6 +95,8 @@ int VCMSessionInfo::TemporalId() const { return absl::get( packets_.front().video_header.video_type_header) .temporal_idx; + } else if (packets_.front().video_header.codec == kVideoCodecH264) { + return packets_.front().video_header.frame_marking.temporal_id; } else { return kNoTemporalIdx; } @@ -111,6 +113,8 @@ bool VCMSessionInfo::LayerSync() const { return absl::get( packets_.front().video_header.video_type_header) .temporal_up_switch; + } else if (packets_.front().video_header.codec == kVideoCodecH264) { + return packets_.front().video_header.frame_marking.base_layer_sync; } else { return false; } @@ -127,6 +131,8 @@ int VCMSessionInfo::Tl0PicId() const { return absl::get( packets_.front().video_header.video_type_header) .tl0_pic_idx; + } else if (packets_.front().video_header.codec == kVideoCodecH264) { + return packets_.front().video_header.frame_marking.tl0_pic_idx; } else { return kNoTl0PicIdx; } diff --git a/modules/video_coding/video_codec_initializer.cc b/modules/video_coding/video_codec_initializer.cc index d67cf526f4..a2bc27aab4 100644 --- a/modules/video_coding/video_codec_initializer.cc +++ b/modules/video_coding/video_codec_initializer.cc @@ -225,6 +225,12 @@ VideoCodec VideoCodecInitializer::VideoEncoderConfigToVideoCodec( case kVideoCodecH264: { if (!config.encoder_specific_settings) *video_codec.H264() = VideoEncoder::GetDefaultH264Settings(); + video_codec.H264()->numberOfTemporalLayers = static_cast( + streams.back().num_temporal_layers.value_or( + video_codec.H264()->numberOfTemporalLayers)); + RTC_DCHECK_GE(video_codec.H264()->numberOfTemporalLayers, 1); + RTC_DCHECK_LE(video_codec.H264()->numberOfTemporalLayers, + kMaxTemporalStreams); break; } default: diff --git a/video/rtp_video_stream_receiver.cc b/video/rtp_video_stream_receiver.cc index 2c35cebfc2..656dd7a990 100644 --- a/video/rtp_video_stream_receiver.cc +++ b/video/rtp_video_stream_receiver.cc @@ -495,6 +495,8 @@ void RtpVideoStreamReceiver::ReceivePacket(const RtpPacketReceived& packet) { VideoSendTiming::kInvalid; webrtc_rtp_header.video_header().is_last_packet_in_frame = webrtc_rtp_header.header.markerBit; + webrtc_rtp_header.video_header().frame_marking.temporal_id = kNoTemporalIdx; + if (parsed_payload.video_header().codec == kVideoCodecVP9) { const RTPVideoHeaderVP9& codec_header = absl::get( parsed_payload.video_header().video_type_header); @@ -512,6 +514,9 @@ void RtpVideoStreamReceiver::ReceivePacket(const RtpPacketReceived& packet) { &webrtc_rtp_header.video_header().video_timing); packet.GetExtension( &webrtc_rtp_header.video_header().playout_delay); + packet.GetExtension( + &webrtc_rtp_header.video_header().frame_marking); + webrtc_rtp_header.video_header().color_space = packet.GetExtension(); if (webrtc_rtp_header.video_header().color_space || @@ -523,6 +528,7 @@ void RtpVideoStreamReceiver::ReceivePacket(const RtpPacketReceived& packet) { } else if (last_color_space_) { webrtc_rtp_header.video_header().color_space = last_color_space_; } + absl::optional generic_descriptor_wire; generic_descriptor_wire.emplace(); if (packet.GetExtension( diff --git a/video/video_send_stream_tests.cc b/video/video_send_stream_tests.cc index bd5a03ca30..057fd9538e 100644 --- a/video/video_send_stream_tests.cc +++ b/video/video_send_stream_tests.cc @@ -2534,8 +2534,23 @@ void VideoCodecConfigObserver::InitCodecSpecifics() { template <> void VideoCodecConfigObserver::VerifyCodecSpecifics( const VideoCodec& config) const { + // Check that the number of temporal layers has propagated properly to + // VideoCodec. + EXPECT_EQ(kVideoCodecConfigObserverNumberOfTemporalLayers, + config.H264().numberOfTemporalLayers); + + for (unsigned char i = 0; i < config.numberOfSimulcastStreams; ++i) { + EXPECT_EQ(kVideoCodecConfigObserverNumberOfTemporalLayers, + config.simulcastStream[i].numberOfTemporalLayers); + } + + // Set expected temporal layers as they should have been set when + // reconfiguring the encoder and not match the set config. + VideoCodecH264 encoder_settings = encoder_settings_; + encoder_settings.numberOfTemporalLayers = + kVideoCodecConfigObserverNumberOfTemporalLayers; EXPECT_EQ( - 0, memcmp(&config.H264(), &encoder_settings_, sizeof(encoder_settings_))); + 0, memcmp(&config.H264(), &encoder_settings, sizeof(encoder_settings_))); } template <> diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc index 168b267580..fb9dd8b8f1 100644 --- a/video/video_stream_encoder.cc +++ b/video/video_stream_encoder.cc @@ -628,6 +628,8 @@ void VideoStreamEncoder::ReconfigureEncoder() { num_layers = codec.VP8()->numberOfTemporalLayers; } else if (codec.codecType == kVideoCodecVP9) { num_layers = codec.VP9()->numberOfTemporalLayers; + } else if (codec.codecType == kVideoCodecH264) { + num_layers = codec.H264()->numberOfTemporalLayers; } else if (codec.codecType == kVideoCodecGeneric && codec.numberOfSimulcastStreams > 0) { // This is mainly for unit testing, disabling frame dropping.