From 2837edce997b469a702bb96f84ac49dfb727531d Mon Sep 17 00:00:00 2001 From: philipel Date: Tue, 2 Oct 2018 13:55:47 +0200 Subject: [PATCH] Make RtpGenericFrameDescriptor available for E2EE. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL makes the RtpGenericFrameDescriptor available in RTPSenderVideo::SendVideo for encryption and in RtpVideoStreamReceiver::OnReceivedFrame for decryption. Bug: webrtc:9361 Change-Id: I5b6d10138c0874657862f103c8c9a2328e6d4a66 Reviewed-on: https://webrtc-review.googlesource.com/102720 Commit-Queue: Philip Eliasson Reviewed-by: Erik Språng Reviewed-by: Danil Chapovalov Cr-Commit-Position: refs/heads/master@{#24929} --- .../source/rtp_format_video_generic.cc | 8 --- .../source/rtp_generic_frame_descriptor.cc | 25 +++++++++ .../source/rtp_generic_frame_descriptor.h | 10 +++- modules/video_coding/BUILD.gn | 1 + modules/video_coding/frame_object.cc | 9 ++++ modules/video_coding/frame_object.h | 2 + modules/video_coding/packet.cc | 2 + modules/video_coding/packet.h | 4 ++ .../rtp_frame_reference_finder.cc | 30 +++++------ .../video_coding/rtp_frame_reference_finder.h | 6 +-- video/rtp_video_stream_receiver.cc | 54 ++++++++----------- video/rtp_video_stream_receiver.h | 14 +++-- 12 files changed, 103 insertions(+), 62 deletions(-) diff --git a/modules/rtp_rtcp/source/rtp_format_video_generic.cc b/modules/rtp_rtcp/source/rtp_format_video_generic.cc index 6ea13b7fce..edd1e3c530 100644 --- a/modules/rtp_rtcp/source/rtp_format_video_generic.cc +++ b/modules/rtp_rtcp/source/rtp_format_video_generic.cc @@ -119,14 +119,6 @@ bool RtpDepacketizerGeneric::Parse(ParsedPayload* parsed_payload, parsed_payload->video_header().generic.emplace(); parsed_payload->video_header().generic->frame_id = ((payload_data[0] & 0x7F) << 8) | payload_data[1]; - - // The old generic format (this format) does not include spatial and - // temporal layer information. To distinguish which format that was actually - // used we set the spatial and themporal layer to -1; - // TODO(bugs.webrtc.org/9772): Remove the old format. - parsed_payload->video_header().generic->spatial_index = -1; - parsed_payload->video_header().generic->temporal_index = -1; - payload_data += kExtendedHeaderLength; payload_data_length -= kExtendedHeaderLength; } diff --git a/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.cc b/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.cc index b5b8ce5199..1b931327ba 100644 --- a/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.cc +++ b/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.cc @@ -19,6 +19,9 @@ constexpr int RtpGenericFrameDescriptor::kMaxTemporalLayers; constexpr int RtpGenericFrameDescriptor::kMaxSpatialLayers; RtpGenericFrameDescriptor::RtpGenericFrameDescriptor() = default; +RtpGenericFrameDescriptor::RtpGenericFrameDescriptor( + const RtpGenericFrameDescriptor&) = default; +RtpGenericFrameDescriptor::~RtpGenericFrameDescriptor() = default; int RtpGenericFrameDescriptor::TemporalLayer() const { RTC_DCHECK(FirstPacketInSubFrame()); @@ -31,6 +34,17 @@ void RtpGenericFrameDescriptor::SetTemporalLayer(int temporal_layer) { temporal_layer_ = temporal_layer; } +int RtpGenericFrameDescriptor::SpatialLayer() const { + RTC_DCHECK(FirstPacketInSubFrame()); + int layer = 0; + uint8_t spatial_layers = spatial_layers_; + while (spatial_layers_ != 0 && !(spatial_layers & 1)) { + spatial_layers >>= 1; + layer++; + } + return layer; +} + uint8_t RtpGenericFrameDescriptor::SpatialLayersBitmask() const { RTC_DCHECK(FirstPacketInSubFrame()); return spatial_layers_; @@ -71,4 +85,15 @@ bool RtpGenericFrameDescriptor::AddFrameDependencyDiff(uint16_t fdiff) { return true; } +void RtpGenericFrameDescriptor::SetByteRepresentation( + rtc::ArrayView byte_representation) { + byte_representation_.assign(byte_representation.begin(), + byte_representation.end()); +} + +rtc::ArrayView +RtpGenericFrameDescriptor::GetByteRepresentation() { + return byte_representation_; +} + } // namespace webrtc diff --git a/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h b/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h index e4b775e0a8..1bde4fa56d 100644 --- a/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h +++ b/modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h @@ -12,6 +12,7 @@ #include #include +#include #include "api/array_view.h" @@ -25,6 +26,8 @@ class RtpGenericFrameDescriptor { static constexpr int kMaxSpatialLayers = 8; RtpGenericFrameDescriptor(); + RtpGenericFrameDescriptor(const RtpGenericFrameDescriptor&); + ~RtpGenericFrameDescriptor(); bool FirstPacketInSubFrame() const { return beginning_of_subframe_; } void SetFirstPacketInSubFrame(bool first) { beginning_of_subframe_ = first; } @@ -41,8 +44,9 @@ class RtpGenericFrameDescriptor { int TemporalLayer() const; void SetTemporalLayer(int temporal_layer); - // Frame might by used, possible indrectly, for spatial layer sid iff + // Frame might by used, possible indirectly, for spatial layer sid iff // (bitmask & (1 << sid)) != 0 + int SpatialLayer() const; uint8_t SpatialLayersBitmask() const; void SetSpatialLayersBitmask(uint8_t spatial_layers); @@ -54,6 +58,9 @@ class RtpGenericFrameDescriptor { // Returns false on failure, i.e. number of dependencies is too large. bool AddFrameDependencyDiff(uint16_t fdiff); + void SetByteRepresentation(rtc::ArrayView representation); + rtc::ArrayView GetByteRepresentation(); + private: bool beginning_of_subframe_ = false; bool end_of_subframe_ = false; @@ -65,6 +72,7 @@ class RtpGenericFrameDescriptor { uint8_t temporal_layer_ = 0; size_t num_frame_deps_ = 0; uint16_t frame_deps_id_diffs_[kMaxNumFrameDependencies]; + std::vector byte_representation_; }; } // namespace webrtc diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index c992da5302..a414153b6b 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -69,6 +69,7 @@ rtc_static_library("packet") { ] deps = [ "..:module_api", + "../rtp_rtcp:rtp_rtcp_format", ] } diff --git a/modules/video_coding/frame_object.cc b/modules/video_coding/frame_object.cc index 32b0cc125c..7f823da7dd 100644 --- a/modules/video_coding/frame_object.cc +++ b/modules/video_coding/frame_object.cc @@ -159,6 +159,15 @@ absl::optional RtpFrameObject::GetRtpVideoHeader() const { return packet->video_header; } +absl::optional +RtpFrameObject::GetGenericFrameDescriptor() const { + rtc::CritScope lock(&packet_buffer_->crit_); + VCMPacket* packet = packet_buffer_->GetPacket(first_seq_num_); + if (!packet) + return absl::nullopt; + return packet->generic_descriptor; +} + absl::optional RtpFrameObject::GetFrameMarking() const { rtc::CritScope lock(&packet_buffer_->crit_); VCMPacket* packet = packet_buffer_->GetPacket(first_seq_num_); diff --git a/modules/video_coding/frame_object.h b/modules/video_coding/frame_object.h index 5fb5193080..968489bfef 100644 --- a/modules/video_coding/frame_object.h +++ b/modules/video_coding/frame_object.h @@ -15,6 +15,7 @@ #include "api/video/encoded_frame.h" #include "common_types.h" // NOLINT(build/include) #include "modules/include/module_common_types.h" +#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h" namespace webrtc { namespace video_coding { @@ -41,6 +42,7 @@ class RtpFrameObject : public EncodedFrame { int64_t RenderTime() const override; bool delayed_by_retransmission() const override; absl::optional GetRtpVideoHeader() const; + absl::optional GetGenericFrameDescriptor() const; absl::optional GetFrameMarking() const; private: diff --git a/modules/video_coding/packet.cc b/modules/video_coding/packet.cc index dea72cb5ce..eec8ba3cda 100644 --- a/modules/video_coding/packet.cc +++ b/modules/video_coding/packet.cc @@ -81,4 +81,6 @@ VCMPacket::VCMPacket(const uint8_t* ptr, } } +VCMPacket::~VCMPacket() = default; + } // namespace webrtc diff --git a/modules/video_coding/packet.h b/modules/video_coding/packet.h index 09ab983e8b..bddf857021 100644 --- a/modules/video_coding/packet.h +++ b/modules/video_coding/packet.h @@ -12,6 +12,7 @@ #define MODULES_VIDEO_CODING_PACKET_H_ #include "modules/include/module_common_types.h" +#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h" namespace webrtc { @@ -22,6 +23,8 @@ class VCMPacket { const size_t size, const WebRtcRTPHeader& rtpHeader); + ~VCMPacket(); + uint8_t payloadType; uint32_t timestamp; // NTP time of the capture time in local timebase in milliseconds. @@ -43,6 +46,7 @@ class VCMPacket { int width; int height; RTPVideoHeader video_header; + absl::optional generic_descriptor; int64_t receive_time_ms; }; diff --git a/modules/video_coding/rtp_frame_reference_finder.cc b/modules/video_coding/rtp_frame_reference_finder.cc index 67414bb356..40b16f4156 100644 --- a/modules/video_coding/rtp_frame_reference_finder.cc +++ b/modules/video_coding/rtp_frame_reference_finder.cc @@ -84,12 +84,10 @@ void RtpFrameReferenceFinder::RetryStashedFrames() { RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameInternal(RtpFrameObject* frame) { - absl::optional video_header = frame->GetRtpVideoHeader(); - // TODO(bugs.webrtc.org/9772): Remove the spatial id check when the old - // generic format has been removed. - if (video_header && video_header->generic && - video_header->generic->spatial_index != -1) { - return ManageFrameGeneric(frame, *video_header->generic); + absl::optional generic_descriptor = + frame->GetGenericFrameDescriptor(); + if (generic_descriptor) { + return ManageFrameGeneric(frame, *generic_descriptor); } switch (frame->codec_type()) { @@ -99,6 +97,7 @@ RtpFrameReferenceFinder::ManageFrameInternal(RtpFrameObject* frame) { return ManageFrameVp9(frame); default: { // Use 15 first bits of frame ID as picture ID if available. + absl::optional video_header = frame->GetRtpVideoHeader(); int picture_id = kNoPictureId; if (video_header && video_header->generic) picture_id = video_header->generic->frame_id & 0x7fff; @@ -171,19 +170,20 @@ void RtpFrameReferenceFinder::UpdateLastPictureIdWithPadding(uint16_t seq_num) { RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameGeneric( RtpFrameObject* frame, - const RTPVideoHeader::GenericDescriptorInfo& descriptor) { - if (EncodedFrame::kMaxFrameReferences < descriptor.dependencies.size()) { + const RtpGenericFrameDescriptor& descriptor) { + int64_t frame_id = generic_frame_id_unwrapper_.Unwrap(descriptor.FrameId()); + frame->id.picture_id = frame_id; + frame->id.spatial_layer = descriptor.SpatialLayer(); + + rtc::ArrayView diffs = descriptor.FrameDependenciesDiffs(); + if (EncodedFrame::kMaxFrameReferences < diffs.size()) { RTC_LOG(LS_WARNING) << "Too many dependencies in generic descriptor."; return kDrop; } - int64_t frame_id = generic_frame_id_unwrapper_.Unwrap(descriptor.frame_id); - frame->id.picture_id = frame_id; - frame->id.spatial_layer = descriptor.spatial_index; - - frame->num_references = descriptor.dependencies.size(); - for (size_t i = 0; i < descriptor.dependencies.size(); ++i) - frame->references[i] = frame_id - descriptor.dependencies[i]; + frame->num_references = diffs.size(); + for (size_t i = 0; i < diffs.size(); ++i) + frame->references[i] = frame_id - diffs[i]; return kHandOff; } diff --git a/modules/video_coding/rtp_frame_reference_finder.h b/modules/video_coding/rtp_frame_reference_finder.h index eae73d2fdc..01819eac65 100644 --- a/modules/video_coding/rtp_frame_reference_finder.h +++ b/modules/video_coding/rtp_frame_reference_finder.h @@ -19,6 +19,7 @@ #include #include "modules/include/module_common_types.h" +#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h" #include "rtc_base/criticalsection.h" #include "rtc_base/numerics/sequence_number_util.h" #include "rtc_base/thread_annotations.h" @@ -88,9 +89,8 @@ class RtpFrameReferenceFinder { FrameDecision ManageFrameInternal(RtpFrameObject* frame) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_); - FrameDecision ManageFrameGeneric( - RtpFrameObject* frame, - const RTPVideoHeader::GenericDescriptorInfo& descriptor) + FrameDecision ManageFrameGeneric(RtpFrameObject* frame, + const RtpGenericFrameDescriptor& descriptor) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_); // Find references for frames with no or very limited information in the diff --git a/video/rtp_video_stream_receiver.cc b/video/rtp_video_stream_receiver.cc index c4e7905d43..64ec544b70 100644 --- a/video/rtp_video_stream_receiver.cc +++ b/video/rtp_video_stream_receiver.cc @@ -201,6 +201,15 @@ int32_t RtpVideoStreamReceiver::OnReceivedPayloadData( const uint8_t* payload_data, size_t payload_size, const WebRtcRTPHeader* rtp_header) { + return OnReceivedPayloadData(payload_data, payload_size, rtp_header, + absl::nullopt); +} + +int32_t RtpVideoStreamReceiver::OnReceivedPayloadData( + const uint8_t* payload_data, + size_t payload_size, + const WebRtcRTPHeader* rtp_header, + const absl::optional& generic_descriptor) { WebRtcRTPHeader rtp_header_with_ntp = *rtp_header; rtp_header_with_ntp.ntp_time_ms = ntp_estimator_.Estimate(rtp_header->header.timestamp); @@ -248,6 +257,8 @@ int32_t RtpVideoStreamReceiver::OnReceivedPayloadData( packet.dataPtr = data; } + packet.generic_descriptor = generic_descriptor; + packet_buffer_->InsertPacket(&packet); return 0; } @@ -462,44 +473,25 @@ void RtpVideoStreamReceiver::ReceivePacket(const RtpPacketReceived& packet) { packet.GetExtension( &webrtc_rtp_header.video_header().playout_delay); - RtpGenericFrameDescriptor generic_descriptor_wire; + absl::optional generic_descriptor_wire; + generic_descriptor_wire.emplace(); if (packet.GetExtension( - &generic_descriptor_wire)) { + &generic_descriptor_wire.value())) { + generic_descriptor_wire->SetByteRepresentation( + packet.GetRawExtension()); webrtc_rtp_header.video_header().is_first_packet_in_frame = - generic_descriptor_wire.FirstSubFrameInFrame() && - generic_descriptor_wire.FirstPacketInSubFrame(); + generic_descriptor_wire->FirstSubFrameInFrame() && + generic_descriptor_wire->FirstPacketInSubFrame(); webrtc_rtp_header.video_header().is_last_packet_in_frame = webrtc_rtp_header.header.markerBit || - (generic_descriptor_wire.LastSubFrameInFrame() && - generic_descriptor_wire.LastPacketInSubFrame()); - - // For now we store the diffs in |generic_descirptor.dependencies|. They - // are later recaculated when the frame id is unwrapped. - // TODO(philipel): Remove RTPVideoHeader::GenericDescriptorInfo and use - // RtpGenericFrameDescriptor instead. - RTPVideoHeader::GenericDescriptorInfo& generic_descriptor = - webrtc_rtp_header.video_header().generic.emplace(); - if (generic_descriptor_wire.FirstPacketInSubFrame()) { - generic_descriptor.frame_id = generic_descriptor_wire.FrameId(); - for (uint16_t diff : generic_descriptor_wire.FrameDependenciesDiffs()) { - generic_descriptor.dependencies.push_back(diff); - } - - generic_descriptor.temporal_index = - generic_descriptor_wire.TemporalLayer(); - uint8_t spatial_bitmask = generic_descriptor_wire.SpatialLayersBitmask(); - while (spatial_bitmask && !(spatial_bitmask & 1)) { - spatial_bitmask >>= 1; - ++generic_descriptor.spatial_index; - } - - // Since the receiver doesn't care knowing about higher spatial layer - // frames that depend on this frame we don't parse it. - } + (generic_descriptor_wire->LastSubFrameInFrame() && + generic_descriptor_wire->LastPacketInSubFrame()); + } else { + generic_descriptor_wire.reset(); } OnReceivedPayloadData(parsed_payload.payload, parsed_payload.payload_length, - &webrtc_rtp_header); + &webrtc_rtp_header, generic_descriptor_wire); } void RtpVideoStreamReceiver::ParseAndHandleEncapsulatingHeader( diff --git a/video/rtp_video_stream_receiver.h b/video/rtp_video_stream_receiver.h index b54d38291e..72ffcc722f 100644 --- a/video/rtp_video_stream_receiver.h +++ b/video/rtp_video_stream_receiver.h @@ -51,8 +51,7 @@ class RtpPacketReceived; class Transport; class UlpfecReceiver; -class RtpVideoStreamReceiver : public RtpData, - public RecoveredPacketReceiver, +class RtpVideoStreamReceiver : public RecoveredPacketReceiver, public RtpPacketSinkInterface, public VCMFrameTypeCallback, public VCMPacketRequestCallback, @@ -95,10 +94,17 @@ class RtpVideoStreamReceiver : public RtpData, // Implements RtpPacketSinkInterface. void OnRtpPacket(const RtpPacketReceived& packet) override; - // Implements RtpData. + // TODO(philipel): Stop using VCMPacket in the new jitter buffer and then + // remove this function. int32_t OnReceivedPayloadData(const uint8_t* payload_data, size_t payload_size, - const WebRtcRTPHeader* rtp_header) override; + const WebRtcRTPHeader* rtp_header); + int32_t OnReceivedPayloadData( + const uint8_t* payload_data, + size_t payload_size, + const WebRtcRTPHeader* rtp_header, + const absl::optional& generic_descriptor); + // Implements RecoveredPacketReceiver. void OnRecoveredPacket(const uint8_t* packet, size_t packet_length) override;