Make RtpGenericFrameDescriptor available for E2EE.
This CL makes the RtpGenericFrameDescriptor available in RTPSenderVideo::SendVideo for encryption and in RtpVideoStreamReceiver::OnReceivedFrame for decryption. Bug: webrtc:9361 Change-Id: I5b6d10138c0874657862f103c8c9a2328e6d4a66 Reviewed-on: https://webrtc-review.googlesource.com/102720 Commit-Queue: Philip Eliasson <philipel@webrtc.org> Reviewed-by: Erik Språng <sprang@webrtc.org> Reviewed-by: Danil Chapovalov <danilchap@webrtc.org> Cr-Commit-Position: refs/heads/master@{#24929}
This commit is contained in:
@ -119,14 +119,6 @@ bool RtpDepacketizerGeneric::Parse(ParsedPayload* parsed_payload,
|
||||
parsed_payload->video_header().generic.emplace();
|
||||
parsed_payload->video_header().generic->frame_id =
|
||||
((payload_data[0] & 0x7F) << 8) | payload_data[1];
|
||||
|
||||
// The old generic format (this format) does not include spatial and
|
||||
// temporal layer information. To distinguish which format that was actually
|
||||
// used we set the spatial and themporal layer to -1;
|
||||
// TODO(bugs.webrtc.org/9772): Remove the old format.
|
||||
parsed_payload->video_header().generic->spatial_index = -1;
|
||||
parsed_payload->video_header().generic->temporal_index = -1;
|
||||
|
||||
payload_data += kExtendedHeaderLength;
|
||||
payload_data_length -= kExtendedHeaderLength;
|
||||
}
|
||||
|
@ -19,6 +19,9 @@ constexpr int RtpGenericFrameDescriptor::kMaxTemporalLayers;
|
||||
constexpr int RtpGenericFrameDescriptor::kMaxSpatialLayers;
|
||||
|
||||
RtpGenericFrameDescriptor::RtpGenericFrameDescriptor() = default;
|
||||
RtpGenericFrameDescriptor::RtpGenericFrameDescriptor(
|
||||
const RtpGenericFrameDescriptor&) = default;
|
||||
RtpGenericFrameDescriptor::~RtpGenericFrameDescriptor() = default;
|
||||
|
||||
int RtpGenericFrameDescriptor::TemporalLayer() const {
|
||||
RTC_DCHECK(FirstPacketInSubFrame());
|
||||
@ -31,6 +34,17 @@ void RtpGenericFrameDescriptor::SetTemporalLayer(int temporal_layer) {
|
||||
temporal_layer_ = temporal_layer;
|
||||
}
|
||||
|
||||
int RtpGenericFrameDescriptor::SpatialLayer() const {
|
||||
RTC_DCHECK(FirstPacketInSubFrame());
|
||||
int layer = 0;
|
||||
uint8_t spatial_layers = spatial_layers_;
|
||||
while (spatial_layers_ != 0 && !(spatial_layers & 1)) {
|
||||
spatial_layers >>= 1;
|
||||
layer++;
|
||||
}
|
||||
return layer;
|
||||
}
|
||||
|
||||
uint8_t RtpGenericFrameDescriptor::SpatialLayersBitmask() const {
|
||||
RTC_DCHECK(FirstPacketInSubFrame());
|
||||
return spatial_layers_;
|
||||
@ -71,4 +85,15 @@ bool RtpGenericFrameDescriptor::AddFrameDependencyDiff(uint16_t fdiff) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void RtpGenericFrameDescriptor::SetByteRepresentation(
|
||||
rtc::ArrayView<const uint8_t> byte_representation) {
|
||||
byte_representation_.assign(byte_representation.begin(),
|
||||
byte_representation.end());
|
||||
}
|
||||
|
||||
rtc::ArrayView<const uint8_t>
|
||||
RtpGenericFrameDescriptor::GetByteRepresentation() {
|
||||
return byte_representation_;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
|
||||
@ -25,6 +26,8 @@ class RtpGenericFrameDescriptor {
|
||||
static constexpr int kMaxSpatialLayers = 8;
|
||||
|
||||
RtpGenericFrameDescriptor();
|
||||
RtpGenericFrameDescriptor(const RtpGenericFrameDescriptor&);
|
||||
~RtpGenericFrameDescriptor();
|
||||
|
||||
bool FirstPacketInSubFrame() const { return beginning_of_subframe_; }
|
||||
void SetFirstPacketInSubFrame(bool first) { beginning_of_subframe_ = first; }
|
||||
@ -41,8 +44,9 @@ class RtpGenericFrameDescriptor {
|
||||
int TemporalLayer() const;
|
||||
void SetTemporalLayer(int temporal_layer);
|
||||
|
||||
// Frame might by used, possible indrectly, for spatial layer sid iff
|
||||
// Frame might by used, possible indirectly, for spatial layer sid iff
|
||||
// (bitmask & (1 << sid)) != 0
|
||||
int SpatialLayer() const;
|
||||
uint8_t SpatialLayersBitmask() const;
|
||||
void SetSpatialLayersBitmask(uint8_t spatial_layers);
|
||||
|
||||
@ -54,6 +58,9 @@ class RtpGenericFrameDescriptor {
|
||||
// Returns false on failure, i.e. number of dependencies is too large.
|
||||
bool AddFrameDependencyDiff(uint16_t fdiff);
|
||||
|
||||
void SetByteRepresentation(rtc::ArrayView<const uint8_t> representation);
|
||||
rtc::ArrayView<const uint8_t> GetByteRepresentation();
|
||||
|
||||
private:
|
||||
bool beginning_of_subframe_ = false;
|
||||
bool end_of_subframe_ = false;
|
||||
@ -65,6 +72,7 @@ class RtpGenericFrameDescriptor {
|
||||
uint8_t temporal_layer_ = 0;
|
||||
size_t num_frame_deps_ = 0;
|
||||
uint16_t frame_deps_id_diffs_[kMaxNumFrameDependencies];
|
||||
std::vector<uint8_t> byte_representation_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -69,6 +69,7 @@ rtc_static_library("packet") {
|
||||
]
|
||||
deps = [
|
||||
"..:module_api",
|
||||
"../rtp_rtcp:rtp_rtcp_format",
|
||||
]
|
||||
}
|
||||
|
||||
|
@ -159,6 +159,15 @@ absl::optional<RTPVideoHeader> RtpFrameObject::GetRtpVideoHeader() const {
|
||||
return packet->video_header;
|
||||
}
|
||||
|
||||
absl::optional<RtpGenericFrameDescriptor>
|
||||
RtpFrameObject::GetGenericFrameDescriptor() const {
|
||||
rtc::CritScope lock(&packet_buffer_->crit_);
|
||||
VCMPacket* packet = packet_buffer_->GetPacket(first_seq_num_);
|
||||
if (!packet)
|
||||
return absl::nullopt;
|
||||
return packet->generic_descriptor;
|
||||
}
|
||||
|
||||
absl::optional<FrameMarking> RtpFrameObject::GetFrameMarking() const {
|
||||
rtc::CritScope lock(&packet_buffer_->crit_);
|
||||
VCMPacket* packet = packet_buffer_->GetPacket(first_seq_num_);
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "api/video/encoded_frame.h"
|
||||
#include "common_types.h" // NOLINT(build/include)
|
||||
#include "modules/include/module_common_types.h"
|
||||
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace video_coding {
|
||||
@ -41,6 +42,7 @@ class RtpFrameObject : public EncodedFrame {
|
||||
int64_t RenderTime() const override;
|
||||
bool delayed_by_retransmission() const override;
|
||||
absl::optional<RTPVideoHeader> GetRtpVideoHeader() const;
|
||||
absl::optional<RtpGenericFrameDescriptor> GetGenericFrameDescriptor() const;
|
||||
absl::optional<FrameMarking> GetFrameMarking() const;
|
||||
|
||||
private:
|
||||
|
@ -81,4 +81,6 @@ VCMPacket::VCMPacket(const uint8_t* ptr,
|
||||
}
|
||||
}
|
||||
|
||||
VCMPacket::~VCMPacket() = default;
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -12,6 +12,7 @@
|
||||
#define MODULES_VIDEO_CODING_PACKET_H_
|
||||
|
||||
#include "modules/include/module_common_types.h"
|
||||
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -22,6 +23,8 @@ class VCMPacket {
|
||||
const size_t size,
|
||||
const WebRtcRTPHeader& rtpHeader);
|
||||
|
||||
~VCMPacket();
|
||||
|
||||
uint8_t payloadType;
|
||||
uint32_t timestamp;
|
||||
// NTP time of the capture time in local timebase in milliseconds.
|
||||
@ -43,6 +46,7 @@ class VCMPacket {
|
||||
int width;
|
||||
int height;
|
||||
RTPVideoHeader video_header;
|
||||
absl::optional<RtpGenericFrameDescriptor> generic_descriptor;
|
||||
|
||||
int64_t receive_time_ms;
|
||||
};
|
||||
|
@ -84,12 +84,10 @@ void RtpFrameReferenceFinder::RetryStashedFrames() {
|
||||
|
||||
RtpFrameReferenceFinder::FrameDecision
|
||||
RtpFrameReferenceFinder::ManageFrameInternal(RtpFrameObject* frame) {
|
||||
absl::optional<RTPVideoHeader> video_header = frame->GetRtpVideoHeader();
|
||||
// TODO(bugs.webrtc.org/9772): Remove the spatial id check when the old
|
||||
// generic format has been removed.
|
||||
if (video_header && video_header->generic &&
|
||||
video_header->generic->spatial_index != -1) {
|
||||
return ManageFrameGeneric(frame, *video_header->generic);
|
||||
absl::optional<RtpGenericFrameDescriptor> generic_descriptor =
|
||||
frame->GetGenericFrameDescriptor();
|
||||
if (generic_descriptor) {
|
||||
return ManageFrameGeneric(frame, *generic_descriptor);
|
||||
}
|
||||
|
||||
switch (frame->codec_type()) {
|
||||
@ -99,6 +97,7 @@ RtpFrameReferenceFinder::ManageFrameInternal(RtpFrameObject* frame) {
|
||||
return ManageFrameVp9(frame);
|
||||
default: {
|
||||
// Use 15 first bits of frame ID as picture ID if available.
|
||||
absl::optional<RTPVideoHeader> video_header = frame->GetRtpVideoHeader();
|
||||
int picture_id = kNoPictureId;
|
||||
if (video_header && video_header->generic)
|
||||
picture_id = video_header->generic->frame_id & 0x7fff;
|
||||
@ -171,19 +170,20 @@ void RtpFrameReferenceFinder::UpdateLastPictureIdWithPadding(uint16_t seq_num) {
|
||||
RtpFrameReferenceFinder::FrameDecision
|
||||
RtpFrameReferenceFinder::ManageFrameGeneric(
|
||||
RtpFrameObject* frame,
|
||||
const RTPVideoHeader::GenericDescriptorInfo& descriptor) {
|
||||
if (EncodedFrame::kMaxFrameReferences < descriptor.dependencies.size()) {
|
||||
const RtpGenericFrameDescriptor& descriptor) {
|
||||
int64_t frame_id = generic_frame_id_unwrapper_.Unwrap(descriptor.FrameId());
|
||||
frame->id.picture_id = frame_id;
|
||||
frame->id.spatial_layer = descriptor.SpatialLayer();
|
||||
|
||||
rtc::ArrayView<const uint16_t> diffs = descriptor.FrameDependenciesDiffs();
|
||||
if (EncodedFrame::kMaxFrameReferences < diffs.size()) {
|
||||
RTC_LOG(LS_WARNING) << "Too many dependencies in generic descriptor.";
|
||||
return kDrop;
|
||||
}
|
||||
|
||||
int64_t frame_id = generic_frame_id_unwrapper_.Unwrap(descriptor.frame_id);
|
||||
frame->id.picture_id = frame_id;
|
||||
frame->id.spatial_layer = descriptor.spatial_index;
|
||||
|
||||
frame->num_references = descriptor.dependencies.size();
|
||||
for (size_t i = 0; i < descriptor.dependencies.size(); ++i)
|
||||
frame->references[i] = frame_id - descriptor.dependencies[i];
|
||||
frame->num_references = diffs.size();
|
||||
for (size_t i = 0; i < diffs.size(); ++i)
|
||||
frame->references[i] = frame_id - diffs[i];
|
||||
|
||||
return kHandOff;
|
||||
}
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <utility>
|
||||
|
||||
#include "modules/include/module_common_types.h"
|
||||
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
|
||||
#include "rtc_base/criticalsection.h"
|
||||
#include "rtc_base/numerics/sequence_number_util.h"
|
||||
#include "rtc_base/thread_annotations.h"
|
||||
@ -88,9 +89,8 @@ class RtpFrameReferenceFinder {
|
||||
FrameDecision ManageFrameInternal(RtpFrameObject* frame)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
|
||||
FrameDecision ManageFrameGeneric(
|
||||
RtpFrameObject* frame,
|
||||
const RTPVideoHeader::GenericDescriptorInfo& descriptor)
|
||||
FrameDecision ManageFrameGeneric(RtpFrameObject* frame,
|
||||
const RtpGenericFrameDescriptor& descriptor)
|
||||
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
|
||||
// Find references for frames with no or very limited information in the
|
||||
|
@ -201,6 +201,15 @@ int32_t RtpVideoStreamReceiver::OnReceivedPayloadData(
|
||||
const uint8_t* payload_data,
|
||||
size_t payload_size,
|
||||
const WebRtcRTPHeader* rtp_header) {
|
||||
return OnReceivedPayloadData(payload_data, payload_size, rtp_header,
|
||||
absl::nullopt);
|
||||
}
|
||||
|
||||
int32_t RtpVideoStreamReceiver::OnReceivedPayloadData(
|
||||
const uint8_t* payload_data,
|
||||
size_t payload_size,
|
||||
const WebRtcRTPHeader* rtp_header,
|
||||
const absl::optional<RtpGenericFrameDescriptor>& generic_descriptor) {
|
||||
WebRtcRTPHeader rtp_header_with_ntp = *rtp_header;
|
||||
rtp_header_with_ntp.ntp_time_ms =
|
||||
ntp_estimator_.Estimate(rtp_header->header.timestamp);
|
||||
@ -248,6 +257,8 @@ int32_t RtpVideoStreamReceiver::OnReceivedPayloadData(
|
||||
packet.dataPtr = data;
|
||||
}
|
||||
|
||||
packet.generic_descriptor = generic_descriptor;
|
||||
|
||||
packet_buffer_->InsertPacket(&packet);
|
||||
return 0;
|
||||
}
|
||||
@ -462,44 +473,25 @@ void RtpVideoStreamReceiver::ReceivePacket(const RtpPacketReceived& packet) {
|
||||
packet.GetExtension<PlayoutDelayLimits>(
|
||||
&webrtc_rtp_header.video_header().playout_delay);
|
||||
|
||||
RtpGenericFrameDescriptor generic_descriptor_wire;
|
||||
absl::optional<RtpGenericFrameDescriptor> generic_descriptor_wire;
|
||||
generic_descriptor_wire.emplace();
|
||||
if (packet.GetExtension<RtpGenericFrameDescriptorExtension>(
|
||||
&generic_descriptor_wire)) {
|
||||
&generic_descriptor_wire.value())) {
|
||||
generic_descriptor_wire->SetByteRepresentation(
|
||||
packet.GetRawExtension<RtpGenericFrameDescriptorExtension>());
|
||||
webrtc_rtp_header.video_header().is_first_packet_in_frame =
|
||||
generic_descriptor_wire.FirstSubFrameInFrame() &&
|
||||
generic_descriptor_wire.FirstPacketInSubFrame();
|
||||
generic_descriptor_wire->FirstSubFrameInFrame() &&
|
||||
generic_descriptor_wire->FirstPacketInSubFrame();
|
||||
webrtc_rtp_header.video_header().is_last_packet_in_frame =
|
||||
webrtc_rtp_header.header.markerBit ||
|
||||
(generic_descriptor_wire.LastSubFrameInFrame() &&
|
||||
generic_descriptor_wire.LastPacketInSubFrame());
|
||||
|
||||
// For now we store the diffs in |generic_descirptor.dependencies|. They
|
||||
// are later recaculated when the frame id is unwrapped.
|
||||
// TODO(philipel): Remove RTPVideoHeader::GenericDescriptorInfo and use
|
||||
// RtpGenericFrameDescriptor instead.
|
||||
RTPVideoHeader::GenericDescriptorInfo& generic_descriptor =
|
||||
webrtc_rtp_header.video_header().generic.emplace();
|
||||
if (generic_descriptor_wire.FirstPacketInSubFrame()) {
|
||||
generic_descriptor.frame_id = generic_descriptor_wire.FrameId();
|
||||
for (uint16_t diff : generic_descriptor_wire.FrameDependenciesDiffs()) {
|
||||
generic_descriptor.dependencies.push_back(diff);
|
||||
}
|
||||
|
||||
generic_descriptor.temporal_index =
|
||||
generic_descriptor_wire.TemporalLayer();
|
||||
uint8_t spatial_bitmask = generic_descriptor_wire.SpatialLayersBitmask();
|
||||
while (spatial_bitmask && !(spatial_bitmask & 1)) {
|
||||
spatial_bitmask >>= 1;
|
||||
++generic_descriptor.spatial_index;
|
||||
}
|
||||
|
||||
// Since the receiver doesn't care knowing about higher spatial layer
|
||||
// frames that depend on this frame we don't parse it.
|
||||
}
|
||||
(generic_descriptor_wire->LastSubFrameInFrame() &&
|
||||
generic_descriptor_wire->LastPacketInSubFrame());
|
||||
} else {
|
||||
generic_descriptor_wire.reset();
|
||||
}
|
||||
|
||||
OnReceivedPayloadData(parsed_payload.payload, parsed_payload.payload_length,
|
||||
&webrtc_rtp_header);
|
||||
&webrtc_rtp_header, generic_descriptor_wire);
|
||||
}
|
||||
|
||||
void RtpVideoStreamReceiver::ParseAndHandleEncapsulatingHeader(
|
||||
|
@ -51,8 +51,7 @@ class RtpPacketReceived;
|
||||
class Transport;
|
||||
class UlpfecReceiver;
|
||||
|
||||
class RtpVideoStreamReceiver : public RtpData,
|
||||
public RecoveredPacketReceiver,
|
||||
class RtpVideoStreamReceiver : public RecoveredPacketReceiver,
|
||||
public RtpPacketSinkInterface,
|
||||
public VCMFrameTypeCallback,
|
||||
public VCMPacketRequestCallback,
|
||||
@ -95,10 +94,17 @@ class RtpVideoStreamReceiver : public RtpData,
|
||||
// Implements RtpPacketSinkInterface.
|
||||
void OnRtpPacket(const RtpPacketReceived& packet) override;
|
||||
|
||||
// Implements RtpData.
|
||||
// TODO(philipel): Stop using VCMPacket in the new jitter buffer and then
|
||||
// remove this function.
|
||||
int32_t OnReceivedPayloadData(const uint8_t* payload_data,
|
||||
size_t payload_size,
|
||||
const WebRtcRTPHeader* rtp_header) override;
|
||||
const WebRtcRTPHeader* rtp_header);
|
||||
int32_t OnReceivedPayloadData(
|
||||
const uint8_t* payload_data,
|
||||
size_t payload_size,
|
||||
const WebRtcRTPHeader* rtp_header,
|
||||
const absl::optional<RtpGenericFrameDescriptor>& generic_descriptor);
|
||||
|
||||
// Implements RecoveredPacketReceiver.
|
||||
void OnRecoveredPacket(const uint8_t* packet, size_t packet_length) override;
|
||||
|
||||
|
Reference in New Issue
Block a user