Make RtpGenericFrameDescriptor available for E2EE.

This CL makes the RtpGenericFrameDescriptor available in
RTPSenderVideo::SendVideo for encryption and in
RtpVideoStreamReceiver::OnReceivedFrame for decryption.

Bug: webrtc:9361
Change-Id: I5b6d10138c0874657862f103c8c9a2328e6d4a66
Reviewed-on: https://webrtc-review.googlesource.com/102720
Commit-Queue: Philip Eliasson <philipel@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24929}
This commit is contained in:
philipel
2018-10-02 13:55:47 +02:00
committed by Commit Bot
parent 3fc5a2087d
commit 2837edce99
12 changed files with 103 additions and 62 deletions

View File

@ -119,14 +119,6 @@ bool RtpDepacketizerGeneric::Parse(ParsedPayload* parsed_payload,
parsed_payload->video_header().generic.emplace();
parsed_payload->video_header().generic->frame_id =
((payload_data[0] & 0x7F) << 8) | payload_data[1];
// The old generic format (this format) does not include spatial and
// temporal layer information. To distinguish which format that was actually
// used we set the spatial and themporal layer to -1;
// TODO(bugs.webrtc.org/9772): Remove the old format.
parsed_payload->video_header().generic->spatial_index = -1;
parsed_payload->video_header().generic->temporal_index = -1;
payload_data += kExtendedHeaderLength;
payload_data_length -= kExtendedHeaderLength;
}

View File

@ -19,6 +19,9 @@ constexpr int RtpGenericFrameDescriptor::kMaxTemporalLayers;
constexpr int RtpGenericFrameDescriptor::kMaxSpatialLayers;
RtpGenericFrameDescriptor::RtpGenericFrameDescriptor() = default;
RtpGenericFrameDescriptor::RtpGenericFrameDescriptor(
const RtpGenericFrameDescriptor&) = default;
RtpGenericFrameDescriptor::~RtpGenericFrameDescriptor() = default;
int RtpGenericFrameDescriptor::TemporalLayer() const {
RTC_DCHECK(FirstPacketInSubFrame());
@ -31,6 +34,17 @@ void RtpGenericFrameDescriptor::SetTemporalLayer(int temporal_layer) {
temporal_layer_ = temporal_layer;
}
int RtpGenericFrameDescriptor::SpatialLayer() const {
RTC_DCHECK(FirstPacketInSubFrame());
int layer = 0;
uint8_t spatial_layers = spatial_layers_;
while (spatial_layers_ != 0 && !(spatial_layers & 1)) {
spatial_layers >>= 1;
layer++;
}
return layer;
}
uint8_t RtpGenericFrameDescriptor::SpatialLayersBitmask() const {
RTC_DCHECK(FirstPacketInSubFrame());
return spatial_layers_;
@ -71,4 +85,15 @@ bool RtpGenericFrameDescriptor::AddFrameDependencyDiff(uint16_t fdiff) {
return true;
}
void RtpGenericFrameDescriptor::SetByteRepresentation(
rtc::ArrayView<const uint8_t> byte_representation) {
byte_representation_.assign(byte_representation.begin(),
byte_representation.end());
}
rtc::ArrayView<const uint8_t>
RtpGenericFrameDescriptor::GetByteRepresentation() {
return byte_representation_;
}
} // namespace webrtc

View File

@ -12,6 +12,7 @@
#include <stddef.h>
#include <stdint.h>
#include <vector>
#include "api/array_view.h"
@ -25,6 +26,8 @@ class RtpGenericFrameDescriptor {
static constexpr int kMaxSpatialLayers = 8;
RtpGenericFrameDescriptor();
RtpGenericFrameDescriptor(const RtpGenericFrameDescriptor&);
~RtpGenericFrameDescriptor();
bool FirstPacketInSubFrame() const { return beginning_of_subframe_; }
void SetFirstPacketInSubFrame(bool first) { beginning_of_subframe_ = first; }
@ -41,8 +44,9 @@ class RtpGenericFrameDescriptor {
int TemporalLayer() const;
void SetTemporalLayer(int temporal_layer);
// Frame might by used, possible indrectly, for spatial layer sid iff
// Frame might by used, possible indirectly, for spatial layer sid iff
// (bitmask & (1 << sid)) != 0
int SpatialLayer() const;
uint8_t SpatialLayersBitmask() const;
void SetSpatialLayersBitmask(uint8_t spatial_layers);
@ -54,6 +58,9 @@ class RtpGenericFrameDescriptor {
// Returns false on failure, i.e. number of dependencies is too large.
bool AddFrameDependencyDiff(uint16_t fdiff);
void SetByteRepresentation(rtc::ArrayView<const uint8_t> representation);
rtc::ArrayView<const uint8_t> GetByteRepresentation();
private:
bool beginning_of_subframe_ = false;
bool end_of_subframe_ = false;
@ -65,6 +72,7 @@ class RtpGenericFrameDescriptor {
uint8_t temporal_layer_ = 0;
size_t num_frame_deps_ = 0;
uint16_t frame_deps_id_diffs_[kMaxNumFrameDependencies];
std::vector<uint8_t> byte_representation_;
};
} // namespace webrtc

View File

@ -69,6 +69,7 @@ rtc_static_library("packet") {
]
deps = [
"..:module_api",
"../rtp_rtcp:rtp_rtcp_format",
]
}

View File

@ -159,6 +159,15 @@ absl::optional<RTPVideoHeader> RtpFrameObject::GetRtpVideoHeader() const {
return packet->video_header;
}
absl::optional<RtpGenericFrameDescriptor>
RtpFrameObject::GetGenericFrameDescriptor() const {
rtc::CritScope lock(&packet_buffer_->crit_);
VCMPacket* packet = packet_buffer_->GetPacket(first_seq_num_);
if (!packet)
return absl::nullopt;
return packet->generic_descriptor;
}
absl::optional<FrameMarking> RtpFrameObject::GetFrameMarking() const {
rtc::CritScope lock(&packet_buffer_->crit_);
VCMPacket* packet = packet_buffer_->GetPacket(first_seq_num_);

View File

@ -15,6 +15,7 @@
#include "api/video/encoded_frame.h"
#include "common_types.h" // NOLINT(build/include)
#include "modules/include/module_common_types.h"
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
namespace webrtc {
namespace video_coding {
@ -41,6 +42,7 @@ class RtpFrameObject : public EncodedFrame {
int64_t RenderTime() const override;
bool delayed_by_retransmission() const override;
absl::optional<RTPVideoHeader> GetRtpVideoHeader() const;
absl::optional<RtpGenericFrameDescriptor> GetGenericFrameDescriptor() const;
absl::optional<FrameMarking> GetFrameMarking() const;
private:

View File

@ -81,4 +81,6 @@ VCMPacket::VCMPacket(const uint8_t* ptr,
}
}
VCMPacket::~VCMPacket() = default;
} // namespace webrtc

View File

@ -12,6 +12,7 @@
#define MODULES_VIDEO_CODING_PACKET_H_
#include "modules/include/module_common_types.h"
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
namespace webrtc {
@ -22,6 +23,8 @@ class VCMPacket {
const size_t size,
const WebRtcRTPHeader& rtpHeader);
~VCMPacket();
uint8_t payloadType;
uint32_t timestamp;
// NTP time of the capture time in local timebase in milliseconds.
@ -43,6 +46,7 @@ class VCMPacket {
int width;
int height;
RTPVideoHeader video_header;
absl::optional<RtpGenericFrameDescriptor> generic_descriptor;
int64_t receive_time_ms;
};

View File

@ -84,12 +84,10 @@ void RtpFrameReferenceFinder::RetryStashedFrames() {
RtpFrameReferenceFinder::FrameDecision
RtpFrameReferenceFinder::ManageFrameInternal(RtpFrameObject* frame) {
absl::optional<RTPVideoHeader> video_header = frame->GetRtpVideoHeader();
// TODO(bugs.webrtc.org/9772): Remove the spatial id check when the old
// generic format has been removed.
if (video_header && video_header->generic &&
video_header->generic->spatial_index != -1) {
return ManageFrameGeneric(frame, *video_header->generic);
absl::optional<RtpGenericFrameDescriptor> generic_descriptor =
frame->GetGenericFrameDescriptor();
if (generic_descriptor) {
return ManageFrameGeneric(frame, *generic_descriptor);
}
switch (frame->codec_type()) {
@ -99,6 +97,7 @@ RtpFrameReferenceFinder::ManageFrameInternal(RtpFrameObject* frame) {
return ManageFrameVp9(frame);
default: {
// Use 15 first bits of frame ID as picture ID if available.
absl::optional<RTPVideoHeader> video_header = frame->GetRtpVideoHeader();
int picture_id = kNoPictureId;
if (video_header && video_header->generic)
picture_id = video_header->generic->frame_id & 0x7fff;
@ -171,19 +170,20 @@ void RtpFrameReferenceFinder::UpdateLastPictureIdWithPadding(uint16_t seq_num) {
RtpFrameReferenceFinder::FrameDecision
RtpFrameReferenceFinder::ManageFrameGeneric(
RtpFrameObject* frame,
const RTPVideoHeader::GenericDescriptorInfo& descriptor) {
if (EncodedFrame::kMaxFrameReferences < descriptor.dependencies.size()) {
const RtpGenericFrameDescriptor& descriptor) {
int64_t frame_id = generic_frame_id_unwrapper_.Unwrap(descriptor.FrameId());
frame->id.picture_id = frame_id;
frame->id.spatial_layer = descriptor.SpatialLayer();
rtc::ArrayView<const uint16_t> diffs = descriptor.FrameDependenciesDiffs();
if (EncodedFrame::kMaxFrameReferences < diffs.size()) {
RTC_LOG(LS_WARNING) << "Too many dependencies in generic descriptor.";
return kDrop;
}
int64_t frame_id = generic_frame_id_unwrapper_.Unwrap(descriptor.frame_id);
frame->id.picture_id = frame_id;
frame->id.spatial_layer = descriptor.spatial_index;
frame->num_references = descriptor.dependencies.size();
for (size_t i = 0; i < descriptor.dependencies.size(); ++i)
frame->references[i] = frame_id - descriptor.dependencies[i];
frame->num_references = diffs.size();
for (size_t i = 0; i < diffs.size(); ++i)
frame->references[i] = frame_id - diffs[i];
return kHandOff;
}

View File

@ -19,6 +19,7 @@
#include <utility>
#include "modules/include/module_common_types.h"
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
#include "rtc_base/criticalsection.h"
#include "rtc_base/numerics/sequence_number_util.h"
#include "rtc_base/thread_annotations.h"
@ -88,9 +89,8 @@ class RtpFrameReferenceFinder {
FrameDecision ManageFrameInternal(RtpFrameObject* frame)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
FrameDecision ManageFrameGeneric(
RtpFrameObject* frame,
const RTPVideoHeader::GenericDescriptorInfo& descriptor)
FrameDecision ManageFrameGeneric(RtpFrameObject* frame,
const RtpGenericFrameDescriptor& descriptor)
RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
// Find references for frames with no or very limited information in the

View File

@ -201,6 +201,15 @@ int32_t RtpVideoStreamReceiver::OnReceivedPayloadData(
const uint8_t* payload_data,
size_t payload_size,
const WebRtcRTPHeader* rtp_header) {
return OnReceivedPayloadData(payload_data, payload_size, rtp_header,
absl::nullopt);
}
int32_t RtpVideoStreamReceiver::OnReceivedPayloadData(
const uint8_t* payload_data,
size_t payload_size,
const WebRtcRTPHeader* rtp_header,
const absl::optional<RtpGenericFrameDescriptor>& generic_descriptor) {
WebRtcRTPHeader rtp_header_with_ntp = *rtp_header;
rtp_header_with_ntp.ntp_time_ms =
ntp_estimator_.Estimate(rtp_header->header.timestamp);
@ -248,6 +257,8 @@ int32_t RtpVideoStreamReceiver::OnReceivedPayloadData(
packet.dataPtr = data;
}
packet.generic_descriptor = generic_descriptor;
packet_buffer_->InsertPacket(&packet);
return 0;
}
@ -462,44 +473,25 @@ void RtpVideoStreamReceiver::ReceivePacket(const RtpPacketReceived& packet) {
packet.GetExtension<PlayoutDelayLimits>(
&webrtc_rtp_header.video_header().playout_delay);
RtpGenericFrameDescriptor generic_descriptor_wire;
absl::optional<RtpGenericFrameDescriptor> generic_descriptor_wire;
generic_descriptor_wire.emplace();
if (packet.GetExtension<RtpGenericFrameDescriptorExtension>(
&generic_descriptor_wire)) {
&generic_descriptor_wire.value())) {
generic_descriptor_wire->SetByteRepresentation(
packet.GetRawExtension<RtpGenericFrameDescriptorExtension>());
webrtc_rtp_header.video_header().is_first_packet_in_frame =
generic_descriptor_wire.FirstSubFrameInFrame() &&
generic_descriptor_wire.FirstPacketInSubFrame();
generic_descriptor_wire->FirstSubFrameInFrame() &&
generic_descriptor_wire->FirstPacketInSubFrame();
webrtc_rtp_header.video_header().is_last_packet_in_frame =
webrtc_rtp_header.header.markerBit ||
(generic_descriptor_wire.LastSubFrameInFrame() &&
generic_descriptor_wire.LastPacketInSubFrame());
// For now we store the diffs in |generic_descirptor.dependencies|. They
// are later recaculated when the frame id is unwrapped.
// TODO(philipel): Remove RTPVideoHeader::GenericDescriptorInfo and use
// RtpGenericFrameDescriptor instead.
RTPVideoHeader::GenericDescriptorInfo& generic_descriptor =
webrtc_rtp_header.video_header().generic.emplace();
if (generic_descriptor_wire.FirstPacketInSubFrame()) {
generic_descriptor.frame_id = generic_descriptor_wire.FrameId();
for (uint16_t diff : generic_descriptor_wire.FrameDependenciesDiffs()) {
generic_descriptor.dependencies.push_back(diff);
}
generic_descriptor.temporal_index =
generic_descriptor_wire.TemporalLayer();
uint8_t spatial_bitmask = generic_descriptor_wire.SpatialLayersBitmask();
while (spatial_bitmask && !(spatial_bitmask & 1)) {
spatial_bitmask >>= 1;
++generic_descriptor.spatial_index;
}
// Since the receiver doesn't care knowing about higher spatial layer
// frames that depend on this frame we don't parse it.
}
(generic_descriptor_wire->LastSubFrameInFrame() &&
generic_descriptor_wire->LastPacketInSubFrame());
} else {
generic_descriptor_wire.reset();
}
OnReceivedPayloadData(parsed_payload.payload, parsed_payload.payload_length,
&webrtc_rtp_header);
&webrtc_rtp_header, generic_descriptor_wire);
}
void RtpVideoStreamReceiver::ParseAndHandleEncapsulatingHeader(

View File

@ -51,8 +51,7 @@ class RtpPacketReceived;
class Transport;
class UlpfecReceiver;
class RtpVideoStreamReceiver : public RtpData,
public RecoveredPacketReceiver,
class RtpVideoStreamReceiver : public RecoveredPacketReceiver,
public RtpPacketSinkInterface,
public VCMFrameTypeCallback,
public VCMPacketRequestCallback,
@ -95,10 +94,17 @@ class RtpVideoStreamReceiver : public RtpData,
// Implements RtpPacketSinkInterface.
void OnRtpPacket(const RtpPacketReceived& packet) override;
// Implements RtpData.
// TODO(philipel): Stop using VCMPacket in the new jitter buffer and then
// remove this function.
int32_t OnReceivedPayloadData(const uint8_t* payload_data,
size_t payload_size,
const WebRtcRTPHeader* rtp_header) override;
const WebRtcRTPHeader* rtp_header);
int32_t OnReceivedPayloadData(
const uint8_t* payload_data,
size_t payload_size,
const WebRtcRTPHeader* rtp_header,
const absl::optional<RtpGenericFrameDescriptor>& generic_descriptor);
// Implements RecoveredPacketReceiver.
void OnRecoveredPacket(const uint8_t* packet, size_t packet_length) override;