in RtpSenderVideo add support for writing DependencyDescriptor header extension

Bug: webrtc:10342
Change-Id: I12cca9c5e1606338bb914e58e13d268bbc6961f9
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/166532
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30427}
This commit is contained in:
Danil Chapovalov
2020-01-30 16:28:53 +01:00
committed by Commit Bot
parent 95cb56bd89
commit 670af2692e
5 changed files with 270 additions and 25 deletions

View File

@ -251,6 +251,7 @@ rtc_library("rtp_rtcp") {
"../../api/rtc_event_log",
"../../api/transport:field_trial_based_config",
"../../api/transport:webrtc_key_value_config",
"../../api/transport/rtp:dependency_descriptor",
"../../api/transport/rtp:rtp_source",
"../../api/units:data_rate",
"../../api/units:time_delta",
@ -332,6 +333,7 @@ rtc_library("rtp_video_header") {
]
deps = [
"../../:webrtc_common",
"../../api/transport/rtp:dependency_descriptor",
"../../api/video:video_frame",
"../../api/video:video_frame_type",
"../../api/video:video_rtp_headers",
@ -508,6 +510,7 @@ if (rtc_include_tests) {
"../../api:transport_api",
"../../api/rtc_event_log",
"../../api/transport:field_trial_based_config",
"../../api/transport/rtp:dependency_descriptor",
"../../api/units:timestamp",
"../../api/video:encoded_image",
"../../api/video:video_bitrate_allocation",
@ -518,6 +521,7 @@ if (rtc_include_tests) {
"../../api/video_codecs:video_codecs_api",
"../../call:rtp_receiver",
"../../common_video",
"../../common_video/generic_frame_descriptor",
"../../common_video/test:utilities",
"../../logging:mocks",
"../../rtc_base:checks",

View File

@ -18,12 +18,15 @@
#include <string>
#include <utility>
#include "absl/memory/memory.h"
#include "absl/strings/match.h"
#include "api/crypto/frame_encryptor_interface.h"
#include "api/transport/rtp/dependency_descriptor.h"
#include "modules/remote_bitrate_estimator/test/bwe_test_logging.h"
#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
#include "modules/rtp_rtcp/source/absolute_capture_time_sender.h"
#include "modules/rtp_rtcp/source/byte_io.h"
#include "modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.h"
#include "modules/rtp_rtcp/source/rtp_format.h"
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h"
#include "modules/rtp_rtcp/source/rtp_header_extensions.h"
@ -72,6 +75,7 @@ void AddRtpHeaderExtensions(
const RTPVideoHeader& video_header,
const absl::optional<PlayoutDelay>& playout_delay,
const absl::optional<AbsoluteCaptureTime>& absolute_capture_time,
FrameDependencyStructure* video_structure,
bool set_video_rotation,
bool set_color_space,
bool set_frame_marking,
@ -115,6 +119,41 @@ void AddRtpHeaderExtensions(
}
if (video_header.generic) {
bool extension_is_set = false;
if (video_structure != nullptr) {
DependencyDescriptor descriptor;
descriptor.first_packet_in_frame = first_packet;
descriptor.last_packet_in_frame = last_packet;
descriptor.frame_number = video_header.generic->frame_id & 0xFFFF;
descriptor.frame_dependencies.spatial_id =
video_header.generic->spatial_index;
descriptor.frame_dependencies.temporal_id =
video_header.generic->temporal_index;
for (int64_t dep : video_header.generic->dependencies) {
descriptor.frame_dependencies.frame_diffs.push_back(
video_header.generic->frame_id - dep);
}
descriptor.frame_dependencies.decode_target_indications =
video_header.generic->decode_target_indications;
RTC_DCHECK_EQ(
descriptor.frame_dependencies.decode_target_indications.size(),
video_structure->num_decode_targets);
// To avoid extra structure copy, temporary share ownership of the
// video_structure with the dependency descriptor.
if (video_header.frame_type == VideoFrameType::kVideoFrameKey &&
first_packet) {
descriptor.attached_structure = absl::WrapUnique(video_structure);
}
extension_is_set = packet->SetExtension<RtpDependencyDescriptorExtension>(
*video_structure, descriptor);
// Remove the temporary shared ownership.
descriptor.attached_structure.release();
}
// Do not use v0/v1 generic frame descriptor when v2 is stored.
if (!extension_is_set) {
RtpGenericFrameDescriptor generic_descriptor;
generic_descriptor.SetFirstPacketInSubFrame(first_packet);
generic_descriptor.SetLastPacketInSubFrame(last_packet);
@ -131,7 +170,8 @@ void AddRtpHeaderExtensions(
uint8_t spatial_bimask = 1 << video_header.generic->spatial_index;
generic_descriptor.SetSpatialLayersBitmask(spatial_bimask);
generic_descriptor.SetTemporalLayer(video_header.generic->temporal_index);
generic_descriptor.SetTemporalLayer(
video_header.generic->temporal_index);
if (video_header.frame_type == VideoFrameType::kVideoFrameKey) {
generic_descriptor.SetResolution(video_header.width,
@ -146,6 +186,7 @@ void AddRtpHeaderExtensions(
}
}
}
}
bool MinimizeDescriptor(RTPVideoHeader* video_header) {
if (auto* vp8 =
@ -417,6 +458,38 @@ absl::optional<uint32_t> RTPSenderVideo::FlexfecSsrc() const {
return absl::nullopt;
}
void RTPSenderVideo::SetVideoStructure(
const FrameDependencyStructure* video_structure) {
RTC_DCHECK_RUNS_SERIALIZED(&send_checker_);
if (video_structure == nullptr) {
video_structure_ = nullptr;
return;
}
// Simple sanity checks video structure is set up.
RTC_DCHECK_GT(video_structure->num_decode_targets, 0);
RTC_DCHECK_GT(video_structure->templates.size(), 0);
int structure_id = 0;
if (video_structure_) {
if (*video_structure_ == *video_structure) {
// Same structure (just a new key frame), no update required.
return;
}
// When setting different video structure make sure structure_id is updated
// so that templates from different structures do not collide.
static constexpr int kMaxTemplates = 64;
structure_id =
(video_structure_->structure_id + video_structure_->templates.size()) %
kMaxTemplates;
}
video_structure_ =
std::make_unique<FrameDependencyStructure>(*video_structure);
video_structure_->structure_id = structure_id;
// TODO(bugs.webrtc.org/10342): Support chains.
video_structure_->num_chains = 0;
}
bool RTPSenderVideo::SendVideo(
int payload_type,
absl::optional<VideoCodecType> codec_type,
@ -523,16 +596,20 @@ bool RTPSenderVideo::SendVideo(
auto last_packet = std::make_unique<RtpPacketToSend>(*single_packet);
// Simplest way to estimate how much extensions would occupy is to set them.
AddRtpHeaderExtensions(video_header, playout_delay, absolute_capture_time,
set_video_rotation, set_color_space, set_frame_marking,
video_structure_.get(), set_video_rotation,
set_color_space, set_frame_marking,
/*first=*/true, /*last=*/true, single_packet.get());
AddRtpHeaderExtensions(video_header, playout_delay, absolute_capture_time,
set_video_rotation, set_color_space, set_frame_marking,
video_structure_.get(), set_video_rotation,
set_color_space, set_frame_marking,
/*first=*/true, /*last=*/false, first_packet.get());
AddRtpHeaderExtensions(video_header, playout_delay, absolute_capture_time,
set_video_rotation, set_color_space, set_frame_marking,
video_structure_.get(), set_video_rotation,
set_color_space, set_frame_marking,
/*first=*/false, /*last=*/false, middle_packet.get());
AddRtpHeaderExtensions(video_header, playout_delay, absolute_capture_time,
set_video_rotation, set_color_space, set_frame_marking,
video_structure_.get(), set_video_rotation,
set_color_space, set_frame_marking,
/*first=*/false, /*last=*/true, last_packet.get());
RTC_DCHECK_GT(packet_capacity, single_packet->headers_size());

View File

@ -18,6 +18,7 @@
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/video_codec_type.h"
#include "api/video/video_frame_type.h"
#include "modules/include/module_common_types.h"
@ -103,6 +104,13 @@ class RTPSenderVideo {
const RTPFragmentationHeader* fragmentation,
RTPVideoHeader video_header,
absl::optional<int64_t> expected_retransmission_time_ms);
// Configures video structures produced by encoder to send using the
// dependency descriptor rtp header extension. Next call to SendVideo should
// have video_header.frame_type == kVideoFrameKey.
// All calls to SendVideo after this call must use video_header compatible
// with the video_structure.
void SetVideoStructure(const FrameDependencyStructure* video_structure);
// FlexFEC/ULPFEC.
// Set FEC rates, max frames before FEC is sent, and type of FEC masks.
// Returns false on failure.
@ -184,6 +192,8 @@ class RTPSenderVideo {
VideoRotation last_rotation_ RTC_GUARDED_BY(send_checker_);
absl::optional<ColorSpace> last_color_space_ RTC_GUARDED_BY(send_checker_);
bool transmit_color_space_next_frame_ RTC_GUARDED_BY(send_checker_);
std::unique_ptr<FrameDependencyStructure> video_structure_
RTC_GUARDED_BY(send_checker_);
// Tracks the current request for playout delay limits from application
// and decides whether the current RTP frame should include the playout

View File

@ -10,15 +10,20 @@
#include "modules/rtp_rtcp/source/rtp_sender_video.h"
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/video_codec_constants.h"
#include "api/video/video_timing.h"
#include "common_video/generic_frame_descriptor/generic_frame_info.h"
#include "modules/rtp_rtcp/include/rtp_cvo.h"
#include "modules/rtp_rtcp/include/rtp_header_extension_map.h"
#include "modules/rtp_rtcp/include/rtp_rtcp.h"
#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
#include "modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.h"
#include "modules/rtp_rtcp/source/rtp_format_video_generic.h"
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h"
@ -35,12 +40,15 @@ namespace webrtc {
namespace {
using ::testing::ElementsAre;
using ::testing::IsEmpty;
using ::testing::SizeIs;
enum : int { // The first valid value is 1.
kAbsoluteSendTimeExtensionId = 1,
kFrameMarkingExtensionId,
kGenericDescriptorId00,
kGenericDescriptorId01,
kGenericDescriptorId02,
kTransmissionTimeOffsetExtensionId,
kTransportSequenceNumberExtensionId,
kVideoRotationExtensionId,
@ -73,6 +81,8 @@ class LoopbackTransportTest : public webrtc::Transport {
kGenericDescriptorId00);
receivers_extensions_.Register<RtpGenericFrameDescriptorExtension01>(
kGenericDescriptorId01);
receivers_extensions_.Register<RtpDependencyDescriptorExtension>(
kGenericDescriptorId02);
receivers_extensions_.Register<FrameMarkingExtension>(
kFrameMarkingExtensionId);
receivers_extensions_.Register<AbsoluteCaptureTimeExtension>(
@ -522,6 +532,148 @@ TEST_P(RtpSenderVideoTest, ConditionalRetransmitLimit) {
EXPECT_TRUE(rtp_sender_video_.AllowRetransmission(header, kSettings, kRttMs));
}
TEST_P(RtpSenderVideoTest, SendsDependencyDescriptorWhenVideoStructureIsSet) {
const int64_t kFrameId = 100000;
uint8_t kFrame[100];
rtp_module_->RegisterRtpHeaderExtension(
RtpDependencyDescriptorExtension::kUri, kGenericDescriptorId02);
FrameDependencyStructure video_structure;
video_structure.num_decode_targets = 2;
video_structure.templates = {
GenericFrameInfo::Builder().S(0).T(0).Dtis("SS").Build(),
GenericFrameInfo::Builder().S(1).T(0).Dtis("-S").Build(),
GenericFrameInfo::Builder().S(1).T(1).Dtis("-D").Build(),
};
rtp_sender_video_.SetVideoStructure(&video_structure);
// Send key frame.
RTPVideoHeader hdr;
RTPVideoHeader::GenericDescriptorInfo& generic = hdr.generic.emplace();
generic.frame_id = kFrameId;
generic.temporal_index = 0;
generic.spatial_index = 0;
generic.decode_target_indications = {DecodeTargetIndication::kSwitch,
DecodeTargetIndication::kSwitch};
hdr.frame_type = VideoFrameType::kVideoFrameKey;
rtp_sender_video_.SendVideo(kPayload, kType, kTimestamp, 0, kFrame, nullptr,
hdr, kDefaultExpectedRetransmissionTimeMs);
ASSERT_EQ(transport_.packets_sent(), 1);
DependencyDescriptor descriptor_key;
ASSERT_TRUE(transport_.last_sent_packet()
.GetExtension<RtpDependencyDescriptorExtension>(
nullptr, &descriptor_key));
ASSERT_TRUE(descriptor_key.attached_structure);
EXPECT_EQ(descriptor_key.attached_structure->num_decode_targets, 2);
EXPECT_THAT(descriptor_key.attached_structure->templates, SizeIs(3));
EXPECT_EQ(descriptor_key.frame_number, kFrameId & 0xFFFF);
EXPECT_EQ(descriptor_key.frame_dependencies.spatial_id, 0);
EXPECT_EQ(descriptor_key.frame_dependencies.temporal_id, 0);
EXPECT_EQ(descriptor_key.frame_dependencies.decode_target_indications,
generic.decode_target_indications);
EXPECT_THAT(descriptor_key.frame_dependencies.frame_diffs, IsEmpty());
// Send delta frame.
generic.frame_id = kFrameId + 1;
generic.temporal_index = 1;
generic.spatial_index = 1;
generic.dependencies = {kFrameId, kFrameId - 500};
generic.decode_target_indications = {DecodeTargetIndication::kNotPresent,
DecodeTargetIndication::kRequired};
hdr.frame_type = VideoFrameType::kVideoFrameDelta;
rtp_sender_video_.SendVideo(kPayload, kType, kTimestamp, 0, kFrame, nullptr,
hdr, kDefaultExpectedRetransmissionTimeMs);
EXPECT_EQ(transport_.packets_sent(), 2);
DependencyDescriptor descriptor_delta;
ASSERT_TRUE(
transport_.last_sent_packet()
.GetExtension<RtpDependencyDescriptorExtension>(
descriptor_key.attached_structure.get(), &descriptor_delta));
EXPECT_EQ(descriptor_delta.attached_structure, nullptr);
EXPECT_EQ(descriptor_delta.frame_number, (kFrameId + 1) & 0xFFFF);
EXPECT_EQ(descriptor_delta.frame_dependencies.spatial_id, 1);
EXPECT_EQ(descriptor_delta.frame_dependencies.temporal_id, 1);
EXPECT_EQ(descriptor_delta.frame_dependencies.decode_target_indications,
generic.decode_target_indications);
EXPECT_THAT(descriptor_delta.frame_dependencies.frame_diffs,
ElementsAre(1, 501));
}
TEST_P(RtpSenderVideoTest,
SetDiffentVideoStructureAvoidsCollisionWithThePreviousStructure) {
const int64_t kFrameId = 100000;
uint8_t kFrame[100];
rtp_module_->RegisterRtpHeaderExtension(
RtpDependencyDescriptorExtension::kUri, kGenericDescriptorId02);
FrameDependencyStructure video_structure1;
video_structure1.num_decode_targets = 2;
video_structure1.templates = {
GenericFrameInfo::Builder().S(0).T(0).Dtis("SS").Build(),
GenericFrameInfo::Builder().S(0).T(1).Dtis("D-").Build(),
};
FrameDependencyStructure video_structure2;
video_structure2.num_decode_targets = 2;
video_structure2.templates = {
GenericFrameInfo::Builder().S(0).T(0).Dtis("SS").Build(),
GenericFrameInfo::Builder().S(0).T(1).Dtis("R-").Build(),
};
// Send 1st key frame.
RTPVideoHeader hdr;
RTPVideoHeader::GenericDescriptorInfo& generic = hdr.generic.emplace();
generic.frame_id = kFrameId;
generic.decode_target_indications = {DecodeTargetIndication::kSwitch,
DecodeTargetIndication::kSwitch};
hdr.frame_type = VideoFrameType::kVideoFrameKey;
rtp_sender_video_.SetVideoStructure(&video_structure1);
rtp_sender_video_.SendVideo(kPayload, kType, kTimestamp, 0, kFrame, nullptr,
hdr, kDefaultExpectedRetransmissionTimeMs);
// Parse 1st extension.
ASSERT_EQ(transport_.packets_sent(), 1);
DependencyDescriptor descriptor_key1;
ASSERT_TRUE(transport_.last_sent_packet()
.GetExtension<RtpDependencyDescriptorExtension>(
nullptr, &descriptor_key1));
ASSERT_TRUE(descriptor_key1.attached_structure);
// Send the delta frame.
generic.frame_id = kFrameId + 1;
generic.temporal_index = 1;
generic.decode_target_indications = {DecodeTargetIndication::kDiscardable,
DecodeTargetIndication::kNotPresent};
hdr.frame_type = VideoFrameType::kVideoFrameDelta;
rtp_sender_video_.SendVideo(kPayload, kType, kTimestamp, 0, kFrame, nullptr,
hdr, kDefaultExpectedRetransmissionTimeMs);
ASSERT_EQ(transport_.packets_sent(), 2);
RtpPacket delta_packet = transport_.last_sent_packet();
// Send 2nd key frame.
generic.frame_id = kFrameId + 2;
generic.decode_target_indications = {DecodeTargetIndication::kSwitch,
DecodeTargetIndication::kSwitch};
hdr.frame_type = VideoFrameType::kVideoFrameKey;
rtp_sender_video_.SetVideoStructure(&video_structure2);
rtp_sender_video_.SendVideo(kPayload, kType, kTimestamp, 0, kFrame, nullptr,
hdr, kDefaultExpectedRetransmissionTimeMs);
// Parse the 2nd key frame.
ASSERT_EQ(transport_.packets_sent(), 3);
DependencyDescriptor descriptor_key2;
ASSERT_TRUE(transport_.last_sent_packet()
.GetExtension<RtpDependencyDescriptorExtension>(
nullptr, &descriptor_key2));
ASSERT_TRUE(descriptor_key2.attached_structure);
// Try to parse the 1st delta frame. It should parseble using the structure
// from the 1st key frame, but not using the structure from the 2nd key frame.
DependencyDescriptor descriptor_delta;
EXPECT_TRUE(delta_packet.GetExtension<RtpDependencyDescriptorExtension>(
descriptor_key1.attached_structure.get(), &descriptor_delta));
EXPECT_FALSE(delta_packet.GetExtension<RtpDependencyDescriptorExtension>(
descriptor_key2.attached_structure.get(), &descriptor_delta));
}
void RtpSenderVideoTest::PopulateGenericFrameDescriptor(int version) {
const absl::string_view ext_uri =
(version == 0) ? RtpGenericFrameDescriptorExtension00::kUri

View File

@ -15,6 +15,7 @@
#include "absl/container/inlined_vector.h"
#include "absl/types/optional.h"
#include "absl/types/variant.h"
#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/color_space.h"
#include "api/video/video_codec_type.h"
#include "api/video/video_content_type.h"
@ -50,6 +51,7 @@ struct RTPVideoHeader {
int64_t frame_id = 0;
int spatial_index = 0;
int temporal_index = 0;
absl::InlinedVector<DecodeTargetIndication, 10> decode_target_indications;
absl::InlinedVector<int64_t, 5> dependencies;
bool discardable = false;
};