Make RtpPayloadParams::MinimalisticVp9Structure codec agnostic.

Bug: none Change-Id: I97f603aad53933b09c761da954130b06ea5a5501 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/230760 Commit-Queue: Philip Eliasson <philipel@webrtc.org> Reviewed-by: Erik Språng <sprang@webrtc.org> Reviewed-by: Danil Chapovalov <danilchap@webrtc.org> Cr-Commit-Position: refs/heads/main@{#34894}
2021-09-01 15:21:16 +02:00
parent b8a19df71c
commit 5b231de486
6 changed files with 106 additions and 25 deletions
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc
@ -308,13 +308,16 @@ void RtpPayloadParams::GenericToGeneric(int64_t shared_frame_id,
      rtp_video_header->generic.emplace();

  generic.frame_id = shared_frame_id;
+  generic.decode_target_indications.push_back(DecodeTargetIndication::kSwitch);

  if (is_keyframe) {
+    generic.chain_diffs.push_back(0);
    last_shared_frame_id_[0].fill(-1);
  } else {
    int64_t frame_id = last_shared_frame_id_[0][0];
    RTC_DCHECK_NE(frame_id, -1);
    RTC_DCHECK_LT(frame_id, shared_frame_id);
+    generic.chain_diffs.push_back(shared_frame_id - frame_id);
    generic.dependencies.push_back(frame_id);
  }

@ -408,10 +411,10 @@ void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
  }
 }

-FrameDependencyStructure RtpPayloadParams::MinimalisticVp9Structure(
-    const CodecSpecificInfoVP9& vp9) {
-  const int num_spatial_layers = vp9.num_spatial_layers;
-  const int num_temporal_layers = kMaxTemporalStreams;
+FrameDependencyStructure RtpPayloadParams::MinimalisticStructure(
+    int num_spatial_layers,
+    int num_temporal_layers) {
+  RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32);
  FrameDependencyStructure structure;
  structure.num_decode_targets = num_spatial_layers * num_temporal_layers;
  structure.num_chains = num_spatial_layers;
@ -423,10 +426,10 @@ FrameDependencyStructure RtpPayloadParams::MinimalisticVp9Structure(
      a_template.temporal_id = tid;
      for (int s = 0; s < num_spatial_layers; ++s) {
        for (int t = 0; t < num_temporal_layers; ++t) {
-          // Prefer kSwitch for indication frame is part of the decode target
-          // because RtpPayloadParams::Vp9ToGeneric uses that indication more
-          // often that kRequired, increasing chance custom dti need not to
-          // use more bits in dependency descriptor on the wire.
+          // Prefer kSwitch indication for frames that is part of the decode
+          // target because dependency descriptor information generated in this
+          // class use kSwitch indications more often that kRequired, increasing
+          // the chance of a good (or complete) template match.
          a_template.decode_target_indications.push_back(
              sid <= s && tid <= t ? DecodeTargetIndication::kSwitch
                                   : DecodeTargetIndication::kNotPresent);
@ -440,9 +443,6 @@ FrameDependencyStructure RtpPayloadParams::MinimalisticVp9Structure(

      structure.decode_target_protected_by_chain.push_back(sid);
    }
-    if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) {
-      structure.resolutions.emplace_back(vp9.width[sid], vp9.height[sid]);
-    }
  }
  return structure;
 }
--- a/call/rtp_payload_params.h
+++ b/call/rtp_payload_params.h
@ -42,13 +42,16 @@ class RtpPayloadParams final {
                                   const CodecSpecificInfo* codec_specific_info,
                                   int64_t shared_frame_id);

-  // Returns structure that aligns with simulated generic info for VP9.
-  // The templates allow to produce valid dependency descriptor for any vp9
-  // stream with up to 4 temporal layers. The set of the templates is not tuned
-  // for any paricular structure thus dependency descriptor would use more bytes
-  // on the wire than with tuned templates.
-  static FrameDependencyStructure MinimalisticVp9Structure(
-      const CodecSpecificInfoVP9& vp9);
+  // Returns structure that aligns with simulated generic info. The templates
+  // allow to produce valid dependency descriptor for any stream where
+  // `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by
+  // https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see
+  // template_fdiffs()). The set of the templates is not tuned for any paricular
+  // structure thus dependency descriptor would use more bytes on the wire than
+  // with tuned templates.
+  static FrameDependencyStructure MinimalisticStructure(
+      int num_spatial_layers,
+      int num_temporal_layers);

  uint32_t ssrc() const;

--- a/call/rtp_payload_params_unittest.cc
+++ b/call/rtp_payload_params_unittest.cc
@ -33,6 +33,7 @@

 using ::testing::Each;
 using ::testing::ElementsAre;
+using ::testing::Eq;
 using ::testing::IsEmpty;
 using ::testing::SizeIs;

@ -302,7 +303,7 @@ TEST(RtpPayloadParamsTest, PictureIdForOldGenericFormat) {
 }

 TEST(RtpPayloadParamsTest, GenericDescriptorForGenericCodec) {
-  RtpPayloadState state{};
+  RtpPayloadState state;

  EncodedImage encoded_image;
  encoded_image._frameType = VideoFrameType::kVideoFrameKey;
@ -313,16 +314,27 @@ TEST(RtpPayloadParamsTest, GenericDescriptorForGenericCodec) {
  RTPVideoHeader header =
      params.GetRtpVideoHeader(encoded_image, &codec_info, 0);

-  EXPECT_EQ(kVideoCodecGeneric, header.codec);
+  EXPECT_THAT(header.codec, Eq(kVideoCodecGeneric));
+
  ASSERT_TRUE(header.generic);
-  EXPECT_EQ(0, header.generic->frame_id);
+  EXPECT_THAT(header.generic->frame_id, Eq(0));
+  EXPECT_THAT(header.generic->spatial_index, Eq(0));
+  EXPECT_THAT(header.generic->temporal_index, Eq(0));
+  EXPECT_THAT(header.generic->decode_target_indications,
+              ElementsAre(DecodeTargetIndication::kSwitch));
  EXPECT_THAT(header.generic->dependencies, IsEmpty());
+  EXPECT_THAT(header.generic->chain_diffs, ElementsAre(0));

  encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
-  header = params.GetRtpVideoHeader(encoded_image, &codec_info, 1);
+  header = params.GetRtpVideoHeader(encoded_image, &codec_info, 3);
  ASSERT_TRUE(header.generic);
-  EXPECT_EQ(1, header.generic->frame_id);
+  EXPECT_THAT(header.generic->frame_id, Eq(3));
+  EXPECT_THAT(header.generic->spatial_index, Eq(0));
+  EXPECT_THAT(header.generic->temporal_index, Eq(0));
  EXPECT_THAT(header.generic->dependencies, ElementsAre(0));
+  EXPECT_THAT(header.generic->decode_target_indications,
+              ElementsAre(DecodeTargetIndication::kSwitch));
+  EXPECT_THAT(header.generic->chain_diffs, ElementsAre(3));
 }

 TEST(RtpPayloadParamsTest, SetsGenericFromGenericFrameInfo) {
--- a/call/rtp_video_sender.cc
+++ b/call/rtp_video_sender.cc
@ -370,6 +370,9 @@ RtpVideoSender::RtpVideoSender(
      simulate_vp9_structure_(!absl::StartsWith(
          field_trials_.Lookup("WebRTC-Vp9DependencyDescriptor"),
          "Disabled")),
+      simulate_generic_structure_(absl::StartsWith(
+          field_trials_.Lookup("WebRTC-GenericCodecDependencyDescriptor"),
+          "Enabled")),
      active_(false),
      suspended_ssrcs_(std::move(suspended_ssrcs)),
      fec_controller_(std::move(fec_controller)),
@ -575,9 +578,23 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage(
      sender_video.SetVideoStructure(&*codec_specific_info->template_structure);
    } else if (simulate_vp9_structure_ && codec_specific_info &&
               codec_specific_info->codecType == kVideoCodecVP9) {
+      const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9;
+
      FrameDependencyStructure structure =
-          RtpPayloadParams::MinimalisticVp9Structure(
-              codec_specific_info->codecSpecific.VP9);
+          RtpPayloadParams::MinimalisticStructure(vp9.num_spatial_layers,
+                                                  kMaxTemporalStreams);
+      if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) {
+        for (size_t i = 0; i < vp9.num_spatial_layers; ++i) {
+          structure.resolutions.emplace_back(vp9.width[i], vp9.height[i]);
+        }
+      }
+      sender_video.SetVideoStructure(&structure);
+    } else if (simulate_generic_structure_ && codec_specific_info &&
+               codec_specific_info->codecType == kVideoCodecGeneric) {
+      FrameDependencyStructure structure =
+          RtpPayloadParams::MinimalisticStructure(
+              /*num_spatial_layers=*/1,
+              /*num_temporal_layers=*/1);
      sender_video.SetVideoStructure(&structure);
    } else {
      sender_video.SetVideoStructure(nullptr);
--- a/call/rtp_video_sender.h
+++ b/call/rtp_video_sender.h
@ -169,6 +169,7 @@ class RtpVideoSender : public RtpVideoSenderInterface,
  const bool use_frame_rate_for_overhead_;
  const bool has_packet_feedback_;
  const bool simulate_vp9_structure_;
+  const bool simulate_generic_structure_;

  // TODO(holmer): Remove mutex_ once RtpVideoSender runs on the
  // transport task queue.
--- a/call/rtp_video_sender_unittest.cc
+++ b/call/rtp_video_sender_unittest.cc
@ -824,6 +824,54 @@ TEST(RtpVideoSenderTest,
  EXPECT_TRUE(sent_packets[1].HasExtension<RtpDependencyDescriptorExtension>());
 }

+TEST(RtpVideoSenderTest, GenerateDependecyDescriptorForGenericCodecs) {
+  test::ScopedFieldTrials field_trials(
+      "WebRTC-GenericCodecDependencyDescriptor/Enabled/");
+  RtpVideoSenderTestFixture test({kSsrc1}, {}, kPayloadType, {});
+  test.router()->SetActive(true);
+
+  RtpHeaderExtensionMap extensions;
+  extensions.Register<RtpDependencyDescriptorExtension>(
+      kDependencyDescriptorExtensionId);
+  std::vector<RtpPacket> sent_packets;
+  ON_CALL(test.transport(), SendRtp)
+      .WillByDefault([&](const uint8_t* packet, size_t length,
+                         const PacketOptions& options) {
+        sent_packets.emplace_back(&extensions);
+        EXPECT_TRUE(sent_packets.back().Parse(packet, length));
+        return true;
+      });
+
+  const uint8_t kPayload[1] = {'a'};
+  EncodedImage encoded_image;
+  encoded_image.SetTimestamp(1);
+  encoded_image.capture_time_ms_ = 2;
+  encoded_image._frameType = VideoFrameType::kVideoFrameKey;
+  encoded_image._encodedWidth = 320;
+  encoded_image._encodedHeight = 180;
+  encoded_image.SetEncodedData(
+      EncodedImageBuffer::Create(kPayload, sizeof(kPayload)));
+
+  CodecSpecificInfo codec_specific;
+  codec_specific.codecType = VideoCodecType::kVideoCodecGeneric;
+  codec_specific.end_of_picture = true;
+
+  // Send two tiny images, each mapping to single RTP packet.
+  EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error,
+            EncodedImageCallback::Result::OK);
+
+  // Send in 2nd picture.
+  encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
+  encoded_image.SetTimestamp(3000);
+  EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error,
+            EncodedImageCallback::Result::OK);
+
+  test.AdvanceTime(TimeDelta::Millis(33));
+  ASSERT_THAT(sent_packets, SizeIs(2));
+  EXPECT_TRUE(sent_packets[0].HasExtension<RtpDependencyDescriptorExtension>());
+  EXPECT_TRUE(sent_packets[1].HasExtension<RtpDependencyDescriptorExtension>());
+}
+
 TEST(RtpVideoSenderTest, SupportsStoppingUsingDependencyDescriptor) {
  RtpVideoSenderTestFixture test({kSsrc1}, {}, kPayloadType, {});
  test.router()->SetActive(true);