Save unwrapped tl0_pic_idx for inserted VP9 frames.

As stashed frames are retried their `tl0_pic_idx` are again unwrapped which can lead to the `tl0_unwrapper_` to unwrap the `tl0_pic_idx` of newer frames backwards. Instead unwrap the `tl0_pid_idx` only once and save it with the frame if necessary. In this CL - Only unwrap the TL0 once in ManageFrame. - Split ManageFrameInternal into ManageFrameFlexible and ManageFrameGof. - Save the unwrapped TL0 with the stashed frame. Bug: none Change-Id: I56e6b071c0082682e010c049c537d66060635567 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/253844 Reviewed-by: Danil Chapovalov <danilchap@webrtc.org> Commit-Queue: Philip Eliasson <philipel@webrtc.org> Cr-Commit-Position: refs/heads/main@{#36146}
2022-03-07 18:12:26 +01:00
parent 10ab697dcb
commit 773205dfb2
3 changed files with 85 additions and 47 deletions
--- a/modules/video_coding/rtp_vp9_ref_finder.cc
+++ b/modules/video_coding/rtp_vp9_ref_finder.cc
@ -16,17 +16,44 @@
 #include "rtc_base/logging.h"

 namespace webrtc {
-
 RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame(
    std::unique_ptr<RtpFrameObject> frame) {
-  FrameDecision decision = ManageFrameInternal(frame.get());
+  const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
+      frame->GetRtpVideoHeader().video_type_header);
+
+  frame->SetSpatialIndex(codec_header.spatial_idx);
+  frame->SetId(codec_header.picture_id & (kFrameIdLength - 1));
+
+  FrameDecision decision;
+  if (codec_header.temporal_idx >= kMaxTemporalLayers ||
+      codec_header.spatial_idx >= kMaxSpatialLayers) {
+    decision = kDrop;
+  } else if (codec_header.flexible_mode) {
+    decision = ManageFrameFlexible(frame.get(), codec_header);
+  } else {
+    if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
+      RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
+                             "non-flexible mode.";
+      decision = kDrop;
+    } else {
+      int64_t unwrapped_tl0 =
+          tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
+      decision = ManageFrameGof(frame.get(), codec_header, unwrapped_tl0);
+
+      if (decision == kStash) {
+        if (stashed_frames_.size() > kMaxStashedFrames) {
+          stashed_frames_.pop_back();
+        }
+
+        stashed_frames_.push_front(
+            {.unwrapped_tl0 = unwrapped_tl0, .frame = std::move(frame)});
+      }
+    }
+  }

  RtpFrameReferenceFinder::ReturnVector res;
  switch (decision) {
    case kStash:
-      if (stashed_frames_.size() > kMaxStashedFrames)
-        stashed_frames_.pop_back();
-      stashed_frames_.push_front(std::move(frame));
      return res;
    case kHandOff:
      res.push_back(std::move(frame));
@ -39,43 +66,28 @@ RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame(
  return res;
 }

-RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameInternal(
-    RtpFrameObject* frame) {
-  const RTPVideoHeader& video_header = frame->GetRtpVideoHeader();
-  const RTPVideoHeaderVP9& codec_header =
-      absl::get<RTPVideoHeaderVP9>(video_header.video_type_header);
-
-  // Protect against corrupted packets with arbitrary large temporal idx.
-  if (codec_header.temporal_idx >= kMaxTemporalLayers ||
-      codec_header.spatial_idx >= kMaxSpatialLayers)
-    return kDrop;
-
-  frame->SetSpatialIndex(codec_header.spatial_idx);
-  frame->SetId(codec_header.picture_id & (kFrameIdLength - 1));
-
-  if (codec_header.flexible_mode) {
-    if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
-      return kDrop;
-    }
-    frame->num_references = codec_header.num_ref_pics;
-    for (size_t i = 0; i < frame->num_references; ++i) {
-      frame->references[i] =
-          Subtract<kFrameIdLength>(frame->Id(), codec_header.pid_diff[i]);
-    }
-
-    FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
-    return kHandOff;
-  }
-
-  if (codec_header.tl0_pic_idx == kNoTl0PicIdx) {
-    RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in "
-                           "non-flexible mode.";
+RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameFlexible(
+    RtpFrameObject* frame,
+    const RTPVideoHeaderVP9& codec_header) {
+  if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) {
    return kDrop;
  }

+  frame->num_references = codec_header.num_ref_pics;
+  for (size_t i = 0; i < frame->num_references; ++i) {
+    frame->references[i] =
+        Subtract<kFrameIdLength>(frame->Id(), codec_header.pid_diff[i]);
+  }
+
+  FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted);
+  return kHandOff;
+}
+
+RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameGof(
+    RtpFrameObject* frame,
+    const RTPVideoHeaderVP9& codec_header,
+    int64_t unwrapped_tl0) {
  GofInfo* info;
-  int64_t unwrapped_tl0 =
-      tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF);
  if (codec_header.ss_data_available) {
    if (codec_header.temporal_idx != 0) {
      RTC_LOG(LS_WARNING) << "Received scalability structure on a non base "
@ -300,20 +312,23 @@ void RtpVp9RefFinder::RetryStashedFrames(
  bool complete_frame = false;
  do {
    complete_frame = false;
-    for (auto frame_it = stashed_frames_.begin();
-         frame_it != stashed_frames_.end();) {
-      FrameDecision decision = ManageFrameInternal(frame_it->get());
+    for (auto it = stashed_frames_.begin(); it != stashed_frames_.end();) {
+      const RTPVideoHeaderVP9& codec_header = absl::get<RTPVideoHeaderVP9>(
+          it->frame->GetRtpVideoHeader().video_type_header);
+      RTC_DCHECK(!codec_header.flexible_mode);
+      FrameDecision decision =
+          ManageFrameGof(it->frame.get(), codec_header, it->unwrapped_tl0);

      switch (decision) {
        case kStash:
-          ++frame_it;
+          ++it;
          break;
        case kHandOff:
          complete_frame = true;
-          res.push_back(std::move(*frame_it));
+          res.push_back(std::move(it->frame));
          [[fallthrough]];
        case kDrop:
-          frame_it = stashed_frames_.erase(frame_it);
+          it = stashed_frames_.erase(it);
      }
    }
  } while (complete_frame);
@ -339,7 +354,7 @@ void RtpVp9RefFinder::FlattenFrameIdAndRefs(RtpFrameObject* frame,
 void RtpVp9RefFinder::ClearTo(uint16_t seq_num) {
  auto it = stashed_frames_.begin();
  while (it != stashed_frames_.end()) {
-    if (AheadOf<uint16_t>(seq_num, (*it)->first_seq_num())) {
+    if (AheadOf<uint16_t>(seq_num, it->frame->first_seq_num())) {
      it = stashed_frames_.erase(it);
    } else {
      ++it;
--- a/modules/video_coding/rtp_vp9_ref_finder.h
+++ b/modules/video_coding/rtp_vp9_ref_finder.h
@ -48,7 +48,16 @@ class RtpVp9RefFinder {
    uint16_t last_picture_id;
  };

-  FrameDecision ManageFrameInternal(RtpFrameObject* frame);
+  struct UnwrappedTl0Frame {
+    int64_t unwrapped_tl0;
+    std::unique_ptr<RtpFrameObject> frame;
+  };
+
+  FrameDecision ManageFrameFlexible(RtpFrameObject* frame,
+                                    const RTPVideoHeaderVP9& vp9_header);
+  FrameDecision ManageFrameGof(RtpFrameObject* frame,
+                               const RTPVideoHeaderVP9& vp9_header,
+                               int64_t unwrapped_tl0);
  void RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector& res);

  bool MissingRequiredFrameVp9(uint16_t picture_id, const GofInfo& info);
@ -62,7 +71,7 @@ class RtpVp9RefFinder {

  // Frames that have been fully received but didn't have all the information
  // needed to determine their references.
-  std::deque<std::unique_ptr<RtpFrameObject>> stashed_frames_;
+  std::deque<UnwrappedTl0Frame> stashed_frames_;

  // Where the current scalability structure is in the
  // `scalability_structures_` array.
--- a/modules/video_coding/rtp_vp9_ref_finder_unittest.cc
+++ b/modules/video_coding/rtp_vp9_ref_finder_unittest.cc
@ -23,6 +23,7 @@ using ::testing::Matches;
 using ::testing::MatchResultListener;
 using ::testing::Pointee;
 using ::testing::Property;
+using ::testing::SizeIs;
 using ::testing::UnorderedElementsAreArray;

 namespace webrtc {
@ -702,4 +703,17 @@ TEST_F(RtpVp9RefFinderTest, SpatialIndex) {
              Contains(Pointee(Property(&EncodedFrame::SpatialIndex, 2))));
 }

+TEST_F(RtpVp9RefFinderTest, StashedFramesDoNotWrapTl0Backwards) {
+  GofInfoVP9 ss;
+  ss.SetGofInfoVP9(kTemporalStructureMode1);
+
+  Insert(Frame().Pid(0).SidAndTid(0, 0).Tl0(0));
+  EXPECT_THAT(frames_, SizeIs(0));
+
+  Insert(Frame().Pid(128).SidAndTid(0, 0).Tl0(128).AsKeyFrame().Gof(&ss));
+  EXPECT_THAT(frames_, SizeIs(1));
+  Insert(Frame().Pid(129).SidAndTid(0, 0).Tl0(129));
+  EXPECT_THAT(frames_, SizeIs(2));
+}
+
 }  // namespace webrtc