Add plumbing of RtpPacketInfos to each VideoFrame as input for SourceTracker.

This change adds the plumbing of RtpPacketInfo from RtpVideoStreamReceiver::OnRtpPacket() to VideoReceiveStream::OnFrame() for video. It is a step towards replacing the non-spec compliant ContributingSources that updates itself at packet-receive time, with the spec-compliant SourceTracker that will update itself at frame-delivery-to-track time. Bug: webrtc:10668 Change-Id: Ib97d430530c5a8487d3b129936c7c51e118889bd Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/139891 Reviewed-by: Stefan Holmer <stefan@webrtc.org> Reviewed-by: Niels Moller <nisse@webrtc.org> Commit-Queue: Chen Xing <chxg@google.com> Cr-Commit-Position: refs/heads/master@{#28332}
2019-06-20 10:05:55 +02:00
parent 7953ad5dab
commit f00bf42d1c
30 changed files with 187 additions and 29 deletions
--- a/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_decoder_adapter.cc
@ -262,6 +262,7 @@ void MultiplexDecoderAdapter::MergeAlphaImages(
                                .set_timestamp_us(0)
                                .set_rotation(decoded_image->rotation())
                                .set_id(decoded_image->id())
+                                .set_packet_infos(decoded_image->packet_infos())
                                .build();
  decoded_complete_callback_->Decoded(merged_image, decode_time_ms, qp);
 }
--- a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
@ -205,6 +205,7 @@ int MultiplexEncoderAdapter::Encode(
                               .set_timestamp_ms(input_image.render_time_ms())
                               .set_rotation(input_image.rotation())
                               .set_id(input_image.id())
+                               .set_packet_infos(input_image.packet_infos())
                               .build();
  rv = encoders_[kAXXStream]->Encode(alpha_image, &adjusted_frame_types);
  return rv;
--- a/modules/video_coding/encoded_frame.h
+++ b/modules/video_coding/encoded_frame.h
@ -54,9 +54,11 @@ class VCMEncodedFrame : protected EncodedImage {

  using EncodedImage::ColorSpace;
  using EncodedImage::data;
+  using EncodedImage::PacketInfos;
  using EncodedImage::set_size;
  using EncodedImage::SetColorSpace;
  using EncodedImage::SetEncodedData;
+  using EncodedImage::SetPacketInfos;
  using EncodedImage::SetSpatialIndex;
  using EncodedImage::SetSpatialLayerFrameSize;
  using EncodedImage::SetTimestamp;
--- a/modules/video_coding/frame_object.cc
+++ b/modules/video_coding/frame_object.cc
@ -11,6 +11,7 @@
 #include "modules/video_coding/frame_object.h"

 #include <string.h>
+#include <utility>

 #include "api/video/encoded_image.h"
 #include "api/video/video_timing.h"
@ -28,7 +29,8 @@ RtpFrameObject::RtpFrameObject(PacketBuffer* packet_buffer,
                               size_t frame_size,
                               int times_nacked,
                               int64_t first_packet_received_time,
-                               int64_t last_packet_received_time)
+                               int64_t last_packet_received_time,
+                               RtpPacketInfos packet_infos)
    : packet_buffer_(packet_buffer),
      first_seq_num_(first_seq_num),
      last_seq_num_(last_seq_num),
@ -63,6 +65,7 @@ RtpFrameObject::RtpFrameObject(PacketBuffer* packet_buffer,

  // EncodedFrame members
  SetTimestamp(first_packet->timestamp);
+  SetPacketInfos(std::move(packet_infos));

  VCMPacket* last_packet = packet_buffer_->GetPacket(last_seq_num);
  RTC_CHECK(last_packet);
--- a/modules/video_coding/frame_object.h
+++ b/modules/video_coding/frame_object.h
@ -29,7 +29,8 @@ class RtpFrameObject : public EncodedFrame {
                 size_t frame_size,
                 int times_nacked,
                 int64_t first_packet_received_time,
-                 int64_t last_packet_received_time);
+                 int64_t last_packet_received_time,
+                 RtpPacketInfos packet_infos);

  ~RtpFrameObject() override;
  uint16_t first_seq_num() const;
--- a/modules/video_coding/generic_decoder.cc
+++ b/modules/video_coding/generic_decoder.cc
@ -84,6 +84,7 @@ void VCMDecodedFrameCallback::Decoded(VideoFrame& decodedImage,
  if (frameInfo->color_space) {
    decodedImage.set_color_space(frameInfo->color_space);
  }
+  decodedImage.set_packet_infos(frameInfo->packet_infos);
  decodedImage.set_rotation(frameInfo->rotation);

  const int64_t now_ms = _clock->TimeInMilliseconds();
@ -211,6 +212,7 @@ int32_t VCMGenericDecoder::Decode(const VCMEncodedFrame& frame, int64_t nowMs) {
  } else {
    _frameInfos[_nextFrameInfoIdx].color_space = absl::nullopt;
  }
+  _frameInfos[_nextFrameInfoIdx].packet_infos = frame.PacketInfos();

  // Set correctly only for key frames. Thus, use latest key frame
  // content type. If the corresponding key frame was lost, decode will fail
--- a/modules/video_coding/generic_decoder.h
+++ b/modules/video_coding/generic_decoder.h
@ -36,6 +36,7 @@ struct VCMFrameInformation {
  EncodedImage::Timing timing;
  int64_t ntp_time_ms;
  absl::optional<ColorSpace> color_space;
+  RtpPacketInfos packet_infos;
 };

 class VCMDecodedFrameCallback : public DecodedImageCallback {
--- a/modules/video_coding/generic_decoder_unittest.cc
+++ b/modules/video_coding/generic_decoder_unittest.cc
@ -122,5 +122,31 @@ TEST_F(GenericDecoderTest, PassesColorSpaceForDelayedDecoders) {
  EXPECT_EQ(*decoded_color_space, color_space);
 }

+TEST_F(GenericDecoderTest, PassesPacketInfos) {
+  RtpPacketInfos packet_infos = CreatePacketInfos(3);
+  VCMEncodedFrame encoded_frame;
+  encoded_frame.SetPacketInfos(packet_infos);
+  generic_decoder_.Decode(encoded_frame, clock_.TimeInMilliseconds());
+  absl::optional<VideoFrame> decoded_frame = user_callback_.WaitForFrame(10);
+  ASSERT_TRUE(decoded_frame.has_value());
+  EXPECT_EQ(decoded_frame->packet_infos().size(), 3U);
+}
+
+TEST_F(GenericDecoderTest, PassesPacketInfosForDelayedDecoders) {
+  RtpPacketInfos packet_infos = CreatePacketInfos(3);
+  decoder_.SetDelayedDecoding(100);
+
+  {
+    // Ensure the original frame is destroyed before the decoding is completed.
+    VCMEncodedFrame encoded_frame;
+    encoded_frame.SetPacketInfos(packet_infos);
+    generic_decoder_.Decode(encoded_frame, clock_.TimeInMilliseconds());
+  }
+
+  absl::optional<VideoFrame> decoded_frame = user_callback_.WaitForFrame(200);
+  ASSERT_TRUE(decoded_frame.has_value());
+  EXPECT_EQ(decoded_frame->packet_infos().size(), 3U);
+}
+
 }  // namespace video_coding
 }  // namespace webrtc
--- a/modules/video_coding/jitter_buffer_unittest.cc
+++ b/modules/video_coding/jitter_buffer_unittest.cc
@ -67,7 +67,8 @@ class TestBasicJitterBuffer : public ::testing::Test {
    video_header.is_first_packet_in_frame = true;
    video_header.frame_type = VideoFrameType::kVideoFrameDelta;
    packet_.reset(new VCMPacket(data_, size_, rtp_header, video_header,
-                                /*ntp_time_ms=*/0));
+                                /*ntp_time_ms=*/0,
+                                clock_->TimeInMilliseconds()));
  }

  VCMEncodedFrame* DecodeCompleteFrame() {
@ -542,7 +543,7 @@ TEST_F(TestBasicJitterBuffer, TestReorderingWithPadding) {
  video_header.codec = kVideoCodecGeneric;
  video_header.frame_type = VideoFrameType::kEmptyFrame;
  VCMPacket empty_packet(data_, 0, rtp_header, video_header,
-                         /*ntp_time_ms=*/0);
+                         /*ntp_time_ms=*/0, clock_->TimeInMilliseconds());
  EXPECT_EQ(kOldPacket,
            jitter_buffer_->InsertPacket(empty_packet, &retransmitted));
  empty_packet.seqNum += 1;
--- a/modules/video_coding/packet.cc
+++ b/modules/video_coding/packet.cc
@ -25,8 +25,7 @@ VCMPacket::VCMPacket()
      timesNacked(-1),
      completeNALU(kNaluUnset),
      insertStartCode(false),
-      video_header(),
-      receive_time_ms(0) {
+      video_header() {
  video_header.playout_delay = {-1, -1};
 }

@ -34,7 +33,8 @@ VCMPacket::VCMPacket(const uint8_t* ptr,
                     size_t size,
                     const RTPHeader& rtp_header,
                     const RTPVideoHeader& videoHeader,
-                     int64_t ntp_time_ms)
+                     int64_t ntp_time_ms,
+                     int64_t receive_time_ms)
    : payloadType(rtp_header.payloadType),
      timestamp(rtp_header.timestamp),
      ntp_time_ms_(ntp_time_ms),
@ -46,7 +46,8 @@ VCMPacket::VCMPacket(const uint8_t* ptr,
      completeNALU(kNaluIncomplete),
      insertStartCode(videoHeader.codec == kVideoCodecH264 &&
                      videoHeader.is_first_packet_in_frame),
-      video_header(videoHeader) {
+      video_header(videoHeader),
+      packet_info(rtp_header, receive_time_ms) {
  if (is_first_packet_in_frame() && markerBit) {
    completeNALU = kNaluComplete;
  } else if (is_first_packet_in_frame()) {
--- a/modules/video_coding/packet.h
+++ b/modules/video_coding/packet.h
@ -16,6 +16,7 @@

 #include "absl/types/optional.h"
 #include "api/rtp_headers.h"
+#include "api/rtp_packet_info.h"
 #include "api/video/video_frame_type.h"
 #include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
 #include "modules/rtp_rtcp/source/rtp_video_header.h"
@ -39,7 +40,8 @@ class VCMPacket {
            size_t size,
            const RTPHeader& rtp_header,
            const RTPVideoHeader& video_header,
-            int64_t ntp_time_ms);
+            int64_t ntp_time_ms,
+            int64_t receive_time_ms);

  ~VCMPacket();

@ -70,7 +72,7 @@ class VCMPacket {
  RTPVideoHeader video_header;
  absl::optional<RtpGenericFrameDescriptor> generic_descriptor;

-  int64_t receive_time_ms;
+  RtpPacketInfo packet_info;
 };

 }  // namespace webrtc
--- a/modules/video_coding/packet_buffer.cc
+++ b/modules/video_coding/packet_buffer.cc
@ -286,8 +286,9 @@ std::vector<std::unique_ptr<RtpFrameObject>> PacketBuffer::FindFrames(
      size_t frame_size = 0;
      int max_nack_count = -1;
      uint16_t start_seq_num = seq_num;
-      int64_t min_recv_time = data_buffer_[index].receive_time_ms;
-      int64_t max_recv_time = data_buffer_[index].receive_time_ms;
+      int64_t min_recv_time = data_buffer_[index].packet_info.receive_time_ms();
+      int64_t max_recv_time = data_buffer_[index].packet_info.receive_time_ms();
+      RtpPacketInfos::vector_type packet_infos;

      // Find the start index by searching backward until the packet with
      // the |frame_begin| flag is set.
@ -310,9 +311,16 @@ std::vector<std::unique_ptr<RtpFrameObject>> PacketBuffer::FindFrames(
        sequence_buffer_[start_index].frame_created = true;

        min_recv_time =
-            std::min(min_recv_time, data_buffer_[start_index].receive_time_ms);
+            std::min(min_recv_time,
+                     data_buffer_[start_index].packet_info.receive_time_ms());
        max_recv_time =
-            std::max(max_recv_time, data_buffer_[start_index].receive_time_ms);
+            std::max(max_recv_time,
+                     data_buffer_[start_index].packet_info.receive_time_ms());
+
+        // Should use |push_front()| since the loop traverses backwards. But
+        // it's too inefficient to do so on a vector so we'll instead fix the
+        // order afterwards.
+        packet_infos.push_back(data_buffer_[start_index].packet_info);

        if (!is_h264 && sequence_buffer_[start_index].frame_begin)
          break;
@ -359,6 +367,9 @@ std::vector<std::unique_ptr<RtpFrameObject>> PacketBuffer::FindFrames(
        --start_seq_num;
      }

+      // Fix the order since the packet-finding loop traverses backwards.
+      std::reverse(packet_infos.begin(), packet_infos.end());
+
      if (is_h264) {
        // Warn if this is an unsafe frame.
        if (has_h264_idr && (!has_h264_sps || !has_h264_pps)) {
@ -406,7 +417,8 @@ std::vector<std::unique_ptr<RtpFrameObject>> PacketBuffer::FindFrames(

      found_frames.emplace_back(
          new RtpFrameObject(this, start_seq_num, seq_num, frame_size,
-                             max_nack_count, min_recv_time, max_recv_time));
+                             max_nack_count, min_recv_time, max_recv_time,
+                             RtpPacketInfos(std::move(packet_infos))));
    }
    ++seq_num;
  }
--- a/modules/video_coding/rtp_frame_reference_finder_unittest.cc
+++ b/modules/video_coding/rtp_frame_reference_finder_unittest.cc
@ -92,7 +92,7 @@ class TestRtpFrameReferenceFinder : public ::testing::Test,
    ref_packet_buffer_->InsertPacket(&packet);

    std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
    reference_finder_->ManageFrame(std::move(frame));
  }

@ -126,7 +126,7 @@ class TestRtpFrameReferenceFinder : public ::testing::Test,
    }

    std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
    reference_finder_->ManageFrame(std::move(frame));
  }

@ -172,7 +172,7 @@ class TestRtpFrameReferenceFinder : public ::testing::Test,
    }

    std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
    reference_finder_->ManageFrame(std::move(frame));
  }

@ -213,7 +213,7 @@ class TestRtpFrameReferenceFinder : public ::testing::Test,
    }

    std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
    reference_finder_->ManageFrame(std::move(frame));
  }

@ -243,7 +243,7 @@ class TestRtpFrameReferenceFinder : public ::testing::Test,
    }

    std::unique_ptr<RtpFrameObject> frame(new RtpFrameObject(
-        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0));
+        ref_packet_buffer_, seq_num_start, seq_num_end, 0, 0, 0, 0, {}));
    reference_finder_->ManageFrame(std::move(frame));
  }

--- a/modules/video_coding/video_receiver.cc
+++ b/modules/video_coding/video_receiver.cc
@ -338,7 +338,8 @@ int32_t VideoReceiver::IncomingPacket(const uint8_t* incomingPayload,
  }
  // Callers don't provide any ntp time.
  const VCMPacket packet(incomingPayload, payloadLength, rtp_header,
-                         video_header, /*ntp_time_ms=*/0);
+                         video_header, /*ntp_time_ms=*/0,
+                         clock_->TimeInMilliseconds());
  int32_t ret = _receiver.InsertPacket(packet);

  // TODO(holmer): Investigate if this somehow should use the key frame