[DVQA] Add support for frames without frame id

Bug: b/234176678 Change-Id: Ibbd82e3341d7b4034173e6e5ada882e079449f8e Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/264552 Commit-Queue: Artem Titov <titovartem@webrtc.org> Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Reviewed-by: Rasmus Brandt <brandtr@webrtc.org> Cr-Commit-Position: refs/heads/main@{#37077}
2022-06-01 13:42:16 +02:00
parent 2a30693718
commit cff66f537c
11 changed files with 114 additions and 39 deletions
--- a/api/video/video_frame.h
+++ b/api/video/video_frame.h
@ -29,6 +29,9 @@ namespace webrtc {

 class RTC_EXPORT VideoFrame {
 public:
+  // Value used to signal that `VideoFrame::id()` is not set.
+  static constexpr uint16_t kNotSetId = 0;
+
  struct RTC_EXPORT UpdateRect {
    int offset_x;
    int offset_y;
@ -99,7 +102,7 @@ class RTC_EXPORT VideoFrame {
    Builder& set_packet_infos(RtpPacketInfos packet_infos);

   private:
-    uint16_t id_ = 0;
+    uint16_t id_ = kNotSetId;
    rtc::scoped_refptr<webrtc::VideoFrameBuffer> video_frame_buffer_;
    int64_t timestamp_us_ = 0;
    uint32_t timestamp_rtp_ = 0;
@ -134,12 +137,12 @@ class RTC_EXPORT VideoFrame {
  // Get frame size in pixels.
  uint32_t size() const;

-  // Get frame ID. Returns 0 if ID is not set. Not guaranteed to be transferred
-  // from the sender to the receiver, but preserved on the sender side. The id
-  // should be propagated between all frame modifications during its lifetime
-  // from capturing to sending as encoded image. It is intended to be unique
-  // over a time window of a few minutes for the peer connection to which the
-  // corresponding video stream belongs to.
+  // Get frame ID. Returns `kNotSetId` if ID is not set. Not guaranteed to be
+  // transferred from the sender to the receiver, but preserved on the sender
+  // side. The id should be propagated between all frame modifications during
+  // its lifetime from capturing to sending as encoded image. It is intended to
+  // be unique over a time window of a few minutes for the peer connection to
+  // which the corresponding video stream belongs to.
  uint16_t id() const { return id_; }
  void set_id(uint16_t id) { id_ = id; }

--- a/test/pc/e2e/BUILD.gn
+++ b/test/pc/e2e/BUILD.gn
@ -74,6 +74,7 @@ if (!build_with_chromium) {
    sources = [ "analyzer/video/encoded_image_data_injector.h" ]

    deps = [ "../../../api/video:encoded_image" ]
+    absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
  }

  rtc_library("single_process_encoded_image_data_injector") {
@ -673,6 +674,7 @@ if (!build_with_chromium) {
      "../../../rtc_base:checks",
      "../../../rtc_base:criticalsection",
      "../../../rtc_base:logging",
+      "../../../rtc_base:macromagic",
      "../../../rtc_base:platform_thread",
      "../../../rtc_base:rtc_event",
      "../../../rtc_base:rtc_numerics",
--- a/test/pc/e2e/analyzer/video/default_video_quality_analyzer.cc
+++ b/test/pc/e2e/analyzer/video/default_video_quality_analyzer.cc
@ -18,6 +18,7 @@
 #include "api/numerics/samples_stats_counter.h"
 #include "api/units/time_delta.h"
 #include "api/video/i420_buffer.h"
+#include "api/video/video_frame.h"
 #include "common_video/libyuv/include/webrtc_libyuv.h"
 #include "rtc_base/logging.h"
 #include "rtc_base/platform_thread.h"
@ -155,14 +156,15 @@ uint16_t DefaultVideoQualityAnalyzer::OnFrameCaptured(
    const std::string& stream_label,
    const webrtc::VideoFrame& frame) {
  // `next_frame_id` is atomic, so we needn't lock here.
-  uint16_t frame_id = next_frame_id_++;
  Timestamp captured_time = Now();
  Timestamp start_time = Timestamp::MinusInfinity();
  size_t peer_index = -1;
  size_t peers_count = -1;
  size_t stream_index;
+  uint16_t frame_id = VideoFrame::kNotSetId;
  {
    MutexLock lock(&mutex_);
+    frame_id = GetNextFrameId();
    RTC_CHECK_EQ(state_, State::kActive)
        << "DefaultVideoQualityAnalyzer has to be started before use";
    // Create a local copy of `start_time_`, peer's index and total peers count
@ -338,6 +340,12 @@ void DefaultVideoQualityAnalyzer::OnFramePreDecode(

  size_t peer_index = peers_->index(peer_name);

+  if (frame_id == VideoFrame::kNotSetId) {
+    frame_counters_.received++;
+    unknown_sender_frame_counters_[std::string(peer_name)].received++;
+    return;
+  }
+
  auto it = captured_frames_in_flight_.find(frame_id);
  if (it == captured_frames_in_flight_.end() ||
      it->second.HasReceivedTime(peer_index)) {
@ -379,6 +387,12 @@ void DefaultVideoQualityAnalyzer::OnFrameDecoded(

  size_t peer_index = peers_->index(peer_name);

+  if (frame.id() == VideoFrame::kNotSetId) {
+    frame_counters_.decoded++;
+    unknown_sender_frame_counters_[std::string(peer_name)].decoded++;
+    return;
+  }
+
  auto it = captured_frames_in_flight_.find(frame.id());
  if (it == captured_frames_in_flight_.end() ||
      it->second.HasDecodeEndTime(peer_index)) {
@ -412,6 +426,12 @@ void DefaultVideoQualityAnalyzer::OnFrameRendered(

  size_t peer_index = peers_->index(peer_name);

+  if (frame.id() == VideoFrame::kNotSetId) {
+    frame_counters_.rendered++;
+    unknown_sender_frame_counters_[std::string(peer_name)].rendered++;
+    return;
+  }
+
  auto frame_it = captured_frames_in_flight_.find(frame.id());
  if (frame_it == captured_frames_in_flight_.end() ||
      frame_it->second.HasRenderedTime(peer_index) ||
@ -719,6 +739,12 @@ FrameCounters DefaultVideoQualityAnalyzer::GetGlobalCounters() const {
  return frame_counters_;
 }

+std::map<std::string, FrameCounters>
+DefaultVideoQualityAnalyzer::GetUnknownSenderFrameCounters() const {
+  MutexLock lock(&mutex_);
+  return unknown_sender_frame_counters_;
+}
+
 std::map<StatsKey, FrameCounters>
 DefaultVideoQualityAnalyzer::GetPerStreamCounters() const {
  MutexLock lock(&mutex_);
@ -743,6 +769,14 @@ AnalyzerStats DefaultVideoQualityAnalyzer::GetAnalyzerStats() const {
  return analyzer_stats_;
 }

+uint16_t DefaultVideoQualityAnalyzer::GetNextFrameId() {
+  uint16_t frame_id = next_frame_id_++;
+  if (next_frame_id_ == VideoFrame::kNotSetId) {
+    next_frame_id_ = 1;
+  }
+  return frame_id;
+}
+
 void DefaultVideoQualityAnalyzer::ReportResults() {
  using ::webrtc::test::ImproveDirection;

@ -754,10 +788,19 @@ void DefaultVideoQualityAnalyzer::ReportResults() {
  test::PrintResult("cpu_usage", "", test_label_.c_str(), GetCpuUsagePercent(),
                    "%", false, ImproveDirection::kSmallerIsBetter);
  LogFrameCounters("Global", frame_counters_);
-  for (auto& item : frames_comparator_.stream_stats()) {
-    LogFrameCounters(ToStatsKey(item.first).ToString(),
-                     stream_frame_counters_.at(item.first));
-    LogStreamInternalStats(ToStatsKey(item.first).ToString(), item.second,
+  if (!unknown_sender_frame_counters_.empty()) {
+    RTC_LOG(LS_INFO) << "Received frame counters with unknown frame id:";
+    for (const auto& [peer_name, frame_counters] :
+         unknown_sender_frame_counters_) {
+      LogFrameCounters(peer_name, frame_counters);
+    }
+  }
+  RTC_LOG(LS_INFO) << "Received frame counters per stream:";
+  for (const auto& [stats_key, stream_stats] :
+       frames_comparator_.stream_stats()) {
+    LogFrameCounters(ToStatsKey(stats_key).ToString(),
+                     stream_frame_counters_.at(stats_key));
+    LogStreamInternalStats(ToStatsKey(stats_key).ToString(), stream_stats,
                           start_time_);
  }
  if (!analyzer_stats_.comparisons_queue_size.IsEmpty()) {
--- a/test/pc/e2e/analyzer/video/default_video_quality_analyzer.h
+++ b/test/pc/e2e/analyzer/video/default_video_quality_analyzer.h
@ -12,6 +12,7 @@
 #define TEST_PC_E2E_ANALYZER_VIDEO_DEFAULT_VIDEO_QUALITY_ANALYZER_H_

 #include <atomic>
+#include <cstdint>
 #include <deque>
 #include <map>
 #include <memory>
@ -30,6 +31,7 @@
 #include "rtc_base/event.h"
 #include "rtc_base/platform_thread.h"
 #include "rtc_base/synchronization/mutex.h"
+#include "rtc_base/thread_annotations.h"
 #include "system_wrappers/include/clock.h"
 #include "test/pc/e2e/analyzer/video/default_video_quality_analyzer_cpu_measurer.h"
 #include "test/pc/e2e/analyzer/video/default_video_quality_analyzer_frames_comparator.h"
@ -86,6 +88,8 @@ class DefaultVideoQualityAnalyzer : public VideoQualityAnalyzerInterface {
  std::set<StatsKey> GetKnownVideoStreams() const;
  VideoStreamsInfo GetKnownStreams() const;
  FrameCounters GetGlobalCounters() const;
+  // Returns frame counter for frames received without frame id set.
+  std::map<std::string, FrameCounters> GetUnknownSenderFrameCounters() const;
  // Returns frame counter per stream label. Valid stream labels can be obtained
  // by calling GetKnownVideoStreams()
  std::map<StatsKey, FrameCounters> GetPerStreamCounters() const;
@ -314,6 +318,10 @@ class DefaultVideoQualityAnalyzer : public VideoQualityAnalyzerInterface {
    std::map<absl::string_view, size_t> index_;
  };

+  // Returns next frame id to use. Frame ID can't be `VideoFrame::kNotSetId`,
+  // because this value is reserved by `VideoFrame` as "ID not set".
+  uint16_t GetNextFrameId() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
  // Report results for all metrics for all streams.
  void ReportResults();
  void ReportResults(const std::string& test_case_name,
@ -337,13 +345,15 @@ class DefaultVideoQualityAnalyzer : public VideoQualityAnalyzerInterface {
  std::string ToMetricName(const InternalStatsKey& key) const
      RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);

+  static const uint16_t kStartingFrameId = 1;
+
  const DefaultVideoQualityAnalyzerOptions options_;
  webrtc::Clock* const clock_;
-  std::atomic<uint16_t> next_frame_id_{0};

  std::string test_label_;

  mutable Mutex mutex_;
+  uint16_t next_frame_id_ RTC_GUARDED_BY(mutex_) = kStartingFrameId;
  std::unique_ptr<NamesCollection> peers_ RTC_GUARDED_BY(mutex_);
  State state_ RTC_GUARDED_BY(mutex_) = State::kNew;
  Timestamp start_time_ RTC_GUARDED_BY(mutex_) = Timestamp::MinusInfinity();
@ -365,6 +375,10 @@ class DefaultVideoQualityAnalyzer : public VideoQualityAnalyzerInterface {
      RTC_GUARDED_BY(mutex_);
  // Global frames count for all video streams.
  FrameCounters frame_counters_ RTC_GUARDED_BY(mutex_);
+  // Frame counters for received frames without video frame id set.
+  // Map from peer name to the frame counters.
+  std::map<std::string, FrameCounters> unknown_sender_frame_counters_
+      RTC_GUARDED_BY(mutex_);
  // Frame counters per each stream per each receiver.
  std::map<InternalStatsKey, FrameCounters> stream_frame_counters_
      RTC_GUARDED_BY(mutex_);
--- a/test/pc/e2e/analyzer/video/encoded_image_data_injector.h
+++ b/test/pc/e2e/analyzer/video/encoded_image_data_injector.h
@ -14,6 +14,7 @@
 #include <cstdint>
 #include <utility>

+#include "absl/types/optional.h"
 #include "api/video/encoded_image.h"

 namespace webrtc {
@ -34,7 +35,7 @@ class EncodedImageDataInjector {
 };

 struct EncodedImageExtractionResult {
-  uint16_t id;
+  absl::optional<uint16_t> id;
  EncodedImage image;
  // Is true if encoded image should be discarded. It is used to filter out
  // unnecessary spatial layers and simulcast streams.
--- a/test/pc/e2e/analyzer/video/quality_analyzing_video_decoder.cc
+++ b/test/pc/e2e/analyzer/video/quality_analyzing_video_decoder.cc
@ -18,6 +18,7 @@
 #include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
 #include "api/video/i420_buffer.h"
+#include "api/video/video_frame.h"
 #include "modules/video_coding/include/video_error_codes.h"
 #include "rtc_base/logging.h"
 #include "test/pc/e2e/analyzer/video/simulcast_dummy_buffer_helper.h"
@ -75,7 +76,7 @@ int32_t QualityAnalyzingVideoDecoder::Decode(const EncodedImage& input_image,
    //
    // For more details see QualityAnalyzingVideoEncoder.
    return analyzing_callback_->IrrelevantSimulcastStreamDecoded(
-        out.id, input_image.Timestamp());
+        out.id.value_or(VideoFrame::kNotSetId), input_image.Timestamp());
  }

  EncodedImage* origin_image;
@ -86,12 +87,15 @@ int32_t QualityAnalyzingVideoDecoder::Decode(const EncodedImage& input_image,
    // Store encoded image to prevent its destruction while it is used in
    // decoder.
    origin_image = &(
-        decoding_images_.insert({out.id, std::move(out.image)}).first->second);
+        decoding_images_.insert({input_image.Timestamp(), std::move(out.image)})
+            .first->second);
  }
  // We can safely dereference `origin_image`, because it can be removed from
-  // the map only after `delegate_` Decode method will be invoked. Image will be
-  // removed inside DecodedImageCallback, which can be done on separate thread.
-  analyzer_->OnFramePreDecode(peer_name_, out.id, *origin_image);
+  // the map only after `delegate_` Decode method will be invoked. Image will
+  // be removed inside DecodedImageCallback, which can be done on separate
+  // thread.
+  analyzer_->OnFramePreDecode(
+      peer_name_, out.id.value_or(VideoFrame::kNotSetId), *origin_image);
  int32_t result =
      delegate_->Decode(*origin_image, missing_frames, render_time_ms);
  if (result != WEBRTC_VIDEO_CODEC_OK) {
@ -99,9 +103,10 @@ int32_t QualityAnalyzingVideoDecoder::Decode(const EncodedImage& input_image,
    {
      MutexLock lock(&mutex_);
      timestamp_to_frame_id_.erase(input_image.Timestamp());
-      decoding_images_.erase(out.id);
+      decoding_images_.erase(input_image.Timestamp());
    }
-    analyzer_->OnDecoderError(peer_name_, out.id, result);
+    analyzer_->OnDecoderError(peer_name_,
+                              out.id.value_or(VideoFrame::kNotSetId), result);
  }
  return result;
 }
@ -208,14 +213,14 @@ void QualityAnalyzingVideoDecoder::OnFrameDecoded(
    VideoFrame* frame,
    absl::optional<int32_t> decode_time_ms,
    absl::optional<uint8_t> qp) {
-  uint16_t frame_id;
+  absl::optional<uint16_t> frame_id;
  std::string codec_name;
  {
    MutexLock lock(&mutex_);
    auto it = timestamp_to_frame_id_.find(frame->timestamp());
    if (it == timestamp_to_frame_id_.end()) {
      // Ensure, that we have info about this frame. It can happen that for some
-      // reasons decoder response, that he failed to decode, when we were
+      // reasons decoder response, that it failed to decode, when we were
      // posting frame to it, but then call the callback for this frame.
      RTC_LOG(LS_ERROR) << "QualityAnalyzingVideoDecoder::OnFrameDecoded: No "
                           "frame id for frame for frame->timestamp()="
@ -224,12 +229,12 @@ void QualityAnalyzingVideoDecoder::OnFrameDecoded(
    }
    frame_id = it->second;
    timestamp_to_frame_id_.erase(it);
-    decoding_images_.erase(frame_id);
+    decoding_images_.erase(frame->timestamp());
    codec_name = codec_name_;
  }
  // Set frame id to the value, that was extracted from corresponding encoded
  // image.
-  frame->set_id(frame_id);
+  frame->set_id(frame_id.value_or(VideoFrame::kNotSetId));
  VideoQualityAnalyzerInterface::DecoderStats stats;
  stats.decoder_name = codec_name;
  stats.decode_time_ms = decode_time_ms;
--- a/test/pc/e2e/analyzer/video/quality_analyzing_video_decoder.h
+++ b/test/pc/e2e/analyzer/video/quality_analyzing_video_decoder.h
@ -17,6 +17,7 @@
 #include <vector>

 #include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
 #include "api/test/video_quality_analyzer_interface.h"
 #include "api/video/encoded_image.h"
 #include "api/video/video_frame.h"
@ -113,12 +114,13 @@ class QualityAnalyzingVideoDecoder : public VideoDecoder {

  // Name of the video codec type used. Ex: VP8, VP9, H264 etc.
  std::string codec_name_ RTC_GUARDED_BY(mutex_);
-  std::map<uint32_t, uint16_t> timestamp_to_frame_id_ RTC_GUARDED_BY(mutex_);
-  // Stores currently being decoded images by frame id. Because
+  std::map<uint32_t, absl::optional<uint16_t>> timestamp_to_frame_id_
+      RTC_GUARDED_BY(mutex_);
+  // Stores currently being decoded images by timestamp. Because
  // EncodedImageDataExtractor can create new copy on EncodedImage we need to
  // ensure, that this image won't be deleted during async decoding. To do it
  // all images are putted into this map and removed from here inside callback.
-  std::map<uint16_t, EncodedImage> decoding_images_ RTC_GUARDED_BY(mutex_);
+  std::map<uint32_t, EncodedImage> decoding_images_ RTC_GUARDED_BY(mutex_);
 };

 // Produces QualityAnalyzingVideoDecoder, which hold decoders, produced by
--- a/test/pc/e2e/analyzer/video/single_process_encoded_image_data_injector.h
+++ b/test/pc/e2e/analyzer/video/single_process_encoded_image_data_injector.h
@ -37,6 +37,8 @@ namespace webrtc_pc_e2e {
 // This injector won't add any extra overhead into EncodedImage payload and
 // support frames with any size of payload. Also assumes that every EncodedImage
 // payload size is greater or equals to 3 bytes
+//
+// This injector doesn't support video frames/encoded images without frame ID.
 class SingleProcessEncodedImageDataInjector
    : public EncodedImageDataPropagator {
 public:
--- a/test/pc/e2e/analyzer/video/video_frame_tracking_id_injector.cc
+++ b/test/pc/e2e/analyzer/video/video_frame_tracking_id_injector.cc
@ -29,8 +29,8 @@ EncodedImage VideoFrameTrackingIdInjector::InjectData(

 EncodedImageExtractionResult VideoFrameTrackingIdInjector::ExtractData(
    const EncodedImage& source) {
-  return EncodedImageExtractionResult{source.VideoFrameTrackingId().value_or(0),
-                                      source, /*discard=*/false};
+  return EncodedImageExtractionResult{source.VideoFrameTrackingId(), source,
+                                      /*discard=*/false};
 }

 }  // namespace webrtc_pc_e2e
--- a/test/pc/e2e/analyzer/video/video_frame_tracking_id_injector_unittest.cc
+++ b/test/pc/e2e/analyzer/video/video_frame_tracking_id_injector_unittest.cc
@ -34,7 +34,8 @@ TEST(VideoFrameTrackingIdInjectorTest, InjectExtractDiscardFalse) {
  EncodedImageExtractionResult out =
      injector.ExtractData(injector.InjectData(512, false, source));

-  EXPECT_EQ(out.id, 512);
+  ASSERT_TRUE(out.id.has_value());
+  EXPECT_EQ(*out.id, 512);
  EXPECT_FALSE(out.discard);
  EXPECT_EQ(out.image.size(), 10ul);
  for (int i = 0; i < 10; ++i) {
--- a/test/pc/e2e/analyzer/video/video_quality_analyzer_injection_helper.cc
+++ b/test/pc/e2e/analyzer/video/video_quality_analyzer_injection_helper.cc
@ -212,6 +212,7 @@ void VideoQualityAnalyzerInjectionHelper::OnFrame(absl::string_view peer_name,
  frame_copy.set_video_frame_buffer(I420Buffer::Copy(*i420_buffer));
  analyzer_->OnFrameRendered(peer_name, frame_copy);

+  if (frame.id() != VideoFrame::kNotSetId) {
    std::string stream_label = analyzer_->GetStreamLabel(frame.id());
    std::vector<std::unique_ptr<rtc::VideoSinkInterface<VideoFrame>>>* sinks =
        PopulateSinks(ReceiverStream(peer_name, stream_label));
@ -222,6 +223,7 @@ void VideoQualityAnalyzerInjectionHelper::OnFrame(absl::string_view peer_name,
      sink->OnFrame(frame);
    }
  }
+}

 std::vector<std::unique_ptr<rtc::VideoSinkInterface<VideoFrame>>>*
 VideoQualityAnalyzerInjectionHelper::PopulateSinks(