Communicate encoder resolutions via rtc::VideoSinkWants.

This will allow us to optimize the internal buffers of webrtc::VideoFrame for the resolution(s) that we actually want to encode. Bug: webrtc:12469, chromium:1157072 Change-Id: If378b52b5e35aa9a9800c1f7dfe189437ce43253 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/208540 Reviewed-by: Niels Moller <nisse@webrtc.org> Reviewed-by: Harald Alvestrand <hta@webrtc.org> Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Commit-Queue: Henrik Boström <hbos@webrtc.org> Cr-Commit-Position: refs/heads/master@{#33342}
2021-02-25 10:30:39 +01:00
parent bb52bdf095
commit 1124ed1ab2
6 changed files with 208 additions and 5 deletions
--- a/api/video/video_source_interface.h
+++ b/api/video/video_source_interface.h
@ -12,6 +12,7 @@
 #define API_VIDEO_VIDEO_SOURCE_INTERFACE_H_

 #include <limits>
+#include <vector>

 #include "absl/types/optional.h"
 #include "api/video/video_sink_interface.h"
@ -22,6 +23,15 @@ namespace rtc {
 // VideoSinkWants is used for notifying the source of properties a video frame
 // should have when it is delivered to a certain sink.
 struct RTC_EXPORT VideoSinkWants {
+  struct FrameSize {
+    FrameSize(int width, int height) : width(width), height(height) {}
+    FrameSize(const FrameSize&) = default;
+    ~FrameSize() = default;
+
+    int width;
+    int height;
+  };
+
  VideoSinkWants();
  VideoSinkWants(const VideoSinkWants&);
  ~VideoSinkWants();
@ -49,8 +59,34 @@ struct RTC_EXPORT VideoSinkWants {
  // Note that this field is unrelated to any horizontal or vertical stride
  // requirements the encoder has on the incoming video frame buffers.
  int resolution_alignment = 1;
+
+  // The resolutions that sink is configured to consume. If the sink is an
+  // encoder this is what the encoder is configured to encode. In singlecast we
+  // only encode one resolution, but in simulcast and SVC this can mean multiple
+  // resolutions per frame.
+  //
+  // The sink is always configured to consume a subset of the
+  // webrtc::VideoFrame's resolution. In the case of encoding, we usually encode
+  // at webrtc::VideoFrame's resolution but this may not always be the case due
+  // to scaleResolutionDownBy or turning off simulcast or SVC layers.
+  //
+  // For example, we may capture at 720p and due to adaptation (e.g. applying
+  // |max_pixel_count| constraints) create webrtc::VideoFrames of size 480p, but
+  // if we do scaleResolutionDownBy:2 then the only resolution we end up
+  // encoding is 240p. In this case we still need to provide webrtc::VideoFrames
+  // of size 480p but we can optimize internal buffers for 240p, avoiding
+  // downsampling to 480p if possible.
+  //
+  // Note that the |resolutions| can change while frames are in flight and
+  // should only be used as a hint when constructing the webrtc::VideoFrame.
+  std::vector<FrameSize> resolutions;
 };

+inline bool operator==(const VideoSinkWants::FrameSize& a,
+                       const VideoSinkWants::FrameSize& b) {
+  return a.width == b.width && a.height == b.height;
+}
+
 template <typename VideoFrameT>
 class VideoSourceInterface {
 public:
--- a/call/call_perf_tests.cc
+++ b/call/call_perf_tests.cc
@ -561,6 +561,18 @@ TEST_F(CallPerfTest, ReceivesCpuOveruseAndUnderuse) {
    // TODO(sprang): Add integration test for maintain-framerate mode?
    void OnSinkWantsChanged(rtc::VideoSinkInterface<VideoFrame>* sink,
                            const rtc::VideoSinkWants& wants) override {
+      // The sink wants can change either because an adaptation happened (i.e.
+      // the pixels or frame rate changed) or for other reasons, such as encoded
+      // resolutions being communicated (happens whenever we capture a new frame
+      // size). In this test, we only care about adaptations.
+      bool did_adapt =
+          last_wants_.max_pixel_count != wants.max_pixel_count ||
+          last_wants_.target_pixel_count != wants.target_pixel_count ||
+          last_wants_.max_framerate_fps != wants.max_framerate_fps;
+      last_wants_ = wants;
+      if (!did_adapt) {
+        return;
+      }
      // At kStart expect CPU overuse. Then expect CPU underuse when the encoder
      // delay has been decreased.
      switch (test_phase_) {
@ -625,6 +637,9 @@ TEST_F(CallPerfTest, ReceivesCpuOveruseAndUnderuse) {
      kAdaptedDown,
      kAdaptedUp
    } test_phase_;
+
+   private:
+    rtc::VideoSinkWants last_wants_;
  } test;

  RunBaseTest(&test);
--- a/video/video_source_sink_controller.cc
+++ b/video/video_source_sink_controller.cc
@ -29,7 +29,14 @@ std::string WantsToString(const rtc::VideoSinkWants& wants) {
     << " max_pixel_count=" << wants.max_pixel_count << " target_pixel_count="
     << (wants.target_pixel_count.has_value()
             ? std::to_string(wants.target_pixel_count.value())
-             : "null");
+             : "null")
+     << " resolutions={";
+  for (size_t i = 0; i < wants.resolutions.size(); ++i) {
+    if (i != 0)
+      ss << ",";
+    ss << wants.resolutions[i].width << "x" << wants.resolutions[i].height;
+  }
+  ss << "}";

  return ss.Release();
 }
@ -104,6 +111,12 @@ int VideoSourceSinkController::resolution_alignment() const {
  return resolution_alignment_;
 }

+const std::vector<rtc::VideoSinkWants::FrameSize>&
+VideoSourceSinkController::resolutions() const {
+  RTC_DCHECK_RUN_ON(&sequence_checker_);
+  return resolutions_;
+}
+
 void VideoSourceSinkController::SetRestrictions(
    VideoSourceRestrictions restrictions) {
  RTC_DCHECK_RUN_ON(&sequence_checker_);
@ -133,6 +146,12 @@ void VideoSourceSinkController::SetResolutionAlignment(
  resolution_alignment_ = resolution_alignment;
 }

+void VideoSourceSinkController::SetResolutions(
+    std::vector<rtc::VideoSinkWants::FrameSize> resolutions) {
+  RTC_DCHECK_RUN_ON(&sequence_checker_);
+  resolutions_ = std::move(resolutions);
+}
+
 // RTC_EXCLUSIVE_LOCKS_REQUIRED(sequence_checker_)
 rtc::VideoSinkWants VideoSourceSinkController::CurrentSettingsToSinkWants()
    const {
@ -161,6 +180,7 @@ rtc::VideoSinkWants VideoSourceSinkController::CurrentSettingsToSinkWants()
               frame_rate_upper_limit_.has_value()
                   ? static_cast<int>(frame_rate_upper_limit_.value())
                   : std::numeric_limits<int>::max());
+  wants.resolutions = resolutions_;
  return wants;
 }

--- a/video/video_source_sink_controller.h
+++ b/video/video_source_sink_controller.h
@ -12,6 +12,7 @@
 #define VIDEO_VIDEO_SOURCE_SINK_CONTROLLER_H_

 #include <string>
+#include <vector>

 #include "absl/types/optional.h"
 #include "api/sequence_checker.h"
@ -46,6 +47,7 @@ class VideoSourceSinkController {
  absl::optional<double> frame_rate_upper_limit() const;
  bool rotation_applied() const;
  int resolution_alignment() const;
+  const std::vector<rtc::VideoSinkWants::FrameSize>& resolutions() const;

  // Updates the settings stored internally. In order for these settings to be
  // applied to the sink, PushSourceSinkSettings() must subsequently be called.
@ -55,6 +57,7 @@ class VideoSourceSinkController {
  void SetFrameRateUpperLimit(absl::optional<double> frame_rate_upper_limit);
  void SetRotationApplied(bool rotation_applied);
  void SetResolutionAlignment(int resolution_alignment);
+  void SetResolutions(std::vector<rtc::VideoSinkWants::FrameSize> resolutions);

 private:
  rtc::VideoSinkWants CurrentSettingsToSinkWants() const
@ -79,6 +82,8 @@ class VideoSourceSinkController {
      RTC_GUARDED_BY(&sequence_checker_);
  bool rotation_applied_ RTC_GUARDED_BY(&sequence_checker_) = false;
  int resolution_alignment_ RTC_GUARDED_BY(&sequence_checker_) = 1;
+  std::vector<rtc::VideoSinkWants::FrameSize> resolutions_
+      RTC_GUARDED_BY(&sequence_checker_);
 };

 }  // namespace webrtc
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@ -991,14 +991,29 @@ void VideoStreamEncoder::ReconfigureEncoder() {
    max_framerate = std::max(stream.max_framerate, max_framerate);
  }

-  main_queue_->PostTask(
-      ToQueuedTask(task_safety_, [this, max_framerate, alignment]() {
+  // The resolutions that we're actually encoding with.
+  std::vector<rtc::VideoSinkWants::FrameSize> encoder_resolutions;
+  // TODO(hbos): For the case of SVC, also make use of |codec.spatialLayers|.
+  // For now, SVC layers are handled by the VP9 encoder.
+  for (const auto& simulcastStream : codec.simulcastStream) {
+    if (!simulcastStream.active)
+      continue;
+    encoder_resolutions.emplace_back(simulcastStream.width,
+                                     simulcastStream.height);
+  }
+  main_queue_->PostTask(ToQueuedTask(
+      task_safety_, [this, max_framerate, alignment,
+                     encoder_resolutions = std::move(encoder_resolutions)]() {
        RTC_DCHECK_RUN_ON(main_queue_);
        if (max_framerate !=
                video_source_sink_controller_.frame_rate_upper_limit() ||
-            alignment != video_source_sink_controller_.resolution_alignment()) {
+            alignment != video_source_sink_controller_.resolution_alignment() ||
+            encoder_resolutions !=
+                video_source_sink_controller_.resolutions()) {
          video_source_sink_controller_.SetFrameRateUpperLimit(max_framerate);
          video_source_sink_controller_.SetResolutionAlignment(alignment);
+          video_source_sink_controller_.SetResolutions(
+              std::move(encoder_resolutions));
          video_source_sink_controller_.PushSourceSinkSettings();
        }
      }));
--- a/video/video_stream_encoder_unittest.cc
+++ b/video/video_stream_encoder_unittest.cc
@ -461,6 +461,10 @@ class AdaptingFrameForwarder : public test::FrameForwarder {
    return adaptation_enabled_;
  }

+  // The "last wants" is a snapshot of the previous rtc::VideoSinkWants where
+  // the resolution or frame rate was different than it is currently. If
+  // something else is modified, such as encoder resolutions, but the resolution
+  // and frame rate stays the same, last wants is not updated.
  rtc::VideoSinkWants last_wants() const {
    MutexLock lock(&mutex_);
    return last_wants_;
@ -519,7 +523,14 @@ class AdaptingFrameForwarder : public test::FrameForwarder {
  void AddOrUpdateSink(rtc::VideoSinkInterface<VideoFrame>* sink,
                       const rtc::VideoSinkWants& wants) override {
    MutexLock lock(&mutex_);
-    last_wants_ = sink_wants_locked();
+    rtc::VideoSinkWants prev_wants = sink_wants_locked();
+    bool did_adapt =
+        prev_wants.max_pixel_count != wants.max_pixel_count ||
+        prev_wants.target_pixel_count != wants.target_pixel_count ||
+        prev_wants.max_framerate_fps != wants.max_framerate_fps;
+    if (did_adapt) {
+      last_wants_ = prev_wants;
+    }
    adapter_.OnSinkWants(wants);
    test::FrameForwarder::AddOrUpdateSinkLocked(sink, wants);
  }
@ -7611,4 +7622,105 @@ TEST_F(VideoStreamEncoderTest, EncoderResetAccordingToParameterChange) {
  video_stream_encoder_->Stop();
 }

+TEST_F(VideoStreamEncoderTest, EncoderResolutionsExposedInSinglecast) {
+  const int kFrameWidth = 1280;
+  const int kFrameHeight = 720;
+
+  SetUp();
+  video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources(
+      DataRate::BitsPerSec(kTargetBitrateBps),
+      DataRate::BitsPerSec(kTargetBitrateBps),
+      DataRate::BitsPerSec(kTargetBitrateBps), 0, 0, 0);
+
+  // Capturing a frame should reconfigure the encoder and expose the encoder
+  // resolution, which is the same as the input frame.
+  int64_t timestamp_ms = kFrameIntervalMs;
+  video_source_.IncomingCapturedFrame(
+      CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight));
+  WaitForEncodedFrame(timestamp_ms);
+  video_stream_encoder_->WaitUntilTaskQueueIsIdle();
+  EXPECT_THAT(video_source_.sink_wants().resolutions,
+              ::testing::ElementsAreArray(
+                  {rtc::VideoSinkWants::FrameSize(kFrameWidth, kFrameHeight)}));
+
+  video_stream_encoder_->Stop();
+}
+
+TEST_F(VideoStreamEncoderTest, EncoderResolutionsExposedInSimulcast) {
+  // Pick downscale factors such that we never encode at full resolution - this
+  // is an interesting use case. The frame resolution influences the encoder
+  // resolutions, but if no layer has |scale_resolution_down_by| == 1 then the
+  // encoder should not ask for the frame resolution. This allows video frames
+  // to have the appearence of one resolution but optimize its internal buffers
+  // for what is actually encoded.
+  const size_t kNumSimulcastLayers = 3u;
+  const float kDownscaleFactors[] = {8.0, 4.0, 2.0};
+  const int kFrameWidth = 1280;
+  const int kFrameHeight = 720;
+  const rtc::VideoSinkWants::FrameSize kLayer0Size(
+      kFrameWidth / kDownscaleFactors[0], kFrameHeight / kDownscaleFactors[0]);
+  const rtc::VideoSinkWants::FrameSize kLayer1Size(
+      kFrameWidth / kDownscaleFactors[1], kFrameHeight / kDownscaleFactors[1]);
+  const rtc::VideoSinkWants::FrameSize kLayer2Size(
+      kFrameWidth / kDownscaleFactors[2], kFrameHeight / kDownscaleFactors[2]);
+
+  VideoEncoderConfig config;
+  test::FillEncoderConfiguration(kVideoCodecVP8, kNumSimulcastLayers, &config);
+  for (size_t i = 0; i < kNumSimulcastLayers; ++i) {
+    config.simulcast_layers[i].scale_resolution_down_by = kDownscaleFactors[i];
+    config.simulcast_layers[i].active = true;
+  }
+  config.video_stream_factory =
+      new rtc::RefCountedObject<cricket::EncoderStreamFactory>(
+          "VP8", /*max qp*/ 56, /*screencast*/ false,
+          /*screenshare enabled*/ false);
+  video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources(
+      DataRate::BitsPerSec(kSimulcastTargetBitrateBps),
+      DataRate::BitsPerSec(kSimulcastTargetBitrateBps),
+      DataRate::BitsPerSec(kSimulcastTargetBitrateBps), 0, 0, 0);
+
+  // Capture a frame with all layers active.
+  int64_t timestamp_ms = kFrameIntervalMs;
+  sink_.SetNumExpectedLayers(kNumSimulcastLayers);
+  video_stream_encoder_->ConfigureEncoder(config.Copy(), kMaxPayloadLength);
+  video_source_.IncomingCapturedFrame(
+      CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight));
+  WaitForEncodedFrame(timestamp_ms);
+  // Expect encoded resolutions to match the expected simulcast layers.
+  video_stream_encoder_->WaitUntilTaskQueueIsIdle();
+  EXPECT_THAT(
+      video_source_.sink_wants().resolutions,
+      ::testing::ElementsAreArray({kLayer0Size, kLayer1Size, kLayer2Size}));
+
+  // Capture a frame with one of the layers inactive.
+  timestamp_ms += kFrameIntervalMs;
+  config.simulcast_layers[2].active = false;
+  sink_.SetNumExpectedLayers(kNumSimulcastLayers - 1);
+  video_stream_encoder_->ConfigureEncoder(config.Copy(), kMaxPayloadLength);
+  video_source_.IncomingCapturedFrame(
+      CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight));
+  WaitForEncodedFrame(timestamp_ms);
+
+  // Expect encoded resolutions to match the expected simulcast layers.
+  video_stream_encoder_->WaitUntilTaskQueueIsIdle();
+  EXPECT_THAT(video_source_.sink_wants().resolutions,
+              ::testing::ElementsAreArray({kLayer0Size, kLayer1Size}));
+
+  // Capture a frame with all but one layer turned off.
+  timestamp_ms += kFrameIntervalMs;
+  config.simulcast_layers[1].active = false;
+  sink_.SetNumExpectedLayers(kNumSimulcastLayers - 2);
+  video_stream_encoder_->ConfigureEncoder(config.Copy(), kMaxPayloadLength);
+  video_source_.IncomingCapturedFrame(
+      CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight));
+  WaitForEncodedFrame(timestamp_ms);
+
+  // Expect encoded resolutions to match the expected simulcast layers.
+  video_stream_encoder_->WaitUntilTaskQueueIsIdle();
+  EXPECT_THAT(video_source_.sink_wants().resolutions,
+              ::testing::ElementsAreArray({kLayer0Size}));
+
+  video_stream_encoder_->Stop();
+}
+
 }  // namespace webrtc