Support native scaling of VideoFrameBuffers in LibvpxVp8Encoder.

This is a follow-up to the VP9, fixing VP8 this time. Context again: This CL is part of Optimized Scaling efforts. In Chromium, the native frame buffer is getting an optimized CropAndScale() implementation. To support HW accelerated scaling, returning pre-scaled images and skipping unnecessary intermediate downscales, WebRTC needs to 1) use CropAndScale instead of libyuv::XXXXScale and 2) only map buffers it actually intends to encode. - To achieve this, WebRTC encoders are updated to map kNative video buffers so that in a follow-up CL VideoStreamEncoder can stop mapping intermediate buffer sizes. Bug: webrtc:12469, chromium:1157072 Change-Id: I026527ae77e36f66d02e149ad6fe304f6a8ccb05 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/212600 Commit-Queue: Henrik Boström <hbos@webrtc.org> Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Reviewed-by: Evan Shrubsole <eshr@google.com> Cr-Commit-Position: refs/heads/master@{#33537}
2021-03-23 09:18:28 +01:00
parent 6a6715042a
commit 3889de1c4c
5 changed files with 227 additions and 97 deletions
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@ -500,7 +500,10 @@ rtc_library("webrtc_vp8") {
    "../../system_wrappers:metrics",
    "//third_party/libyuv",
  ]
-  absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
+  absl_deps = [
+    "//third_party/abseil-cpp/absl/algorithm:container",
+    "//third_party/abseil-cpp/absl/types:optional",
+  ]
  if (rtc_build_libvpx) {
    deps += [ rtc_libvpx_dir ]
  }
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
@ -21,6 +21,7 @@
 #include <utility>
 #include <vector>

+#include "absl/algorithm/container.h"
 #include "api/scoped_refptr.h"
 #include "api/video/video_content_type.h"
 #include "api/video/video_frame_buffer.h"
@ -160,6 +161,41 @@ void ApplyVp8EncoderConfigToVpxConfig(const Vp8EncoderConfig& encoder_config,
  }
 }

+void SetRawImagePlanes(vpx_image_t* raw_image, VideoFrameBuffer* buffer) {
+  switch (buffer->type()) {
+    case VideoFrameBuffer::Type::kI420:
+    case VideoFrameBuffer::Type::kI420A: {
+      const I420BufferInterface* i420_buffer = buffer->GetI420();
+      RTC_DCHECK(i420_buffer);
+      raw_image->planes[VPX_PLANE_Y] =
+          const_cast<uint8_t*>(i420_buffer->DataY());
+      raw_image->planes[VPX_PLANE_U] =
+          const_cast<uint8_t*>(i420_buffer->DataU());
+      raw_image->planes[VPX_PLANE_V] =
+          const_cast<uint8_t*>(i420_buffer->DataV());
+      raw_image->stride[VPX_PLANE_Y] = i420_buffer->StrideY();
+      raw_image->stride[VPX_PLANE_U] = i420_buffer->StrideU();
+      raw_image->stride[VPX_PLANE_V] = i420_buffer->StrideV();
+      break;
+    }
+    case VideoFrameBuffer::Type::kNV12: {
+      const NV12BufferInterface* nv12_buffer = buffer->GetNV12();
+      RTC_DCHECK(nv12_buffer);
+      raw_image->planes[VPX_PLANE_Y] =
+          const_cast<uint8_t*>(nv12_buffer->DataY());
+      raw_image->planes[VPX_PLANE_U] =
+          const_cast<uint8_t*>(nv12_buffer->DataUV());
+      raw_image->planes[VPX_PLANE_V] = raw_image->planes[VPX_PLANE_U] + 1;
+      raw_image->stride[VPX_PLANE_Y] = nv12_buffer->StrideY();
+      raw_image->stride[VPX_PLANE_U] = nv12_buffer->StrideUV();
+      raw_image->stride[VPX_PLANE_V] = nv12_buffer->StrideUV();
+      break;
+    }
+    default:
+      RTC_NOTREACHED();
+  }
+}
+
 }  // namespace

 std::unique_ptr<VideoEncoder> VP8Encoder::Create() {
@ -929,40 +965,29 @@ int LibvpxVp8Encoder::Encode(const VideoFrame& frame,
    flags[i] = send_key_frame ? VPX_EFLAG_FORCE_KF : EncodeFlags(tl_configs[i]);
  }

-  rtc::scoped_refptr<VideoFrameBuffer> input_image = frame.video_frame_buffer();
-  // Since we are extracting raw pointers from |input_image| to
-  // |raw_images_[0]|, the resolution of these frames must match.
-  RTC_DCHECK_EQ(input_image->width(), raw_images_[0].d_w);
-  RTC_DCHECK_EQ(input_image->height(), raw_images_[0].d_h);
-  switch (input_image->type()) {
-    case VideoFrameBuffer::Type::kI420:
-      PrepareI420Image(input_image->GetI420());
-      break;
-    case VideoFrameBuffer::Type::kNV12:
-      PrepareNV12Image(input_image->GetNV12());
-      break;
-    default: {
-      rtc::scoped_refptr<I420BufferInterface> i420_image =
-          input_image->ToI420();
-      if (!i420_image) {
-        RTC_LOG(LS_ERROR) << "Failed to convert "
-                          << VideoFrameBufferTypeToString(input_image->type())
-                          << " image to I420. Can't encode frame.";
+  // Scale and map buffers and set |raw_images_| to hold pointers to the result.
+  // Because |raw_images_| are set to hold pointers to the prepared buffers, we
+  // need to keep these buffers alive through reference counting until after
+  // encoding is complete.
+  std::vector<rtc::scoped_refptr<VideoFrameBuffer>> prepared_buffers =
+      PrepareBuffers(frame.video_frame_buffer());
+  if (prepared_buffers.empty()) {
    return WEBRTC_VIDEO_CODEC_ERROR;
  }
-      input_image = i420_image;
-      PrepareI420Image(i420_image);
-    }
-  }
  struct CleanUpOnExit {
-    explicit CleanUpOnExit(vpx_image_t& raw_image) : raw_image_(raw_image) {}
+    explicit CleanUpOnExit(
+        vpx_image_t* raw_image,
+        std::vector<rtc::scoped_refptr<VideoFrameBuffer>> prepared_buffers)
+        : raw_image_(raw_image),
+          prepared_buffers_(std::move(prepared_buffers)) {}
    ~CleanUpOnExit() {
-      raw_image_.planes[VPX_PLANE_Y] = nullptr;
-      raw_image_.planes[VPX_PLANE_U] = nullptr;
-      raw_image_.planes[VPX_PLANE_V] = nullptr;
+      raw_image_->planes[VPX_PLANE_Y] = nullptr;
+      raw_image_->planes[VPX_PLANE_U] = nullptr;
+      raw_image_->planes[VPX_PLANE_V] = nullptr;
    }
-    vpx_image_t& raw_image_;
-  } clean_up_on_exit(raw_images_[0]);
+    vpx_image_t* raw_image_;
+    std::vector<rtc::scoped_refptr<VideoFrameBuffer>> prepared_buffers_;
+  } clean_up_on_exit(&raw_images_[0], std::move(prepared_buffers));

  if (send_key_frame) {
    // Adapt the size of the key frame when in screenshare with 1 temporal
@ -1262,61 +1287,109 @@ void LibvpxVp8Encoder::MaybeUpdatePixelFormat(vpx_img_fmt fmt) {
  }
 }

-void LibvpxVp8Encoder::PrepareI420Image(const I420BufferInterface* frame) {
-  RTC_DCHECK(!raw_images_.empty());
+std::vector<rtc::scoped_refptr<VideoFrameBuffer>>
+LibvpxVp8Encoder::PrepareBuffers(rtc::scoped_refptr<VideoFrameBuffer> buffer) {
+  RTC_DCHECK_EQ(buffer->width(), raw_images_[0].d_w);
+  RTC_DCHECK_EQ(buffer->height(), raw_images_[0].d_h);
+  absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
+      supported_formats = {VideoFrameBuffer::Type::kI420,
+                           VideoFrameBuffer::Type::kNV12};
+
+  rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer;
+  if (buffer->type() != VideoFrameBuffer::Type::kNative) {
+    // |buffer| is already mapped.
+    mapped_buffer = buffer;
+  } else {
+    // Attempt to map to one of the supported formats.
+    mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats);
+  }
+  if (!mapped_buffer ||
+      (absl::c_find(supported_formats, mapped_buffer->type()) ==
+           supported_formats.end() &&
+       mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) {
+    // Unknown pixel format or unable to map, convert to I420 and prepare that
+    // buffer instead to ensure Scale() is safe to use.
+    auto converted_buffer = buffer->ToI420();
+    if (!converted_buffer) {
+      RTC_LOG(LS_ERROR) << "Failed to convert "
+                        << VideoFrameBufferTypeToString(buffer->type())
+                        << " image to I420. Can't encode frame.";
+      return {};
+    }
+    // The buffer should now be a mapped I420 or I420A format, but some buffer
+    // implementations incorrectly return the wrong buffer format, such as
+    // kNative. As a workaround to this, we perform ToI420() a second time.
+    // TODO(https://crbug.com/webrtc/12602): When Android buffers have a correct
+    // ToI420() implementaion, remove his workaround.
+    if (converted_buffer->type() != VideoFrameBuffer::Type::kI420 &&
+        converted_buffer->type() != VideoFrameBuffer::Type::kI420A) {
+      converted_buffer = converted_buffer->ToI420();
+      RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 ||
+                converted_buffer->type() == VideoFrameBuffer::Type::kI420A);
+    }
+    // Because |buffer| had to be converted, use |converted_buffer| instead...
+    buffer = mapped_buffer = converted_buffer;
+  }
+
+  // Maybe update pixel format.
+  absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
+      mapped_type = {mapped_buffer->type()};
+  switch (mapped_buffer->type()) {
+    case VideoFrameBuffer::Type::kI420:
+    case VideoFrameBuffer::Type::kI420A:
      MaybeUpdatePixelFormat(VPX_IMG_FMT_I420);
-  // Image in vpx_image_t format.
-  // Input image is const. VP8's raw image is not defined as const.
-  raw_images_[0].planes[VPX_PLANE_Y] = const_cast<uint8_t*>(frame->DataY());
-  raw_images_[0].planes[VPX_PLANE_U] = const_cast<uint8_t*>(frame->DataU());
-  raw_images_[0].planes[VPX_PLANE_V] = const_cast<uint8_t*>(frame->DataV());
-
-  raw_images_[0].stride[VPX_PLANE_Y] = frame->StrideY();
-  raw_images_[0].stride[VPX_PLANE_U] = frame->StrideU();
-  raw_images_[0].stride[VPX_PLANE_V] = frame->StrideV();
-
-  for (size_t i = 1; i < encoders_.size(); ++i) {
-    // Scale the image down a number of times by downsampling factor
-    libyuv::I420Scale(
-        raw_images_[i - 1].planes[VPX_PLANE_Y],
-        raw_images_[i - 1].stride[VPX_PLANE_Y],
-        raw_images_[i - 1].planes[VPX_PLANE_U],
-        raw_images_[i - 1].stride[VPX_PLANE_U],
-        raw_images_[i - 1].planes[VPX_PLANE_V],
-        raw_images_[i - 1].stride[VPX_PLANE_V], raw_images_[i - 1].d_w,
-        raw_images_[i - 1].d_h, raw_images_[i].planes[VPX_PLANE_Y],
-        raw_images_[i].stride[VPX_PLANE_Y], raw_images_[i].planes[VPX_PLANE_U],
-        raw_images_[i].stride[VPX_PLANE_U], raw_images_[i].planes[VPX_PLANE_V],
-        raw_images_[i].stride[VPX_PLANE_V], raw_images_[i].d_w,
-        raw_images_[i].d_h, libyuv::kFilterBilinear);
-  }
-}
-
-void LibvpxVp8Encoder::PrepareNV12Image(const NV12BufferInterface* frame) {
-  RTC_DCHECK(!raw_images_.empty());
+      break;
+    case VideoFrameBuffer::Type::kNV12:
      MaybeUpdatePixelFormat(VPX_IMG_FMT_NV12);
-  // Image in vpx_image_t format.
-  // Input image is const. VP8's raw image is not defined as const.
-  raw_images_[0].planes[VPX_PLANE_Y] = const_cast<uint8_t*>(frame->DataY());
-  raw_images_[0].planes[VPX_PLANE_U] = const_cast<uint8_t*>(frame->DataUV());
-  raw_images_[0].planes[VPX_PLANE_V] = raw_images_[0].planes[VPX_PLANE_U] + 1;
-  raw_images_[0].stride[VPX_PLANE_Y] = frame->StrideY();
-  raw_images_[0].stride[VPX_PLANE_U] = frame->StrideUV();
-  raw_images_[0].stride[VPX_PLANE_V] = frame->StrideUV();
-
-  for (size_t i = 1; i < encoders_.size(); ++i) {
-    // Scale the image down a number of times by downsampling factor
-    libyuv::NV12Scale(
-        raw_images_[i - 1].planes[VPX_PLANE_Y],
-        raw_images_[i - 1].stride[VPX_PLANE_Y],
-        raw_images_[i - 1].planes[VPX_PLANE_U],
-        raw_images_[i - 1].stride[VPX_PLANE_U], raw_images_[i - 1].d_w,
-        raw_images_[i - 1].d_h, raw_images_[i].planes[VPX_PLANE_Y],
-        raw_images_[i].stride[VPX_PLANE_Y], raw_images_[i].planes[VPX_PLANE_U],
-        raw_images_[i].stride[VPX_PLANE_U], raw_images_[i].d_w,
-        raw_images_[i].d_h, libyuv::kFilterBilinear);
-    raw_images_[i].planes[VPX_PLANE_V] = raw_images_[i].planes[VPX_PLANE_U] + 1;
+      break;
+    default:
+      RTC_NOTREACHED();
  }
+
+  // Prepare |raw_images_| from |mapped_buffer| and, if simulcast, scaled
+  // versions of |buffer|.
+  std::vector<rtc::scoped_refptr<VideoFrameBuffer>> prepared_buffers;
+  SetRawImagePlanes(&raw_images_[0], mapped_buffer);
+  prepared_buffers.push_back(mapped_buffer);
+  for (size_t i = 1; i < encoders_.size(); ++i) {
+    // Native buffers should implement optimized scaling and is the preferred
+    // buffer to scale. But if the buffer isn't native, it should be cheaper to
+    // scale from the previously prepared buffer which is smaller than |buffer|.
+    VideoFrameBuffer* buffer_to_scale =
+        buffer->type() == VideoFrameBuffer::Type::kNative
+            ? buffer.get()
+            : prepared_buffers.back().get();
+
+    auto scaled_buffer =
+        buffer_to_scale->Scale(raw_images_[i].d_w, raw_images_[i].d_h);
+    if (scaled_buffer->type() == VideoFrameBuffer::Type::kNative) {
+      auto mapped_scaled_buffer =
+          scaled_buffer->GetMappedFrameBuffer(mapped_type);
+      RTC_DCHECK(mapped_scaled_buffer) << "Unable to map the scaled buffer.";
+      if (!mapped_scaled_buffer) {
+        RTC_LOG(LS_ERROR) << "Failed to map scaled "
+                          << VideoFrameBufferTypeToString(scaled_buffer->type())
+                          << " image to "
+                          << VideoFrameBufferTypeToString(mapped_buffer->type())
+                          << ". Can't encode frame.";
+        return {};
+      }
+      scaled_buffer = mapped_scaled_buffer;
+    }
+    RTC_DCHECK_EQ(scaled_buffer->type(), mapped_buffer->type())
+        << "Scaled frames must have the same type as the mapped frame.";
+    if (scaled_buffer->type() != mapped_buffer->type()) {
+      RTC_LOG(LS_ERROR) << "When scaling "
+                        << VideoFrameBufferTypeToString(buffer_to_scale->type())
+                        << ", the image was unexpectedly converted to "
+                        << VideoFrameBufferTypeToString(scaled_buffer->type())
+                        << ". Can't encode frame.";
+      return {};
+    }
+    SetRawImagePlanes(&raw_images_[i], scaled_buffer);
+    prepared_buffers.push_back(scaled_buffer);
+  }
+  return prepared_buffers;
 }

 // static
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h
@ -95,8 +95,13 @@ class LibvpxVp8Encoder : public VideoEncoder {
  bool UpdateVpxConfiguration(size_t stream_index);

  void MaybeUpdatePixelFormat(vpx_img_fmt fmt);
-  void PrepareI420Image(const I420BufferInterface* frame);
-  void PrepareNV12Image(const NV12BufferInterface* frame);
+  // Prepares |raw_image_| to reference image data of |buffer|, or of mapped or
+  // scaled versions of |buffer|. Returns a list of buffers that got referenced
+  // as a result, allowing the caller to keep references to them until after
+  // encoding has finished. On failure to convert the buffer, an empty list is
+  // returned.
+  std::vector<rtc::scoped_refptr<VideoFrameBuffer>> PrepareBuffers(
+      rtc::scoped_refptr<VideoFrameBuffer> buffer);

  const std::unique_ptr<LibvpxInterface> libvpx_;

--- a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
@ -27,6 +27,7 @@
 #include "modules/video_coding/utility/vp8_header_parser.h"
 #include "rtc_base/time_utils.h"
 #include "test/field_trial.h"
+#include "test/mappable_native_buffer.h"
 #include "test/video_codec_settings.h"

 namespace webrtc {
@ -715,4 +716,61 @@ TEST_F(TestVp8Impl, GetEncoderInfoFpsAllocationSimulcastVideo) {
              ::testing::ElementsAreArray(expected_fps_allocation));
 }

+class TestVp8ImplForPixelFormat
+    : public TestVp8Impl,
+      public ::testing::WithParamInterface<VideoFrameBuffer::Type> {
+ public:
+  TestVp8ImplForPixelFormat() : TestVp8Impl(), mappable_type_(GetParam()) {}
+
+ protected:
+  VideoFrameBuffer::Type mappable_type_;
+};
+
+TEST_P(TestVp8ImplForPixelFormat, EncodeNativeFrameSimulcast) {
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release());
+
+  // Configure simulcast.
+  codec_settings_.numberOfSimulcastStreams = 3;
+  codec_settings_.simulcastStream[0] = {
+      kWidth / 4, kHeight / 4, kFramerateFps, 1, 4000, 3000, 2000, 80, true};
+  codec_settings_.simulcastStream[1] = {
+      kWidth / 2, kHeight / 2, kFramerateFps, 1, 4000, 3000, 2000, 80, true};
+  codec_settings_.simulcastStream[2] = {
+      kWidth, kHeight, kFramerateFps, 1, 4000, 3000, 2000, 80, true};
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->InitEncode(&codec_settings_, kSettings));
+
+  // Create a zero-conversion NV12 frame (calling ToI420 on it crashes).
+  VideoFrame input_frame =
+      test::CreateMappableNativeFrame(1, mappable_type_, kWidth, kHeight);
+
+  EncodedImage encoded_frame;
+  CodecSpecificInfo codec_specific_info;
+  EncodeAndWaitForFrame(input_frame, &encoded_frame, &codec_specific_info);
+
+  // After encoding, we expect one mapping per simulcast layer.
+  rtc::scoped_refptr<test::MappableNativeBuffer> mappable_buffer =
+      test::GetMappableNativeBufferFromVideoFrame(input_frame);
+  std::vector<rtc::scoped_refptr<VideoFrameBuffer>> mapped_buffers =
+      mappable_buffer->GetMappedFramedBuffers();
+  ASSERT_EQ(mapped_buffers.size(), 3u);
+  EXPECT_EQ(mapped_buffers[0]->type(), mappable_type_);
+  EXPECT_EQ(mapped_buffers[0]->width(), kWidth);
+  EXPECT_EQ(mapped_buffers[0]->height(), kHeight);
+  EXPECT_EQ(mapped_buffers[1]->type(), mappable_type_);
+  EXPECT_EQ(mapped_buffers[1]->width(), kWidth / 2);
+  EXPECT_EQ(mapped_buffers[1]->height(), kHeight / 2);
+  EXPECT_EQ(mapped_buffers[2]->type(), mappable_type_);
+  EXPECT_EQ(mapped_buffers[2]->width(), kWidth / 4);
+  EXPECT_EQ(mapped_buffers[2]->height(), kHeight / 4);
+  EXPECT_FALSE(mappable_buffer->DidConvertToI420());
+
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release());
+}
+
+INSTANTIATE_TEST_SUITE_P(All,
+                         TestVp8ImplForPixelFormat,
+                         ::testing::Values(VideoFrameBuffer::Type::kI420,
+                                           VideoFrameBuffer::Type::kNV12));
+
 }  // namespace webrtc
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
@ -1893,7 +1893,7 @@ rtc::scoped_refptr<VideoFrameBuffer> LibvpxVp9Encoder::PrepareBufferForProfile0(
       mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) {
    // Unknown pixel format or unable to map, convert to I420 and prepare that
    // buffer instead to ensure Scale() is safe to use.
-    rtc::scoped_refptr<VideoFrameBuffer> converted_buffer = buffer->ToI420();
+    auto converted_buffer = buffer->ToI420();
    if (!converted_buffer) {
      RTC_LOG(LS_ERROR) << "Failed to convert "
                        << VideoFrameBufferTypeToString(buffer->type())
@ -1902,21 +1902,12 @@ rtc::scoped_refptr<VideoFrameBuffer> LibvpxVp9Encoder::PrepareBufferForProfile0(
    }
    // The buffer should now be a mapped I420 or I420A format, but some buffer
    // implementations incorrectly return the wrong buffer format, such as
-    // kNative. As a workaround to this, we retry GetMappedFrameBuffer+ToI420.
+    // kNative. As a workaround to this, we perform ToI420() a second time.
    // TODO(https://crbug.com/webrtc/12602): When Android buffers have a correct
    // ToI420() implementaion, remove his workaround.
-    if (converted_buffer->type() != VideoFrameBuffer::Type::kI420 &&
-        converted_buffer->type() != VideoFrameBuffer::Type::kI420A) {
-      if (converted_buffer->type() == VideoFrameBuffer::Type::kNative) {
-        auto mapped_converted_buffer =
-            converted_buffer->GetMappedFrameBuffer(supported_formats);
-        if (mapped_converted_buffer)
-          converted_buffer = mapped_converted_buffer;
-      }
    if (converted_buffer->type() != VideoFrameBuffer::Type::kI420 &&
        converted_buffer->type() != VideoFrameBuffer::Type::kI420A) {
      converted_buffer = converted_buffer->ToI420();
-      }
      RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 ||
                converted_buffer->type() == VideoFrameBuffer::Type::kI420A);
    }