Support native scaling of VideoFrameBuffers in LibvpxVp9Encoder.

This CL is part of Optimized Scaling efforts. In Chromium, the native frame buffer is getting an optimized CropAndScale() implementation. To support HW accelerated scaling, returning pre-scaled images and skipping unnecessary intermediate downscales, WebRTC needs to 1) use CropAndScale instead of libyuv::XXXXScale and 2) only map buffers it actually intends to encode. - To achieve this, WebRTC encoders are updated to map kNative video buffers so that in a follow-up CL VideoStreamEncoder can stop mapping intermediate buffer sizes. In this CL LibvpxVp9Encoder is updated to map kNative buffers of pixel formats it supports and convert ToI420() if the kNative buffer is something else. A fake native buffer that keeps track of which resolutions were mapped, MappableNativeBuffer, is added. Because VP9 is currently an SVC encoder and not a simulcast encoder, it does not need to invoke CropAndScale. This CL also fixes MultiplexEncoderAdapter, but because it simply forwards frames it only cares about the pixel format when |supports_augmented_data_| is true so this is the only time we map it. Because this encoder is not used with kNative in practise, we don't care to make this path optimal. Bug: webrtc:12469, chromium:1157072 Change-Id: I74edf85b18eccd0d250776bbade7a6444478efce Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/212580 Commit-Queue: Henrik Boström <hbos@webrtc.org> Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Reviewed-by: Evan Shrubsole <eshr@google.com> Cr-Commit-Position: refs/heads/master@{#33526}
2021-03-22 12:24:30 +01:00
parent 2ff25db72a
commit bd9e4a95eb
10 changed files with 488 additions and 42 deletions
--- a/modules/video_coding/codecs/multiplex/augmented_video_frame_buffer.cc
+++ b/modules/video_coding/codecs/multiplex/augmented_video_frame_buffer.cc
@ -54,4 +54,12 @@ int AugmentedVideoFrameBuffer::height() const {
 rtc::scoped_refptr<I420BufferInterface> AugmentedVideoFrameBuffer::ToI420() {
  return video_frame_buffer_->ToI420();
 }
+
+const I420BufferInterface* AugmentedVideoFrameBuffer::GetI420() const {
+  // TODO(https://crbug.com/webrtc/12021): When AugmentedVideoFrameBuffer is
+  // updated to implement the buffer interfaces of relevant
+  // VideoFrameBuffer::Types, stop overriding GetI420() as a workaround to
+  // AugmentedVideoFrameBuffer not being the type that is returned by type().
+  return video_frame_buffer_->GetI420();
+}
 }  // namespace webrtc
--- a/modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h
+++ b/modules/video_coding/codecs/multiplex/include/augmented_video_frame_buffer.h
@ -45,6 +45,12 @@ class AugmentedVideoFrameBuffer : public VideoFrameBuffer {

  // Get the I140 Buffer from the underlying frame buffer
  rtc::scoped_refptr<I420BufferInterface> ToI420() final;
+  // Returns GetI420() of the underlying VideoFrameBuffer.
+  // TODO(hbos): AugmentedVideoFrameBuffer should not return a type (such as
+  // kI420) without also implementing that type's interface (i.e.
+  // I420BufferInterface). Either implement all possible Type's interfaces or
+  // return kNative.
+  const I420BufferInterface* GetI420() const final;

 private:
  uint16_t augmenting_data_size_;
--- a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
@ -157,20 +157,38 @@ int MultiplexEncoderAdapter::Encode(
    return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
  }

+  // The input image is forwarded as-is, unless it is a native buffer and
+  // |supports_augmented_data_| is true in which case we need to map it in order
+  // to access the underlying AugmentedVideoFrameBuffer.
+  VideoFrame forwarded_image = input_image;
+  if (supports_augmented_data_ &&
+      forwarded_image.video_frame_buffer()->type() ==
+          VideoFrameBuffer::Type::kNative) {
+    auto info = GetEncoderInfo();
+    rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer =
+        forwarded_image.video_frame_buffer()->GetMappedFrameBuffer(
+            info.preferred_pixel_formats);
+    if (!mapped_buffer) {
+      // Unable to map the buffer.
+      return WEBRTC_VIDEO_CODEC_ERROR;
+    }
+    forwarded_image.set_video_frame_buffer(std::move(mapped_buffer));
+  }
+
  std::vector<VideoFrameType> adjusted_frame_types;
  if (key_frame_interval_ > 0 && picture_index_ % key_frame_interval_ == 0) {
    adjusted_frame_types.push_back(VideoFrameType::kVideoFrameKey);
  } else {
    adjusted_frame_types.push_back(VideoFrameType::kVideoFrameDelta);
  }
-  const bool has_alpha = input_image.video_frame_buffer()->type() ==
+  const bool has_alpha = forwarded_image.video_frame_buffer()->type() ==
                         VideoFrameBuffer::Type::kI420A;
  std::unique_ptr<uint8_t[]> augmenting_data = nullptr;
  uint16_t augmenting_data_length = 0;
  AugmentedVideoFrameBuffer* augmented_video_frame_buffer = nullptr;
  if (supports_augmented_data_) {
    augmented_video_frame_buffer = static_cast<AugmentedVideoFrameBuffer*>(
-        input_image.video_frame_buffer().get());
+        forwarded_image.video_frame_buffer().get());
    augmenting_data_length =
        augmented_video_frame_buffer->GetAugmentingDataSize();
    augmenting_data =
@ -185,7 +203,7 @@ int MultiplexEncoderAdapter::Encode(
    MutexLock lock(&mutex_);
    stashed_images_.emplace(
        std::piecewise_construct,
-        std::forward_as_tuple(input_image.timestamp()),
+        std::forward_as_tuple(forwarded_image.timestamp()),
        std::forward_as_tuple(
            picture_index_, has_alpha ? kAlphaCodecStreams : 1,
            std::move(augmenting_data), augmenting_data_length));
@ -194,7 +212,8 @@ int MultiplexEncoderAdapter::Encode(
  ++picture_index_;

  // Encode YUV
-  int rv = encoders_[kYUVStream]->Encode(input_image, &adjusted_frame_types);
+  int rv =
+      encoders_[kYUVStream]->Encode(forwarded_image, &adjusted_frame_types);

  // If we do not receive an alpha frame, we send a single frame for this
  // |picture_index_|. The receiver will receive |frame_count| as 1 which
@ -206,23 +225,24 @@ int MultiplexEncoderAdapter::Encode(
  rtc::scoped_refptr<VideoFrameBuffer> frame_buffer =
      supports_augmented_data_
          ? augmented_video_frame_buffer->GetVideoFrameBuffer()
-          : input_image.video_frame_buffer();
+          : forwarded_image.video_frame_buffer();
  const I420ABufferInterface* yuva_buffer = frame_buffer->GetI420A();
  rtc::scoped_refptr<I420BufferInterface> alpha_buffer =
-      WrapI420Buffer(input_image.width(), input_image.height(),
+      WrapI420Buffer(forwarded_image.width(), forwarded_image.height(),
                     yuva_buffer->DataA(), yuva_buffer->StrideA(),
                     multiplex_dummy_planes_.data(), yuva_buffer->StrideU(),
                     multiplex_dummy_planes_.data(), yuva_buffer->StrideV(),
                     // To keep reference alive.
                     [frame_buffer] {});
-  VideoFrame alpha_image = VideoFrame::Builder()
-                               .set_video_frame_buffer(alpha_buffer)
-                               .set_timestamp_rtp(input_image.timestamp())
-                               .set_timestamp_ms(input_image.render_time_ms())
-                               .set_rotation(input_image.rotation())
-                               .set_id(input_image.id())
-                               .set_packet_infos(input_image.packet_infos())
-                               .build();
+  VideoFrame alpha_image =
+      VideoFrame::Builder()
+          .set_video_frame_buffer(alpha_buffer)
+          .set_timestamp_rtp(forwarded_image.timestamp())
+          .set_timestamp_ms(forwarded_image.render_time_ms())
+          .set_rotation(forwarded_image.rotation())
+          .set_id(forwarded_image.id())
+          .set_packet_infos(forwarded_image.packet_infos())
+          .build();
  rv = encoders_[kAXXStream]->Encode(alpha_image, &adjusted_frame_types);
  return rv;
 }
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
@ -18,6 +18,7 @@
 #include <utility>
 #include <vector>

+#include "absl/algorithm/container.h"
 #include "absl/memory/memory.h"
 #include "absl/strings/match.h"
 #include "api/video/color_space.h"
@ -1040,37 +1041,17 @@ int LibvpxVp9Encoder::Encode(const VideoFrame& input_image,
  // doing this.
  input_image_ = &input_image;

-  // Keep reference to buffer until encode completes.
-  rtc::scoped_refptr<const VideoFrameBuffer> video_frame_buffer;
+  // In case we need to map the buffer, |mapped_buffer| is used to keep it alive
+  // through reference counting until after encoding has finished.
+  rtc::scoped_refptr<const VideoFrameBuffer> mapped_buffer;
  const I010BufferInterface* i010_buffer;
  rtc::scoped_refptr<const I010BufferInterface> i010_copy;
  switch (profile_) {
    case VP9Profile::kProfile0: {
-      if (input_image.video_frame_buffer()->type() ==
-          VideoFrameBuffer::Type::kNV12) {
-        const NV12BufferInterface* nv12_buffer =
-            input_image.video_frame_buffer()->GetNV12();
-        video_frame_buffer = nv12_buffer;
-        MaybeRewrapRawWithFormat(VPX_IMG_FMT_NV12);
-        raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(nv12_buffer->DataY());
-        raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(nv12_buffer->DataUV());
-        raw_->planes[VPX_PLANE_V] = raw_->planes[VPX_PLANE_U] + 1;
-        raw_->stride[VPX_PLANE_Y] = nv12_buffer->StrideY();
-        raw_->stride[VPX_PLANE_U] = nv12_buffer->StrideUV();
-        raw_->stride[VPX_PLANE_V] = nv12_buffer->StrideUV();
-      } else {
-        rtc::scoped_refptr<I420BufferInterface> i420_buffer =
-            input_image.video_frame_buffer()->ToI420();
-        video_frame_buffer = i420_buffer;
-        MaybeRewrapRawWithFormat(VPX_IMG_FMT_I420);
-        // Image in vpx_image_t format.
-        // Input image is const. VPX's raw image is not defined as const.
-        raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(i420_buffer->DataY());
-        raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(i420_buffer->DataU());
-        raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(i420_buffer->DataV());
-        raw_->stride[VPX_PLANE_Y] = i420_buffer->StrideY();
-        raw_->stride[VPX_PLANE_U] = i420_buffer->StrideU();
-        raw_->stride[VPX_PLANE_V] = i420_buffer->StrideV();
+      mapped_buffer =
+          PrepareBufferForProfile0(input_image.video_frame_buffer());
+      if (!mapped_buffer) {
+        return WEBRTC_VIDEO_CODEC_ERROR;
      }
      break;
    }
@ -1892,6 +1873,90 @@ void LibvpxVp9Encoder::MaybeRewrapRawWithFormat(const vpx_img_fmt fmt) {
  // else no-op since the image is already in the right format.
 }

+rtc::scoped_refptr<VideoFrameBuffer> LibvpxVp9Encoder::PrepareBufferForProfile0(
+    rtc::scoped_refptr<VideoFrameBuffer> buffer) {
+  absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
+      supported_formats = {VideoFrameBuffer::Type::kI420,
+                           VideoFrameBuffer::Type::kNV12};
+
+  rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer;
+  if (buffer->type() != VideoFrameBuffer::Type::kNative) {
+    // |buffer| is already mapped.
+    mapped_buffer = buffer;
+  } else {
+    // Attempt to map to one of the supported formats.
+    mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats);
+  }
+  if (!mapped_buffer ||
+      (absl::c_find(supported_formats, mapped_buffer->type()) ==
+           supported_formats.end() &&
+       mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) {
+    // Unknown pixel format or unable to map, convert to I420 and prepare that
+    // buffer instead to ensure Scale() is safe to use.
+    rtc::scoped_refptr<VideoFrameBuffer> converted_buffer = buffer->ToI420();
+    if (!converted_buffer) {
+      RTC_LOG(LS_ERROR) << "Failed to convert "
+                        << VideoFrameBufferTypeToString(buffer->type())
+                        << " image to I420. Can't encode frame.";
+      return {};
+    }
+    // The buffer should now be a mapped I420 or I420A format, but some buffer
+    // implementations incorrectly return the wrong buffer format, such as
+    // kNative. As a workaround to this, we retry GetMappedFrameBuffer+ToI420.
+    // TODO(https://crbug.com/webrtc/12602): When Android buffers have a correct
+    // ToI420() implementaion, remove his workaround.
+    if (converted_buffer->type() != VideoFrameBuffer::Type::kI420 &&
+        converted_buffer->type() != VideoFrameBuffer::Type::kI420A) {
+      if (converted_buffer->type() == VideoFrameBuffer::Type::kNative) {
+        auto mapped_converted_buffer =
+            converted_buffer->GetMappedFrameBuffer(supported_formats);
+        if (mapped_converted_buffer)
+          converted_buffer = mapped_converted_buffer;
+      }
+      if (converted_buffer->type() != VideoFrameBuffer::Type::kI420 &&
+          converted_buffer->type() != VideoFrameBuffer::Type::kI420A) {
+        converted_buffer = converted_buffer->ToI420();
+      }
+      RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 ||
+                converted_buffer->type() == VideoFrameBuffer::Type::kI420A);
+    }
+    // Because |buffer| had to be converted, use |converted_buffer| instead.
+    buffer = mapped_buffer = converted_buffer;
+  }
+
+  // Prepare |raw_| from |mapped_buffer|.
+  switch (mapped_buffer->type()) {
+    case VideoFrameBuffer::Type::kI420:
+    case VideoFrameBuffer::Type::kI420A: {
+      MaybeRewrapRawWithFormat(VPX_IMG_FMT_I420);
+      const I420BufferInterface* i420_buffer = mapped_buffer->GetI420();
+      RTC_DCHECK(i420_buffer);
+      raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(i420_buffer->DataY());
+      raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(i420_buffer->DataU());
+      raw_->planes[VPX_PLANE_V] = const_cast<uint8_t*>(i420_buffer->DataV());
+      raw_->stride[VPX_PLANE_Y] = i420_buffer->StrideY();
+      raw_->stride[VPX_PLANE_U] = i420_buffer->StrideU();
+      raw_->stride[VPX_PLANE_V] = i420_buffer->StrideV();
+      break;
+    }
+    case VideoFrameBuffer::Type::kNV12: {
+      MaybeRewrapRawWithFormat(VPX_IMG_FMT_NV12);
+      const NV12BufferInterface* nv12_buffer = mapped_buffer->GetNV12();
+      RTC_DCHECK(nv12_buffer);
+      raw_->planes[VPX_PLANE_Y] = const_cast<uint8_t*>(nv12_buffer->DataY());
+      raw_->planes[VPX_PLANE_U] = const_cast<uint8_t*>(nv12_buffer->DataUV());
+      raw_->planes[VPX_PLANE_V] = raw_->planes[VPX_PLANE_U] + 1;
+      raw_->stride[VPX_PLANE_Y] = nv12_buffer->StrideY();
+      raw_->stride[VPX_PLANE_U] = nv12_buffer->StrideUV();
+      raw_->stride[VPX_PLANE_V] = nv12_buffer->StrideUV();
+      break;
+    }
+    default:
+      RTC_NOTREACHED();
+  }
+  return mapped_buffer;
+}
+
 }  // namespace webrtc

 #endif  // RTC_ENABLE_VP9
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h
@ -103,6 +103,12 @@ class LibvpxVp9Encoder : public VP9Encoder {
  size_t SteadyStateSize(int sid, int tid);

  void MaybeRewrapRawWithFormat(const vpx_img_fmt fmt);
+  // Prepares |raw_| to reference image data of |buffer|, or of mapped or scaled
+  // versions of |buffer|. Returns the buffer that got referenced as a result,
+  // allowing the caller to keep a reference to it until after encoding has
+  // finished. On failure to convert the buffer, null is returned.
+  rtc::scoped_refptr<VideoFrameBuffer> PrepareBufferForProfile0(
+      rtc::scoped_refptr<VideoFrameBuffer> buffer);

  const std::unique_ptr<LibvpxInterface> libvpx_;
  EncodedImage encoded_image_;
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@ -30,6 +30,7 @@
 #include "test/field_trial.h"
 #include "test/gmock.h"
 #include "test/gtest.h"
+#include "test/mappable_native_buffer.h"
 #include "test/video_codec_settings.h"

 namespace webrtc {
@ -158,6 +159,31 @@ TEST_P(TestVp9ImplForPixelFormat, EncodeDecode) {
            color_space.chroma_siting_vertical());
 }

+TEST_P(TestVp9ImplForPixelFormat, EncodeNativeBuffer) {
+  VideoFrame input_frame = NextInputFrame();
+  // Replace the input frame with a fake native buffer of the same size and
+  // underlying pixel format. Do not allow ToI420() for non-I420 buffers,
+  // ensuring zero-conversion.
+  input_frame = test::CreateMappableNativeFrame(
+      input_frame.ntp_time_ms(), input_frame.video_frame_buffer()->type(),
+      input_frame.width(), input_frame.height());
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(input_frame, nullptr));
+  EncodedImage encoded_frame;
+  CodecSpecificInfo codec_specific_info;
+  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+
+  // After encoding, we would expect a single mapping to have happened.
+  rtc::scoped_refptr<test::MappableNativeBuffer> mappable_buffer =
+      test::GetMappableNativeBufferFromVideoFrame(input_frame);
+  std::vector<rtc::scoped_refptr<VideoFrameBuffer>> mapped_buffers =
+      mappable_buffer->GetMappedFramedBuffers();
+  ASSERT_EQ(mapped_buffers.size(), 1u);
+  EXPECT_EQ(mapped_buffers[0]->type(), mappable_buffer->mappable_type());
+  EXPECT_EQ(mapped_buffers[0]->width(), input_frame.width());
+  EXPECT_EQ(mapped_buffers[0]->height(), input_frame.height());
+  EXPECT_FALSE(mappable_buffer->DidConvertToI420());
+}
+
 TEST_P(TestVp9ImplForPixelFormat, DecodedColorSpaceFromBitstream) {
  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(NextInputFrame(), nullptr));
  EncodedImage encoded_frame;