Add NV12 to libvpx wrappers output

Bug: webrtc:11956 Change-Id: Id8734b8f0fd87ac9b849d70b0c5764bf1ffd9c75 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/185300 Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org> Reviewed-by: Henrik Boström <hbos@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32225}
2020-09-29 10:37:32 +02:00
parent de95329daa
commit b6f002b55f
7 changed files with 120 additions and 35 deletions
--- a/modules/video_coding/codecs/h264/h264_decoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_decoder_impl.cc
@ -36,6 +36,7 @@ extern "C" {
 #include "rtc_base/logging.h"
 #include "system_wrappers/include/field_trial.h"
 #include "system_wrappers/include/metrics.h"
+#include "third_party/libyuv/include/libyuv/convert.h"

 namespace webrtc {

@ -103,7 +104,7 @@ int H264DecoderImpl::AVGetBuffer2(AVCodecContext* context,
  // TODO(nisse): Delete that feature from the video pool, instead add
  // an explicit call to InitializeData here.
  rtc::scoped_refptr<I420Buffer> frame_buffer =
-      decoder->pool_.CreateI420Buffer(width, height);
+      decoder->ffmpeg_buffer_pool_.CreateI420Buffer(width, height);

  int y_size = width * height;
  int uv_size = frame_buffer->ChromaWidth() * frame_buffer->ChromaHeight();
@ -150,10 +151,13 @@ void H264DecoderImpl::AVFreeBuffer2(void* opaque, uint8_t* data) {
 }

 H264DecoderImpl::H264DecoderImpl()
-    : pool_(true),
+    : ffmpeg_buffer_pool_(true),
      decoded_image_callback_(nullptr),
      has_reported_init_(false),
-      has_reported_error_(false) {}
+      has_reported_error_(false),
+      preferred_output_format_(field_trial::IsEnabled("WebRTC-NV12Decode")
+                                   ? VideoFrameBuffer::Type::kNV12
+                                   : VideoFrameBuffer::Type::kI420) {}

 H264DecoderImpl::~H264DecoderImpl() {
  Release();
@ -219,7 +223,8 @@ int32_t H264DecoderImpl::InitDecode(const VideoCodec* codec_settings,
  av_frame_.reset(av_frame_alloc());

  if (codec_settings && codec_settings->buffer_pool_size) {
-    if (!pool_.Resize(*codec_settings->buffer_pool_size)) {
+    if (!ffmpeg_buffer_pool_.Resize(*codec_settings->buffer_pool_size) ||
+        !output_buffer_pool_.Resize(*codec_settings->buffer_pool_size)) {
      return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
    }
  }
@ -325,12 +330,25 @@ int32_t H264DecoderImpl::Decode(const EncodedImage& input_image,
                i420_buffer->DataV() +
                    i420_buffer->StrideV() * i420_buffer->height() / 2);

-  auto cropped_buffer = WrapI420Buffer(
+  rtc::scoped_refptr<webrtc::VideoFrameBuffer> cropped_buffer = WrapI420Buffer(
      av_frame_->width, av_frame_->height, av_frame_->data[kYPlaneIndex],
      av_frame_->linesize[kYPlaneIndex], av_frame_->data[kUPlaneIndex],
      av_frame_->linesize[kUPlaneIndex], av_frame_->data[kVPlaneIndex],
      av_frame_->linesize[kVPlaneIndex], rtc::KeepRefUntilDone(i420_buffer));

+  if (preferred_output_format_ == VideoFrameBuffer::Type::kNV12) {
+    const I420BufferInterface* cropped_i420 = cropped_buffer->GetI420();
+    auto nv12_buffer = output_buffer_pool_.CreateNV12Buffer(
+        cropped_i420->width(), cropped_i420->height());
+    libyuv::I420ToNV12(cropped_i420->DataY(), cropped_i420->StrideY(),
+                       cropped_i420->DataU(), cropped_i420->StrideU(),
+                       cropped_i420->DataV(), cropped_i420->StrideV(),
+                       nv12_buffer->MutableDataY(), nv12_buffer->StrideY(),
+                       nv12_buffer->MutableDataUV(), nv12_buffer->StrideUV(),
+                       i420_buffer->width(), i420_buffer->height());
+    cropped_buffer = nv12_buffer;
+  }
+
  // Pass on color space from input frame if explicitly specified.
  const ColorSpace& color_space =
      input_image.ColorSpace() ? *input_image.ColorSpace()
--- a/modules/video_coding/codecs/h264/h264_decoder_impl.h
+++ b/modules/video_coding/codecs/h264/h264_decoder_impl.h
@ -92,7 +92,10 @@ class H264DecoderImpl : public H264Decoder {
  void ReportInit();
  void ReportError();

-  VideoFrameBufferPool pool_;
+  // Used by ffmpeg via |AVGetBuffer2()| to allocate I420 images.
+  VideoFrameBufferPool ffmpeg_buffer_pool_;
+  // Used to allocate NV12 images if NV12 output is preferred.
+  VideoFrameBufferPool output_buffer_pool_;
  std::unique_ptr<AVCodecContext, AVCodecContextDeleter> av_context_;
  std::unique_ptr<AVFrame, AVFrameDeleter> av_frame_;

@ -102,6 +105,9 @@ class H264DecoderImpl : public H264Decoder {
  bool has_reported_error_;

  webrtc::H264BitstreamParser h264_bitstream_parser_;
+
+  // Decoder should produce this format if possible.
+  const VideoFrameBuffer::Type preferred_output_format_;
 };

 }  // namespace webrtc
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc
@ -132,7 +132,10 @@ LibvpxVp8Decoder::LibvpxVp8Decoder()
      key_frame_required_(true),
      deblock_params_(use_postproc_ ? GetPostProcParamsFromFieldTrialGroup()
                                    : absl::nullopt),
-      qp_smoother_(use_postproc_ ? new QpSmoother() : nullptr) {}
+      qp_smoother_(use_postproc_ ? new QpSmoother() : nullptr),
+      preferred_output_format_(field_trial::IsEnabled("WebRTC-NV12Decode")
+                                   ? VideoFrameBuffer::Type::kNV12
+                                   : VideoFrameBuffer::Type::kI420) {}

 LibvpxVp8Decoder::~LibvpxVp8Decoder() {
  inited_ = true;  // in order to do the actual release
@ -328,8 +331,38 @@ int LibvpxVp8Decoder::ReturnFrame(
  last_frame_width_ = img->d_w;
  last_frame_height_ = img->d_h;
  // Allocate memory for decoded image.
-  rtc::scoped_refptr<I420Buffer> buffer =
-      buffer_pool_.CreateI420Buffer(img->d_w, img->d_h);
+  rtc::scoped_refptr<VideoFrameBuffer> buffer;
+
+  if (preferred_output_format_ == VideoFrameBuffer::Type::kNV12) {
+    // Convert instead of making a copy.
+    // Note: libvpx doesn't support creating NV12 image directly.
+    // Due to the bitstream structure such a change would just hide the
+    // conversion operation inside the decode call.
+    rtc::scoped_refptr<NV12Buffer> nv12_buffer =
+        buffer_pool_.CreateNV12Buffer(img->d_w, img->d_h);
+    buffer = nv12_buffer;
+    if (nv12_buffer.get()) {
+      libyuv::I420ToNV12(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y],
+                         img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U],
+                         img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V],
+                         nv12_buffer->MutableDataY(), nv12_buffer->StrideY(),
+                         nv12_buffer->MutableDataUV(), nv12_buffer->StrideUV(),
+                         img->d_w, img->d_h);
+    }
+  } else {
+    rtc::scoped_refptr<I420Buffer> i420_buffer =
+        buffer_pool_.CreateI420Buffer(img->d_w, img->d_h);
+    buffer = i420_buffer;
+    if (i420_buffer.get()) {
+      libyuv::I420Copy(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y],
+                       img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U],
+                       img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V],
+                       i420_buffer->MutableDataY(), i420_buffer->StrideY(),
+                       i420_buffer->MutableDataU(), i420_buffer->StrideU(),
+                       i420_buffer->MutableDataV(), i420_buffer->StrideV(),
+                       img->d_w, img->d_h);
+    }
+  }

  if (!buffer.get()) {
    // Pool has too many pending frames.
@ -338,14 +371,6 @@ int LibvpxVp8Decoder::ReturnFrame(
    return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
  }

-  libyuv::I420Copy(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y],
-                   img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U],
-                   img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V],
-                   buffer->MutableDataY(), buffer->StrideY(),
-                   buffer->MutableDataU(), buffer->StrideU(),
-                   buffer->MutableDataV(), buffer->StrideV(), img->d_w,
-                   img->d_h);
-
  VideoFrame decoded_image = VideoFrame::Builder()
                                 .set_video_frame_buffer(buffer)
                                 .set_timestamp_rtp(timestamp)
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h
@ -64,6 +64,9 @@ class LibvpxVp8Decoder : public VideoDecoder {
  bool key_frame_required_;
  const absl::optional<DeblockParams> deblock_params_;
  const std::unique_ptr<QpSmoother> qp_smoother_;
+
+  // Decoder should produce this format if possible.
+  const VideoFrameBuffer::Type preferred_output_format_;
 };

 }  // namespace webrtc
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@ -33,6 +33,7 @@
 #include "rtc_base/time_utils.h"
 #include "rtc_base/trace_event.h"
 #include "system_wrappers/include/field_trial.h"
+#include "third_party/libyuv/include/libyuv/convert.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"
@ -1692,12 +1693,15 @@ VP9DecoderImpl::VP9DecoderImpl()
    : decode_complete_callback_(nullptr),
      inited_(false),
      decoder_(nullptr),
-      key_frame_required_(true) {}
+      key_frame_required_(true),
+      preferred_output_format_(field_trial::IsEnabled("WebRTC-NV12Decode")
+                                   ? VideoFrameBuffer::Type::kNV12
+                                   : VideoFrameBuffer::Type::kI420) {}

 VP9DecoderImpl::~VP9DecoderImpl() {
  inited_ = true;  // in order to do the actual release
  Release();
-  int num_buffers_in_use = frame_buffer_pool_.GetNumBuffersInUse();
+  int num_buffers_in_use = libvpx_buffer_pool_.GetNumBuffersInUse();
  if (num_buffers_in_use > 0) {
    // The frame buffers are reference counted and frames are exposed after
    // decoding. There may be valid usage cases where previous frames are still
@ -1758,7 +1762,7 @@ int VP9DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) {
    return WEBRTC_VIDEO_CODEC_MEMORY;
  }

-  if (!frame_buffer_pool_.InitializeVpxUsePool(decoder_)) {
+  if (!libvpx_buffer_pool_.InitializeVpxUsePool(decoder_)) {
    return WEBRTC_VIDEO_CODEC_MEMORY;
  }

@ -1766,7 +1770,8 @@ int VP9DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) {
  // Always start with a complete key frame.
  key_frame_required_ = true;
  if (inst && inst->buffer_pool_size) {
-    if (!frame_buffer_pool_.Resize(*inst->buffer_pool_size)) {
+    if (!libvpx_buffer_pool_.Resize(*inst->buffer_pool_size) ||
+        !output_buffer_pool_.Resize(*inst->buffer_pool_size)) {
      return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
    }
  }
@ -1831,8 +1836,9 @@ int VP9DecoderImpl::Decode(const EncodedImage& input_image,
  if (input_image.size() == 0) {
    buffer = nullptr;  // Triggers full frame concealment.
  }
-  // During decode libvpx may get and release buffers from |frame_buffer_pool_|.
-  // In practice libvpx keeps a few (~3-4) buffers alive at a time.
+  // During decode libvpx may get and release buffers from
+  // |libvpx_buffer_pool_|. In practice libvpx keeps a few (~3-4) buffers alive
+  // at a time.
  if (vpx_codec_decode(decoder_, buffer,
                       static_cast<unsigned int>(input_image.size()), 0,
                       VPX_DL_REALTIME)) {
@ -1876,15 +1882,34 @@ int VP9DecoderImpl::ReturnFrame(
  switch (img->bit_depth) {
    case 8:
      if (img->fmt == VPX_IMG_FMT_I420) {
-        img_wrapped_buffer = WrapI420Buffer(
-            img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
-            img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
-            img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
-            img->stride[VPX_PLANE_V],
-            // WrappedI420Buffer's mechanism for allowing the release of its
-            // frame buffer is through a callback function. This is where we
-            // should release |img_buffer|.
-            rtc::KeepRefUntilDone(img_buffer));
+        if (preferred_output_format_ == VideoFrameBuffer::Type::kNV12) {
+          rtc::scoped_refptr<NV12Buffer> nv12_buffer =
+              output_buffer_pool_.CreateNV12Buffer(img->d_w, img->d_h);
+          if (!nv12_buffer.get()) {
+            // Buffer pool is full.
+            return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
+          }
+          img_wrapped_buffer = nv12_buffer;
+          libyuv::I420ToNV12(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y],
+                             img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U],
+                             img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V],
+                             nv12_buffer->MutableDataY(),
+                             nv12_buffer->StrideY(),
+                             nv12_buffer->MutableDataUV(),
+                             nv12_buffer->StrideUV(), img->d_w, img->d_h);
+          // No holding onto img_buffer as it's no longer needed and can be
+          // reused.
+        } else {
+          img_wrapped_buffer = WrapI420Buffer(
+              img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
+              img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
+              img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
+              img->stride[VPX_PLANE_V],
+              // WrappedI420Buffer's mechanism for allowing the release of its
+              // frame buffer is through a callback function. This is where we
+              // should release |img_buffer|.
+              rtc::KeepRefUntilDone(img_buffer));
+        }
      } else if (img->fmt == VPX_IMG_FMT_I444) {
        img_wrapped_buffer = WrapI444Buffer(
            img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
@ -1945,7 +1970,7 @@ int VP9DecoderImpl::Release() {
  if (decoder_ != nullptr) {
    if (inited_) {
      // When a codec is destroyed libvpx will release any buffers of
-      // |frame_buffer_pool_| it is currently using.
+      // |libvpx_buffer_pool_| it is currently using.
      if (vpx_codec_destroy(decoder_)) {
        ret_val = WEBRTC_VIDEO_CODEC_MEMORY;
      }
@ -1956,7 +1981,8 @@ int VP9DecoderImpl::Release() {
  // Releases buffers from the pool. Any buffers not in use are deleted. Buffers
  // still referenced externally are deleted once fully released, not returning
  // to the pool.
-  frame_buffer_pool_.ClearPool();
+  libvpx_buffer_pool_.ClearPool();
+  output_buffer_pool_.Release();
  inited_ = false;
  return ret_val;
 }
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@ -21,6 +21,7 @@

 #include "api/fec_controller_override.h"
 #include "api/video_codecs/video_encoder.h"
+#include "common_video/include/video_frame_buffer_pool.h"
 #include "media/base/vp9_profile.h"
 #include "modules/video_coding/codecs/vp9/include/vp9.h"
 #include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h"
@ -216,13 +217,18 @@ class VP9DecoderImpl : public VP9Decoder {
                  const webrtc::ColorSpace* explicit_color_space);

  // Memory pool used to share buffers between libvpx and webrtc.
-  Vp9FrameBufferPool frame_buffer_pool_;
+  Vp9FrameBufferPool libvpx_buffer_pool_;
+  // Buffer pool used to allocate additionally needed NV12 buffers.
+  VideoFrameBufferPool output_buffer_pool_;
  DecodedImageCallback* decode_complete_callback_;
  bool inited_;
  vpx_codec_ctx_t* decoder_;
  bool key_frame_required_;
  VideoCodec current_codec_;
  int num_cores_;
+
+  // Decoder should produce this format if possible.
+  const VideoFrameBuffer::Type preferred_output_format_;
 };
 }  // namespace webrtc