H264: Improve FFmpeg decoder performance by using I420BufferPool.

Had to update I420BufferPool to allow zero-initializing buffers. BUG=chromium:500605, chromium:468365, webrtc:5428 Review URL: https://codereview.webrtc.org/1645543003 Cr-Commit-Position: refs/heads/master@{#11505}
2016-02-05 08:08:34 -08:00
parent c6e16e3d91
commit 900f97534b
10 changed files with 74 additions and 41 deletions
--- a/webrtc/common_video/BUILD.gn
+++ b/webrtc/common_video/BUILD.gn
@ -48,7 +48,6 @@ source_set("common_video") {

  deps = [
    "..:webrtc_common",
-    "../modules/video_coding:webrtc_h264",
    "../system_wrappers",
  ]

--- a/webrtc/common_video/common_video.gyp
+++ b/webrtc/common_video/common_video.gyp
@ -19,7 +19,6 @@
      ],
      'dependencies': [
        '<(webrtc_root)/common.gyp:webrtc_common',
-        '<(webrtc_root)/modules/modules.gyp:webrtc_h264',
        '<(webrtc_root)/system_wrappers/system_wrappers.gyp:system_wrappers',
      ],
      'direct_dependent_settings': {
--- a/webrtc/common_video/i420_buffer_pool.cc
+++ b/webrtc/common_video/i420_buffer_pool.cc
@ -53,7 +53,8 @@ class PooledI420Buffer : public webrtc::VideoFrameBuffer {

 namespace webrtc {

-I420BufferPool::I420BufferPool() {
+I420BufferPool::I420BufferPool(bool zero_initialize)
+    : zero_initialize_(zero_initialize) {
  Release();
 }

@ -83,7 +84,11 @@ rtc::scoped_refptr<VideoFrameBuffer> I420BufferPool::CreateBuffer(int width,
      return new rtc::RefCountedObject<PooledI420Buffer>(buffer);
  }
  // Allocate new buffer.
-  buffers_.push_back(new rtc::RefCountedObject<I420Buffer>(width, height));
+  rtc::scoped_refptr<I420Buffer> buffer = new rtc::RefCountedObject<I420Buffer>(
+      width, height);
+  if (zero_initialize_)
+    buffer->InitializeData();
+  buffers_.push_back(buffer);
  return new rtc::RefCountedObject<PooledI420Buffer>(buffers_.back());
 }

--- a/webrtc/common_video/include/i420_buffer_pool.h
+++ b/webrtc/common_video/include/i420_buffer_pool.h
@ -25,7 +25,9 @@ namespace webrtc {
 // changes, old buffers will be purged from the pool.
 class I420BufferPool {
 public:
-  I420BufferPool();
+  I420BufferPool() : I420BufferPool(false) {}
+  explicit I420BufferPool(bool zero_initialize);
+
  // Returns a buffer from the pool, or creates a new buffer if no suitable
  // buffer exists in the pool.
  rtc::scoped_refptr<VideoFrameBuffer> CreateBuffer(int width, int height);
@ -36,6 +38,12 @@ class I420BufferPool {
 private:
  rtc::ThreadChecker thread_checker_;
  std::list<rtc::scoped_refptr<I420Buffer>> buffers_;
+  // If true, newly allocated buffers are zero-initialized. Note that recycled
+  // buffers are not zero'd before reuse. This is required of buffers used by
+  // FFmpeg according to http://crbug.com/390941, which only requires it for the
+  // initial allocation (as shown by FFmpeg's own buffer allocation code). It
+  // has to do with "Use-of-uninitialized-value" on "Linux_msan_chrome".
+  bool zero_initialize_;
 };

 }  // namespace webrtc
--- a/webrtc/common_video/include/video_frame_buffer.h
+++ b/webrtc/common_video/include/video_frame_buffer.h
@ -66,6 +66,7 @@ class I420Buffer : public VideoFrameBuffer {
 public:
  I420Buffer(int width, int height);
  I420Buffer(int width, int height, int stride_y, int stride_u, int stride_v);
+  void InitializeData();

  int width() const override;
  int height() const override;
--- a/webrtc/common_video/video_frame_buffer.cc
+++ b/webrtc/common_video/video_frame_buffer.cc
@ -18,6 +18,14 @@ static const int kBufferAlignment = 64;

 namespace webrtc {

+namespace {
+
+int I420DataSize(int height, int stride_y, int stride_u, int stride_v) {
+  return stride_y * height + (stride_u + stride_v) * ((height + 1) / 2);
+}
+
+}  // namespace
+
 uint8_t* VideoFrameBuffer::MutableData(PlaneType type) {
  RTC_NOTREACHED();
  return nullptr;
@ -40,7 +48,7 @@ I420Buffer::I420Buffer(int width,
      stride_u_(stride_u),
      stride_v_(stride_v),
      data_(static_cast<uint8_t*>(AlignedMalloc(
-          stride_y * height + (stride_u + stride_v) * ((height + 1) / 2),
+          I420DataSize(height, stride_y, stride_u, stride_v),
          kBufferAlignment))) {
  RTC_DCHECK_GT(width, 0);
  RTC_DCHECK_GT(height, 0);
@ -52,6 +60,11 @@ I420Buffer::I420Buffer(int width,
 I420Buffer::~I420Buffer() {
 }

+void I420Buffer::InitializeData() {
+  memset(data_.get(), 0,
+         I420DataSize(height_, stride_y_, stride_u_, stride_v_));
+}
+
 int I420Buffer::width() const {
  return width_;
 }
--- a/webrtc/modules/video_coding/BUILD.gn
+++ b/webrtc/modules/video_coding/BUILD.gn
@ -152,6 +152,7 @@ source_set("webrtc_h264") {
    deps += [
      "//third_party/ffmpeg:ffmpeg",
      "//third_party/openh264:encoder",
+      "//webrtc/common_video",
    ]
  }
 }
--- a/webrtc/modules/video_coding/codecs/h264/h264.gypi
+++ b/webrtc/modules/video_coding/codecs/h264/h264.gypi
@ -40,6 +40,7 @@
          'dependencies': [
            '<(DEPTH)/third_party/ffmpeg/ffmpeg.gyp:ffmpeg',
            '<(DEPTH)/third_party/openh264/openh264.gyp:openh264_encoder',
+            '<(DEPTH)/webrtc/common_video/common_video.gyp:common_video',
          ],
          'sources': [
            'h264_decoder_impl.cc',
--- a/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc
+++ b/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc
@ -76,23 +76,18 @@ void InitializeFFmpeg() {

 #endif  // defined(WEBRTC_INITIALIZE_FFMPEG)

-// Called by FFmpeg when it is done with a frame buffer, see AVGetBuffer2.
-void AVFreeBuffer2(void* opaque, uint8_t* data) {
-  VideoFrame* video_frame = static_cast<VideoFrame*>(opaque);
-  delete video_frame;
-}
+}  // namespace

-// Called by FFmpeg when it needs a frame buffer to store decoded frames in.
-// The VideoFrames returned by FFmpeg at |Decode| originate from here. They are
-// reference counted and freed by FFmpeg using |AVFreeBuffer2|.
-// TODO(hbos): Use a frame pool for better performance instead of create/free.
-// Could be owned by decoder, |static_cast<H264DecoderImpl*>(context->opaque)|.
-// Consider verifying that the buffer was allocated by us to avoid unsafe type
-// cast. See https://bugs.chromium.org/p/webrtc/issues/detail?id=5428.
-int AVGetBuffer2(AVCodecContext* context, AVFrame* av_frame, int flags) {
-  RTC_CHECK_EQ(context->pix_fmt, kPixelFormat);  // Same as in InitDecode.
+int H264DecoderImpl::AVGetBuffer2(
+    AVCodecContext* context, AVFrame* av_frame, int flags) {
+  // Set in |InitDecode|.
+  H264DecoderImpl* decoder = static_cast<H264DecoderImpl*>(context->opaque);
+  // DCHECK values set in |InitDecode|.
+  RTC_DCHECK(decoder);
+  RTC_DCHECK_EQ(context->pix_fmt, kPixelFormat);
  // Necessary capability to be allowed to provide our own buffers.
-  RTC_CHECK(context->codec->capabilities | AV_CODEC_CAP_DR1);
+  RTC_DCHECK(context->codec->capabilities | AV_CODEC_CAP_DR1);
+
  // |av_frame->width| and |av_frame->height| are set by FFmpeg. These are the
  // actual image's dimensions and may be different from |context->width| and
  // |context->coded_width| due to reordering.
@ -120,25 +115,18 @@ int AVGetBuffer2(AVCodecContext* context, AVFrame* av_frame, int flags) {
  // The video frame is stored in |video_frame|. |av_frame| is FFmpeg's version
  // of a video frame and will be set up to reference |video_frame|'s buffers.
  VideoFrame* video_frame = new VideoFrame();
-  int stride_y = width;
-  int stride_uv = (width + 1) / 2;
-  RTC_CHECK_EQ(0, video_frame->CreateEmptyFrame(
-      width, height, stride_y, stride_uv, stride_uv));
-  int total_size = video_frame->allocated_size(kYPlane) +
-                   video_frame->allocated_size(kUPlane) +
-                   video_frame->allocated_size(kVPlane);
-  RTC_DCHECK_EQ(total_size, stride_y * height +
-                (stride_uv + stride_uv) * ((height + 1) / 2));
-
  // FFmpeg expects the initial allocation to be zero-initialized according to
-  // http://crbug.com/390941.
-  // Using a single |av_frame->buf| - YUV is required to be a continuous blob of
-  // memory. We can zero-initialize with one memset operation for all planes.
+  // http://crbug.com/390941. Our pool is set up to zero-initialize new buffers.
+  video_frame->set_video_frame_buffer(
+      decoder->pool_.CreateBuffer(width, height));
+  // DCHECK that we have a continuous buffer as is required.
  RTC_DCHECK_EQ(video_frame->buffer(kUPlane),
      video_frame->buffer(kYPlane) + video_frame->allocated_size(kYPlane));
  RTC_DCHECK_EQ(video_frame->buffer(kVPlane),
      video_frame->buffer(kUPlane) + video_frame->allocated_size(kUPlane));
-  memset(video_frame->buffer(kYPlane), 0, total_size);
+  int total_size = video_frame->allocated_size(kYPlane) +
+                   video_frame->allocated_size(kUPlane) +
+                   video_frame->allocated_size(kVPlane);

  av_frame->format = context->pix_fmt;
  av_frame->reordered_opaque = context->reordered_opaque;
@ -161,10 +149,16 @@ int AVGetBuffer2(AVCodecContext* context, AVFrame* av_frame, int flags) {
  return 0;
 }

-}  // namespace
+void H264DecoderImpl::AVFreeBuffer2(void* opaque, uint8_t* data) {
+  // The buffer pool recycles the buffer used by |video_frame| when there are no
+  // more references to it. |video_frame| is a thin buffer holder and is not
+  // recycled.
+  VideoFrame* video_frame = static_cast<VideoFrame*>(opaque);
+  delete video_frame;
+}

-H264DecoderImpl::H264DecoderImpl()
-    : decoded_image_callback_(nullptr) {
+H264DecoderImpl::H264DecoderImpl() : pool_(true),
+                                     decoded_image_callback_(nullptr) {
 }

 H264DecoderImpl::~H264DecoderImpl() {
@ -209,13 +203,15 @@ int32_t H264DecoderImpl::InitDecode(const VideoCodec* codec_settings,
  av_context_->extradata = nullptr;
  av_context_->extradata_size = 0;

+  // If this is ever increased, look at |av_context_->thread_safe_callbacks| and
+  // make it possible to disable the thread checker in the frame buffer pool.
  av_context_->thread_count = 1;
  av_context_->thread_type = FF_THREAD_SLICE;

-  // FFmpeg will get video buffers from our AVGetBuffer2, memory managed by us.
+  // Function used by FFmpeg to get buffers to store decoded frames in.
  av_context_->get_buffer2 = AVGetBuffer2;
-  // get_buffer2 is called with the context, there |opaque| can be used to get a
-  // pointer |this|.
+  // |get_buffer2| is called with the context, there |opaque| can be used to get
+  // a pointer |this|.
  av_context_->opaque = this;
  // Use ref counted frames (av_frame_unref).
  av_context_->refcounted_frames = 1;  // true
--- a/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h
+++ b/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h
@ -19,6 +19,7 @@ extern "C" {
 }  // extern "C"

 #include "webrtc/base/scoped_ptr.h"
+#include "webrtc/common_video/include/i420_buffer_pool.h"

 namespace webrtc {

@ -52,8 +53,17 @@ class H264DecoderImpl : public H264Decoder {
                 int64_t render_time_ms = -1) override;

 private:
+  // Called by FFmpeg when it needs a frame buffer to store decoded frames in.
+  // The |VideoFrame| returned by FFmpeg at |Decode| originate from here. Their
+  // buffers are reference counted and freed by FFmpeg using |AVFreeBuffer2|.
+  static int AVGetBuffer2(
+      AVCodecContext* context, AVFrame* av_frame, int flags);
+  // Called by FFmpeg when it is done with a video frame, see |AVGetBuffer2|.
+  static void AVFreeBuffer2(void* opaque, uint8_t* data);
+
  bool IsInitialized() const;

+  I420BufferPool pool_;
  rtc::scoped_ptr<AVCodecContext, AVCodecContextDeleter> av_context_;
  rtc::scoped_ptr<AVFrame, AVFrameDeleter> av_frame_;