Support native scaling of VideoFrameBuffers in LibvpxVp8Encoder.

This is a follow-up to the VP9, fixing VP8 this time. Context again:

This CL is part of Optimized Scaling efforts. In Chromium, the native
frame buffer is getting an optimized CropAndScale() implementation. To
support HW accelerated scaling, returning pre-scaled images and skipping
unnecessary intermediate downscales, WebRTC needs to 1) use CropAndScale
instead of libyuv::XXXXScale and 2) only map buffers it actually intends
to encode.
- To achieve this, WebRTC encoders are updated to map kNative video
  buffers so that in a follow-up CL VideoStreamEncoder can stop mapping
  intermediate buffer sizes.

Bug: webrtc:12469, chromium:1157072
Change-Id: I026527ae77e36f66d02e149ad6fe304f6a8ccb05
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/212600
Commit-Queue: Henrik Boström <hbos@webrtc.org>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Evan Shrubsole <eshr@google.com>
Cr-Commit-Position: refs/heads/master@{#33537}
This commit is contained in:
Henrik Boström
2021-03-23 09:18:28 +01:00
committed by Commit Bot
parent 6a6715042a
commit 3889de1c4c
5 changed files with 227 additions and 97 deletions

View File

@ -500,7 +500,10 @@ rtc_library("webrtc_vp8") {
"../../system_wrappers:metrics",
"//third_party/libyuv",
]
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
absl_deps = [
"//third_party/abseil-cpp/absl/algorithm:container",
"//third_party/abseil-cpp/absl/types:optional",
]
if (rtc_build_libvpx) {
deps += [ rtc_libvpx_dir ]
}

View File

@ -21,6 +21,7 @@
#include <utility>
#include <vector>
#include "absl/algorithm/container.h"
#include "api/scoped_refptr.h"
#include "api/video/video_content_type.h"
#include "api/video/video_frame_buffer.h"
@ -160,6 +161,41 @@ void ApplyVp8EncoderConfigToVpxConfig(const Vp8EncoderConfig& encoder_config,
}
}
void SetRawImagePlanes(vpx_image_t* raw_image, VideoFrameBuffer* buffer) {
switch (buffer->type()) {
case VideoFrameBuffer::Type::kI420:
case VideoFrameBuffer::Type::kI420A: {
const I420BufferInterface* i420_buffer = buffer->GetI420();
RTC_DCHECK(i420_buffer);
raw_image->planes[VPX_PLANE_Y] =
const_cast<uint8_t*>(i420_buffer->DataY());
raw_image->planes[VPX_PLANE_U] =
const_cast<uint8_t*>(i420_buffer->DataU());
raw_image->planes[VPX_PLANE_V] =
const_cast<uint8_t*>(i420_buffer->DataV());
raw_image->stride[VPX_PLANE_Y] = i420_buffer->StrideY();
raw_image->stride[VPX_PLANE_U] = i420_buffer->StrideU();
raw_image->stride[VPX_PLANE_V] = i420_buffer->StrideV();
break;
}
case VideoFrameBuffer::Type::kNV12: {
const NV12BufferInterface* nv12_buffer = buffer->GetNV12();
RTC_DCHECK(nv12_buffer);
raw_image->planes[VPX_PLANE_Y] =
const_cast<uint8_t*>(nv12_buffer->DataY());
raw_image->planes[VPX_PLANE_U] =
const_cast<uint8_t*>(nv12_buffer->DataUV());
raw_image->planes[VPX_PLANE_V] = raw_image->planes[VPX_PLANE_U] + 1;
raw_image->stride[VPX_PLANE_Y] = nv12_buffer->StrideY();
raw_image->stride[VPX_PLANE_U] = nv12_buffer->StrideUV();
raw_image->stride[VPX_PLANE_V] = nv12_buffer->StrideUV();
break;
}
default:
RTC_NOTREACHED();
}
}
} // namespace
std::unique_ptr<VideoEncoder> VP8Encoder::Create() {
@ -929,40 +965,29 @@ int LibvpxVp8Encoder::Encode(const VideoFrame& frame,
flags[i] = send_key_frame ? VPX_EFLAG_FORCE_KF : EncodeFlags(tl_configs[i]);
}
rtc::scoped_refptr<VideoFrameBuffer> input_image = frame.video_frame_buffer();
// Since we are extracting raw pointers from |input_image| to
// |raw_images_[0]|, the resolution of these frames must match.
RTC_DCHECK_EQ(input_image->width(), raw_images_[0].d_w);
RTC_DCHECK_EQ(input_image->height(), raw_images_[0].d_h);
switch (input_image->type()) {
case VideoFrameBuffer::Type::kI420:
PrepareI420Image(input_image->GetI420());
break;
case VideoFrameBuffer::Type::kNV12:
PrepareNV12Image(input_image->GetNV12());
break;
default: {
rtc::scoped_refptr<I420BufferInterface> i420_image =
input_image->ToI420();
if (!i420_image) {
RTC_LOG(LS_ERROR) << "Failed to convert "
<< VideoFrameBufferTypeToString(input_image->type())
<< " image to I420. Can't encode frame.";
// Scale and map buffers and set |raw_images_| to hold pointers to the result.
// Because |raw_images_| are set to hold pointers to the prepared buffers, we
// need to keep these buffers alive through reference counting until after
// encoding is complete.
std::vector<rtc::scoped_refptr<VideoFrameBuffer>> prepared_buffers =
PrepareBuffers(frame.video_frame_buffer());
if (prepared_buffers.empty()) {
return WEBRTC_VIDEO_CODEC_ERROR;
}
input_image = i420_image;
PrepareI420Image(i420_image);
}
}
struct CleanUpOnExit {
explicit CleanUpOnExit(vpx_image_t& raw_image) : raw_image_(raw_image) {}
explicit CleanUpOnExit(
vpx_image_t* raw_image,
std::vector<rtc::scoped_refptr<VideoFrameBuffer>> prepared_buffers)
: raw_image_(raw_image),
prepared_buffers_(std::move(prepared_buffers)) {}
~CleanUpOnExit() {
raw_image_.planes[VPX_PLANE_Y] = nullptr;
raw_image_.planes[VPX_PLANE_U] = nullptr;
raw_image_.planes[VPX_PLANE_V] = nullptr;
raw_image_->planes[VPX_PLANE_Y] = nullptr;
raw_image_->planes[VPX_PLANE_U] = nullptr;
raw_image_->planes[VPX_PLANE_V] = nullptr;
}
vpx_image_t& raw_image_;
} clean_up_on_exit(raw_images_[0]);
vpx_image_t* raw_image_;
std::vector<rtc::scoped_refptr<VideoFrameBuffer>> prepared_buffers_;
} clean_up_on_exit(&raw_images_[0], std::move(prepared_buffers));
if (send_key_frame) {
// Adapt the size of the key frame when in screenshare with 1 temporal
@ -1262,61 +1287,109 @@ void LibvpxVp8Encoder::MaybeUpdatePixelFormat(vpx_img_fmt fmt) {
}
}
void LibvpxVp8Encoder::PrepareI420Image(const I420BufferInterface* frame) {
RTC_DCHECK(!raw_images_.empty());
std::vector<rtc::scoped_refptr<VideoFrameBuffer>>
LibvpxVp8Encoder::PrepareBuffers(rtc::scoped_refptr<VideoFrameBuffer> buffer) {
RTC_DCHECK_EQ(buffer->width(), raw_images_[0].d_w);
RTC_DCHECK_EQ(buffer->height(), raw_images_[0].d_h);
absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
supported_formats = {VideoFrameBuffer::Type::kI420,
VideoFrameBuffer::Type::kNV12};
rtc::scoped_refptr<VideoFrameBuffer> mapped_buffer;
if (buffer->type() != VideoFrameBuffer::Type::kNative) {
// |buffer| is already mapped.
mapped_buffer = buffer;
} else {
// Attempt to map to one of the supported formats.
mapped_buffer = buffer->GetMappedFrameBuffer(supported_formats);
}
if (!mapped_buffer ||
(absl::c_find(supported_formats, mapped_buffer->type()) ==
supported_formats.end() &&
mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) {
// Unknown pixel format or unable to map, convert to I420 and prepare that
// buffer instead to ensure Scale() is safe to use.
auto converted_buffer = buffer->ToI420();
if (!converted_buffer) {
RTC_LOG(LS_ERROR) << "Failed to convert "
<< VideoFrameBufferTypeToString(buffer->type())
<< " image to I420. Can't encode frame.";
return {};
}
// The buffer should now be a mapped I420 or I420A format, but some buffer
// implementations incorrectly return the wrong buffer format, such as
// kNative. As a workaround to this, we perform ToI420() a second time.
// TODO(https://crbug.com/webrtc/12602): When Android buffers have a correct
// ToI420() implementaion, remove his workaround.
if (converted_buffer->type() != VideoFrameBuffer::Type::kI420 &&
converted_buffer->type() != VideoFrameBuffer::Type::kI420A) {
converted_buffer = converted_buffer->ToI420();
RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 ||
converted_buffer->type() == VideoFrameBuffer::Type::kI420A);
}
// Because |buffer| had to be converted, use |converted_buffer| instead...
buffer = mapped_buffer = converted_buffer;
}
// Maybe update pixel format.
absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
mapped_type = {mapped_buffer->type()};
switch (mapped_buffer->type()) {
case VideoFrameBuffer::Type::kI420:
case VideoFrameBuffer::Type::kI420A:
MaybeUpdatePixelFormat(VPX_IMG_FMT_I420);
// Image in vpx_image_t format.
// Input image is const. VP8's raw image is not defined as const.
raw_images_[0].planes[VPX_PLANE_Y] = const_cast<uint8_t*>(frame->DataY());
raw_images_[0].planes[VPX_PLANE_U] = const_cast<uint8_t*>(frame->DataU());
raw_images_[0].planes[VPX_PLANE_V] = const_cast<uint8_t*>(frame->DataV());
raw_images_[0].stride[VPX_PLANE_Y] = frame->StrideY();
raw_images_[0].stride[VPX_PLANE_U] = frame->StrideU();
raw_images_[0].stride[VPX_PLANE_V] = frame->StrideV();
for (size_t i = 1; i < encoders_.size(); ++i) {
// Scale the image down a number of times by downsampling factor
libyuv::I420Scale(
raw_images_[i - 1].planes[VPX_PLANE_Y],
raw_images_[i - 1].stride[VPX_PLANE_Y],
raw_images_[i - 1].planes[VPX_PLANE_U],
raw_images_[i - 1].stride[VPX_PLANE_U],
raw_images_[i - 1].planes[VPX_PLANE_V],
raw_images_[i - 1].stride[VPX_PLANE_V], raw_images_[i - 1].d_w,
raw_images_[i - 1].d_h, raw_images_[i].planes[VPX_PLANE_Y],
raw_images_[i].stride[VPX_PLANE_Y], raw_images_[i].planes[VPX_PLANE_U],
raw_images_[i].stride[VPX_PLANE_U], raw_images_[i].planes[VPX_PLANE_V],
raw_images_[i].stride[VPX_PLANE_V], raw_images_[i].d_w,
raw_images_[i].d_h, libyuv::kFilterBilinear);
}
}
void LibvpxVp8Encoder::PrepareNV12Image(const NV12BufferInterface* frame) {
RTC_DCHECK(!raw_images_.empty());
break;
case VideoFrameBuffer::Type::kNV12:
MaybeUpdatePixelFormat(VPX_IMG_FMT_NV12);
// Image in vpx_image_t format.
// Input image is const. VP8's raw image is not defined as const.
raw_images_[0].planes[VPX_PLANE_Y] = const_cast<uint8_t*>(frame->DataY());
raw_images_[0].planes[VPX_PLANE_U] = const_cast<uint8_t*>(frame->DataUV());
raw_images_[0].planes[VPX_PLANE_V] = raw_images_[0].planes[VPX_PLANE_U] + 1;
raw_images_[0].stride[VPX_PLANE_Y] = frame->StrideY();
raw_images_[0].stride[VPX_PLANE_U] = frame->StrideUV();
raw_images_[0].stride[VPX_PLANE_V] = frame->StrideUV();
for (size_t i = 1; i < encoders_.size(); ++i) {
// Scale the image down a number of times by downsampling factor
libyuv::NV12Scale(
raw_images_[i - 1].planes[VPX_PLANE_Y],
raw_images_[i - 1].stride[VPX_PLANE_Y],
raw_images_[i - 1].planes[VPX_PLANE_U],
raw_images_[i - 1].stride[VPX_PLANE_U], raw_images_[i - 1].d_w,
raw_images_[i - 1].d_h, raw_images_[i].planes[VPX_PLANE_Y],
raw_images_[i].stride[VPX_PLANE_Y], raw_images_[i].planes[VPX_PLANE_U],
raw_images_[i].stride[VPX_PLANE_U], raw_images_[i].d_w,
raw_images_[i].d_h, libyuv::kFilterBilinear);
raw_images_[i].planes[VPX_PLANE_V] = raw_images_[i].planes[VPX_PLANE_U] + 1;
break;
default:
RTC_NOTREACHED();
}
// Prepare |raw_images_| from |mapped_buffer| and, if simulcast, scaled
// versions of |buffer|.
std::vector<rtc::scoped_refptr<VideoFrameBuffer>> prepared_buffers;
SetRawImagePlanes(&raw_images_[0], mapped_buffer);
prepared_buffers.push_back(mapped_buffer);
for (size_t i = 1; i < encoders_.size(); ++i) {
// Native buffers should implement optimized scaling and is the preferred
// buffer to scale. But if the buffer isn't native, it should be cheaper to
// scale from the previously prepared buffer which is smaller than |buffer|.
VideoFrameBuffer* buffer_to_scale =
buffer->type() == VideoFrameBuffer::Type::kNative
? buffer.get()
: prepared_buffers.back().get();
auto scaled_buffer =
buffer_to_scale->Scale(raw_images_[i].d_w, raw_images_[i].d_h);
if (scaled_buffer->type() == VideoFrameBuffer::Type::kNative) {
auto mapped_scaled_buffer =
scaled_buffer->GetMappedFrameBuffer(mapped_type);
RTC_DCHECK(mapped_scaled_buffer) << "Unable to map the scaled buffer.";
if (!mapped_scaled_buffer) {
RTC_LOG(LS_ERROR) << "Failed to map scaled "
<< VideoFrameBufferTypeToString(scaled_buffer->type())
<< " image to "
<< VideoFrameBufferTypeToString(mapped_buffer->type())
<< ". Can't encode frame.";
return {};
}
scaled_buffer = mapped_scaled_buffer;
}
RTC_DCHECK_EQ(scaled_buffer->type(), mapped_buffer->type())
<< "Scaled frames must have the same type as the mapped frame.";
if (scaled_buffer->type() != mapped_buffer->type()) {
RTC_LOG(LS_ERROR) << "When scaling "
<< VideoFrameBufferTypeToString(buffer_to_scale->type())
<< ", the image was unexpectedly converted to "
<< VideoFrameBufferTypeToString(scaled_buffer->type())
<< ". Can't encode frame.";
return {};
}
SetRawImagePlanes(&raw_images_[i], scaled_buffer);
prepared_buffers.push_back(scaled_buffer);
}
return prepared_buffers;
}
// static

View File

@ -95,8 +95,13 @@ class LibvpxVp8Encoder : public VideoEncoder {
bool UpdateVpxConfiguration(size_t stream_index);
void MaybeUpdatePixelFormat(vpx_img_fmt fmt);
void PrepareI420Image(const I420BufferInterface* frame);
void PrepareNV12Image(const NV12BufferInterface* frame);
// Prepares |raw_image_| to reference image data of |buffer|, or of mapped or
// scaled versions of |buffer|. Returns a list of buffers that got referenced
// as a result, allowing the caller to keep references to them until after
// encoding has finished. On failure to convert the buffer, an empty list is
// returned.
std::vector<rtc::scoped_refptr<VideoFrameBuffer>> PrepareBuffers(
rtc::scoped_refptr<VideoFrameBuffer> buffer);
const std::unique_ptr<LibvpxInterface> libvpx_;

View File

@ -27,6 +27,7 @@
#include "modules/video_coding/utility/vp8_header_parser.h"
#include "rtc_base/time_utils.h"
#include "test/field_trial.h"
#include "test/mappable_native_buffer.h"
#include "test/video_codec_settings.h"
namespace webrtc {
@ -715,4 +716,61 @@ TEST_F(TestVp8Impl, GetEncoderInfoFpsAllocationSimulcastVideo) {
::testing::ElementsAreArray(expected_fps_allocation));
}
class TestVp8ImplForPixelFormat
: public TestVp8Impl,
public ::testing::WithParamInterface<VideoFrameBuffer::Type> {
public:
TestVp8ImplForPixelFormat() : TestVp8Impl(), mappable_type_(GetParam()) {}
protected:
VideoFrameBuffer::Type mappable_type_;
};
TEST_P(TestVp8ImplForPixelFormat, EncodeNativeFrameSimulcast) {
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release());
// Configure simulcast.
codec_settings_.numberOfSimulcastStreams = 3;
codec_settings_.simulcastStream[0] = {
kWidth / 4, kHeight / 4, kFramerateFps, 1, 4000, 3000, 2000, 80, true};
codec_settings_.simulcastStream[1] = {
kWidth / 2, kHeight / 2, kFramerateFps, 1, 4000, 3000, 2000, 80, true};
codec_settings_.simulcastStream[2] = {
kWidth, kHeight, kFramerateFps, 1, 4000, 3000, 2000, 80, true};
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->InitEncode(&codec_settings_, kSettings));
// Create a zero-conversion NV12 frame (calling ToI420 on it crashes).
VideoFrame input_frame =
test::CreateMappableNativeFrame(1, mappable_type_, kWidth, kHeight);
EncodedImage encoded_frame;
CodecSpecificInfo codec_specific_info;
EncodeAndWaitForFrame(input_frame, &encoded_frame, &codec_specific_info);
// After encoding, we expect one mapping per simulcast layer.
rtc::scoped_refptr<test::MappableNativeBuffer> mappable_buffer =
test::GetMappableNativeBufferFromVideoFrame(input_frame);
std::vector<rtc::scoped_refptr<VideoFrameBuffer>> mapped_buffers =
mappable_buffer->GetMappedFramedBuffers();
ASSERT_EQ(mapped_buffers.size(), 3u);
EXPECT_EQ(mapped_buffers[0]->type(), mappable_type_);
EXPECT_EQ(mapped_buffers[0]->width(), kWidth);
EXPECT_EQ(mapped_buffers[0]->height(), kHeight);
EXPECT_EQ(mapped_buffers[1]->type(), mappable_type_);
EXPECT_EQ(mapped_buffers[1]->width(), kWidth / 2);
EXPECT_EQ(mapped_buffers[1]->height(), kHeight / 2);
EXPECT_EQ(mapped_buffers[2]->type(), mappable_type_);
EXPECT_EQ(mapped_buffers[2]->width(), kWidth / 4);
EXPECT_EQ(mapped_buffers[2]->height(), kHeight / 4);
EXPECT_FALSE(mappable_buffer->DidConvertToI420());
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release());
}
INSTANTIATE_TEST_SUITE_P(All,
TestVp8ImplForPixelFormat,
::testing::Values(VideoFrameBuffer::Type::kI420,
VideoFrameBuffer::Type::kNV12));
} // namespace webrtc

View File

@ -1893,7 +1893,7 @@ rtc::scoped_refptr<VideoFrameBuffer> LibvpxVp9Encoder::PrepareBufferForProfile0(
mapped_buffer->type() != VideoFrameBuffer::Type::kI420A)) {
// Unknown pixel format or unable to map, convert to I420 and prepare that
// buffer instead to ensure Scale() is safe to use.
rtc::scoped_refptr<VideoFrameBuffer> converted_buffer = buffer->ToI420();
auto converted_buffer = buffer->ToI420();
if (!converted_buffer) {
RTC_LOG(LS_ERROR) << "Failed to convert "
<< VideoFrameBufferTypeToString(buffer->type())
@ -1902,21 +1902,12 @@ rtc::scoped_refptr<VideoFrameBuffer> LibvpxVp9Encoder::PrepareBufferForProfile0(
}
// The buffer should now be a mapped I420 or I420A format, but some buffer
// implementations incorrectly return the wrong buffer format, such as
// kNative. As a workaround to this, we retry GetMappedFrameBuffer+ToI420.
// kNative. As a workaround to this, we perform ToI420() a second time.
// TODO(https://crbug.com/webrtc/12602): When Android buffers have a correct
// ToI420() implementaion, remove his workaround.
if (converted_buffer->type() != VideoFrameBuffer::Type::kI420 &&
converted_buffer->type() != VideoFrameBuffer::Type::kI420A) {
if (converted_buffer->type() == VideoFrameBuffer::Type::kNative) {
auto mapped_converted_buffer =
converted_buffer->GetMappedFrameBuffer(supported_formats);
if (mapped_converted_buffer)
converted_buffer = mapped_converted_buffer;
}
if (converted_buffer->type() != VideoFrameBuffer::Type::kI420 &&
converted_buffer->type() != VideoFrameBuffer::Type::kI420A) {
converted_buffer = converted_buffer->ToI420();
}
RTC_CHECK(converted_buffer->type() == VideoFrameBuffer::Type::kI420 ||
converted_buffer->type() == VideoFrameBuffer::Type::kI420A);
}