Reland "VP9 decoder: Sets thread count based on resolution, reinit on change."

This is a reland of d5925756980f6e82a55f57532c8d855e954459fb

Patchset 2 is a reland of
https://webrtc-review.googlesource.com/c/src/+/177012

Patchset 3 is a fix for a potential crash when InitDecode()is called from
VideoStreamDecoderImpl::GetDecoder(), where the decoder_settings
parameter is a but surprisingly set to nullptr.

Original change's description:
> VP9 decoder: Sets thread count based on resolution, reinit on change.
>
> Previously, number of decoder threads for VP9 were always set to 8 but
> with a cap at number of cores. This was done since we "can't know" the
> resolution that will be used.
>
> With this change, we now intialize the number of threads based on
> resolution given in InitDecode(). If a resolution change happens in
> flight, it requires a keyframe. We therefore parse the header from
> any key frame and if it has a new resolution, we re-initialize the
> decoder.
>
> The number of threads used is based on pixel count. We set one thread
> as target for 1280x720, and scale up lineraly from there. The 8-thread
> cap is gone, but still limit it core count.
>
> This means for instance: 1 <= 720p, 2 for 1080p, 4 for 1440p, 9 for 4K.
>
> Bug: webrtc:11551
> Change-Id: I14c169a6c651c50bd1b870c4b22bc4495c8448fd
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/174460
> Commit-Queue: Erik Språng <sprang@webrtc.org>
> Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#31507}

Bug: webrtc:11551
Change-Id: I2b4b146d0b8319f07ce1660202d6aa4b374eb015
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/177246
Reviewed-by: Johannes Kron <kron@webrtc.org>
Commit-Queue: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31527}
This commit is contained in:
Erik Språng
2020-06-15 16:52:13 +02:00
committed by Commit Bot
parent 9b526180c9
commit 969ccf0e12
5 changed files with 220 additions and 53 deletions

View File

@ -1733,4 +1733,12 @@ TEST_F(TestVp9Impl, ReenablingUpperLayerAfterKFWithInterlayerPredIsEnabled) {
EXPECT_EQ(encoded_frames[0]._frameType, VideoFrameType::kVideoFrameDelta);
}
TEST_F(TestVp9Impl, HandlesEmptyInitDecode) {
std::unique_ptr<VideoDecoder> decoder = CreateDecoder();
// Check that nullptr settings are ok for decoder.
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
decoder->InitDecode(/*codec_settings=*/nullptr, 1));
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder->Release());
}
} // namespace webrtc

View File

@ -25,6 +25,7 @@
#include "common_video/libyuv/include/webrtc_libyuv.h"
#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
#include "modules/video_coding/codecs/vp9/svc_rate_allocator.h"
#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h"
#include "rtc_base/checks.h"
#include "rtc_base/experiments/rate_control_settings.h"
#include "rtc_base/keep_ref_until_done.h"
@ -45,8 +46,6 @@ namespace {
uint8_t kRefBufIdx[4] = {0, 0, 0, 1};
uint8_t kUpdBufIdx[4] = {0, 0, 1, 0};
int kMaxNumTiles4kVideo = 8;
// Maximum allowed PID difference for differnet per-layer frame-rate case.
const int kMaxAllowedPidDiff = 30;
@ -1668,14 +1667,32 @@ int VP9DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) {
// errors earlier than the multi-threads version.
// - Make peak CPU usage under control (not depending on input)
cfg.threads = 1;
(void)kMaxNumTiles4kVideo; // unused
#else
// We want to use multithreading when decoding high resolution videos. But,
// since we don't know resolution of input stream at this stage, we always
// enable it.
cfg.threads = std::min(number_of_cores, kMaxNumTiles4kVideo);
if (!inst) {
// No config provided - don't know resolution to decode yet.
// Set thread count to one in the meantime.
cfg.threads = 1;
} else {
// We want to use multithreading when decoding high resolution videos. But
// not too many in order to avoid overhead when many stream are decoded
// concurrently.
// Set 2 thread as target for 1280x720 pixel count, and then scale up
// linearly from there - but cap at physical core count.
// For common resolutions this results in:
// 1 for 360p
// 2 for 720p
// 4 for 1080p
// 8 for 1440p
// 18 for 4K
int num_threads =
std::max(1, 2 * (inst->width * inst->height) / (1280 * 720));
cfg.threads = std::min(number_of_cores, num_threads);
current_codec_ = *inst;
}
#endif
num_cores_ = number_of_cores;
vpx_codec_flags_t flags = 0;
if (vpx_codec_dec_init(decoder_, vpx_codec_vp9_dx(), &cfg, flags)) {
return WEBRTC_VIDEO_CODEC_MEMORY;
@ -1705,6 +1722,29 @@ int VP9DecoderImpl::Decode(const EncodedImage& input_image,
if (decode_complete_callback_ == nullptr) {
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
}
if (input_image._frameType == VideoFrameType::kVideoFrameKey) {
absl::optional<vp9::FrameInfo> frame_info =
vp9::ParseIntraFrameInfo(input_image.data(), input_image.size());
if (frame_info) {
if (frame_info->frame_width != current_codec_.width ||
frame_info->frame_height != current_codec_.height) {
// Resolution has changed, tear down and re-init a new decoder in
// order to get correct sizing.
Release();
current_codec_.width = frame_info->frame_width;
current_codec_.height = frame_info->frame_height;
int reinit_status = InitDecode(&current_codec_, num_cores_);
if (reinit_status != WEBRTC_VIDEO_CODEC_OK) {
RTC_LOG(LS_WARNING) << "Failed to re-init decoder.";
return reinit_status;
}
}
} else {
RTC_LOG(LS_WARNING) << "Failed to parse VP9 header from key-frame.";
}
}
// Always start with a complete key frame.
if (key_frame_required_) {
if (input_image._frameType != VideoFrameType::kVideoFrameKey)

View File

@ -210,6 +210,8 @@ class VP9DecoderImpl : public VP9Decoder {
bool inited_;
vpx_codec_ctx_t* decoder_;
bool key_frame_required_;
VideoCodec current_codec_;
int num_cores_;
};
} // namespace webrtc

View File

@ -52,40 +52,65 @@ bool Vp9ReadSyncCode(rtc::BitBuffer* br) {
return true;
}
bool Vp9ReadColorConfig(rtc::BitBuffer* br, uint8_t profile) {
if (profile == 2 || profile == 3) {
// Bitdepth.
RETURN_FALSE_IF_ERROR(br->ConsumeBits(1));
bool Vp9ReadColorConfig(rtc::BitBuffer* br,
uint8_t profile,
FrameInfo* frame_info) {
if (profile == 0 || profile == 1) {
frame_info->bit_detph = BitDept::k8Bit;
} else if (profile == 2 || profile == 3) {
uint32_t ten_or_twelve_bits;
RETURN_FALSE_IF_ERROR(br->ReadBits(&ten_or_twelve_bits, 1));
frame_info->bit_detph =
ten_or_twelve_bits ? BitDept::k12Bit : BitDept::k10Bit;
}
uint32_t color_space;
RETURN_FALSE_IF_ERROR(br->ReadBits(&color_space, 3));
frame_info->color_space = static_cast<ColorSpace>(color_space);
// SRGB is 7.
if (color_space != 7) {
// YUV range flag.
RETURN_FALSE_IF_ERROR(br->ConsumeBits(1));
uint32_t color_range;
RETURN_FALSE_IF_ERROR(br->ReadBits(&color_range, 1));
frame_info->color_range =
color_range ? ColorRange::kFull : ColorRange::kStudio;
if (profile == 1 || profile == 3) {
// 1 bit: subsampling x.
// 1 bit: subsampling y.
RETURN_FALSE_IF_ERROR(br->ConsumeBits(2));
uint32_t reserved_bit;
RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1));
if (reserved_bit) {
RTC_LOG(LS_WARNING) << "Failed to get QP. Reserved bit set.";
return false;
uint32_t subsampling_x;
uint32_t subsampling_y;
RETURN_FALSE_IF_ERROR(br->ReadBits(&subsampling_x, 1));
RETURN_FALSE_IF_ERROR(br->ReadBits(&subsampling_y, 1));
if (subsampling_x) {
frame_info->sub_sampling =
subsampling_y ? YuvSubsampling::k420 : YuvSubsampling::k422;
} else {
frame_info->sub_sampling =
subsampling_y ? YuvSubsampling::k440 : YuvSubsampling::k444;
}
}
} else {
if (profile == 1 || profile == 3) {
uint32_t reserved_bit;
RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1));
if (reserved_bit) {
RTC_LOG(LS_WARNING) << "Failed to get QP. Reserved bit set.";
RTC_LOG(LS_WARNING) << "Failed to parse header. Reserved bit set.";
return false;
}
} else {
RTC_LOG(LS_WARNING) << "Failed to get QP. 4:4:4 color not supported in "
"profile 0 or 2.";
// Profile 0 or 2.
frame_info->sub_sampling = YuvSubsampling::k420;
}
} else {
// SRGB
frame_info->color_range = ColorRange::kFull;
if (profile == 1 || profile == 3) {
frame_info->sub_sampling = YuvSubsampling::k444;
uint32_t reserved_bit;
RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1));
if (reserved_bit) {
RTC_LOG(LS_WARNING) << "Failed to parse header. Reserved bit set.";
return false;
}
} else {
RTC_LOG(LS_WARNING) << "Failed to parse header. 4:4:4 color not supported"
" in profile 0 or 2.";
return false;
}
}
@ -93,24 +118,38 @@ bool Vp9ReadColorConfig(rtc::BitBuffer* br, uint8_t profile) {
return true;
}
bool Vp9ReadFrameSize(rtc::BitBuffer* br) {
// 2 bytes: frame width.
// 2 bytes: frame height.
return br->ConsumeBytes(4);
bool Vp9ReadFrameSize(rtc::BitBuffer* br, FrameInfo* frame_info) {
// 16 bits: frame width - 1.
uint16_t frame_width_minus_one;
RETURN_FALSE_IF_ERROR(br->ReadUInt16(&frame_width_minus_one));
// 16 bits: frame height - 1.
uint16_t frame_height_minus_one;
RETURN_FALSE_IF_ERROR(br->ReadUInt16(&frame_height_minus_one));
frame_info->frame_width = frame_width_minus_one + 1;
frame_info->frame_height = frame_height_minus_one + 1;
return true;
}
bool Vp9ReadRenderSize(rtc::BitBuffer* br) {
uint32_t bit;
RETURN_FALSE_IF_ERROR(br->ReadBits(&bit, 1));
if (bit) {
// 2 bytes: render width.
// 2 bytes: render height.
RETURN_FALSE_IF_ERROR(br->ConsumeBytes(4));
bool Vp9ReadRenderSize(rtc::BitBuffer* br, FrameInfo* frame_info) {
uint32_t render_and_frame_size_different;
RETURN_FALSE_IF_ERROR(br->ReadBits(&render_and_frame_size_different, 1));
if (render_and_frame_size_different) {
// 16 bits: render width - 1.
uint16_t render_width_minus_one;
RETURN_FALSE_IF_ERROR(br->ReadUInt16(&render_width_minus_one));
// 16 bits: render height - 1.
uint16_t render_height_minus_one;
RETURN_FALSE_IF_ERROR(br->ReadUInt16(&render_height_minus_one));
frame_info->render_width = render_width_minus_one + 1;
frame_info->render_height = render_height_minus_one + 1;
} else {
frame_info->render_width = frame_info->frame_width;
frame_info->render_height = frame_info->frame_height;
}
return true;
}
bool Vp9ReadFrameSizeFromRefs(rtc::BitBuffer* br) {
bool Vp9ReadFrameSizeFromRefs(rtc::BitBuffer* br, FrameInfo* frame_info) {
uint32_t found_ref = 0;
for (size_t i = 0; i < kVp9NumRefsPerFrame; i++) {
// Size in refs.
@ -120,11 +159,11 @@ bool Vp9ReadFrameSizeFromRefs(rtc::BitBuffer* br) {
}
if (!found_ref) {
if (!Vp9ReadFrameSize(br)) {
if (!Vp9ReadFrameSize(br, frame_info)) {
return false;
}
}
return Vp9ReadRenderSize(br);
return Vp9ReadRenderSize(br, frame_info);
}
bool Vp9ReadInterpolationFilter(rtc::BitBuffer* br) {
@ -166,14 +205,14 @@ bool Vp9ReadLoopfilter(rtc::BitBuffer* br) {
}
} // namespace
bool GetQp(const uint8_t* buf, size_t length, int* qp) {
bool Parse(const uint8_t* buf, size_t length, int* qp, FrameInfo* frame_info) {
rtc::BitBuffer br(buf, length);
// Frame marker.
uint32_t frame_marker;
RETURN_FALSE_IF_ERROR(br.ReadBits(&frame_marker, 2));
if (frame_marker != 0x2) {
RTC_LOG(LS_WARNING) << "Failed to get QP. Frame marker should be 2.";
RTC_LOG(LS_WARNING) << "Failed to parse header. Frame marker should be 2.";
return false;
}
@ -181,6 +220,7 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) {
uint8_t profile;
if (!Vp9ReadProfile(&br, &profile))
return false;
frame_info->profile = profile;
// Show existing frame.
uint32_t show_existing_frame;
@ -195,18 +235,21 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) {
RETURN_FALSE_IF_ERROR(br.ReadBits(&frame_type, 1));
RETURN_FALSE_IF_ERROR(br.ReadBits(&show_frame, 1));
RETURN_FALSE_IF_ERROR(br.ReadBits(&error_resilient, 1));
frame_info->show_frame = show_frame;
frame_info->error_resilient = error_resilient;
if (!frame_type) {
if (frame_type == 0) {
// Key-frame.
if (!Vp9ReadSyncCode(&br))
return false;
if (!Vp9ReadColorConfig(&br, profile))
if (!Vp9ReadColorConfig(&br, profile, frame_info))
return false;
if (!Vp9ReadFrameSize(&br))
if (!Vp9ReadFrameSize(&br, frame_info))
return false;
if (!Vp9ReadRenderSize(&br))
if (!Vp9ReadRenderSize(&br, frame_info))
return false;
} else {
// Non-keyframe.
uint32_t intra_only = 0;
if (!show_frame)
RETURN_FALSE_IF_ERROR(br.ReadBits(&intra_only, 1));
@ -218,14 +261,14 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) {
return false;
if (profile > 0) {
if (!Vp9ReadColorConfig(&br, profile))
if (!Vp9ReadColorConfig(&br, profile, frame_info))
return false;
}
// Refresh frame flags.
RETURN_FALSE_IF_ERROR(br.ConsumeBits(8));
if (!Vp9ReadFrameSize(&br))
if (!Vp9ReadFrameSize(&br, frame_info))
return false;
if (!Vp9ReadRenderSize(&br))
if (!Vp9ReadRenderSize(&br, frame_info))
return false;
} else {
// Refresh frame flags.
@ -237,7 +280,7 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) {
RETURN_FALSE_IF_ERROR(br.ConsumeBits(4));
}
if (!Vp9ReadFrameSizeFromRefs(&br))
if (!Vp9ReadFrameSizeFromRefs(&br, frame_info))
return false;
// Allow high precision mv.
@ -267,6 +310,20 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) {
return true;
}
} // namespace vp9
bool GetQp(const uint8_t* buf, size_t length, int* qp) {
FrameInfo frame_info;
return Parse(buf, length, qp, &frame_info);
}
absl::optional<FrameInfo> ParseIntraFrameInfo(const uint8_t* buf,
size_t length) {
int qp = 0;
FrameInfo frame_info;
if (Parse(buf, length, &qp, &frame_info) && frame_info.frame_width > 0) {
return frame_info;
}
return absl::nullopt;
}
} // namespace vp9
} // namespace webrtc

View File

@ -13,6 +13,7 @@
#include <stddef.h>
#include <stdint.h>
#include "absl/types/optional.h"
namespace webrtc {
@ -22,6 +23,65 @@ namespace vp9 {
// Returns true on success, false otherwise.
bool GetQp(const uint8_t* buf, size_t length, int* qp);
// Bit depth per channel. Support varies by profile.
enum class BitDept : uint8_t {
k8Bit = 8,
k10Bit = 10,
k12Bit = 12,
};
enum class ColorSpace : uint8_t {
CS_UNKNOWN = 0, // Unknown (in this case the color space must be signaled
// outside the VP9 bitstream).
CS_BT_601 = 1, // CS_BT_601 Rec. ITU-R BT.601-7
CS_BT_709 = 2, // Rec. ITU-R BT.709-6
CS_SMPTE_170 = 3, // SMPTE-170
CS_SMPTE_240 = 4, // SMPTE-240
CS_BT_2020 = 5, // Rec. ITU-R BT.2020-2
CS_RESERVED = 6, // Reserved
CS_RGB = 7, // sRGB (IEC 61966-2-1)
};
enum class ColorRange {
kStudio, // Studio swing:
// For BitDepth equals 8:
// Y is between 16 and 235 inclusive.
// U and V are between 16 and 240 inclusive.
// For BitDepth equals 10:
// Y is between 64 and 940 inclusive.
// U and V are between 64 and 960 inclusive.
// For BitDepth equals 12:
// Y is between 256 and 3760.
// U and V are between 256 and 3840 inclusive.
kFull // Full swing; no restriction on Y, U, V values.
};
enum class YuvSubsampling {
k444,
k440,
k422,
k420,
};
struct FrameInfo {
int profile = 0; // Profile 0-3 are valid.
bool show_frame = false;
bool error_resilient = false;
BitDept bit_detph = BitDept::k8Bit;
ColorSpace color_space = ColorSpace::CS_UNKNOWN;
ColorRange color_range;
YuvSubsampling sub_sampling;
int frame_width = 0;
int frame_height = 0;
int render_width = 0;
int render_height = 0;
};
// Parses frame information for a VP9 key-frame or all-intra frame from a
// bitstream. Returns nullopt on failure or if not a key-frame.
absl::optional<FrameInfo> ParseIntraFrameInfo(const uint8_t* buf,
size_t length);
} // namespace vp9
} // namespace webrtc