VP9 decoder: Sets thread count based on resolution, reinit on change.

Previously, number of decoder threads for VP9 were always set to 8 but
with a cap at number of cores. This was done since we "can't know" the
resolution that will be used.

With this change, we now intialize the number of threads based on
resolution given in InitDecode(). If a resolution change happens in
flight, it requires a keyframe. We therefore parse the header from
any key frame and if it has a new resolution, we re-initialize the
decoder.

The number of threads used is based on pixel count. We set one thread
as target for 1280x720, and scale up lineraly from there. The 8-thread
cap is gone, but still limit it core count.

This means for instance: 1 <= 720p, 2 for 1080p, 4 for 1440p, 9 for 4K.

Bug: webrtc:11551
Change-Id: I14c169a6c651c50bd1b870c4b22bc4495c8448fd
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/174460
Commit-Queue: Erik Språng <sprang@webrtc.org>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31507}
This commit is contained in:
Erik Språng
2020-06-11 18:24:22 +02:00
committed by Commit Bot
parent 33c0c342f6
commit d592575698
4 changed files with 199 additions and 53 deletions

View File

@ -25,6 +25,7 @@
#include "common_video/libyuv/include/webrtc_libyuv.h"
#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
#include "modules/video_coding/codecs/vp9/svc_rate_allocator.h"
#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h"
#include "rtc_base/checks.h"
#include "rtc_base/experiments/rate_control_settings.h"
#include "rtc_base/keep_ref_until_done.h"
@ -45,8 +46,6 @@ namespace {
uint8_t kRefBufIdx[4] = {0, 0, 0, 1};
uint8_t kUpdBufIdx[4] = {0, 0, 1, 0};
int kMaxNumTiles4kVideo = 8;
// Maximum allowed PID difference for differnet per-layer frame-rate case.
const int kMaxAllowedPidDiff = 30;
@ -1659,13 +1658,18 @@ int VP9DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) {
// errors earlier than the multi-threads version.
// - Make peak CPU usage under control (not depending on input)
cfg.threads = 1;
(void)kMaxNumTiles4kVideo; // unused
#else
// We want to use multithreading when decoding high resolution videos. But,
// since we don't know resolution of input stream at this stage, we always
// enable it.
cfg.threads = std::min(number_of_cores, kMaxNumTiles4kVideo);
// We want to use multithreading when decoding high resolution videos. But not
// too many in order to avoid overhead when many stream are decoded
// concurrently.
// Set 1280x720 pixel count as target for one core, and then scale up linearly
// from there - but cap at physical core count.
// This results in 2 for 1080p, 4 for 1440p and 8 for 4K.
int num_threads = std::max(1, (inst->width * inst->height) / (1280 * 720));
cfg.threads = std::min(number_of_cores, num_threads);
#endif
current_codec_ = *inst;
num_cores_ = number_of_cores;
vpx_codec_flags_t flags = 0;
if (vpx_codec_dec_init(decoder_, vpx_codec_vp9_dx(), &cfg, flags)) {
@ -1696,6 +1700,29 @@ int VP9DecoderImpl::Decode(const EncodedImage& input_image,
if (decode_complete_callback_ == nullptr) {
return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
}
if (input_image._frameType == VideoFrameType::kVideoFrameKey) {
absl::optional<vp9::FrameInfo> frame_info =
vp9::ParseIntraFrameInfo(input_image.data(), input_image.size());
if (frame_info) {
if (frame_info->frame_width != current_codec_.width ||
frame_info->frame_height != current_codec_.height) {
// Resolution has changed, tear down and re-init a new decoder in
// order to get correct sizing.
Release();
current_codec_.width = frame_info->frame_width;
current_codec_.height = frame_info->frame_height;
int reinit_status = InitDecode(&current_codec_, num_cores_);
if (reinit_status != WEBRTC_VIDEO_CODEC_OK) {
RTC_LOG(LS_WARNING) << "Failed to re-init decoder.";
return reinit_status;
}
}
} else {
RTC_LOG(LS_WARNING) << "Failed to parse VP9 header from key-frame.";
}
}
// Always start with a complete key frame.
if (key_frame_required_) {
if (input_image._frameType != VideoFrameType::kVideoFrameKey)

View File

@ -210,6 +210,8 @@ class VP9DecoderImpl : public VP9Decoder {
bool inited_;
vpx_codec_ctx_t* decoder_;
bool key_frame_required_;
VideoCodec current_codec_;
int num_cores_;
};
} // namespace webrtc

View File

@ -52,40 +52,65 @@ bool Vp9ReadSyncCode(rtc::BitBuffer* br) {
return true;
}
bool Vp9ReadColorConfig(rtc::BitBuffer* br, uint8_t profile) {
if (profile == 2 || profile == 3) {
// Bitdepth.
RETURN_FALSE_IF_ERROR(br->ConsumeBits(1));
bool Vp9ReadColorConfig(rtc::BitBuffer* br,
uint8_t profile,
FrameInfo* frame_info) {
if (profile == 0 || profile == 1) {
frame_info->bit_detph = BitDept::k8Bit;
} else if (profile == 2 || profile == 3) {
uint32_t ten_or_twelve_bits;
RETURN_FALSE_IF_ERROR(br->ReadBits(&ten_or_twelve_bits, 1));
frame_info->bit_detph =
ten_or_twelve_bits ? BitDept::k12Bit : BitDept::k10Bit;
}
uint32_t color_space;
RETURN_FALSE_IF_ERROR(br->ReadBits(&color_space, 3));
frame_info->color_space = static_cast<ColorSpace>(color_space);
// SRGB is 7.
if (color_space != 7) {
// YUV range flag.
RETURN_FALSE_IF_ERROR(br->ConsumeBits(1));
uint32_t color_range;
RETURN_FALSE_IF_ERROR(br->ReadBits(&color_range, 1));
frame_info->color_range =
color_range ? ColorRange::kFull : ColorRange::kStudio;
if (profile == 1 || profile == 3) {
// 1 bit: subsampling x.
// 1 bit: subsampling y.
RETURN_FALSE_IF_ERROR(br->ConsumeBits(2));
uint32_t reserved_bit;
RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1));
if (reserved_bit) {
RTC_LOG(LS_WARNING) << "Failed to get QP. Reserved bit set.";
return false;
uint32_t subsampling_x;
uint32_t subsampling_y;
RETURN_FALSE_IF_ERROR(br->ReadBits(&subsampling_x, 1));
RETURN_FALSE_IF_ERROR(br->ReadBits(&subsampling_y, 1));
if (subsampling_x) {
frame_info->sub_sampling =
subsampling_y ? YuvSubsampling::k420 : YuvSubsampling::k422;
} else {
frame_info->sub_sampling =
subsampling_y ? YuvSubsampling::k440 : YuvSubsampling::k444;
}
}
} else {
if (profile == 1 || profile == 3) {
uint32_t reserved_bit;
RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1));
if (reserved_bit) {
RTC_LOG(LS_WARNING) << "Failed to get QP. Reserved bit set.";
RTC_LOG(LS_WARNING) << "Failed to parse header. Reserved bit set.";
return false;
}
} else {
RTC_LOG(LS_WARNING) << "Failed to get QP. 4:4:4 color not supported in "
"profile 0 or 2.";
// Profile 0 or 2.
frame_info->sub_sampling = YuvSubsampling::k420;
}
} else {
// SRGB
frame_info->color_range = ColorRange::kFull;
if (profile == 1 || profile == 3) {
frame_info->sub_sampling = YuvSubsampling::k444;
uint32_t reserved_bit;
RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1));
if (reserved_bit) {
RTC_LOG(LS_WARNING) << "Failed to parse header. Reserved bit set.";
return false;
}
} else {
RTC_LOG(LS_WARNING) << "Failed to parse header. 4:4:4 color not supported"
" in profile 0 or 2.";
return false;
}
}
@ -93,24 +118,38 @@ bool Vp9ReadColorConfig(rtc::BitBuffer* br, uint8_t profile) {
return true;
}
bool Vp9ReadFrameSize(rtc::BitBuffer* br) {
// 2 bytes: frame width.
// 2 bytes: frame height.
return br->ConsumeBytes(4);
bool Vp9ReadFrameSize(rtc::BitBuffer* br, FrameInfo* frame_info) {
// 16 bits: frame width - 1.
uint16_t frame_width_minus_one;
RETURN_FALSE_IF_ERROR(br->ReadUInt16(&frame_width_minus_one));
// 16 bits: frame height - 1.
uint16_t frame_height_minus_one;
RETURN_FALSE_IF_ERROR(br->ReadUInt16(&frame_height_minus_one));
frame_info->frame_width = frame_width_minus_one + 1;
frame_info->frame_height = frame_height_minus_one + 1;
return true;
}
bool Vp9ReadRenderSize(rtc::BitBuffer* br) {
uint32_t bit;
RETURN_FALSE_IF_ERROR(br->ReadBits(&bit, 1));
if (bit) {
// 2 bytes: render width.
// 2 bytes: render height.
RETURN_FALSE_IF_ERROR(br->ConsumeBytes(4));
bool Vp9ReadRenderSize(rtc::BitBuffer* br, FrameInfo* frame_info) {
uint32_t render_and_frame_size_different;
RETURN_FALSE_IF_ERROR(br->ReadBits(&render_and_frame_size_different, 1));
if (render_and_frame_size_different) {
// 16 bits: render width - 1.
uint16_t render_width_minus_one;
RETURN_FALSE_IF_ERROR(br->ReadUInt16(&render_width_minus_one));
// 16 bits: render height - 1.
uint16_t render_height_minus_one;
RETURN_FALSE_IF_ERROR(br->ReadUInt16(&render_height_minus_one));
frame_info->render_width = render_width_minus_one + 1;
frame_info->render_height = render_height_minus_one + 1;
} else {
frame_info->render_width = frame_info->frame_width;
frame_info->render_height = frame_info->frame_height;
}
return true;
}
bool Vp9ReadFrameSizeFromRefs(rtc::BitBuffer* br) {
bool Vp9ReadFrameSizeFromRefs(rtc::BitBuffer* br, FrameInfo* frame_info) {
uint32_t found_ref = 0;
for (size_t i = 0; i < kVp9NumRefsPerFrame; i++) {
// Size in refs.
@ -120,11 +159,11 @@ bool Vp9ReadFrameSizeFromRefs(rtc::BitBuffer* br) {
}
if (!found_ref) {
if (!Vp9ReadFrameSize(br)) {
if (!Vp9ReadFrameSize(br, frame_info)) {
return false;
}
}
return Vp9ReadRenderSize(br);
return Vp9ReadRenderSize(br, frame_info);
}
bool Vp9ReadInterpolationFilter(rtc::BitBuffer* br) {
@ -166,14 +205,14 @@ bool Vp9ReadLoopfilter(rtc::BitBuffer* br) {
}
} // namespace
bool GetQp(const uint8_t* buf, size_t length, int* qp) {
bool Parse(const uint8_t* buf, size_t length, int* qp, FrameInfo* frame_info) {
rtc::BitBuffer br(buf, length);
// Frame marker.
uint32_t frame_marker;
RETURN_FALSE_IF_ERROR(br.ReadBits(&frame_marker, 2));
if (frame_marker != 0x2) {
RTC_LOG(LS_WARNING) << "Failed to get QP. Frame marker should be 2.";
RTC_LOG(LS_WARNING) << "Failed to parse header. Frame marker should be 2.";
return false;
}
@ -181,6 +220,7 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) {
uint8_t profile;
if (!Vp9ReadProfile(&br, &profile))
return false;
frame_info->profile = profile;
// Show existing frame.
uint32_t show_existing_frame;
@ -195,18 +235,21 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) {
RETURN_FALSE_IF_ERROR(br.ReadBits(&frame_type, 1));
RETURN_FALSE_IF_ERROR(br.ReadBits(&show_frame, 1));
RETURN_FALSE_IF_ERROR(br.ReadBits(&error_resilient, 1));
frame_info->show_frame = show_frame;
frame_info->error_resilient = error_resilient;
if (!frame_type) {
if (frame_type == 0) {
// Key-frame.
if (!Vp9ReadSyncCode(&br))
return false;
if (!Vp9ReadColorConfig(&br, profile))
if (!Vp9ReadColorConfig(&br, profile, frame_info))
return false;
if (!Vp9ReadFrameSize(&br))
if (!Vp9ReadFrameSize(&br, frame_info))
return false;
if (!Vp9ReadRenderSize(&br))
if (!Vp9ReadRenderSize(&br, frame_info))
return false;
} else {
// Non-keyframe.
uint32_t intra_only = 0;
if (!show_frame)
RETURN_FALSE_IF_ERROR(br.ReadBits(&intra_only, 1));
@ -218,14 +261,14 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) {
return false;
if (profile > 0) {
if (!Vp9ReadColorConfig(&br, profile))
if (!Vp9ReadColorConfig(&br, profile, frame_info))
return false;
}
// Refresh frame flags.
RETURN_FALSE_IF_ERROR(br.ConsumeBits(8));
if (!Vp9ReadFrameSize(&br))
if (!Vp9ReadFrameSize(&br, frame_info))
return false;
if (!Vp9ReadRenderSize(&br))
if (!Vp9ReadRenderSize(&br, frame_info))
return false;
} else {
// Refresh frame flags.
@ -237,7 +280,7 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) {
RETURN_FALSE_IF_ERROR(br.ConsumeBits(4));
}
if (!Vp9ReadFrameSizeFromRefs(&br))
if (!Vp9ReadFrameSizeFromRefs(&br, frame_info))
return false;
// Allow high precision mv.
@ -267,6 +310,20 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) {
return true;
}
} // namespace vp9
bool GetQp(const uint8_t* buf, size_t length, int* qp) {
FrameInfo frame_info;
return Parse(buf, length, qp, &frame_info);
}
absl::optional<FrameInfo> ParseIntraFrameInfo(const uint8_t* buf,
size_t length) {
int qp = 0;
FrameInfo frame_info;
if (Parse(buf, length, &qp, &frame_info) && frame_info.frame_width > 0) {
return frame_info;
}
return absl::nullopt;
}
} // namespace vp9
} // namespace webrtc

View File

@ -13,6 +13,7 @@
#include <stddef.h>
#include <stdint.h>
#include "absl/types/optional.h"
namespace webrtc {
@ -22,6 +23,65 @@ namespace vp9 {
// Returns true on success, false otherwise.
bool GetQp(const uint8_t* buf, size_t length, int* qp);
// Bit depth per channel. Support varies by profile.
enum class BitDept : uint8_t {
k8Bit = 8,
k10Bit = 10,
k12Bit = 12,
};
enum class ColorSpace : uint8_t {
CS_UNKNOWN = 0, // Unknown (in this case the color space must be signaled
// outside the VP9 bitstream).
CS_BT_601 = 1, // CS_BT_601 Rec. ITU-R BT.601-7
CS_BT_709 = 2, // Rec. ITU-R BT.709-6
CS_SMPTE_170 = 3, // SMPTE-170
CS_SMPTE_240 = 4, // SMPTE-240
CS_BT_2020 = 5, // Rec. ITU-R BT.2020-2
CS_RESERVED = 6, // Reserved
CS_RGB = 7, // sRGB (IEC 61966-2-1)
};
enum class ColorRange {
kStudio, // Studio swing:
// For BitDepth equals 8:
// Y is between 16 and 235 inclusive.
// U and V are between 16 and 240 inclusive.
// For BitDepth equals 10:
// Y is between 64 and 940 inclusive.
// U and V are between 64 and 960 inclusive.
// For BitDepth equals 12:
// Y is between 256 and 3760.
// U and V are between 256 and 3840 inclusive.
kFull // Full swing; no restriction on Y, U, V values.
};
enum class YuvSubsampling {
k444,
k440,
k422,
k420,
};
struct FrameInfo {
int profile = 0; // Profile 0-3 are valid.
bool show_frame = false;
bool error_resilient = false;
BitDept bit_detph = BitDept::k8Bit;
ColorSpace color_space = ColorSpace::CS_UNKNOWN;
ColorRange color_range;
YuvSubsampling sub_sampling;
int frame_width = 0;
int frame_height = 0;
int render_width = 0;
int render_height = 0;
};
// Parses frame information for a VP9 key-frame or all-intra frame from a
// bitstream. Returns nullopt on failure or if not a key-frame.
absl::optional<FrameInfo> ParseIntraFrameInfo(const uint8_t* buf,
size_t length);
} // namespace vp9
} // namespace webrtc