diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc index 3221c55725..4d5b8497d1 100644 --- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc +++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -1733,4 +1733,12 @@ TEST_F(TestVp9Impl, ReenablingUpperLayerAfterKFWithInterlayerPredIsEnabled) { EXPECT_EQ(encoded_frames[0]._frameType, VideoFrameType::kVideoFrameDelta); } +TEST_F(TestVp9Impl, HandlesEmptyInitDecode) { + std::unique_ptr decoder = CreateDecoder(); + // Check that nullptr settings are ok for decoder. + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + decoder->InitDecode(/*codec_settings=*/nullptr, 1)); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder->Release()); +} + } // namespace webrtc diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc index 568f13336e..f557594b15 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -25,6 +25,7 @@ #include "common_video/libyuv/include/webrtc_libyuv.h" #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h" #include "modules/video_coding/codecs/vp9/svc_rate_allocator.h" +#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h" #include "rtc_base/checks.h" #include "rtc_base/experiments/rate_control_settings.h" #include "rtc_base/keep_ref_until_done.h" @@ -45,8 +46,6 @@ namespace { uint8_t kRefBufIdx[4] = {0, 0, 0, 1}; uint8_t kUpdBufIdx[4] = {0, 0, 1, 0}; -int kMaxNumTiles4kVideo = 8; - // Maximum allowed PID difference for differnet per-layer frame-rate case. const int kMaxAllowedPidDiff = 30; @@ -1668,14 +1667,32 @@ int VP9DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) { // errors earlier than the multi-threads version. // - Make peak CPU usage under control (not depending on input) cfg.threads = 1; - (void)kMaxNumTiles4kVideo; // unused #else - // We want to use multithreading when decoding high resolution videos. But, - // since we don't know resolution of input stream at this stage, we always - // enable it. - cfg.threads = std::min(number_of_cores, kMaxNumTiles4kVideo); + if (!inst) { + // No config provided - don't know resolution to decode yet. + // Set thread count to one in the meantime. + cfg.threads = 1; + } else { + // We want to use multithreading when decoding high resolution videos. But + // not too many in order to avoid overhead when many stream are decoded + // concurrently. + // Set 2 thread as target for 1280x720 pixel count, and then scale up + // linearly from there - but cap at physical core count. + // For common resolutions this results in: + // 1 for 360p + // 2 for 720p + // 4 for 1080p + // 8 for 1440p + // 18 for 4K + int num_threads = + std::max(1, 2 * (inst->width * inst->height) / (1280 * 720)); + cfg.threads = std::min(number_of_cores, num_threads); + current_codec_ = *inst; + } #endif + num_cores_ = number_of_cores; + vpx_codec_flags_t flags = 0; if (vpx_codec_dec_init(decoder_, vpx_codec_vp9_dx(), &cfg, flags)) { return WEBRTC_VIDEO_CODEC_MEMORY; @@ -1705,6 +1722,29 @@ int VP9DecoderImpl::Decode(const EncodedImage& input_image, if (decode_complete_callback_ == nullptr) { return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } + + if (input_image._frameType == VideoFrameType::kVideoFrameKey) { + absl::optional frame_info = + vp9::ParseIntraFrameInfo(input_image.data(), input_image.size()); + if (frame_info) { + if (frame_info->frame_width != current_codec_.width || + frame_info->frame_height != current_codec_.height) { + // Resolution has changed, tear down and re-init a new decoder in + // order to get correct sizing. + Release(); + current_codec_.width = frame_info->frame_width; + current_codec_.height = frame_info->frame_height; + int reinit_status = InitDecode(¤t_codec_, num_cores_); + if (reinit_status != WEBRTC_VIDEO_CODEC_OK) { + RTC_LOG(LS_WARNING) << "Failed to re-init decoder."; + return reinit_status; + } + } + } else { + RTC_LOG(LS_WARNING) << "Failed to parse VP9 header from key-frame."; + } + } + // Always start with a complete key frame. if (key_frame_required_) { if (input_image._frameType != VideoFrameType::kVideoFrameKey) diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h index 2126044dcc..066ce20a6a 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.h +++ b/modules/video_coding/codecs/vp9/vp9_impl.h @@ -210,6 +210,8 @@ class VP9DecoderImpl : public VP9Decoder { bool inited_; vpx_codec_ctx_t* decoder_; bool key_frame_required_; + VideoCodec current_codec_; + int num_cores_; }; } // namespace webrtc diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser.cc b/modules/video_coding/utility/vp9_uncompressed_header_parser.cc index 9c89235fe2..f8ddd4db41 100644 --- a/modules/video_coding/utility/vp9_uncompressed_header_parser.cc +++ b/modules/video_coding/utility/vp9_uncompressed_header_parser.cc @@ -52,40 +52,65 @@ bool Vp9ReadSyncCode(rtc::BitBuffer* br) { return true; } -bool Vp9ReadColorConfig(rtc::BitBuffer* br, uint8_t profile) { - if (profile == 2 || profile == 3) { - // Bitdepth. - RETURN_FALSE_IF_ERROR(br->ConsumeBits(1)); +bool Vp9ReadColorConfig(rtc::BitBuffer* br, + uint8_t profile, + FrameInfo* frame_info) { + if (profile == 0 || profile == 1) { + frame_info->bit_detph = BitDept::k8Bit; + } else if (profile == 2 || profile == 3) { + uint32_t ten_or_twelve_bits; + RETURN_FALSE_IF_ERROR(br->ReadBits(&ten_or_twelve_bits, 1)); + frame_info->bit_detph = + ten_or_twelve_bits ? BitDept::k12Bit : BitDept::k10Bit; } uint32_t color_space; RETURN_FALSE_IF_ERROR(br->ReadBits(&color_space, 3)); + frame_info->color_space = static_cast(color_space); // SRGB is 7. if (color_space != 7) { - // YUV range flag. - RETURN_FALSE_IF_ERROR(br->ConsumeBits(1)); + uint32_t color_range; + RETURN_FALSE_IF_ERROR(br->ReadBits(&color_range, 1)); + frame_info->color_range = + color_range ? ColorRange::kFull : ColorRange::kStudio; + if (profile == 1 || profile == 3) { - // 1 bit: subsampling x. - // 1 bit: subsampling y. - RETURN_FALSE_IF_ERROR(br->ConsumeBits(2)); - uint32_t reserved_bit; - RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1)); - if (reserved_bit) { - RTC_LOG(LS_WARNING) << "Failed to get QP. Reserved bit set."; - return false; + uint32_t subsampling_x; + uint32_t subsampling_y; + RETURN_FALSE_IF_ERROR(br->ReadBits(&subsampling_x, 1)); + RETURN_FALSE_IF_ERROR(br->ReadBits(&subsampling_y, 1)); + if (subsampling_x) { + frame_info->sub_sampling = + subsampling_y ? YuvSubsampling::k420 : YuvSubsampling::k422; + } else { + frame_info->sub_sampling = + subsampling_y ? YuvSubsampling::k440 : YuvSubsampling::k444; } - } - } else { - if (profile == 1 || profile == 3) { + uint32_t reserved_bit; RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1)); if (reserved_bit) { - RTC_LOG(LS_WARNING) << "Failed to get QP. Reserved bit set."; + RTC_LOG(LS_WARNING) << "Failed to parse header. Reserved bit set."; return false; } } else { - RTC_LOG(LS_WARNING) << "Failed to get QP. 4:4:4 color not supported in " - "profile 0 or 2."; + // Profile 0 or 2. + frame_info->sub_sampling = YuvSubsampling::k420; + } + } else { + // SRGB + frame_info->color_range = ColorRange::kFull; + if (profile == 1 || profile == 3) { + frame_info->sub_sampling = YuvSubsampling::k444; + uint32_t reserved_bit; + RETURN_FALSE_IF_ERROR(br->ReadBits(&reserved_bit, 1)); + if (reserved_bit) { + RTC_LOG(LS_WARNING) << "Failed to parse header. Reserved bit set."; + return false; + } + } else { + RTC_LOG(LS_WARNING) << "Failed to parse header. 4:4:4 color not supported" + " in profile 0 or 2."; return false; } } @@ -93,24 +118,38 @@ bool Vp9ReadColorConfig(rtc::BitBuffer* br, uint8_t profile) { return true; } -bool Vp9ReadFrameSize(rtc::BitBuffer* br) { - // 2 bytes: frame width. - // 2 bytes: frame height. - return br->ConsumeBytes(4); +bool Vp9ReadFrameSize(rtc::BitBuffer* br, FrameInfo* frame_info) { + // 16 bits: frame width - 1. + uint16_t frame_width_minus_one; + RETURN_FALSE_IF_ERROR(br->ReadUInt16(&frame_width_minus_one)); + // 16 bits: frame height - 1. + uint16_t frame_height_minus_one; + RETURN_FALSE_IF_ERROR(br->ReadUInt16(&frame_height_minus_one)); + frame_info->frame_width = frame_width_minus_one + 1; + frame_info->frame_height = frame_height_minus_one + 1; + return true; } -bool Vp9ReadRenderSize(rtc::BitBuffer* br) { - uint32_t bit; - RETURN_FALSE_IF_ERROR(br->ReadBits(&bit, 1)); - if (bit) { - // 2 bytes: render width. - // 2 bytes: render height. - RETURN_FALSE_IF_ERROR(br->ConsumeBytes(4)); +bool Vp9ReadRenderSize(rtc::BitBuffer* br, FrameInfo* frame_info) { + uint32_t render_and_frame_size_different; + RETURN_FALSE_IF_ERROR(br->ReadBits(&render_and_frame_size_different, 1)); + if (render_and_frame_size_different) { + // 16 bits: render width - 1. + uint16_t render_width_minus_one; + RETURN_FALSE_IF_ERROR(br->ReadUInt16(&render_width_minus_one)); + // 16 bits: render height - 1. + uint16_t render_height_minus_one; + RETURN_FALSE_IF_ERROR(br->ReadUInt16(&render_height_minus_one)); + frame_info->render_width = render_width_minus_one + 1; + frame_info->render_height = render_height_minus_one + 1; + } else { + frame_info->render_width = frame_info->frame_width; + frame_info->render_height = frame_info->frame_height; } return true; } -bool Vp9ReadFrameSizeFromRefs(rtc::BitBuffer* br) { +bool Vp9ReadFrameSizeFromRefs(rtc::BitBuffer* br, FrameInfo* frame_info) { uint32_t found_ref = 0; for (size_t i = 0; i < kVp9NumRefsPerFrame; i++) { // Size in refs. @@ -120,11 +159,11 @@ bool Vp9ReadFrameSizeFromRefs(rtc::BitBuffer* br) { } if (!found_ref) { - if (!Vp9ReadFrameSize(br)) { + if (!Vp9ReadFrameSize(br, frame_info)) { return false; } } - return Vp9ReadRenderSize(br); + return Vp9ReadRenderSize(br, frame_info); } bool Vp9ReadInterpolationFilter(rtc::BitBuffer* br) { @@ -166,14 +205,14 @@ bool Vp9ReadLoopfilter(rtc::BitBuffer* br) { } } // namespace -bool GetQp(const uint8_t* buf, size_t length, int* qp) { +bool Parse(const uint8_t* buf, size_t length, int* qp, FrameInfo* frame_info) { rtc::BitBuffer br(buf, length); // Frame marker. uint32_t frame_marker; RETURN_FALSE_IF_ERROR(br.ReadBits(&frame_marker, 2)); if (frame_marker != 0x2) { - RTC_LOG(LS_WARNING) << "Failed to get QP. Frame marker should be 2."; + RTC_LOG(LS_WARNING) << "Failed to parse header. Frame marker should be 2."; return false; } @@ -181,6 +220,7 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) { uint8_t profile; if (!Vp9ReadProfile(&br, &profile)) return false; + frame_info->profile = profile; // Show existing frame. uint32_t show_existing_frame; @@ -195,18 +235,21 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) { RETURN_FALSE_IF_ERROR(br.ReadBits(&frame_type, 1)); RETURN_FALSE_IF_ERROR(br.ReadBits(&show_frame, 1)); RETURN_FALSE_IF_ERROR(br.ReadBits(&error_resilient, 1)); + frame_info->show_frame = show_frame; + frame_info->error_resilient = error_resilient; - if (!frame_type) { + if (frame_type == 0) { + // Key-frame. if (!Vp9ReadSyncCode(&br)) return false; - if (!Vp9ReadColorConfig(&br, profile)) + if (!Vp9ReadColorConfig(&br, profile, frame_info)) return false; - if (!Vp9ReadFrameSize(&br)) + if (!Vp9ReadFrameSize(&br, frame_info)) return false; - if (!Vp9ReadRenderSize(&br)) + if (!Vp9ReadRenderSize(&br, frame_info)) return false; - } else { + // Non-keyframe. uint32_t intra_only = 0; if (!show_frame) RETURN_FALSE_IF_ERROR(br.ReadBits(&intra_only, 1)); @@ -218,14 +261,14 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) { return false; if (profile > 0) { - if (!Vp9ReadColorConfig(&br, profile)) + if (!Vp9ReadColorConfig(&br, profile, frame_info)) return false; } // Refresh frame flags. RETURN_FALSE_IF_ERROR(br.ConsumeBits(8)); - if (!Vp9ReadFrameSize(&br)) + if (!Vp9ReadFrameSize(&br, frame_info)) return false; - if (!Vp9ReadRenderSize(&br)) + if (!Vp9ReadRenderSize(&br, frame_info)) return false; } else { // Refresh frame flags. @@ -237,7 +280,7 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) { RETURN_FALSE_IF_ERROR(br.ConsumeBits(4)); } - if (!Vp9ReadFrameSizeFromRefs(&br)) + if (!Vp9ReadFrameSizeFromRefs(&br, frame_info)) return false; // Allow high precision mv. @@ -267,6 +310,20 @@ bool GetQp(const uint8_t* buf, size_t length, int* qp) { return true; } -} // namespace vp9 +bool GetQp(const uint8_t* buf, size_t length, int* qp) { + FrameInfo frame_info; + return Parse(buf, length, qp, &frame_info); +} +absl::optional ParseIntraFrameInfo(const uint8_t* buf, + size_t length) { + int qp = 0; + FrameInfo frame_info; + if (Parse(buf, length, &qp, &frame_info) && frame_info.frame_width > 0) { + return frame_info; + } + return absl::nullopt; +} + +} // namespace vp9 } // namespace webrtc diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser.h b/modules/video_coding/utility/vp9_uncompressed_header_parser.h index 69e8de87df..a7f04670d2 100644 --- a/modules/video_coding/utility/vp9_uncompressed_header_parser.h +++ b/modules/video_coding/utility/vp9_uncompressed_header_parser.h @@ -13,6 +13,7 @@ #include #include +#include "absl/types/optional.h" namespace webrtc { @@ -22,6 +23,65 @@ namespace vp9 { // Returns true on success, false otherwise. bool GetQp(const uint8_t* buf, size_t length, int* qp); +// Bit depth per channel. Support varies by profile. +enum class BitDept : uint8_t { + k8Bit = 8, + k10Bit = 10, + k12Bit = 12, +}; + +enum class ColorSpace : uint8_t { + CS_UNKNOWN = 0, // Unknown (in this case the color space must be signaled + // outside the VP9 bitstream). + CS_BT_601 = 1, // CS_BT_601 Rec. ITU-R BT.601-7 + CS_BT_709 = 2, // Rec. ITU-R BT.709-6 + CS_SMPTE_170 = 3, // SMPTE-170 + CS_SMPTE_240 = 4, // SMPTE-240 + CS_BT_2020 = 5, // Rec. ITU-R BT.2020-2 + CS_RESERVED = 6, // Reserved + CS_RGB = 7, // sRGB (IEC 61966-2-1) +}; + +enum class ColorRange { + kStudio, // Studio swing: + // For BitDepth equals 8: + // Y is between 16 and 235 inclusive. + // U and V are between 16 and 240 inclusive. + // For BitDepth equals 10: + // Y is between 64 and 940 inclusive. + // U and V are between 64 and 960 inclusive. + // For BitDepth equals 12: + // Y is between 256 and 3760. + // U and V are between 256 and 3840 inclusive. + kFull // Full swing; no restriction on Y, U, V values. +}; + +enum class YuvSubsampling { + k444, + k440, + k422, + k420, +}; + +struct FrameInfo { + int profile = 0; // Profile 0-3 are valid. + bool show_frame = false; + bool error_resilient = false; + BitDept bit_detph = BitDept::k8Bit; + ColorSpace color_space = ColorSpace::CS_UNKNOWN; + ColorRange color_range; + YuvSubsampling sub_sampling; + int frame_width = 0; + int frame_height = 0; + int render_width = 0; + int render_height = 0; +}; + +// Parses frame information for a VP9 key-frame or all-intra frame from a +// bitstream. Returns nullopt on failure or if not a key-frame. +absl::optional ParseIntraFrameInfo(const uint8_t* buf, + size_t length); + } // namespace vp9 } // namespace webrtc