From 3097008de03b6260da5cfabb5cbac6f6a64ca810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Spr=C3=A5ng?= Date: Thu, 15 Jul 2021 11:29:30 +0200 Subject: [PATCH] Rename vp9::FrameInfo to vp9::UncompressedHeader and add more fields. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These fields will be used for bitstream validation in upcoming CLs. A new vp9_constants.h file is also added, containing common constants defined by the bitstream spec. Bug: webrtc:12354 Change-Id: If04256d83409069c8bee43ad41aed41c3707dfd3 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/226060 Commit-Queue: Erik Språng Reviewed-by: Philip Eliasson Cr-Commit-Position: refs/heads/master@{#34476} --- modules/video_coding/BUILD.gn | 1 + .../codecs/vp9/libvpx_vp9_decoder.cc | 4 +- modules/video_coding/utility/vp9_constants.h | 200 +++++++++ .../utility/vp9_uncompressed_header_parser.cc | 391 ++++++++++++++---- .../utility/vp9_uncompressed_header_parser.h | 83 +++- ...vp9_uncompressed_header_parser_unittest.cc | 46 ++- 6 files changed, 631 insertions(+), 94 deletions(-) create mode 100644 modules/video_coding/utility/vp9_constants.h diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index 50f2e8d836..5154b51933 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -334,6 +334,7 @@ rtc_library("video_coding_utility") { "utility/simulcast_utility.h", "utility/vp8_header_parser.cc", "utility/vp8_header_parser.h", + "utility/vp9_constants.h", "utility/vp9_uncompressed_header_parser.cc", "utility/vp9_uncompressed_header_parser.h", ] diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc b/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc index 3500ef5919..d10d8d10d8 100644 --- a/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc +++ b/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc @@ -206,8 +206,8 @@ int LibvpxVp9Decoder::Decode(const EncodedImage& input_image, } if (input_image._frameType == VideoFrameType::kVideoFrameKey) { - absl::optional frame_info = - vp9::ParseIntraFrameInfo(input_image.data(), input_image.size()); + absl::optional frame_info = + vp9::ParseUncompressedHeader(input_image.data(), input_image.size()); if (frame_info) { if (frame_info->frame_width != current_codec_.width || frame_info->frame_height != current_codec_.height) { diff --git a/modules/video_coding/utility/vp9_constants.h b/modules/video_coding/utility/vp9_constants.h new file mode 100644 index 0000000000..999e780cb7 --- /dev/null +++ b/modules/video_coding/utility/vp9_constants.h @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_UTILITY_VP9_CONSTANTS_H_ +#define MODULES_VIDEO_CODING_UTILITY_VP9_CONSTANTS_H_ + +#include +#include + +#include + +namespace webrtc { +namespace vp9 { + +// Number of frames that can be stored for future reference. +static constexpr size_t kNumRefFrames = 8; +// Number of frame contexts that can be store for future reference. +static constexpr size_t kNumFrameContexts = 4; +// Each inter frame can use up to 3 frames for reference. +constexpr size_t kRefsPerFrame = 3; +// Number of values that can be decoded for mv_fr. +constexpr size_t kMvFrSize = 4; +// Number of positions to search in motion vector prediction. +constexpr size_t kMvrefNeighbours = 8; +// Number of contexts when decoding intra_mode . +constexpr size_t kBlockSizeGroups = 4; +// Number of different block sizes used. +constexpr size_t kBlockSizes = 13; +// Sentinel value to mark partition choices that are illegal. +constexpr size_t kBlockInvalid = 14; +// Number of contexts when decoding partition. +constexpr size_t kPartitionContexts = 16; +// Smallest size of a mode info block. +constexpr size_t kMiSize = 8; +// Minimum width of a tile in units of superblocks (although tiles on +// the right hand edge can be narrower). +constexpr size_t kMinTileWidth_B64 = 4; +// Maximum width of a tile in units of superblocks. +constexpr size_t kMaxTileWidth_B64 = 64; +// Number of motion vectors returned by find_mv_refs process. +constexpr size_t kMaxMvRefCandidates = 2; +// Number of values that can be derived for ref_frame. +constexpr size_t kMaxRefFrames = 4; +// Number of contexts for is_inter. +constexpr size_t kIsInterContexts = 4; +// Number of contexts for comp_mode. +constexpr size_t kCompModeContexts = 5; +// Number of contexts for single_ref and comp_ref. +constexpr size_t kRefContexts = 5; +// Number of segments allowed in segmentation map. +constexpr size_t kMaxSegments = 8; +// Index for quantizer segment feature. +constexpr size_t kSegLvlAlt_Q = 0; +// Index for loop filter segment feature. +constexpr size_t kSegLvlAlt_L = 1; +// Index for reference frame segment feature. +constexpr size_t kSegLvlRefFrame = 2; +// Index for skip segment feature. +constexpr size_t kSegLvlSkip = 3; +// Number of segment features. +constexpr size_t kSegLvlMax = 4; +// Number of different plane types (Y or UV). +constexpr size_t kBlockTypes = 2; +// Number of different prediction types (intra or inter). +constexpr size_t kRefTypes = 2; +// Number of coefficient bands. +constexpr size_t kCoefBands = 6; +// Number of contexts for decoding coefficients. +constexpr size_t kPrevCoefContexts = 6; +// Number of coefficient probabilities that are directly transmitted. +constexpr size_t kUnconstrainedNodes = 3; +// Number of contexts for transform size. +constexpr size_t kTxSizeContexts = 2; +// Number of values for interp_filter. +constexpr size_t kSwitchableFilters = 3; +// Number of contexts for interp_filter. +constexpr size_t kInterpFilterContexts = 4; +// Number of contexts for decoding skip. +constexpr size_t kSkipContexts = 3; +// Number of values for partition. +constexpr size_t kPartitionTypes = 4; +// Number of values for tx_size. +constexpr size_t kTxSizes = 4; +// Number of values for tx_mode. +constexpr size_t kTxModes = 5; +// Inverse transform rows with DCT and columns with DCT. +constexpr size_t kDctDct = 0; +// Inverse transform rows with DCT and columns with ADST. +constexpr size_t kAdstDct = 1; +// Inverse transform rows with ADST and columns with DCT. +constexpr size_t kDctAdst = 2; +// Inverse transform rows with ADST and columns with ADST. +constexpr size_t kAdstAdst = 3; +// Number of values for y_mode. +constexpr size_t kMbModeCount = 14; +// Number of values for intra_mode. +constexpr size_t kIntraModes = 10; +// Number of values for inter_mode. +constexpr size_t kInterModes = 4; +// Number of contexts for inter_mode. +constexpr size_t kInterModeContexts = 7; +// Number of values for mv_joint. +constexpr size_t kMvJoints = 4; +// Number of values for mv_class. +constexpr size_t kMvClasses = 11; +// Number of values for mv_class0_bit. +constexpr size_t kClass0Size = 2; +// Maximum number of bits for decoding motion vectors. +constexpr size_t kMvOffsetBits = 10; +// Number of values allowed for a probability adjustment. +constexpr size_t kMaxProb = 255; +// Number of different mode types for loop filtering. +constexpr size_t kMaxModeLfDeltas = 2; +// Threshold at which motion vectors are considered large. +constexpr size_t kCompandedMvrefThresh = 8; +// Maximum value used for loop filtering. +constexpr size_t kMaxLoopFilter = 63; +// Number of bits of precision when scaling reference frames. +constexpr size_t kRefScaleShift = 14; +// Number of bits of precision when performing inter prediction. +constexpr size_t kSubpelBits = 4; +// 1 << kSubpelBits. +constexpr size_t kSubpelShifts = 16; +// kSubpelShifts - 1. +constexpr size_t kSubpelMask = 15; +// Value used when clipping motion vectors. +constexpr size_t kMvBorder = 128; +// Value used when clipping motion vectors. +constexpr size_t kInterpExtend = 4; +// Value used when clipping motion vectors. +constexpr size_t kBorderinpixels = 160; +// Value used in adapting probabilities. +constexpr size_t kMaxUpdateFactor = 128; +// Value used in adapting probabilities. +constexpr size_t kCountSat = 20; +// Both candidates use ZEROMV. +constexpr size_t kBothZero = 0; +// One candidate uses ZEROMV, one uses NEARMV or NEARESTMV. +constexpr size_t kZeroPlusPredicted = 1; +// Both candidates use NEARMV or NEARESTMV. +constexpr size_t kBothPredicted = 2; +// One candidate uses NEWMV, one uses ZEROMV. +constexpr size_t kNewPlusNonIntra = 3; +// Both candidates use NEWMV. +constexpr size_t kBothNew = 4; +// One candidate uses intra prediction, one uses inter prediction. +constexpr size_t kIntraPlusNonIntra = 5; +// Both candidates use intra prediction. +constexpr size_t kBothIntra = 6; +// Sentinel value marking a case that can never occur. +constexpr size_t kInvalidCase = 9; + +enum class TxMode : uint8_t { + kOnly4X4 = 0, + kAllow8X8 = 1, + kAllow16x16 = 2, + kAllow32x32 = 3, + kTxModeSelect = 4 +}; + +enum BlockSize : uint8_t { + kBlock4X4 = 0, + kBlock4X8 = 1, + kBlock8X4 = 2, + kBlock8X8 = 3, + kBlock8X16 = 4, + kBlock16X8 = 5, + kBlock16X16 = 6, + kBlock16X32 = 7, + kBlock32X16 = 8, + kBlock32X32 = 9, + kBlock32X64 = 10, + kBlock64X32 = 11, + kBlock64X64 = 12 +}; + +enum Partition : uint8_t { + kPartitionNone = 0, + kPartitionHorizontal = 1, + kPartitionVertical = 2, + kPartitionSplit = 3 +}; + +enum class ReferenceMode : uint8_t { + kSingleReference = 0, + kCompoundReference = 1, + kReferenceModeSelect = 2, +}; + +} // namespace vp9 +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_UTILITY_VP9_CONSTANTS_H_ diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser.cc b/modules/video_coding/utility/vp9_uncompressed_header_parser.cc index 07ba3255c6..7fbdd58951 100644 --- a/modules/video_coding/utility/vp9_uncompressed_header_parser.cc +++ b/modules/video_coding/utility/vp9_uncompressed_header_parser.cc @@ -12,6 +12,7 @@ #include "absl/strings/string_view.h" #include "rtc_base/bit_buffer.h" #include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" namespace webrtc { @@ -152,11 +153,15 @@ class BitstreamReader { // Returns true if full number of bits were read, false otherwise. bool ConsumeBits(int bits) { return buffer_->ConsumeBits(bits); } + void GetPosition(size_t* out_byte_offset, size_t* out_bit_offset) const { + buffer_->GetCurrentOffset(out_byte_offset, out_bit_offset); + } + private: rtc::BitBuffer* buffer_; }; -bool Vp9ReadColorConfig(BitstreamReader* br, FrameInfo* frame_info) { +bool Vp9ReadColorConfig(BitstreamReader* br, UncompressedHeader* frame_info) { if (frame_info->profile == 2 || frame_info->profile == 3) { READ_OR_RETURN(br->ReadBoolean(), [frame_info](bool ten_or_twelve_bits) { frame_info->bit_detph = @@ -219,7 +224,18 @@ bool Vp9ReadColorConfig(BitstreamReader* br, FrameInfo* frame_info) { return true; } -bool Vp9ReadFrameSize(BitstreamReader* br, FrameInfo* frame_info) { +bool ReadRefreshFrameFlags(BitstreamReader* br, + UncompressedHeader* frame_info) { + // Refresh frame flags. + READ_OR_RETURN(br->ReadUnsigned(), [frame_info](uint8_t flags) { + for (int i = 0; i < 8; ++i) { + frame_info->updated_buffers.set(i, (flags & (0x01 << (7 - i))) != 0); + } + }); + return true; +} + +bool Vp9ReadFrameSize(BitstreamReader* br, UncompressedHeader* frame_info) { // 16 bits: frame (width|height) - 1. READ_OR_RETURN(br->ReadUnsigned(), [frame_info](uint16_t width) { frame_info->frame_width = width + 1; @@ -230,10 +246,12 @@ bool Vp9ReadFrameSize(BitstreamReader* br, FrameInfo* frame_info) { return true; } -bool Vp9ReadRenderSize(BitstreamReader* br, FrameInfo* frame_info) { +bool Vp9ReadRenderSize(BitstreamReader* br, UncompressedHeader* frame_info) { // render_and_frame_size_different return br->IfNextBoolean( [&] { + auto& pos = frame_info->render_size_position.emplace(); + br->GetPosition(&pos.byte_offset, &pos.bit_offset); // 16 bits: render (width|height) - 1. READ_OR_RETURN(br->ReadUnsigned(), [frame_info](uint16_t width) { @@ -253,11 +271,16 @@ bool Vp9ReadRenderSize(BitstreamReader* br, FrameInfo* frame_info) { }); } -bool Vp9ReadFrameSizeFromRefs(BitstreamReader* br, FrameInfo* frame_info) { +bool Vp9ReadFrameSizeFromRefs(BitstreamReader* br, + UncompressedHeader* frame_info) { bool found_ref = false; for (size_t i = 0; !found_ref && i < kVp9NumRefsPerFrame; i++) { // Size in refs. - READ_OR_RETURN(br->ReadBoolean(), [&](bool ref) { found_ref = ref; }); + br->IfNextBoolean([&] { + frame_info->infer_size_from_reference = frame_info->reference_buffers[i]; + found_ref = true; + return true; + }); } if (!found_ref) { @@ -286,58 +309,104 @@ bool Vp9ReadLoopfilter(BitstreamReader* br) { }); } -bool Vp9ReadQp(BitstreamReader* br, FrameInfo* frame_info) { +bool Vp9ReadQp(BitstreamReader* br, UncompressedHeader* frame_info) { READ_OR_RETURN(br->ReadUnsigned(), [frame_info](uint8_t qp) { frame_info->base_qp = qp; }); // yuv offsets + frame_info->is_lossless = frame_info->base_qp == 0; for (int i = 0; i < 3; ++i) { - RETURN_IF_FALSE(br->IfNextBoolean([br] { // if delta_coded - return br->ConsumeBits(5); + RETURN_IF_FALSE(br->IfNextBoolean([&] { // if delta_coded + READ_OR_RETURN(br->ReadUnsigned(4), [&](int delta) { + if (delta != 0) { + frame_info->is_lossless = false; + } + }); + return true; })); } return true; } -bool Vp9ReadSegmentationParams(BitstreamReader* br) { - constexpr int kVp9MaxSegments = 8; - constexpr int kVp9SegLvlMax = 4; - constexpr int kSegmentationFeatureBits[kVp9SegLvlMax] = {8, 6, 2, 0}; - constexpr bool kSegmentationFeatureSigned[kVp9SegLvlMax] = {1, 1, 0, 0}; +bool Vp9ReadSegmentationParams(BitstreamReader* br, + UncompressedHeader* frame_info) { + constexpr int kSegmentationFeatureBits[kSegLvlMax] = {8, 6, 2, 0}; + constexpr bool kSegmentationFeatureSigned[kSegLvlMax] = {1, 1, 0, 0}; - RETURN_IF_FALSE(br->IfNextBoolean([&] { // segmentation_enabled - return br->IfNextBoolean([&] { // update_map - // Consume probs. + return br->IfNextBoolean([&] { // segmentation_enabled + frame_info->segmentation_enabled = true; + RETURN_IF_FALSE(br->IfNextBoolean([&] { // update_map + frame_info->segmentation_tree_probs.emplace(); for (int i = 0; i < 7; ++i) { - RETURN_IF_FALSE(br->IfNextBoolean([br] { return br->ConsumeBits(7); })); + RETURN_IF_FALSE(br->IfNextBoolean( + [&] { + READ_OR_RETURN(br->ReadUnsigned(), [&](uint8_t prob) { + (*frame_info->segmentation_tree_probs)[i] = prob; + }); + return true; + }, + [&] { + (*frame_info->segmentation_tree_probs)[i] = 255; + return true; + })); } - return br->IfNextBoolean([&] { // temporal_update - // Consume probs. - for (int i = 0; i < 3; ++i) { - RETURN_IF_FALSE( - br->IfNextBoolean([br] { return br->ConsumeBits(7); })); - } + // temporal_update + return br->IfNextBoolean( + [&] { + frame_info->segmentation_pred_prob.emplace(); + for (int i = 0; i < 3; ++i) { + RETURN_IF_FALSE(br->IfNextBoolean( + [&] { + READ_OR_RETURN( + br->ReadUnsigned(), [&](uint8_t prob) { + (*frame_info->segmentation_pred_prob)[i] = prob; + }); + return true; + }, + [&] { + (*frame_info->segmentation_pred_prob)[i] = 255; + return true; + })); + } + return true; + }, + [&] { + frame_info->segmentation_pred_prob->fill(255); + return true; + }); + })); + + return br->IfNextBoolean([&] { // segmentation_update_data + RETURN_IF_FALSE(br->IfNextBoolean([&] { + frame_info->segmentation_is_delta = true; return true; - }); - }); - })); + })); - return br->IfNextBoolean([&] { - RETURN_IF_FALSE(br->ConsumeBits(1)); // abs_or_delta - for (int i = 0; i < kVp9MaxSegments; ++i) { - for (int j = 0; j < kVp9SegLvlMax; ++j) { - RETURN_IF_FALSE(br->IfNextBoolean([&] { // feature_enabled - return br->ConsumeBits(kSegmentationFeatureBits[j] + - kSegmentationFeatureSigned[j]); - })); + for (size_t i = 0; i < kMaxSegments; ++i) { + for (size_t j = 0; j < kSegLvlMax; ++j) { + RETURN_IF_FALSE(br->IfNextBoolean([&] { // feature_enabled + READ_OR_RETURN( + br->ReadUnsigned(kSegmentationFeatureBits[j]), + [&](uint8_t feature_value) { + frame_info->segmentation_features[i][j] = feature_value; + }); + if (kSegmentationFeatureSigned[j]) { + RETURN_IF_FALSE(br->IfNextBoolean([&] { + (*frame_info->segmentation_features[i][j]) *= -1; + return true; + })); + } + return true; + })); + } } - } - return true; + return true; + }); }); } -bool Vp9ReadTileInfo(BitstreamReader* br, FrameInfo* frame_info) { +bool Vp9ReadTileInfo(BitstreamReader* br, UncompressedHeader* frame_info) { size_t mi_cols = (frame_info->frame_width + 7) >> 3; size_t sb64_cols = (mi_cols + 7) >> 3; @@ -352,12 +421,12 @@ bool Vp9ReadTileInfo(BitstreamReader* br, FrameInfo* frame_info) { } --max_log2; - size_t cols_log2 = min_log2; + frame_info->tile_cols_log2 = min_log2; bool done = false; - while (!done && cols_log2 < max_log2) { + while (!done && frame_info->tile_cols_log2 < max_log2) { RETURN_IF_FALSE(br->IfNextBoolean( [&] { - ++cols_log2; + ++frame_info->tile_cols_log2; return true; }, [&] { @@ -365,13 +434,157 @@ bool Vp9ReadTileInfo(BitstreamReader* br, FrameInfo* frame_info) { return true; })); } - - // rows_log2; - return br->IfNextBoolean([&] { return br->ConsumeBits(1); }); + frame_info->tile_rows_log2 = 0; + RETURN_IF_FALSE(br->IfNextBoolean([&] { + ++frame_info->tile_rows_log2; + return br->IfNextBoolean([&] { + ++frame_info->tile_rows_log2; + return true; + }); + })); + return true; } + +const InterpolationFilter kLiteralToType[4] = { + InterpolationFilter::kEightTapSmooth, InterpolationFilter::kEightTap, + InterpolationFilter::kEightTapSharp, InterpolationFilter::kBilinear}; } // namespace -bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) { +std::string UncompressedHeader::ToString() const { + char buf[1024]; + rtc::SimpleStringBuilder oss(buf); + + oss << "Vp9UncompressedHeader { " + << "profile = " << profile; + + if (show_existing_frame) { + oss << ", show_existing_frame = " << *show_existing_frame << " }"; + return oss.str(); + } + + oss << ", frame type = " << (is_keyframe ? "key" : "delta") + << ", show_frame = " << (show_frame ? "true" : "false") + << ", error_resilient = " << (error_resilient ? "true" : "false"); + + oss << ", bit_depth = "; + switch (bit_detph) { + case BitDept::k8Bit: + oss << "8bit"; + break; + case BitDept::k10Bit: + oss << "10bit"; + break; + case BitDept::k12Bit: + oss << "12bit"; + break; + } + + if (color_space) { + oss << ", color_space = "; + switch (*color_space) { + case ColorSpace::CS_UNKNOWN: + oss << "unknown"; + break; + case ColorSpace::CS_BT_601: + oss << "CS_BT_601 Rec. ITU-R BT.601-7"; + break; + case ColorSpace::CS_BT_709: + oss << "Rec. ITU-R BT.709-6"; + break; + case ColorSpace::CS_SMPTE_170: + oss << "SMPTE-170"; + break; + case ColorSpace::CS_SMPTE_240: + oss << "SMPTE-240"; + break; + case ColorSpace::CS_BT_2020: + oss << "Rec. ITU-R BT.2020-2"; + break; + case ColorSpace::CS_RESERVED: + oss << "Reserved"; + break; + case ColorSpace::CS_RGB: + oss << "sRGB (IEC 61966-2-1)"; + break; + } + } + + if (color_range) { + oss << ", color_range = "; + switch (*color_range) { + case ColorRange::kFull: + oss << "full"; + break; + case ColorRange::kStudio: + oss << "studio"; + break; + } + } + + if (sub_sampling) { + oss << ", sub_sampling = "; + switch (*sub_sampling) { + case YuvSubsampling::k444: + oss << "444"; + break; + case YuvSubsampling::k440: + oss << "440"; + break; + case YuvSubsampling::k422: + oss << "422"; + break; + case YuvSubsampling::k420: + oss << "420"; + break; + } + } + + if (infer_size_from_reference) { + oss << ", infer_frame_resolution_from = " << *infer_size_from_reference; + } else { + oss << ", frame_width = " << frame_width + << ", frame_height = " << frame_height; + } + if (render_width != 0 && render_height != 0) { + oss << ", render_width = " << render_width + << ", render_height = " << render_height; + } + + oss << ", base qp = " << base_qp; + if (reference_buffers[0] != -1) { + oss << ", last_buffer = " << reference_buffers[0]; + } + if (reference_buffers[1] != -1) { + oss << ", golden_buffer = " << reference_buffers[1]; + } + if (reference_buffers[2] != -1) { + oss << ", altref_buffer = " << reference_buffers[2]; + } + + oss << ", updated buffers = { "; + bool first = true; + for (int i = 0; i < 8; ++i) { + if (updated_buffers.test(i)) { + if (first) { + first = false; + } else { + oss << ", "; + } + oss << i; + } + } + oss << " }"; + + oss << ", compressed_header_size_bytes = " << compressed_header_size; + + oss << " }"; + return oss.str(); +} + +bool Parse(const uint8_t* buf, + size_t length, + UncompressedHeader* frame_info, + bool qp_only) { rtc::BitBuffer bit_buffer(buf, length); BitstreamReader br(&bit_buffer); @@ -423,6 +636,9 @@ bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) { return false; if (!Vp9ReadRenderSize(&br, frame_info)) return false; + + // Key-frames implicitly update all buffers. + frame_info->updated_buffers.set(); } else { // Non-keyframe. bool is_intra_only = false; @@ -441,31 +657,49 @@ bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) { if (frame_info->profile > 0) { if (!Vp9ReadColorConfig(&br, frame_info)) return false; + } else { + frame_info->color_space = ColorSpace::CS_BT_601; + frame_info->sub_sampling = YuvSubsampling::k420; + frame_info->bit_detph = BitDept::k8Bit; } - // Refresh frame flags. - RETURN_IF_FALSE(br.ConsumeBits(8)); - if (!Vp9ReadFrameSize(&br, frame_info)) - return false; - if (!Vp9ReadRenderSize(&br, frame_info)) - return false; + frame_info->reference_buffers.fill(-1); + RETURN_IF_FALSE(ReadRefreshFrameFlags(&br, frame_info)); + RETURN_IF_FALSE(Vp9ReadFrameSize(&br, frame_info)); + RETURN_IF_FALSE(Vp9ReadRenderSize(&br, frame_info)); } else { - // Refresh frame flags. - RETURN_IF_FALSE(br.ConsumeBits(8)); + RETURN_IF_FALSE(ReadRefreshFrameFlags(&br, frame_info)); + frame_info->reference_buffers_sign_bias[0] = false; for (size_t i = 0; i < kVp9NumRefsPerFrame; i++) { - // 3 bits: Ref frame index. - // 1 bit: Ref frame sign biases. - RETURN_IF_FALSE(br.ConsumeBits(4)); + READ_OR_RETURN(br.ReadUnsigned(3), [&](uint8_t idx) { + frame_info->reference_buffers[i] = idx; + }); + READ_OR_RETURN(br.ReadBoolean(), [&](bool sign_bias) { + frame_info->reference_buffers_sign_bias[ReferenceFrame::kLast + i] = + sign_bias; + }); } if (!Vp9ReadFrameSizeFromRefs(&br, frame_info)) return false; - // Allow high precision mv. - RETURN_IF_FALSE(br.ConsumeBits(1)); + READ_OR_RETURN(br.ReadBoolean(), [&](bool allow_high_precision_mv) { + frame_info->allow_high_precision_mv = allow_high_precision_mv; + }); + // Interpolation filter. - RETURN_IF_FALSE(br.IfNextBoolean([] { return true; }, - [&br] { return br.ConsumeBits(2); })); + RETURN_IF_FALSE(br.IfNextBoolean( + [frame_info] { + frame_info->interpolation_filter = InterpolationFilter::kSwitchable; + return true; + }, + [&] { + READ_OR_RETURN( + br.ReadUnsigned(2), [frame_info](uint8_t filter) { + frame_info->interpolation_filter = kLiteralToType[filter]; + }); + return true; + })); } } @@ -476,7 +710,8 @@ bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) { } // Frame context index. - RETURN_IF_FALSE(br.ConsumeBits(2)); + READ_OR_RETURN(br.ReadUnsigned(2), + [&](uint8_t idx) { frame_info->frame_context_idx = idx; }); if (!Vp9ReadLoopfilter(&br)) return false; @@ -484,33 +719,39 @@ bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) { // Read base QP. RETURN_IF_FALSE(Vp9ReadQp(&br, frame_info)); - const bool kParseFullHeader = false; - if (kParseFullHeader) { - // Currently not used, but will be needed when parsing beyond the - // uncompressed header. - RETURN_IF_FALSE(Vp9ReadSegmentationParams(&br)); - - RETURN_IF_FALSE(Vp9ReadTileInfo(&br, frame_info)); - - RETURN_IF_FALSE(br.ConsumeBits(16)); // header_size_in_bytes + if (qp_only) { + // Not interested in the rest of the header, return early. + return true; } + RETURN_IF_FALSE(Vp9ReadSegmentationParams(&br, frame_info)); + RETURN_IF_FALSE(Vp9ReadTileInfo(&br, frame_info)); + READ_OR_RETURN(br.ReadUnsigned(), [frame_info](uint16_t size) { + frame_info->compressed_header_size = size; + }); + + // Trailing bits. + RETURN_IF_FALSE(br.ConsumeBits(bit_buffer.RemainingBitCount() % 8)); + frame_info->uncompressed_header_size = + length - (bit_buffer.RemainingBitCount() / 8); + return true; } bool GetQp(const uint8_t* buf, size_t length, int* qp) { - FrameInfo frame_info; - if (!Parse(buf, length, &frame_info)) { + UncompressedHeader frame_info; + if (!Parse(buf, length, &frame_info, /*qp_only=*/true)) { return false; } *qp = frame_info.base_qp; return true; } -absl::optional ParseIntraFrameInfo(const uint8_t* buf, - size_t length) { - FrameInfo frame_info; - if (Parse(buf, length, &frame_info) && frame_info.frame_width > 0) { +absl::optional ParseUncompressedHeader(const uint8_t* buf, + size_t length) { + UncompressedHeader frame_info; + if (Parse(buf, length, &frame_info, /*qp_only=*/false) && + frame_info.frame_width > 0) { return frame_info; } return absl::nullopt; diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser.h b/modules/video_coding/utility/vp9_uncompressed_header_parser.h index 7a5e2c058b..67166e364b 100644 --- a/modules/video_coding/utility/vp9_uncompressed_header_parser.h +++ b/modules/video_coding/utility/vp9_uncompressed_header_parser.h @@ -13,7 +13,13 @@ #include #include + +#include +#include +#include + #include "absl/types/optional.h" +#include "modules/video_coding/utility/vp9_constants.h" namespace webrtc { @@ -63,27 +69,86 @@ enum class YuvSubsampling { k420, }; -struct FrameInfo { - int profile = 0; // Profile 0-3 are valid. +enum ReferenceFrame : int { + kNone = -1, + kIntra = 0, + kLast = 1, + kGolden = 2, + kAltref = 3, +}; + +enum class InterpolationFilter : uint8_t { + kEightTap = 0, + kEightTapSmooth = 1, + kEightTapSharp = 2, + kBilinear = 3, + kSwitchable = 4 +}; + +struct UncompressedHeader { + int profile = 0; // Profiles 0-3 are valid. absl::optional show_existing_frame; bool is_keyframe = false; bool show_frame = false; bool error_resilient = false; BitDept bit_detph = BitDept::k8Bit; - ColorSpace color_space = ColorSpace::CS_UNKNOWN; - ColorRange color_range; - YuvSubsampling sub_sampling; + absl::optional color_space; + absl::optional color_range; + absl::optional sub_sampling; int frame_width = 0; int frame_height = 0; int render_width = 0; int render_height = 0; + // Width/height of the tiles used (in units of 8x8 blocks). + size_t tile_cols_log2 = 0; // tile_cols = 1 << tile_cols_log2 + size_t tile_rows_log2 = 0; // tile_rows = 1 << tile_rows_log2 + struct BitstreamPosition { + size_t byte_offset = 0; + size_t bit_offset = 0; + }; + absl::optional render_size_position; + InterpolationFilter interpolation_filter = InterpolationFilter::kEightTap; + bool allow_high_precision_mv = false; int base_qp = 0; + bool is_lossless = false; + uint8_t frame_context_idx = 0; + + bool segmentation_enabled = false; + absl::optional> segmentation_tree_probs; + absl::optional> segmentation_pred_prob; + bool segmentation_is_delta = false; + absl::optional segmentation_features[kMaxSegments][kSegLvlMax]; + + // Which of the 8 reference buffers may be used as references for this frame. + // -1 indicates not used (e.g. {-1, -1, -1} for intra-only frames). + std::array reference_buffers = {-1, -1, -1}; + // Sign bias corresponding to reference buffers, where the index is a + // ReferenceFrame. + // false/0 indidate backwards reference, true/1 indicate forwards reference). + std::array reference_buffers_sign_bias = {false, false, + false, false}; + + // Indicates which reference buffer [0,7] to infer the frame size from. + absl::optional infer_size_from_reference; + // Which of the 8 reference buffers are updated by this frame. + std::bitset updated_buffers = 0; + + // Header sizes, in bytes. + uint32_t uncompressed_header_size = 0; + uint32_t compressed_header_size = 0; + + bool is_intra_only() const { + return reference_buffers[0] == -1 && reference_buffers[1] == -1 && + reference_buffers[2] == -1; + } + + std::string ToString() const; }; -// Parses frame information for a VP9 key-frame or all-intra frame from a -// bitstream. Returns nullopt on failure or if not a key-frame. -absl::optional ParseIntraFrameInfo(const uint8_t* buf, - size_t length); +// Parses the uncompressed header and populates (most) values in a +// UncompressedHeader struct. Returns nullopt on failure. +absl::optional ParseUncompressedHeader(const uint8_t* buf, + size_t length); } // namespace vp9 diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser_unittest.cc b/modules/video_coding/utility/vp9_uncompressed_header_parser_unittest.cc index b69b45d5c4..672becc9e3 100644 --- a/modules/video_coding/utility/vp9_uncompressed_header_parser_unittest.cc +++ b/modules/video_coding/utility/vp9_uncompressed_header_parser_unittest.cc @@ -15,6 +15,11 @@ namespace webrtc { namespace vp9 { +using ::testing::AllOf; +using ::testing::ElementsAreArray; +using ::testing::Eq; +using ::testing::Field; +using ::testing::Optional; TEST(Vp9UncompressedHeaderParserTest, FrameWithSegmentation) { // Uncompressed header from a frame generated with libvpx. @@ -26,21 +31,46 @@ TEST(Vp9UncompressedHeaderParserTest, FrameWithSegmentation) { 0x2e, 0x73, 0xb7, 0xee, 0x22, 0x06, 0x81, 0x82, 0xd4, 0xef, 0xc3, 0x58, 0x1f, 0x12, 0xd2, 0x7b, 0x28, 0x1f, 0x80, 0xfc, 0x07, 0xe0, 0x00, 0x00}; - absl::optional frame_info = - ParseIntraFrameInfo(kHeader, sizeof(kHeader)); - // Segmentation info is not actually populated in FrameInfo struct, but it - // needs to be parsed otherwise we end up on the wrong offset. The check for - // segmentation is thus that we have a valid return value. + absl::optional frame_info = + ParseUncompressedHeader(kHeader, sizeof(kHeader)); ASSERT_TRUE(frame_info.has_value()); - EXPECT_EQ(frame_info->is_keyframe, false); - EXPECT_EQ(frame_info->error_resilient, true); - EXPECT_EQ(frame_info->show_frame, true); + EXPECT_FALSE(frame_info->is_keyframe); + EXPECT_TRUE(frame_info->error_resilient); + EXPECT_TRUE(frame_info->show_frame); + EXPECT_FALSE(frame_info->show_existing_frame); EXPECT_EQ(frame_info->base_qp, 185); EXPECT_EQ(frame_info->frame_width, 320); EXPECT_EQ(frame_info->frame_height, 240); EXPECT_EQ(frame_info->render_width, 640); EXPECT_EQ(frame_info->render_height, 480); + EXPECT_TRUE(frame_info->allow_high_precision_mv); + EXPECT_EQ(frame_info->frame_context_idx, 0u); + EXPECT_EQ(frame_info->interpolation_filter, InterpolationFilter::kSwitchable); + EXPECT_EQ(frame_info->is_lossless, false); + EXPECT_EQ(frame_info->profile, 0); + EXPECT_THAT(frame_info->reference_buffers, ElementsAreArray({0, 0, 0})); + EXPECT_THAT(frame_info->reference_buffers_sign_bias, + ElementsAreArray({false, false, false, false})); + EXPECT_EQ(frame_info->updated_buffers, 0b10000000); + EXPECT_EQ(frame_info->tile_cols_log2, 0u); + EXPECT_EQ(frame_info->tile_rows_log2, 0u); + EXPECT_THAT( + frame_info->render_size_position, + ::testing::Optional(AllOf( + Field(&UncompressedHeader::BitstreamPosition::byte_offset, 8u), + Field(&UncompressedHeader::BitstreamPosition::bit_offset, 0u)))); + EXPECT_EQ(frame_info->compressed_header_size, 23u); + EXPECT_EQ(frame_info->uncompressed_header_size, 37u); + + EXPECT_TRUE(frame_info->segmentation_enabled); + EXPECT_FALSE(frame_info->segmentation_is_delta); + EXPECT_THAT(frame_info->segmentation_pred_prob, + Optional(ElementsAreArray({205, 1, 1}))); + EXPECT_THAT(frame_info->segmentation_tree_probs, + Optional(ElementsAreArray({255, 255, 128, 1, 128, 128, 128}))); + EXPECT_THAT(frame_info->segmentation_features[1][kSegLvlAlt_Q], Eq(-63)); + EXPECT_THAT(frame_info->segmentation_features[2][kSegLvlAlt_Q], Eq(-81)); } } // namespace vp9