diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index 5154b51933..50f2e8d836 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -334,7 +334,6 @@ rtc_library("video_coding_utility") { "utility/simulcast_utility.h", "utility/vp8_header_parser.cc", "utility/vp8_header_parser.h", - "utility/vp9_constants.h", "utility/vp9_uncompressed_header_parser.cc", "utility/vp9_uncompressed_header_parser.h", ] diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc b/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc index d10d8d10d8..3500ef5919 100644 --- a/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc +++ b/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc @@ -206,8 +206,8 @@ int LibvpxVp9Decoder::Decode(const EncodedImage& input_image, } if (input_image._frameType == VideoFrameType::kVideoFrameKey) { - absl::optional frame_info = - vp9::ParseUncompressedHeader(input_image.data(), input_image.size()); + absl::optional frame_info = + vp9::ParseIntraFrameInfo(input_image.data(), input_image.size()); if (frame_info) { if (frame_info->frame_width != current_codec_.width || frame_info->frame_height != current_codec_.height) { diff --git a/modules/video_coding/utility/vp9_constants.h b/modules/video_coding/utility/vp9_constants.h deleted file mode 100644 index 999e780cb7..0000000000 --- a/modules/video_coding/utility/vp9_constants.h +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef MODULES_VIDEO_CODING_UTILITY_VP9_CONSTANTS_H_ -#define MODULES_VIDEO_CODING_UTILITY_VP9_CONSTANTS_H_ - -#include -#include - -#include - -namespace webrtc { -namespace vp9 { - -// Number of frames that can be stored for future reference. -static constexpr size_t kNumRefFrames = 8; -// Number of frame contexts that can be store for future reference. -static constexpr size_t kNumFrameContexts = 4; -// Each inter frame can use up to 3 frames for reference. -constexpr size_t kRefsPerFrame = 3; -// Number of values that can be decoded for mv_fr. -constexpr size_t kMvFrSize = 4; -// Number of positions to search in motion vector prediction. -constexpr size_t kMvrefNeighbours = 8; -// Number of contexts when decoding intra_mode . -constexpr size_t kBlockSizeGroups = 4; -// Number of different block sizes used. -constexpr size_t kBlockSizes = 13; -// Sentinel value to mark partition choices that are illegal. -constexpr size_t kBlockInvalid = 14; -// Number of contexts when decoding partition. -constexpr size_t kPartitionContexts = 16; -// Smallest size of a mode info block. -constexpr size_t kMiSize = 8; -// Minimum width of a tile in units of superblocks (although tiles on -// the right hand edge can be narrower). -constexpr size_t kMinTileWidth_B64 = 4; -// Maximum width of a tile in units of superblocks. -constexpr size_t kMaxTileWidth_B64 = 64; -// Number of motion vectors returned by find_mv_refs process. -constexpr size_t kMaxMvRefCandidates = 2; -// Number of values that can be derived for ref_frame. -constexpr size_t kMaxRefFrames = 4; -// Number of contexts for is_inter. -constexpr size_t kIsInterContexts = 4; -// Number of contexts for comp_mode. -constexpr size_t kCompModeContexts = 5; -// Number of contexts for single_ref and comp_ref. -constexpr size_t kRefContexts = 5; -// Number of segments allowed in segmentation map. -constexpr size_t kMaxSegments = 8; -// Index for quantizer segment feature. -constexpr size_t kSegLvlAlt_Q = 0; -// Index for loop filter segment feature. -constexpr size_t kSegLvlAlt_L = 1; -// Index for reference frame segment feature. -constexpr size_t kSegLvlRefFrame = 2; -// Index for skip segment feature. -constexpr size_t kSegLvlSkip = 3; -// Number of segment features. -constexpr size_t kSegLvlMax = 4; -// Number of different plane types (Y or UV). -constexpr size_t kBlockTypes = 2; -// Number of different prediction types (intra or inter). -constexpr size_t kRefTypes = 2; -// Number of coefficient bands. -constexpr size_t kCoefBands = 6; -// Number of contexts for decoding coefficients. -constexpr size_t kPrevCoefContexts = 6; -// Number of coefficient probabilities that are directly transmitted. -constexpr size_t kUnconstrainedNodes = 3; -// Number of contexts for transform size. -constexpr size_t kTxSizeContexts = 2; -// Number of values for interp_filter. -constexpr size_t kSwitchableFilters = 3; -// Number of contexts for interp_filter. -constexpr size_t kInterpFilterContexts = 4; -// Number of contexts for decoding skip. -constexpr size_t kSkipContexts = 3; -// Number of values for partition. -constexpr size_t kPartitionTypes = 4; -// Number of values for tx_size. -constexpr size_t kTxSizes = 4; -// Number of values for tx_mode. -constexpr size_t kTxModes = 5; -// Inverse transform rows with DCT and columns with DCT. -constexpr size_t kDctDct = 0; -// Inverse transform rows with DCT and columns with ADST. -constexpr size_t kAdstDct = 1; -// Inverse transform rows with ADST and columns with DCT. -constexpr size_t kDctAdst = 2; -// Inverse transform rows with ADST and columns with ADST. -constexpr size_t kAdstAdst = 3; -// Number of values for y_mode. -constexpr size_t kMbModeCount = 14; -// Number of values for intra_mode. -constexpr size_t kIntraModes = 10; -// Number of values for inter_mode. -constexpr size_t kInterModes = 4; -// Number of contexts for inter_mode. -constexpr size_t kInterModeContexts = 7; -// Number of values for mv_joint. -constexpr size_t kMvJoints = 4; -// Number of values for mv_class. -constexpr size_t kMvClasses = 11; -// Number of values for mv_class0_bit. -constexpr size_t kClass0Size = 2; -// Maximum number of bits for decoding motion vectors. -constexpr size_t kMvOffsetBits = 10; -// Number of values allowed for a probability adjustment. -constexpr size_t kMaxProb = 255; -// Number of different mode types for loop filtering. -constexpr size_t kMaxModeLfDeltas = 2; -// Threshold at which motion vectors are considered large. -constexpr size_t kCompandedMvrefThresh = 8; -// Maximum value used for loop filtering. -constexpr size_t kMaxLoopFilter = 63; -// Number of bits of precision when scaling reference frames. -constexpr size_t kRefScaleShift = 14; -// Number of bits of precision when performing inter prediction. -constexpr size_t kSubpelBits = 4; -// 1 << kSubpelBits. -constexpr size_t kSubpelShifts = 16; -// kSubpelShifts - 1. -constexpr size_t kSubpelMask = 15; -// Value used when clipping motion vectors. -constexpr size_t kMvBorder = 128; -// Value used when clipping motion vectors. -constexpr size_t kInterpExtend = 4; -// Value used when clipping motion vectors. -constexpr size_t kBorderinpixels = 160; -// Value used in adapting probabilities. -constexpr size_t kMaxUpdateFactor = 128; -// Value used in adapting probabilities. -constexpr size_t kCountSat = 20; -// Both candidates use ZEROMV. -constexpr size_t kBothZero = 0; -// One candidate uses ZEROMV, one uses NEARMV or NEARESTMV. -constexpr size_t kZeroPlusPredicted = 1; -// Both candidates use NEARMV or NEARESTMV. -constexpr size_t kBothPredicted = 2; -// One candidate uses NEWMV, one uses ZEROMV. -constexpr size_t kNewPlusNonIntra = 3; -// Both candidates use NEWMV. -constexpr size_t kBothNew = 4; -// One candidate uses intra prediction, one uses inter prediction. -constexpr size_t kIntraPlusNonIntra = 5; -// Both candidates use intra prediction. -constexpr size_t kBothIntra = 6; -// Sentinel value marking a case that can never occur. -constexpr size_t kInvalidCase = 9; - -enum class TxMode : uint8_t { - kOnly4X4 = 0, - kAllow8X8 = 1, - kAllow16x16 = 2, - kAllow32x32 = 3, - kTxModeSelect = 4 -}; - -enum BlockSize : uint8_t { - kBlock4X4 = 0, - kBlock4X8 = 1, - kBlock8X4 = 2, - kBlock8X8 = 3, - kBlock8X16 = 4, - kBlock16X8 = 5, - kBlock16X16 = 6, - kBlock16X32 = 7, - kBlock32X16 = 8, - kBlock32X32 = 9, - kBlock32X64 = 10, - kBlock64X32 = 11, - kBlock64X64 = 12 -}; - -enum Partition : uint8_t { - kPartitionNone = 0, - kPartitionHorizontal = 1, - kPartitionVertical = 2, - kPartitionSplit = 3 -}; - -enum class ReferenceMode : uint8_t { - kSingleReference = 0, - kCompoundReference = 1, - kReferenceModeSelect = 2, -}; - -} // namespace vp9 -} // namespace webrtc - -#endif // MODULES_VIDEO_CODING_UTILITY_VP9_CONSTANTS_H_ diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser.cc b/modules/video_coding/utility/vp9_uncompressed_header_parser.cc index 7fbdd58951..07ba3255c6 100644 --- a/modules/video_coding/utility/vp9_uncompressed_header_parser.cc +++ b/modules/video_coding/utility/vp9_uncompressed_header_parser.cc @@ -12,7 +12,6 @@ #include "absl/strings/string_view.h" #include "rtc_base/bit_buffer.h" #include "rtc_base/logging.h" -#include "rtc_base/strings/string_builder.h" namespace webrtc { @@ -153,15 +152,11 @@ class BitstreamReader { // Returns true if full number of bits were read, false otherwise. bool ConsumeBits(int bits) { return buffer_->ConsumeBits(bits); } - void GetPosition(size_t* out_byte_offset, size_t* out_bit_offset) const { - buffer_->GetCurrentOffset(out_byte_offset, out_bit_offset); - } - private: rtc::BitBuffer* buffer_; }; -bool Vp9ReadColorConfig(BitstreamReader* br, UncompressedHeader* frame_info) { +bool Vp9ReadColorConfig(BitstreamReader* br, FrameInfo* frame_info) { if (frame_info->profile == 2 || frame_info->profile == 3) { READ_OR_RETURN(br->ReadBoolean(), [frame_info](bool ten_or_twelve_bits) { frame_info->bit_detph = @@ -224,18 +219,7 @@ bool Vp9ReadColorConfig(BitstreamReader* br, UncompressedHeader* frame_info) { return true; } -bool ReadRefreshFrameFlags(BitstreamReader* br, - UncompressedHeader* frame_info) { - // Refresh frame flags. - READ_OR_RETURN(br->ReadUnsigned(), [frame_info](uint8_t flags) { - for (int i = 0; i < 8; ++i) { - frame_info->updated_buffers.set(i, (flags & (0x01 << (7 - i))) != 0); - } - }); - return true; -} - -bool Vp9ReadFrameSize(BitstreamReader* br, UncompressedHeader* frame_info) { +bool Vp9ReadFrameSize(BitstreamReader* br, FrameInfo* frame_info) { // 16 bits: frame (width|height) - 1. READ_OR_RETURN(br->ReadUnsigned(), [frame_info](uint16_t width) { frame_info->frame_width = width + 1; @@ -246,12 +230,10 @@ bool Vp9ReadFrameSize(BitstreamReader* br, UncompressedHeader* frame_info) { return true; } -bool Vp9ReadRenderSize(BitstreamReader* br, UncompressedHeader* frame_info) { +bool Vp9ReadRenderSize(BitstreamReader* br, FrameInfo* frame_info) { // render_and_frame_size_different return br->IfNextBoolean( [&] { - auto& pos = frame_info->render_size_position.emplace(); - br->GetPosition(&pos.byte_offset, &pos.bit_offset); // 16 bits: render (width|height) - 1. READ_OR_RETURN(br->ReadUnsigned(), [frame_info](uint16_t width) { @@ -271,16 +253,11 @@ bool Vp9ReadRenderSize(BitstreamReader* br, UncompressedHeader* frame_info) { }); } -bool Vp9ReadFrameSizeFromRefs(BitstreamReader* br, - UncompressedHeader* frame_info) { +bool Vp9ReadFrameSizeFromRefs(BitstreamReader* br, FrameInfo* frame_info) { bool found_ref = false; for (size_t i = 0; !found_ref && i < kVp9NumRefsPerFrame; i++) { // Size in refs. - br->IfNextBoolean([&] { - frame_info->infer_size_from_reference = frame_info->reference_buffers[i]; - found_ref = true; - return true; - }); + READ_OR_RETURN(br->ReadBoolean(), [&](bool ref) { found_ref = ref; }); } if (!found_ref) { @@ -309,104 +286,58 @@ bool Vp9ReadLoopfilter(BitstreamReader* br) { }); } -bool Vp9ReadQp(BitstreamReader* br, UncompressedHeader* frame_info) { +bool Vp9ReadQp(BitstreamReader* br, FrameInfo* frame_info) { READ_OR_RETURN(br->ReadUnsigned(), [frame_info](uint8_t qp) { frame_info->base_qp = qp; }); // yuv offsets - frame_info->is_lossless = frame_info->base_qp == 0; for (int i = 0; i < 3; ++i) { - RETURN_IF_FALSE(br->IfNextBoolean([&] { // if delta_coded - READ_OR_RETURN(br->ReadUnsigned(4), [&](int delta) { - if (delta != 0) { - frame_info->is_lossless = false; - } - }); - return true; + RETURN_IF_FALSE(br->IfNextBoolean([br] { // if delta_coded + return br->ConsumeBits(5); })); } return true; } -bool Vp9ReadSegmentationParams(BitstreamReader* br, - UncompressedHeader* frame_info) { - constexpr int kSegmentationFeatureBits[kSegLvlMax] = {8, 6, 2, 0}; - constexpr bool kSegmentationFeatureSigned[kSegLvlMax] = {1, 1, 0, 0}; +bool Vp9ReadSegmentationParams(BitstreamReader* br) { + constexpr int kVp9MaxSegments = 8; + constexpr int kVp9SegLvlMax = 4; + constexpr int kSegmentationFeatureBits[kVp9SegLvlMax] = {8, 6, 2, 0}; + constexpr bool kSegmentationFeatureSigned[kVp9SegLvlMax] = {1, 1, 0, 0}; - return br->IfNextBoolean([&] { // segmentation_enabled - frame_info->segmentation_enabled = true; - RETURN_IF_FALSE(br->IfNextBoolean([&] { // update_map - frame_info->segmentation_tree_probs.emplace(); + RETURN_IF_FALSE(br->IfNextBoolean([&] { // segmentation_enabled + return br->IfNextBoolean([&] { // update_map + // Consume probs. for (int i = 0; i < 7; ++i) { - RETURN_IF_FALSE(br->IfNextBoolean( - [&] { - READ_OR_RETURN(br->ReadUnsigned(), [&](uint8_t prob) { - (*frame_info->segmentation_tree_probs)[i] = prob; - }); - return true; - }, - [&] { - (*frame_info->segmentation_tree_probs)[i] = 255; - return true; - })); + RETURN_IF_FALSE(br->IfNextBoolean([br] { return br->ConsumeBits(7); })); } - // temporal_update - return br->IfNextBoolean( - [&] { - frame_info->segmentation_pred_prob.emplace(); - for (int i = 0; i < 3; ++i) { - RETURN_IF_FALSE(br->IfNextBoolean( - [&] { - READ_OR_RETURN( - br->ReadUnsigned(), [&](uint8_t prob) { - (*frame_info->segmentation_pred_prob)[i] = prob; - }); - return true; - }, - [&] { - (*frame_info->segmentation_pred_prob)[i] = 255; - return true; - })); - } - return true; - }, - [&] { - frame_info->segmentation_pred_prob->fill(255); - return true; - }); - })); - - return br->IfNextBoolean([&] { // segmentation_update_data - RETURN_IF_FALSE(br->IfNextBoolean([&] { - frame_info->segmentation_is_delta = true; - return true; - })); - - for (size_t i = 0; i < kMaxSegments; ++i) { - for (size_t j = 0; j < kSegLvlMax; ++j) { - RETURN_IF_FALSE(br->IfNextBoolean([&] { // feature_enabled - READ_OR_RETURN( - br->ReadUnsigned(kSegmentationFeatureBits[j]), - [&](uint8_t feature_value) { - frame_info->segmentation_features[i][j] = feature_value; - }); - if (kSegmentationFeatureSigned[j]) { - RETURN_IF_FALSE(br->IfNextBoolean([&] { - (*frame_info->segmentation_features[i][j]) *= -1; - return true; - })); - } - return true; - })); + return br->IfNextBoolean([&] { // temporal_update + // Consume probs. + for (int i = 0; i < 3; ++i) { + RETURN_IF_FALSE( + br->IfNextBoolean([br] { return br->ConsumeBits(7); })); } - } - return true; + return true; + }); }); + })); + + return br->IfNextBoolean([&] { + RETURN_IF_FALSE(br->ConsumeBits(1)); // abs_or_delta + for (int i = 0; i < kVp9MaxSegments; ++i) { + for (int j = 0; j < kVp9SegLvlMax; ++j) { + RETURN_IF_FALSE(br->IfNextBoolean([&] { // feature_enabled + return br->ConsumeBits(kSegmentationFeatureBits[j] + + kSegmentationFeatureSigned[j]); + })); + } + } + return true; }); } -bool Vp9ReadTileInfo(BitstreamReader* br, UncompressedHeader* frame_info) { +bool Vp9ReadTileInfo(BitstreamReader* br, FrameInfo* frame_info) { size_t mi_cols = (frame_info->frame_width + 7) >> 3; size_t sb64_cols = (mi_cols + 7) >> 3; @@ -421,12 +352,12 @@ bool Vp9ReadTileInfo(BitstreamReader* br, UncompressedHeader* frame_info) { } --max_log2; - frame_info->tile_cols_log2 = min_log2; + size_t cols_log2 = min_log2; bool done = false; - while (!done && frame_info->tile_cols_log2 < max_log2) { + while (!done && cols_log2 < max_log2) { RETURN_IF_FALSE(br->IfNextBoolean( [&] { - ++frame_info->tile_cols_log2; + ++cols_log2; return true; }, [&] { @@ -434,157 +365,13 @@ bool Vp9ReadTileInfo(BitstreamReader* br, UncompressedHeader* frame_info) { return true; })); } - frame_info->tile_rows_log2 = 0; - RETURN_IF_FALSE(br->IfNextBoolean([&] { - ++frame_info->tile_rows_log2; - return br->IfNextBoolean([&] { - ++frame_info->tile_rows_log2; - return true; - }); - })); - return true; -} -const InterpolationFilter kLiteralToType[4] = { - InterpolationFilter::kEightTapSmooth, InterpolationFilter::kEightTap, - InterpolationFilter::kEightTapSharp, InterpolationFilter::kBilinear}; + // rows_log2; + return br->IfNextBoolean([&] { return br->ConsumeBits(1); }); +} } // namespace -std::string UncompressedHeader::ToString() const { - char buf[1024]; - rtc::SimpleStringBuilder oss(buf); - - oss << "Vp9UncompressedHeader { " - << "profile = " << profile; - - if (show_existing_frame) { - oss << ", show_existing_frame = " << *show_existing_frame << " }"; - return oss.str(); - } - - oss << ", frame type = " << (is_keyframe ? "key" : "delta") - << ", show_frame = " << (show_frame ? "true" : "false") - << ", error_resilient = " << (error_resilient ? "true" : "false"); - - oss << ", bit_depth = "; - switch (bit_detph) { - case BitDept::k8Bit: - oss << "8bit"; - break; - case BitDept::k10Bit: - oss << "10bit"; - break; - case BitDept::k12Bit: - oss << "12bit"; - break; - } - - if (color_space) { - oss << ", color_space = "; - switch (*color_space) { - case ColorSpace::CS_UNKNOWN: - oss << "unknown"; - break; - case ColorSpace::CS_BT_601: - oss << "CS_BT_601 Rec. ITU-R BT.601-7"; - break; - case ColorSpace::CS_BT_709: - oss << "Rec. ITU-R BT.709-6"; - break; - case ColorSpace::CS_SMPTE_170: - oss << "SMPTE-170"; - break; - case ColorSpace::CS_SMPTE_240: - oss << "SMPTE-240"; - break; - case ColorSpace::CS_BT_2020: - oss << "Rec. ITU-R BT.2020-2"; - break; - case ColorSpace::CS_RESERVED: - oss << "Reserved"; - break; - case ColorSpace::CS_RGB: - oss << "sRGB (IEC 61966-2-1)"; - break; - } - } - - if (color_range) { - oss << ", color_range = "; - switch (*color_range) { - case ColorRange::kFull: - oss << "full"; - break; - case ColorRange::kStudio: - oss << "studio"; - break; - } - } - - if (sub_sampling) { - oss << ", sub_sampling = "; - switch (*sub_sampling) { - case YuvSubsampling::k444: - oss << "444"; - break; - case YuvSubsampling::k440: - oss << "440"; - break; - case YuvSubsampling::k422: - oss << "422"; - break; - case YuvSubsampling::k420: - oss << "420"; - break; - } - } - - if (infer_size_from_reference) { - oss << ", infer_frame_resolution_from = " << *infer_size_from_reference; - } else { - oss << ", frame_width = " << frame_width - << ", frame_height = " << frame_height; - } - if (render_width != 0 && render_height != 0) { - oss << ", render_width = " << render_width - << ", render_height = " << render_height; - } - - oss << ", base qp = " << base_qp; - if (reference_buffers[0] != -1) { - oss << ", last_buffer = " << reference_buffers[0]; - } - if (reference_buffers[1] != -1) { - oss << ", golden_buffer = " << reference_buffers[1]; - } - if (reference_buffers[2] != -1) { - oss << ", altref_buffer = " << reference_buffers[2]; - } - - oss << ", updated buffers = { "; - bool first = true; - for (int i = 0; i < 8; ++i) { - if (updated_buffers.test(i)) { - if (first) { - first = false; - } else { - oss << ", "; - } - oss << i; - } - } - oss << " }"; - - oss << ", compressed_header_size_bytes = " << compressed_header_size; - - oss << " }"; - return oss.str(); -} - -bool Parse(const uint8_t* buf, - size_t length, - UncompressedHeader* frame_info, - bool qp_only) { +bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) { rtc::BitBuffer bit_buffer(buf, length); BitstreamReader br(&bit_buffer); @@ -636,9 +423,6 @@ bool Parse(const uint8_t* buf, return false; if (!Vp9ReadRenderSize(&br, frame_info)) return false; - - // Key-frames implicitly update all buffers. - frame_info->updated_buffers.set(); } else { // Non-keyframe. bool is_intra_only = false; @@ -657,49 +441,31 @@ bool Parse(const uint8_t* buf, if (frame_info->profile > 0) { if (!Vp9ReadColorConfig(&br, frame_info)) return false; - } else { - frame_info->color_space = ColorSpace::CS_BT_601; - frame_info->sub_sampling = YuvSubsampling::k420; - frame_info->bit_detph = BitDept::k8Bit; } - frame_info->reference_buffers.fill(-1); - RETURN_IF_FALSE(ReadRefreshFrameFlags(&br, frame_info)); - RETURN_IF_FALSE(Vp9ReadFrameSize(&br, frame_info)); - RETURN_IF_FALSE(Vp9ReadRenderSize(&br, frame_info)); + // Refresh frame flags. + RETURN_IF_FALSE(br.ConsumeBits(8)); + if (!Vp9ReadFrameSize(&br, frame_info)) + return false; + if (!Vp9ReadRenderSize(&br, frame_info)) + return false; } else { - RETURN_IF_FALSE(ReadRefreshFrameFlags(&br, frame_info)); + // Refresh frame flags. + RETURN_IF_FALSE(br.ConsumeBits(8)); - frame_info->reference_buffers_sign_bias[0] = false; for (size_t i = 0; i < kVp9NumRefsPerFrame; i++) { - READ_OR_RETURN(br.ReadUnsigned(3), [&](uint8_t idx) { - frame_info->reference_buffers[i] = idx; - }); - READ_OR_RETURN(br.ReadBoolean(), [&](bool sign_bias) { - frame_info->reference_buffers_sign_bias[ReferenceFrame::kLast + i] = - sign_bias; - }); + // 3 bits: Ref frame index. + // 1 bit: Ref frame sign biases. + RETURN_IF_FALSE(br.ConsumeBits(4)); } if (!Vp9ReadFrameSizeFromRefs(&br, frame_info)) return false; - READ_OR_RETURN(br.ReadBoolean(), [&](bool allow_high_precision_mv) { - frame_info->allow_high_precision_mv = allow_high_precision_mv; - }); - + // Allow high precision mv. + RETURN_IF_FALSE(br.ConsumeBits(1)); // Interpolation filter. - RETURN_IF_FALSE(br.IfNextBoolean( - [frame_info] { - frame_info->interpolation_filter = InterpolationFilter::kSwitchable; - return true; - }, - [&] { - READ_OR_RETURN( - br.ReadUnsigned(2), [frame_info](uint8_t filter) { - frame_info->interpolation_filter = kLiteralToType[filter]; - }); - return true; - })); + RETURN_IF_FALSE(br.IfNextBoolean([] { return true; }, + [&br] { return br.ConsumeBits(2); })); } } @@ -710,8 +476,7 @@ bool Parse(const uint8_t* buf, } // Frame context index. - READ_OR_RETURN(br.ReadUnsigned(2), - [&](uint8_t idx) { frame_info->frame_context_idx = idx; }); + RETURN_IF_FALSE(br.ConsumeBits(2)); if (!Vp9ReadLoopfilter(&br)) return false; @@ -719,39 +484,33 @@ bool Parse(const uint8_t* buf, // Read base QP. RETURN_IF_FALSE(Vp9ReadQp(&br, frame_info)); - if (qp_only) { - // Not interested in the rest of the header, return early. - return true; + const bool kParseFullHeader = false; + if (kParseFullHeader) { + // Currently not used, but will be needed when parsing beyond the + // uncompressed header. + RETURN_IF_FALSE(Vp9ReadSegmentationParams(&br)); + + RETURN_IF_FALSE(Vp9ReadTileInfo(&br, frame_info)); + + RETURN_IF_FALSE(br.ConsumeBits(16)); // header_size_in_bytes } - RETURN_IF_FALSE(Vp9ReadSegmentationParams(&br, frame_info)); - RETURN_IF_FALSE(Vp9ReadTileInfo(&br, frame_info)); - READ_OR_RETURN(br.ReadUnsigned(), [frame_info](uint16_t size) { - frame_info->compressed_header_size = size; - }); - - // Trailing bits. - RETURN_IF_FALSE(br.ConsumeBits(bit_buffer.RemainingBitCount() % 8)); - frame_info->uncompressed_header_size = - length - (bit_buffer.RemainingBitCount() / 8); - return true; } bool GetQp(const uint8_t* buf, size_t length, int* qp) { - UncompressedHeader frame_info; - if (!Parse(buf, length, &frame_info, /*qp_only=*/true)) { + FrameInfo frame_info; + if (!Parse(buf, length, &frame_info)) { return false; } *qp = frame_info.base_qp; return true; } -absl::optional ParseUncompressedHeader(const uint8_t* buf, - size_t length) { - UncompressedHeader frame_info; - if (Parse(buf, length, &frame_info, /*qp_only=*/false) && - frame_info.frame_width > 0) { +absl::optional ParseIntraFrameInfo(const uint8_t* buf, + size_t length) { + FrameInfo frame_info; + if (Parse(buf, length, &frame_info) && frame_info.frame_width > 0) { return frame_info; } return absl::nullopt; diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser.h b/modules/video_coding/utility/vp9_uncompressed_header_parser.h index 67166e364b..7a5e2c058b 100644 --- a/modules/video_coding/utility/vp9_uncompressed_header_parser.h +++ b/modules/video_coding/utility/vp9_uncompressed_header_parser.h @@ -13,13 +13,7 @@ #include #include - -#include -#include -#include - #include "absl/types/optional.h" -#include "modules/video_coding/utility/vp9_constants.h" namespace webrtc { @@ -69,86 +63,27 @@ enum class YuvSubsampling { k420, }; -enum ReferenceFrame : int { - kNone = -1, - kIntra = 0, - kLast = 1, - kGolden = 2, - kAltref = 3, -}; - -enum class InterpolationFilter : uint8_t { - kEightTap = 0, - kEightTapSmooth = 1, - kEightTapSharp = 2, - kBilinear = 3, - kSwitchable = 4 -}; - -struct UncompressedHeader { - int profile = 0; // Profiles 0-3 are valid. +struct FrameInfo { + int profile = 0; // Profile 0-3 are valid. absl::optional show_existing_frame; bool is_keyframe = false; bool show_frame = false; bool error_resilient = false; BitDept bit_detph = BitDept::k8Bit; - absl::optional color_space; - absl::optional color_range; - absl::optional sub_sampling; + ColorSpace color_space = ColorSpace::CS_UNKNOWN; + ColorRange color_range; + YuvSubsampling sub_sampling; int frame_width = 0; int frame_height = 0; int render_width = 0; int render_height = 0; - // Width/height of the tiles used (in units of 8x8 blocks). - size_t tile_cols_log2 = 0; // tile_cols = 1 << tile_cols_log2 - size_t tile_rows_log2 = 0; // tile_rows = 1 << tile_rows_log2 - struct BitstreamPosition { - size_t byte_offset = 0; - size_t bit_offset = 0; - }; - absl::optional render_size_position; - InterpolationFilter interpolation_filter = InterpolationFilter::kEightTap; - bool allow_high_precision_mv = false; int base_qp = 0; - bool is_lossless = false; - uint8_t frame_context_idx = 0; - - bool segmentation_enabled = false; - absl::optional> segmentation_tree_probs; - absl::optional> segmentation_pred_prob; - bool segmentation_is_delta = false; - absl::optional segmentation_features[kMaxSegments][kSegLvlMax]; - - // Which of the 8 reference buffers may be used as references for this frame. - // -1 indicates not used (e.g. {-1, -1, -1} for intra-only frames). - std::array reference_buffers = {-1, -1, -1}; - // Sign bias corresponding to reference buffers, where the index is a - // ReferenceFrame. - // false/0 indidate backwards reference, true/1 indicate forwards reference). - std::array reference_buffers_sign_bias = {false, false, - false, false}; - - // Indicates which reference buffer [0,7] to infer the frame size from. - absl::optional infer_size_from_reference; - // Which of the 8 reference buffers are updated by this frame. - std::bitset updated_buffers = 0; - - // Header sizes, in bytes. - uint32_t uncompressed_header_size = 0; - uint32_t compressed_header_size = 0; - - bool is_intra_only() const { - return reference_buffers[0] == -1 && reference_buffers[1] == -1 && - reference_buffers[2] == -1; - } - - std::string ToString() const; }; -// Parses the uncompressed header and populates (most) values in a -// UncompressedHeader struct. Returns nullopt on failure. -absl::optional ParseUncompressedHeader(const uint8_t* buf, - size_t length); +// Parses frame information for a VP9 key-frame or all-intra frame from a +// bitstream. Returns nullopt on failure or if not a key-frame. +absl::optional ParseIntraFrameInfo(const uint8_t* buf, + size_t length); } // namespace vp9 diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser_unittest.cc b/modules/video_coding/utility/vp9_uncompressed_header_parser_unittest.cc index 672becc9e3..b69b45d5c4 100644 --- a/modules/video_coding/utility/vp9_uncompressed_header_parser_unittest.cc +++ b/modules/video_coding/utility/vp9_uncompressed_header_parser_unittest.cc @@ -15,11 +15,6 @@ namespace webrtc { namespace vp9 { -using ::testing::AllOf; -using ::testing::ElementsAreArray; -using ::testing::Eq; -using ::testing::Field; -using ::testing::Optional; TEST(Vp9UncompressedHeaderParserTest, FrameWithSegmentation) { // Uncompressed header from a frame generated with libvpx. @@ -31,46 +26,21 @@ TEST(Vp9UncompressedHeaderParserTest, FrameWithSegmentation) { 0x2e, 0x73, 0xb7, 0xee, 0x22, 0x06, 0x81, 0x82, 0xd4, 0xef, 0xc3, 0x58, 0x1f, 0x12, 0xd2, 0x7b, 0x28, 0x1f, 0x80, 0xfc, 0x07, 0xe0, 0x00, 0x00}; - absl::optional frame_info = - ParseUncompressedHeader(kHeader, sizeof(kHeader)); + absl::optional frame_info = + ParseIntraFrameInfo(kHeader, sizeof(kHeader)); + // Segmentation info is not actually populated in FrameInfo struct, but it + // needs to be parsed otherwise we end up on the wrong offset. The check for + // segmentation is thus that we have a valid return value. ASSERT_TRUE(frame_info.has_value()); - EXPECT_FALSE(frame_info->is_keyframe); - EXPECT_TRUE(frame_info->error_resilient); - EXPECT_TRUE(frame_info->show_frame); - EXPECT_FALSE(frame_info->show_existing_frame); + EXPECT_EQ(frame_info->is_keyframe, false); + EXPECT_EQ(frame_info->error_resilient, true); + EXPECT_EQ(frame_info->show_frame, true); EXPECT_EQ(frame_info->base_qp, 185); EXPECT_EQ(frame_info->frame_width, 320); EXPECT_EQ(frame_info->frame_height, 240); EXPECT_EQ(frame_info->render_width, 640); EXPECT_EQ(frame_info->render_height, 480); - EXPECT_TRUE(frame_info->allow_high_precision_mv); - EXPECT_EQ(frame_info->frame_context_idx, 0u); - EXPECT_EQ(frame_info->interpolation_filter, InterpolationFilter::kSwitchable); - EXPECT_EQ(frame_info->is_lossless, false); - EXPECT_EQ(frame_info->profile, 0); - EXPECT_THAT(frame_info->reference_buffers, ElementsAreArray({0, 0, 0})); - EXPECT_THAT(frame_info->reference_buffers_sign_bias, - ElementsAreArray({false, false, false, false})); - EXPECT_EQ(frame_info->updated_buffers, 0b10000000); - EXPECT_EQ(frame_info->tile_cols_log2, 0u); - EXPECT_EQ(frame_info->tile_rows_log2, 0u); - EXPECT_THAT( - frame_info->render_size_position, - ::testing::Optional(AllOf( - Field(&UncompressedHeader::BitstreamPosition::byte_offset, 8u), - Field(&UncompressedHeader::BitstreamPosition::bit_offset, 0u)))); - EXPECT_EQ(frame_info->compressed_header_size, 23u); - EXPECT_EQ(frame_info->uncompressed_header_size, 37u); - - EXPECT_TRUE(frame_info->segmentation_enabled); - EXPECT_FALSE(frame_info->segmentation_is_delta); - EXPECT_THAT(frame_info->segmentation_pred_prob, - Optional(ElementsAreArray({205, 1, 1}))); - EXPECT_THAT(frame_info->segmentation_tree_probs, - Optional(ElementsAreArray({255, 255, 128, 1, 128, 128, 128}))); - EXPECT_THAT(frame_info->segmentation_features[1][kSegLvlAlt_Q], Eq(-63)); - EXPECT_THAT(frame_info->segmentation_features[2][kSegLvlAlt_Q], Eq(-81)); } } // namespace vp9