Rename vp9::FrameInfo to vp9::UncompressedHeader and add more fields.

These fields will be used for bitstream validation in upcoming CLs.
A new vp9_constants.h file is also added, containing common constants
defined by the bitstream spec.

Bug: webrtc:12354
Change-Id: If04256d83409069c8bee43ad41aed41c3707dfd3
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/226060
Commit-Queue: Erik Språng <sprang@webrtc.org>
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#34476}
This commit is contained in:
Erik Språng
2021-07-15 11:29:30 +02:00
committed by WebRTC LUCI CQ
parent 0d2dc1f38f
commit 3097008de0
6 changed files with 631 additions and 94 deletions

View File

@ -334,6 +334,7 @@ rtc_library("video_coding_utility") {
"utility/simulcast_utility.h", "utility/simulcast_utility.h",
"utility/vp8_header_parser.cc", "utility/vp8_header_parser.cc",
"utility/vp8_header_parser.h", "utility/vp8_header_parser.h",
"utility/vp9_constants.h",
"utility/vp9_uncompressed_header_parser.cc", "utility/vp9_uncompressed_header_parser.cc",
"utility/vp9_uncompressed_header_parser.h", "utility/vp9_uncompressed_header_parser.h",
] ]

View File

@ -206,8 +206,8 @@ int LibvpxVp9Decoder::Decode(const EncodedImage& input_image,
} }
if (input_image._frameType == VideoFrameType::kVideoFrameKey) { if (input_image._frameType == VideoFrameType::kVideoFrameKey) {
absl::optional<vp9::FrameInfo> frame_info = absl::optional<vp9::UncompressedHeader> frame_info =
vp9::ParseIntraFrameInfo(input_image.data(), input_image.size()); vp9::ParseUncompressedHeader(input_image.data(), input_image.size());
if (frame_info) { if (frame_info) {
if (frame_info->frame_width != current_codec_.width || if (frame_info->frame_width != current_codec_.width ||
frame_info->frame_height != current_codec_.height) { frame_info->frame_height != current_codec_.height) {

View File

@ -0,0 +1,200 @@
/*
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_VIDEO_CODING_UTILITY_VP9_CONSTANTS_H_
#define MODULES_VIDEO_CODING_UTILITY_VP9_CONSTANTS_H_
#include <stddef.h>
#include <stdint.h>
#include <string>
namespace webrtc {
namespace vp9 {
// Number of frames that can be stored for future reference.
static constexpr size_t kNumRefFrames = 8;
// Number of frame contexts that can be store for future reference.
static constexpr size_t kNumFrameContexts = 4;
// Each inter frame can use up to 3 frames for reference.
constexpr size_t kRefsPerFrame = 3;
// Number of values that can be decoded for mv_fr.
constexpr size_t kMvFrSize = 4;
// Number of positions to search in motion vector prediction.
constexpr size_t kMvrefNeighbours = 8;
// Number of contexts when decoding intra_mode .
constexpr size_t kBlockSizeGroups = 4;
// Number of different block sizes used.
constexpr size_t kBlockSizes = 13;
// Sentinel value to mark partition choices that are illegal.
constexpr size_t kBlockInvalid = 14;
// Number of contexts when decoding partition.
constexpr size_t kPartitionContexts = 16;
// Smallest size of a mode info block.
constexpr size_t kMiSize = 8;
// Minimum width of a tile in units of superblocks (although tiles on
// the right hand edge can be narrower).
constexpr size_t kMinTileWidth_B64 = 4;
// Maximum width of a tile in units of superblocks.
constexpr size_t kMaxTileWidth_B64 = 64;
// Number of motion vectors returned by find_mv_refs process.
constexpr size_t kMaxMvRefCandidates = 2;
// Number of values that can be derived for ref_frame.
constexpr size_t kMaxRefFrames = 4;
// Number of contexts for is_inter.
constexpr size_t kIsInterContexts = 4;
// Number of contexts for comp_mode.
constexpr size_t kCompModeContexts = 5;
// Number of contexts for single_ref and comp_ref.
constexpr size_t kRefContexts = 5;
// Number of segments allowed in segmentation map.
constexpr size_t kMaxSegments = 8;
// Index for quantizer segment feature.
constexpr size_t kSegLvlAlt_Q = 0;
// Index for loop filter segment feature.
constexpr size_t kSegLvlAlt_L = 1;
// Index for reference frame segment feature.
constexpr size_t kSegLvlRefFrame = 2;
// Index for skip segment feature.
constexpr size_t kSegLvlSkip = 3;
// Number of segment features.
constexpr size_t kSegLvlMax = 4;
// Number of different plane types (Y or UV).
constexpr size_t kBlockTypes = 2;
// Number of different prediction types (intra or inter).
constexpr size_t kRefTypes = 2;
// Number of coefficient bands.
constexpr size_t kCoefBands = 6;
// Number of contexts for decoding coefficients.
constexpr size_t kPrevCoefContexts = 6;
// Number of coefficient probabilities that are directly transmitted.
constexpr size_t kUnconstrainedNodes = 3;
// Number of contexts for transform size.
constexpr size_t kTxSizeContexts = 2;
// Number of values for interp_filter.
constexpr size_t kSwitchableFilters = 3;
// Number of contexts for interp_filter.
constexpr size_t kInterpFilterContexts = 4;
// Number of contexts for decoding skip.
constexpr size_t kSkipContexts = 3;
// Number of values for partition.
constexpr size_t kPartitionTypes = 4;
// Number of values for tx_size.
constexpr size_t kTxSizes = 4;
// Number of values for tx_mode.
constexpr size_t kTxModes = 5;
// Inverse transform rows with DCT and columns with DCT.
constexpr size_t kDctDct = 0;
// Inverse transform rows with DCT and columns with ADST.
constexpr size_t kAdstDct = 1;
// Inverse transform rows with ADST and columns with DCT.
constexpr size_t kDctAdst = 2;
// Inverse transform rows with ADST and columns with ADST.
constexpr size_t kAdstAdst = 3;
// Number of values for y_mode.
constexpr size_t kMbModeCount = 14;
// Number of values for intra_mode.
constexpr size_t kIntraModes = 10;
// Number of values for inter_mode.
constexpr size_t kInterModes = 4;
// Number of contexts for inter_mode.
constexpr size_t kInterModeContexts = 7;
// Number of values for mv_joint.
constexpr size_t kMvJoints = 4;
// Number of values for mv_class.
constexpr size_t kMvClasses = 11;
// Number of values for mv_class0_bit.
constexpr size_t kClass0Size = 2;
// Maximum number of bits for decoding motion vectors.
constexpr size_t kMvOffsetBits = 10;
// Number of values allowed for a probability adjustment.
constexpr size_t kMaxProb = 255;
// Number of different mode types for loop filtering.
constexpr size_t kMaxModeLfDeltas = 2;
// Threshold at which motion vectors are considered large.
constexpr size_t kCompandedMvrefThresh = 8;
// Maximum value used for loop filtering.
constexpr size_t kMaxLoopFilter = 63;
// Number of bits of precision when scaling reference frames.
constexpr size_t kRefScaleShift = 14;
// Number of bits of precision when performing inter prediction.
constexpr size_t kSubpelBits = 4;
// 1 << kSubpelBits.
constexpr size_t kSubpelShifts = 16;
// kSubpelShifts - 1.
constexpr size_t kSubpelMask = 15;
// Value used when clipping motion vectors.
constexpr size_t kMvBorder = 128;
// Value used when clipping motion vectors.
constexpr size_t kInterpExtend = 4;
// Value used when clipping motion vectors.
constexpr size_t kBorderinpixels = 160;
// Value used in adapting probabilities.
constexpr size_t kMaxUpdateFactor = 128;
// Value used in adapting probabilities.
constexpr size_t kCountSat = 20;
// Both candidates use ZEROMV.
constexpr size_t kBothZero = 0;
// One candidate uses ZEROMV, one uses NEARMV or NEARESTMV.
constexpr size_t kZeroPlusPredicted = 1;
// Both candidates use NEARMV or NEARESTMV.
constexpr size_t kBothPredicted = 2;
// One candidate uses NEWMV, one uses ZEROMV.
constexpr size_t kNewPlusNonIntra = 3;
// Both candidates use NEWMV.
constexpr size_t kBothNew = 4;
// One candidate uses intra prediction, one uses inter prediction.
constexpr size_t kIntraPlusNonIntra = 5;
// Both candidates use intra prediction.
constexpr size_t kBothIntra = 6;
// Sentinel value marking a case that can never occur.
constexpr size_t kInvalidCase = 9;
enum class TxMode : uint8_t {
kOnly4X4 = 0,
kAllow8X8 = 1,
kAllow16x16 = 2,
kAllow32x32 = 3,
kTxModeSelect = 4
};
enum BlockSize : uint8_t {
kBlock4X4 = 0,
kBlock4X8 = 1,
kBlock8X4 = 2,
kBlock8X8 = 3,
kBlock8X16 = 4,
kBlock16X8 = 5,
kBlock16X16 = 6,
kBlock16X32 = 7,
kBlock32X16 = 8,
kBlock32X32 = 9,
kBlock32X64 = 10,
kBlock64X32 = 11,
kBlock64X64 = 12
};
enum Partition : uint8_t {
kPartitionNone = 0,
kPartitionHorizontal = 1,
kPartitionVertical = 2,
kPartitionSplit = 3
};
enum class ReferenceMode : uint8_t {
kSingleReference = 0,
kCompoundReference = 1,
kReferenceModeSelect = 2,
};
} // namespace vp9
} // namespace webrtc
#endif // MODULES_VIDEO_CODING_UTILITY_VP9_CONSTANTS_H_

View File

@ -12,6 +12,7 @@
#include "absl/strings/string_view.h" #include "absl/strings/string_view.h"
#include "rtc_base/bit_buffer.h" #include "rtc_base/bit_buffer.h"
#include "rtc_base/logging.h" #include "rtc_base/logging.h"
#include "rtc_base/strings/string_builder.h"
namespace webrtc { namespace webrtc {
@ -152,11 +153,15 @@ class BitstreamReader {
// Returns true if full number of bits were read, false otherwise. // Returns true if full number of bits were read, false otherwise.
bool ConsumeBits(int bits) { return buffer_->ConsumeBits(bits); } bool ConsumeBits(int bits) { return buffer_->ConsumeBits(bits); }
void GetPosition(size_t* out_byte_offset, size_t* out_bit_offset) const {
buffer_->GetCurrentOffset(out_byte_offset, out_bit_offset);
}
private: private:
rtc::BitBuffer* buffer_; rtc::BitBuffer* buffer_;
}; };
bool Vp9ReadColorConfig(BitstreamReader* br, FrameInfo* frame_info) { bool Vp9ReadColorConfig(BitstreamReader* br, UncompressedHeader* frame_info) {
if (frame_info->profile == 2 || frame_info->profile == 3) { if (frame_info->profile == 2 || frame_info->profile == 3) {
READ_OR_RETURN(br->ReadBoolean(), [frame_info](bool ten_or_twelve_bits) { READ_OR_RETURN(br->ReadBoolean(), [frame_info](bool ten_or_twelve_bits) {
frame_info->bit_detph = frame_info->bit_detph =
@ -219,7 +224,18 @@ bool Vp9ReadColorConfig(BitstreamReader* br, FrameInfo* frame_info) {
return true; return true;
} }
bool Vp9ReadFrameSize(BitstreamReader* br, FrameInfo* frame_info) { bool ReadRefreshFrameFlags(BitstreamReader* br,
UncompressedHeader* frame_info) {
// Refresh frame flags.
READ_OR_RETURN(br->ReadUnsigned<uint8_t>(), [frame_info](uint8_t flags) {
for (int i = 0; i < 8; ++i) {
frame_info->updated_buffers.set(i, (flags & (0x01 << (7 - i))) != 0);
}
});
return true;
}
bool Vp9ReadFrameSize(BitstreamReader* br, UncompressedHeader* frame_info) {
// 16 bits: frame (width|height) - 1. // 16 bits: frame (width|height) - 1.
READ_OR_RETURN(br->ReadUnsigned<uint16_t>(), [frame_info](uint16_t width) { READ_OR_RETURN(br->ReadUnsigned<uint16_t>(), [frame_info](uint16_t width) {
frame_info->frame_width = width + 1; frame_info->frame_width = width + 1;
@ -230,10 +246,12 @@ bool Vp9ReadFrameSize(BitstreamReader* br, FrameInfo* frame_info) {
return true; return true;
} }
bool Vp9ReadRenderSize(BitstreamReader* br, FrameInfo* frame_info) { bool Vp9ReadRenderSize(BitstreamReader* br, UncompressedHeader* frame_info) {
// render_and_frame_size_different // render_and_frame_size_different
return br->IfNextBoolean( return br->IfNextBoolean(
[&] { [&] {
auto& pos = frame_info->render_size_position.emplace();
br->GetPosition(&pos.byte_offset, &pos.bit_offset);
// 16 bits: render (width|height) - 1. // 16 bits: render (width|height) - 1.
READ_OR_RETURN(br->ReadUnsigned<uint16_t>(), READ_OR_RETURN(br->ReadUnsigned<uint16_t>(),
[frame_info](uint16_t width) { [frame_info](uint16_t width) {
@ -253,11 +271,16 @@ bool Vp9ReadRenderSize(BitstreamReader* br, FrameInfo* frame_info) {
}); });
} }
bool Vp9ReadFrameSizeFromRefs(BitstreamReader* br, FrameInfo* frame_info) { bool Vp9ReadFrameSizeFromRefs(BitstreamReader* br,
UncompressedHeader* frame_info) {
bool found_ref = false; bool found_ref = false;
for (size_t i = 0; !found_ref && i < kVp9NumRefsPerFrame; i++) { for (size_t i = 0; !found_ref && i < kVp9NumRefsPerFrame; i++) {
// Size in refs. // Size in refs.
READ_OR_RETURN(br->ReadBoolean(), [&](bool ref) { found_ref = ref; }); br->IfNextBoolean([&] {
frame_info->infer_size_from_reference = frame_info->reference_buffers[i];
found_ref = true;
return true;
});
} }
if (!found_ref) { if (!found_ref) {
@ -286,58 +309,104 @@ bool Vp9ReadLoopfilter(BitstreamReader* br) {
}); });
} }
bool Vp9ReadQp(BitstreamReader* br, FrameInfo* frame_info) { bool Vp9ReadQp(BitstreamReader* br, UncompressedHeader* frame_info) {
READ_OR_RETURN(br->ReadUnsigned<uint8_t>(), READ_OR_RETURN(br->ReadUnsigned<uint8_t>(),
[frame_info](uint8_t qp) { frame_info->base_qp = qp; }); [frame_info](uint8_t qp) { frame_info->base_qp = qp; });
// yuv offsets // yuv offsets
frame_info->is_lossless = frame_info->base_qp == 0;
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
RETURN_IF_FALSE(br->IfNextBoolean([br] { // if delta_coded RETURN_IF_FALSE(br->IfNextBoolean([&] { // if delta_coded
return br->ConsumeBits(5); READ_OR_RETURN(br->ReadUnsigned<int>(4), [&](int delta) {
if (delta != 0) {
frame_info->is_lossless = false;
}
});
return true;
})); }));
} }
return true; return true;
} }
bool Vp9ReadSegmentationParams(BitstreamReader* br) { bool Vp9ReadSegmentationParams(BitstreamReader* br,
constexpr int kVp9MaxSegments = 8; UncompressedHeader* frame_info) {
constexpr int kVp9SegLvlMax = 4; constexpr int kSegmentationFeatureBits[kSegLvlMax] = {8, 6, 2, 0};
constexpr int kSegmentationFeatureBits[kVp9SegLvlMax] = {8, 6, 2, 0}; constexpr bool kSegmentationFeatureSigned[kSegLvlMax] = {1, 1, 0, 0};
constexpr bool kSegmentationFeatureSigned[kVp9SegLvlMax] = {1, 1, 0, 0};
RETURN_IF_FALSE(br->IfNextBoolean([&] { // segmentation_enabled return br->IfNextBoolean([&] { // segmentation_enabled
return br->IfNextBoolean([&] { // update_map frame_info->segmentation_enabled = true;
// Consume probs. RETURN_IF_FALSE(br->IfNextBoolean([&] { // update_map
frame_info->segmentation_tree_probs.emplace();
for (int i = 0; i < 7; ++i) { for (int i = 0; i < 7; ++i) {
RETURN_IF_FALSE(br->IfNextBoolean([br] { return br->ConsumeBits(7); })); RETURN_IF_FALSE(br->IfNextBoolean(
[&] {
READ_OR_RETURN(br->ReadUnsigned<uint8_t>(), [&](uint8_t prob) {
(*frame_info->segmentation_tree_probs)[i] = prob;
});
return true;
},
[&] {
(*frame_info->segmentation_tree_probs)[i] = 255;
return true;
}));
} }
return br->IfNextBoolean([&] { // temporal_update // temporal_update
// Consume probs. return br->IfNextBoolean(
for (int i = 0; i < 3; ++i) { [&] {
RETURN_IF_FALSE( frame_info->segmentation_pred_prob.emplace();
br->IfNextBoolean([br] { return br->ConsumeBits(7); })); for (int i = 0; i < 3; ++i) {
} RETURN_IF_FALSE(br->IfNextBoolean(
[&] {
READ_OR_RETURN(
br->ReadUnsigned<uint8_t>(), [&](uint8_t prob) {
(*frame_info->segmentation_pred_prob)[i] = prob;
});
return true;
},
[&] {
(*frame_info->segmentation_pred_prob)[i] = 255;
return true;
}));
}
return true;
},
[&] {
frame_info->segmentation_pred_prob->fill(255);
return true;
});
}));
return br->IfNextBoolean([&] { // segmentation_update_data
RETURN_IF_FALSE(br->IfNextBoolean([&] {
frame_info->segmentation_is_delta = true;
return true; return true;
}); }));
});
}));
return br->IfNextBoolean([&] { for (size_t i = 0; i < kMaxSegments; ++i) {
RETURN_IF_FALSE(br->ConsumeBits(1)); // abs_or_delta for (size_t j = 0; j < kSegLvlMax; ++j) {
for (int i = 0; i < kVp9MaxSegments; ++i) { RETURN_IF_FALSE(br->IfNextBoolean([&] { // feature_enabled
for (int j = 0; j < kVp9SegLvlMax; ++j) { READ_OR_RETURN(
RETURN_IF_FALSE(br->IfNextBoolean([&] { // feature_enabled br->ReadUnsigned<uint8_t>(kSegmentationFeatureBits[j]),
return br->ConsumeBits(kSegmentationFeatureBits[j] + [&](uint8_t feature_value) {
kSegmentationFeatureSigned[j]); frame_info->segmentation_features[i][j] = feature_value;
})); });
if (kSegmentationFeatureSigned[j]) {
RETURN_IF_FALSE(br->IfNextBoolean([&] {
(*frame_info->segmentation_features[i][j]) *= -1;
return true;
}));
}
return true;
}));
}
} }
} return true;
return true; });
}); });
} }
bool Vp9ReadTileInfo(BitstreamReader* br, FrameInfo* frame_info) { bool Vp9ReadTileInfo(BitstreamReader* br, UncompressedHeader* frame_info) {
size_t mi_cols = (frame_info->frame_width + 7) >> 3; size_t mi_cols = (frame_info->frame_width + 7) >> 3;
size_t sb64_cols = (mi_cols + 7) >> 3; size_t sb64_cols = (mi_cols + 7) >> 3;
@ -352,12 +421,12 @@ bool Vp9ReadTileInfo(BitstreamReader* br, FrameInfo* frame_info) {
} }
--max_log2; --max_log2;
size_t cols_log2 = min_log2; frame_info->tile_cols_log2 = min_log2;
bool done = false; bool done = false;
while (!done && cols_log2 < max_log2) { while (!done && frame_info->tile_cols_log2 < max_log2) {
RETURN_IF_FALSE(br->IfNextBoolean( RETURN_IF_FALSE(br->IfNextBoolean(
[&] { [&] {
++cols_log2; ++frame_info->tile_cols_log2;
return true; return true;
}, },
[&] { [&] {
@ -365,13 +434,157 @@ bool Vp9ReadTileInfo(BitstreamReader* br, FrameInfo* frame_info) {
return true; return true;
})); }));
} }
frame_info->tile_rows_log2 = 0;
// rows_log2; RETURN_IF_FALSE(br->IfNextBoolean([&] {
return br->IfNextBoolean([&] { return br->ConsumeBits(1); }); ++frame_info->tile_rows_log2;
return br->IfNextBoolean([&] {
++frame_info->tile_rows_log2;
return true;
});
}));
return true;
} }
const InterpolationFilter kLiteralToType[4] = {
InterpolationFilter::kEightTapSmooth, InterpolationFilter::kEightTap,
InterpolationFilter::kEightTapSharp, InterpolationFilter::kBilinear};
} // namespace } // namespace
bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) { std::string UncompressedHeader::ToString() const {
char buf[1024];
rtc::SimpleStringBuilder oss(buf);
oss << "Vp9UncompressedHeader { "
<< "profile = " << profile;
if (show_existing_frame) {
oss << ", show_existing_frame = " << *show_existing_frame << " }";
return oss.str();
}
oss << ", frame type = " << (is_keyframe ? "key" : "delta")
<< ", show_frame = " << (show_frame ? "true" : "false")
<< ", error_resilient = " << (error_resilient ? "true" : "false");
oss << ", bit_depth = ";
switch (bit_detph) {
case BitDept::k8Bit:
oss << "8bit";
break;
case BitDept::k10Bit:
oss << "10bit";
break;
case BitDept::k12Bit:
oss << "12bit";
break;
}
if (color_space) {
oss << ", color_space = ";
switch (*color_space) {
case ColorSpace::CS_UNKNOWN:
oss << "unknown";
break;
case ColorSpace::CS_BT_601:
oss << "CS_BT_601 Rec. ITU-R BT.601-7";
break;
case ColorSpace::CS_BT_709:
oss << "Rec. ITU-R BT.709-6";
break;
case ColorSpace::CS_SMPTE_170:
oss << "SMPTE-170";
break;
case ColorSpace::CS_SMPTE_240:
oss << "SMPTE-240";
break;
case ColorSpace::CS_BT_2020:
oss << "Rec. ITU-R BT.2020-2";
break;
case ColorSpace::CS_RESERVED:
oss << "Reserved";
break;
case ColorSpace::CS_RGB:
oss << "sRGB (IEC 61966-2-1)";
break;
}
}
if (color_range) {
oss << ", color_range = ";
switch (*color_range) {
case ColorRange::kFull:
oss << "full";
break;
case ColorRange::kStudio:
oss << "studio";
break;
}
}
if (sub_sampling) {
oss << ", sub_sampling = ";
switch (*sub_sampling) {
case YuvSubsampling::k444:
oss << "444";
break;
case YuvSubsampling::k440:
oss << "440";
break;
case YuvSubsampling::k422:
oss << "422";
break;
case YuvSubsampling::k420:
oss << "420";
break;
}
}
if (infer_size_from_reference) {
oss << ", infer_frame_resolution_from = " << *infer_size_from_reference;
} else {
oss << ", frame_width = " << frame_width
<< ", frame_height = " << frame_height;
}
if (render_width != 0 && render_height != 0) {
oss << ", render_width = " << render_width
<< ", render_height = " << render_height;
}
oss << ", base qp = " << base_qp;
if (reference_buffers[0] != -1) {
oss << ", last_buffer = " << reference_buffers[0];
}
if (reference_buffers[1] != -1) {
oss << ", golden_buffer = " << reference_buffers[1];
}
if (reference_buffers[2] != -1) {
oss << ", altref_buffer = " << reference_buffers[2];
}
oss << ", updated buffers = { ";
bool first = true;
for (int i = 0; i < 8; ++i) {
if (updated_buffers.test(i)) {
if (first) {
first = false;
} else {
oss << ", ";
}
oss << i;
}
}
oss << " }";
oss << ", compressed_header_size_bytes = " << compressed_header_size;
oss << " }";
return oss.str();
}
bool Parse(const uint8_t* buf,
size_t length,
UncompressedHeader* frame_info,
bool qp_only) {
rtc::BitBuffer bit_buffer(buf, length); rtc::BitBuffer bit_buffer(buf, length);
BitstreamReader br(&bit_buffer); BitstreamReader br(&bit_buffer);
@ -423,6 +636,9 @@ bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) {
return false; return false;
if (!Vp9ReadRenderSize(&br, frame_info)) if (!Vp9ReadRenderSize(&br, frame_info))
return false; return false;
// Key-frames implicitly update all buffers.
frame_info->updated_buffers.set();
} else { } else {
// Non-keyframe. // Non-keyframe.
bool is_intra_only = false; bool is_intra_only = false;
@ -441,31 +657,49 @@ bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) {
if (frame_info->profile > 0) { if (frame_info->profile > 0) {
if (!Vp9ReadColorConfig(&br, frame_info)) if (!Vp9ReadColorConfig(&br, frame_info))
return false; return false;
} else {
frame_info->color_space = ColorSpace::CS_BT_601;
frame_info->sub_sampling = YuvSubsampling::k420;
frame_info->bit_detph = BitDept::k8Bit;
} }
// Refresh frame flags. frame_info->reference_buffers.fill(-1);
RETURN_IF_FALSE(br.ConsumeBits(8)); RETURN_IF_FALSE(ReadRefreshFrameFlags(&br, frame_info));
if (!Vp9ReadFrameSize(&br, frame_info)) RETURN_IF_FALSE(Vp9ReadFrameSize(&br, frame_info));
return false; RETURN_IF_FALSE(Vp9ReadRenderSize(&br, frame_info));
if (!Vp9ReadRenderSize(&br, frame_info))
return false;
} else { } else {
// Refresh frame flags. RETURN_IF_FALSE(ReadRefreshFrameFlags(&br, frame_info));
RETURN_IF_FALSE(br.ConsumeBits(8));
frame_info->reference_buffers_sign_bias[0] = false;
for (size_t i = 0; i < kVp9NumRefsPerFrame; i++) { for (size_t i = 0; i < kVp9NumRefsPerFrame; i++) {
// 3 bits: Ref frame index. READ_OR_RETURN(br.ReadUnsigned<uint8_t>(3), [&](uint8_t idx) {
// 1 bit: Ref frame sign biases. frame_info->reference_buffers[i] = idx;
RETURN_IF_FALSE(br.ConsumeBits(4)); });
READ_OR_RETURN(br.ReadBoolean(), [&](bool sign_bias) {
frame_info->reference_buffers_sign_bias[ReferenceFrame::kLast + i] =
sign_bias;
});
} }
if (!Vp9ReadFrameSizeFromRefs(&br, frame_info)) if (!Vp9ReadFrameSizeFromRefs(&br, frame_info))
return false; return false;
// Allow high precision mv. READ_OR_RETURN(br.ReadBoolean(), [&](bool allow_high_precision_mv) {
RETURN_IF_FALSE(br.ConsumeBits(1)); frame_info->allow_high_precision_mv = allow_high_precision_mv;
});
// Interpolation filter. // Interpolation filter.
RETURN_IF_FALSE(br.IfNextBoolean([] { return true; }, RETURN_IF_FALSE(br.IfNextBoolean(
[&br] { return br.ConsumeBits(2); })); [frame_info] {
frame_info->interpolation_filter = InterpolationFilter::kSwitchable;
return true;
},
[&] {
READ_OR_RETURN(
br.ReadUnsigned<uint8_t>(2), [frame_info](uint8_t filter) {
frame_info->interpolation_filter = kLiteralToType[filter];
});
return true;
}));
} }
} }
@ -476,7 +710,8 @@ bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) {
} }
// Frame context index. // Frame context index.
RETURN_IF_FALSE(br.ConsumeBits(2)); READ_OR_RETURN(br.ReadUnsigned<uint8_t>(2),
[&](uint8_t idx) { frame_info->frame_context_idx = idx; });
if (!Vp9ReadLoopfilter(&br)) if (!Vp9ReadLoopfilter(&br))
return false; return false;
@ -484,33 +719,39 @@ bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) {
// Read base QP. // Read base QP.
RETURN_IF_FALSE(Vp9ReadQp(&br, frame_info)); RETURN_IF_FALSE(Vp9ReadQp(&br, frame_info));
const bool kParseFullHeader = false; if (qp_only) {
if (kParseFullHeader) { // Not interested in the rest of the header, return early.
// Currently not used, but will be needed when parsing beyond the return true;
// uncompressed header.
RETURN_IF_FALSE(Vp9ReadSegmentationParams(&br));
RETURN_IF_FALSE(Vp9ReadTileInfo(&br, frame_info));
RETURN_IF_FALSE(br.ConsumeBits(16)); // header_size_in_bytes
} }
RETURN_IF_FALSE(Vp9ReadSegmentationParams(&br, frame_info));
RETURN_IF_FALSE(Vp9ReadTileInfo(&br, frame_info));
READ_OR_RETURN(br.ReadUnsigned<uint16_t>(), [frame_info](uint16_t size) {
frame_info->compressed_header_size = size;
});
// Trailing bits.
RETURN_IF_FALSE(br.ConsumeBits(bit_buffer.RemainingBitCount() % 8));
frame_info->uncompressed_header_size =
length - (bit_buffer.RemainingBitCount() / 8);
return true; return true;
} }
bool GetQp(const uint8_t* buf, size_t length, int* qp) { bool GetQp(const uint8_t* buf, size_t length, int* qp) {
FrameInfo frame_info; UncompressedHeader frame_info;
if (!Parse(buf, length, &frame_info)) { if (!Parse(buf, length, &frame_info, /*qp_only=*/true)) {
return false; return false;
} }
*qp = frame_info.base_qp; *qp = frame_info.base_qp;
return true; return true;
} }
absl::optional<FrameInfo> ParseIntraFrameInfo(const uint8_t* buf, absl::optional<UncompressedHeader> ParseUncompressedHeader(const uint8_t* buf,
size_t length) { size_t length) {
FrameInfo frame_info; UncompressedHeader frame_info;
if (Parse(buf, length, &frame_info) && frame_info.frame_width > 0) { if (Parse(buf, length, &frame_info, /*qp_only=*/false) &&
frame_info.frame_width > 0) {
return frame_info; return frame_info;
} }
return absl::nullopt; return absl::nullopt;

View File

@ -13,7 +13,13 @@
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <array>
#include <bitset>
#include <string>
#include "absl/types/optional.h" #include "absl/types/optional.h"
#include "modules/video_coding/utility/vp9_constants.h"
namespace webrtc { namespace webrtc {
@ -63,27 +69,86 @@ enum class YuvSubsampling {
k420, k420,
}; };
struct FrameInfo { enum ReferenceFrame : int {
int profile = 0; // Profile 0-3 are valid. kNone = -1,
kIntra = 0,
kLast = 1,
kGolden = 2,
kAltref = 3,
};
enum class InterpolationFilter : uint8_t {
kEightTap = 0,
kEightTapSmooth = 1,
kEightTapSharp = 2,
kBilinear = 3,
kSwitchable = 4
};
struct UncompressedHeader {
int profile = 0; // Profiles 0-3 are valid.
absl::optional<uint8_t> show_existing_frame; absl::optional<uint8_t> show_existing_frame;
bool is_keyframe = false; bool is_keyframe = false;
bool show_frame = false; bool show_frame = false;
bool error_resilient = false; bool error_resilient = false;
BitDept bit_detph = BitDept::k8Bit; BitDept bit_detph = BitDept::k8Bit;
ColorSpace color_space = ColorSpace::CS_UNKNOWN; absl::optional<ColorSpace> color_space;
ColorRange color_range; absl::optional<ColorRange> color_range;
YuvSubsampling sub_sampling; absl::optional<YuvSubsampling> sub_sampling;
int frame_width = 0; int frame_width = 0;
int frame_height = 0; int frame_height = 0;
int render_width = 0; int render_width = 0;
int render_height = 0; int render_height = 0;
// Width/height of the tiles used (in units of 8x8 blocks).
size_t tile_cols_log2 = 0; // tile_cols = 1 << tile_cols_log2
size_t tile_rows_log2 = 0; // tile_rows = 1 << tile_rows_log2
struct BitstreamPosition {
size_t byte_offset = 0;
size_t bit_offset = 0;
};
absl::optional<BitstreamPosition> render_size_position;
InterpolationFilter interpolation_filter = InterpolationFilter::kEightTap;
bool allow_high_precision_mv = false;
int base_qp = 0; int base_qp = 0;
bool is_lossless = false;
uint8_t frame_context_idx = 0;
bool segmentation_enabled = false;
absl::optional<std::array<uint8_t, 7>> segmentation_tree_probs;
absl::optional<std::array<uint8_t, 3>> segmentation_pred_prob;
bool segmentation_is_delta = false;
absl::optional<int> segmentation_features[kMaxSegments][kSegLvlMax];
// Which of the 8 reference buffers may be used as references for this frame.
// -1 indicates not used (e.g. {-1, -1, -1} for intra-only frames).
std::array<int, kRefsPerFrame> reference_buffers = {-1, -1, -1};
// Sign bias corresponding to reference buffers, where the index is a
// ReferenceFrame.
// false/0 indidate backwards reference, true/1 indicate forwards reference).
std::array<bool, kMaxRefFrames> reference_buffers_sign_bias = {false, false,
false, false};
// Indicates which reference buffer [0,7] to infer the frame size from.
absl::optional<int> infer_size_from_reference;
// Which of the 8 reference buffers are updated by this frame.
std::bitset<kNumRefFrames> updated_buffers = 0;
// Header sizes, in bytes.
uint32_t uncompressed_header_size = 0;
uint32_t compressed_header_size = 0;
bool is_intra_only() const {
return reference_buffers[0] == -1 && reference_buffers[1] == -1 &&
reference_buffers[2] == -1;
}
std::string ToString() const;
}; };
// Parses frame information for a VP9 key-frame or all-intra frame from a // Parses the uncompressed header and populates (most) values in a
// bitstream. Returns nullopt on failure or if not a key-frame. // UncompressedHeader struct. Returns nullopt on failure.
absl::optional<FrameInfo> ParseIntraFrameInfo(const uint8_t* buf, absl::optional<UncompressedHeader> ParseUncompressedHeader(const uint8_t* buf,
size_t length); size_t length);
} // namespace vp9 } // namespace vp9

View File

@ -15,6 +15,11 @@
namespace webrtc { namespace webrtc {
namespace vp9 { namespace vp9 {
using ::testing::AllOf;
using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::Field;
using ::testing::Optional;
TEST(Vp9UncompressedHeaderParserTest, FrameWithSegmentation) { TEST(Vp9UncompressedHeaderParserTest, FrameWithSegmentation) {
// Uncompressed header from a frame generated with libvpx. // Uncompressed header from a frame generated with libvpx.
@ -26,21 +31,46 @@ TEST(Vp9UncompressedHeaderParserTest, FrameWithSegmentation) {
0x2e, 0x73, 0xb7, 0xee, 0x22, 0x06, 0x81, 0x82, 0xd4, 0xef, 0xc3, 0x58, 0x2e, 0x73, 0xb7, 0xee, 0x22, 0x06, 0x81, 0x82, 0xd4, 0xef, 0xc3, 0x58,
0x1f, 0x12, 0xd2, 0x7b, 0x28, 0x1f, 0x80, 0xfc, 0x07, 0xe0, 0x00, 0x00}; 0x1f, 0x12, 0xd2, 0x7b, 0x28, 0x1f, 0x80, 0xfc, 0x07, 0xe0, 0x00, 0x00};
absl::optional<FrameInfo> frame_info = absl::optional<UncompressedHeader> frame_info =
ParseIntraFrameInfo(kHeader, sizeof(kHeader)); ParseUncompressedHeader(kHeader, sizeof(kHeader));
// Segmentation info is not actually populated in FrameInfo struct, but it
// needs to be parsed otherwise we end up on the wrong offset. The check for
// segmentation is thus that we have a valid return value.
ASSERT_TRUE(frame_info.has_value()); ASSERT_TRUE(frame_info.has_value());
EXPECT_EQ(frame_info->is_keyframe, false); EXPECT_FALSE(frame_info->is_keyframe);
EXPECT_EQ(frame_info->error_resilient, true); EXPECT_TRUE(frame_info->error_resilient);
EXPECT_EQ(frame_info->show_frame, true); EXPECT_TRUE(frame_info->show_frame);
EXPECT_FALSE(frame_info->show_existing_frame);
EXPECT_EQ(frame_info->base_qp, 185); EXPECT_EQ(frame_info->base_qp, 185);
EXPECT_EQ(frame_info->frame_width, 320); EXPECT_EQ(frame_info->frame_width, 320);
EXPECT_EQ(frame_info->frame_height, 240); EXPECT_EQ(frame_info->frame_height, 240);
EXPECT_EQ(frame_info->render_width, 640); EXPECT_EQ(frame_info->render_width, 640);
EXPECT_EQ(frame_info->render_height, 480); EXPECT_EQ(frame_info->render_height, 480);
EXPECT_TRUE(frame_info->allow_high_precision_mv);
EXPECT_EQ(frame_info->frame_context_idx, 0u);
EXPECT_EQ(frame_info->interpolation_filter, InterpolationFilter::kSwitchable);
EXPECT_EQ(frame_info->is_lossless, false);
EXPECT_EQ(frame_info->profile, 0);
EXPECT_THAT(frame_info->reference_buffers, ElementsAreArray({0, 0, 0}));
EXPECT_THAT(frame_info->reference_buffers_sign_bias,
ElementsAreArray({false, false, false, false}));
EXPECT_EQ(frame_info->updated_buffers, 0b10000000);
EXPECT_EQ(frame_info->tile_cols_log2, 0u);
EXPECT_EQ(frame_info->tile_rows_log2, 0u);
EXPECT_THAT(
frame_info->render_size_position,
::testing::Optional(AllOf(
Field(&UncompressedHeader::BitstreamPosition::byte_offset, 8u),
Field(&UncompressedHeader::BitstreamPosition::bit_offset, 0u))));
EXPECT_EQ(frame_info->compressed_header_size, 23u);
EXPECT_EQ(frame_info->uncompressed_header_size, 37u);
EXPECT_TRUE(frame_info->segmentation_enabled);
EXPECT_FALSE(frame_info->segmentation_is_delta);
EXPECT_THAT(frame_info->segmentation_pred_prob,
Optional(ElementsAreArray({205, 1, 1})));
EXPECT_THAT(frame_info->segmentation_tree_probs,
Optional(ElementsAreArray({255, 255, 128, 1, 128, 128, 128})));
EXPECT_THAT(frame_info->segmentation_features[1][kSegLvlAlt_Q], Eq(-63));
EXPECT_THAT(frame_info->segmentation_features[2][kSegLvlAlt_Q], Eq(-81));
} }
} // namespace vp9 } // namespace vp9