Update how VP9 temporal up switch is populated

This CL updates both the static GOF pattern with the correct flags for
temporal_up_switch, as well the flexible mode logic to base the flag
on dependency descriptors instead use reference buffers.

Bug: webrtc:13576
Change-Id: I578f744bec51d1f3531da5f4a89d12f05a16a6c0
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/247187
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Commit-Queue: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#35741}
This commit is contained in:
Erik Språng
2022-01-19 14:17:14 +01:00
committed by WebRTC LUCI CQ
parent 1ca57b9015
commit 16cbed4782
4 changed files with 58 additions and 37 deletions

View File

@ -46,14 +46,14 @@ struct GofInfoVP9 {
case kTemporalStructureMode1:
num_frames_in_gof = 1;
temporal_idx[0] = 0;
temporal_up_switch[0] = false;
temporal_up_switch[0] = true;
num_ref_pics[0] = 1;
pid_diff[0][0] = 1;
break;
case kTemporalStructureMode2:
num_frames_in_gof = 2;
temporal_idx[0] = 0;
temporal_up_switch[0] = false;
temporal_up_switch[0] = true;
num_ref_pics[0] = 1;
pid_diff[0][0] = 2;
@ -65,7 +65,7 @@ struct GofInfoVP9 {
case kTemporalStructureMode3:
num_frames_in_gof = 4;
temporal_idx[0] = 0;
temporal_up_switch[0] = false;
temporal_up_switch[0] = true;
num_ref_pics[0] = 1;
pid_diff[0][0] = 4;
@ -87,7 +87,7 @@ struct GofInfoVP9 {
case kTemporalStructureMode4:
num_frames_in_gof = 8;
temporal_idx[0] = 0;
temporal_up_switch[0] = false;
temporal_up_switch[0] = true;
num_ref_pics[0] = 1;
pid_diff[0][0] = 4;
@ -97,12 +97,12 @@ struct GofInfoVP9 {
pid_diff[1][0] = 1;
temporal_idx[2] = 1;
temporal_up_switch[2] = true;
temporal_up_switch[2] = false;
num_ref_pics[2] = 1;
pid_diff[2][0] = 2;
temporal_idx[3] = 2;
temporal_up_switch[3] = false;
temporal_up_switch[3] = true;
num_ref_pics[3] = 2;
pid_diff[3][0] = 1;
pid_diff[3][1] = 2;
@ -113,7 +113,7 @@ struct GofInfoVP9 {
pid_diff[4][0] = 4;
temporal_idx[5] = 2;
temporal_up_switch[5] = false;
temporal_up_switch[5] = true;
num_ref_pics[5] = 2;
pid_diff[5][0] = 1;
pid_diff[5][1] = 2;
@ -125,7 +125,7 @@ struct GofInfoVP9 {
pid_diff[6][1] = 4;
temporal_idx[7] = 2;
temporal_up_switch[7] = false;
temporal_up_switch[7] = true;
num_ref_pics[7] = 2;
pid_diff[7][0] = 1;
pid_diff[7][1] = 2;
@ -195,7 +195,10 @@ struct RTPVideoHeaderVP9 {
uint8_t temporal_idx; // Temporal layer index, or kNoTemporalIdx.
uint8_t spatial_idx; // Spatial layer index, or kNoSpatialIdx.
bool temporal_up_switch; // True if upswitch to higher frame rate is possible
// starting from this frame.
// meaning subsequent higher temporal layer pictures
// will not depend on any picture before the current
// picture (in coding order) with temporal layer ID
// greater than `temporal_idx` of this frame.
bool inter_layer_predicted; // Frame is dependent on directly lower spatial
// layer frame.

View File

@ -959,7 +959,7 @@ int LibvpxVp9Encoder::Encode(const VideoFrame& input_image,
const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof;
layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx];
if (VideoCodecMode::kScreensharing == codec_.mode) {
if (codec_.mode == VideoCodecMode::kScreensharing) {
const uint32_t frame_timestamp_ms =
1000 * input_image.timestamp() / kVideoPayloadTypeFrequency;
@ -1212,8 +1212,7 @@ int LibvpxVp9Encoder::Encode(const VideoFrame& input_image,
bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
absl::optional<int>* spatial_idx,
const vpx_codec_cx_pkt& pkt,
uint32_t timestamp) {
const vpx_codec_cx_pkt& pkt) {
RTC_CHECK(codec_specific != nullptr);
codec_specific->codecType = kVideoCodecVP9;
CodecSpecificInfoVP9* vp9_info = &(codec_specific->codecSpecific.VP9);
@ -1248,9 +1247,6 @@ bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
*spatial_idx = layer_id.spatial_layer_id;
}
// TODO(asapersson): this info has to be obtained from the encoder.
vp9_info->temporal_up_switch = false;
const bool is_key_pic = (pics_since_key_ == 0);
const bool is_inter_layer_pred_allowed =
(inter_layer_pred_ == InterLayerPredMode::kOn ||
@ -1283,6 +1279,20 @@ bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
vp9_info);
if (vp9_info->flexible_mode) {
vp9_info->gof_idx = kNoGofIdx;
if (!svc_controller_) {
if (num_temporal_layers_ == 1) {
vp9_info->temporal_up_switch = true;
} else {
// In flexible mode with > 1 temporal layer but no SVC controller we
// can't techincally determine if a frame is an upswitch point, use
// gof-based data as proxy for now.
// TODO(sprang): Remove once SVC controller is the only choice.
vp9_info->gof_idx =
static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
vp9_info->temporal_up_switch =
gof_.temporal_up_switch[vp9_info->gof_idx];
}
}
} else {
vp9_info->gof_idx =
static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
@ -1353,6 +1363,23 @@ bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
svc_params_.scaling_factor_den[sid]);
}
}
if (is_flexible_mode_) {
// Populate data for legacy temporal-upswitch state.
// We can switch up to a higher temporal layer only if all temporal layers
// higher than this (within the current spatial layer) are switch points.
vp9_info->temporal_up_switch = true;
for (int i = layer_id.temporal_layer_id + 1; i < num_temporal_layers_;
++i) {
// Assumes decode targets are always ordered first by spatial then by
// temporal id.
size_t dti_index =
(layer_id.spatial_layer_id * num_temporal_layers_) + i;
vp9_info->temporal_up_switch &=
(codec_specific->generic_frame_info
->decode_target_indications[dti_index] ==
DecodeTargetIndication::kSwitch);
}
}
}
return true;
}
@ -1428,8 +1455,6 @@ void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
ref_buf_list.push_back(ref_buf_.at(0));
}
size_t max_ref_temporal_layer_id = 0;
std::vector<size_t> ref_pid_list;
vp9_info->num_ref_pics = 0;
@ -1461,9 +1486,6 @@ void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
vp9_info->p_diff[vp9_info->num_ref_pics] = static_cast<uint8_t>(p_diff);
++vp9_info->num_ref_pics;
max_ref_temporal_layer_id =
std::max(max_ref_temporal_layer_id, ref_buf.temporal_layer_id);
} else {
RTC_DCHECK(inter_layer_predicted);
// RTP spec only allows to use previous spatial layer for inter-layer
@ -1471,10 +1493,6 @@ void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
RTC_DCHECK_EQ(ref_buf.spatial_layer_id + 1, layer_id.spatial_layer_id);
}
}
vp9_info->temporal_up_switch =
(max_ref_temporal_layer_id <
static_cast<size_t>(layer_id.temporal_layer_id));
}
void LibvpxVp9Encoder::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt,
@ -1636,8 +1654,7 @@ void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
codec_specific_ = {};
absl::optional<int> spatial_index;
if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt,
input_image_->timestamp())) {
if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt)) {
// Drop the frame.
encoded_image_.set_size(0);
return;

View File

@ -67,8 +67,7 @@ class LibvpxVp9Encoder : public VP9Encoder {
bool PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
absl::optional<int>* spatial_idx,
const vpx_codec_cx_pkt& pkt,
uint32_t timestamp);
const vpx_codec_cx_pkt& pkt);
void FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
const size_t pic_num,
const bool inter_layer_predicted,