Update how VP9 temporal up switch is populated
This CL updates both the static GOF pattern with the correct flags for temporal_up_switch, as well the flexible mode logic to base the flag on dependency descriptors instead use reference buffers. Bug: webrtc:13576 Change-Id: I578f744bec51d1f3531da5f4a89d12f05a16a6c0 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/247187 Reviewed-by: Danil Chapovalov <danilchap@webrtc.org> Commit-Queue: Erik Språng <sprang@webrtc.org> Cr-Commit-Position: refs/heads/main@{#35741}
This commit is contained in:
committed by
WebRTC LUCI CQ
parent
1ca57b9015
commit
16cbed4782
@ -46,14 +46,14 @@ struct GofInfoVP9 {
|
||||
case kTemporalStructureMode1:
|
||||
num_frames_in_gof = 1;
|
||||
temporal_idx[0] = 0;
|
||||
temporal_up_switch[0] = false;
|
||||
temporal_up_switch[0] = true;
|
||||
num_ref_pics[0] = 1;
|
||||
pid_diff[0][0] = 1;
|
||||
break;
|
||||
case kTemporalStructureMode2:
|
||||
num_frames_in_gof = 2;
|
||||
temporal_idx[0] = 0;
|
||||
temporal_up_switch[0] = false;
|
||||
temporal_up_switch[0] = true;
|
||||
num_ref_pics[0] = 1;
|
||||
pid_diff[0][0] = 2;
|
||||
|
||||
@ -65,7 +65,7 @@ struct GofInfoVP9 {
|
||||
case kTemporalStructureMode3:
|
||||
num_frames_in_gof = 4;
|
||||
temporal_idx[0] = 0;
|
||||
temporal_up_switch[0] = false;
|
||||
temporal_up_switch[0] = true;
|
||||
num_ref_pics[0] = 1;
|
||||
pid_diff[0][0] = 4;
|
||||
|
||||
@ -87,7 +87,7 @@ struct GofInfoVP9 {
|
||||
case kTemporalStructureMode4:
|
||||
num_frames_in_gof = 8;
|
||||
temporal_idx[0] = 0;
|
||||
temporal_up_switch[0] = false;
|
||||
temporal_up_switch[0] = true;
|
||||
num_ref_pics[0] = 1;
|
||||
pid_diff[0][0] = 4;
|
||||
|
||||
@ -97,12 +97,12 @@ struct GofInfoVP9 {
|
||||
pid_diff[1][0] = 1;
|
||||
|
||||
temporal_idx[2] = 1;
|
||||
temporal_up_switch[2] = true;
|
||||
temporal_up_switch[2] = false;
|
||||
num_ref_pics[2] = 1;
|
||||
pid_diff[2][0] = 2;
|
||||
|
||||
temporal_idx[3] = 2;
|
||||
temporal_up_switch[3] = false;
|
||||
temporal_up_switch[3] = true;
|
||||
num_ref_pics[3] = 2;
|
||||
pid_diff[3][0] = 1;
|
||||
pid_diff[3][1] = 2;
|
||||
@ -113,7 +113,7 @@ struct GofInfoVP9 {
|
||||
pid_diff[4][0] = 4;
|
||||
|
||||
temporal_idx[5] = 2;
|
||||
temporal_up_switch[5] = false;
|
||||
temporal_up_switch[5] = true;
|
||||
num_ref_pics[5] = 2;
|
||||
pid_diff[5][0] = 1;
|
||||
pid_diff[5][1] = 2;
|
||||
@ -125,7 +125,7 @@ struct GofInfoVP9 {
|
||||
pid_diff[6][1] = 4;
|
||||
|
||||
temporal_idx[7] = 2;
|
||||
temporal_up_switch[7] = false;
|
||||
temporal_up_switch[7] = true;
|
||||
num_ref_pics[7] = 2;
|
||||
pid_diff[7][0] = 1;
|
||||
pid_diff[7][1] = 2;
|
||||
@ -195,7 +195,10 @@ struct RTPVideoHeaderVP9 {
|
||||
uint8_t temporal_idx; // Temporal layer index, or kNoTemporalIdx.
|
||||
uint8_t spatial_idx; // Spatial layer index, or kNoSpatialIdx.
|
||||
bool temporal_up_switch; // True if upswitch to higher frame rate is possible
|
||||
// starting from this frame.
|
||||
// meaning subsequent higher temporal layer pictures
|
||||
// will not depend on any picture before the current
|
||||
// picture (in coding order) with temporal layer ID
|
||||
// greater than `temporal_idx` of this frame.
|
||||
bool inter_layer_predicted; // Frame is dependent on directly lower spatial
|
||||
// layer frame.
|
||||
|
||||
|
||||
@ -959,7 +959,7 @@ int LibvpxVp9Encoder::Encode(const VideoFrame& input_image,
|
||||
const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof;
|
||||
layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx];
|
||||
|
||||
if (VideoCodecMode::kScreensharing == codec_.mode) {
|
||||
if (codec_.mode == VideoCodecMode::kScreensharing) {
|
||||
const uint32_t frame_timestamp_ms =
|
||||
1000 * input_image.timestamp() / kVideoPayloadTypeFrequency;
|
||||
|
||||
@ -1212,8 +1212,7 @@ int LibvpxVp9Encoder::Encode(const VideoFrame& input_image,
|
||||
|
||||
bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
||||
absl::optional<int>* spatial_idx,
|
||||
const vpx_codec_cx_pkt& pkt,
|
||||
uint32_t timestamp) {
|
||||
const vpx_codec_cx_pkt& pkt) {
|
||||
RTC_CHECK(codec_specific != nullptr);
|
||||
codec_specific->codecType = kVideoCodecVP9;
|
||||
CodecSpecificInfoVP9* vp9_info = &(codec_specific->codecSpecific.VP9);
|
||||
@ -1248,9 +1247,6 @@ bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
||||
*spatial_idx = layer_id.spatial_layer_id;
|
||||
}
|
||||
|
||||
// TODO(asapersson): this info has to be obtained from the encoder.
|
||||
vp9_info->temporal_up_switch = false;
|
||||
|
||||
const bool is_key_pic = (pics_since_key_ == 0);
|
||||
const bool is_inter_layer_pred_allowed =
|
||||
(inter_layer_pred_ == InterLayerPredMode::kOn ||
|
||||
@ -1283,6 +1279,20 @@ bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
||||
vp9_info);
|
||||
if (vp9_info->flexible_mode) {
|
||||
vp9_info->gof_idx = kNoGofIdx;
|
||||
if (!svc_controller_) {
|
||||
if (num_temporal_layers_ == 1) {
|
||||
vp9_info->temporal_up_switch = true;
|
||||
} else {
|
||||
// In flexible mode with > 1 temporal layer but no SVC controller we
|
||||
// can't techincally determine if a frame is an upswitch point, use
|
||||
// gof-based data as proxy for now.
|
||||
// TODO(sprang): Remove once SVC controller is the only choice.
|
||||
vp9_info->gof_idx =
|
||||
static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
|
||||
vp9_info->temporal_up_switch =
|
||||
gof_.temporal_up_switch[vp9_info->gof_idx];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
vp9_info->gof_idx =
|
||||
static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
|
||||
@ -1353,6 +1363,23 @@ bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
||||
svc_params_.scaling_factor_den[sid]);
|
||||
}
|
||||
}
|
||||
if (is_flexible_mode_) {
|
||||
// Populate data for legacy temporal-upswitch state.
|
||||
// We can switch up to a higher temporal layer only if all temporal layers
|
||||
// higher than this (within the current spatial layer) are switch points.
|
||||
vp9_info->temporal_up_switch = true;
|
||||
for (int i = layer_id.temporal_layer_id + 1; i < num_temporal_layers_;
|
||||
++i) {
|
||||
// Assumes decode targets are always ordered first by spatial then by
|
||||
// temporal id.
|
||||
size_t dti_index =
|
||||
(layer_id.spatial_layer_id * num_temporal_layers_) + i;
|
||||
vp9_info->temporal_up_switch &=
|
||||
(codec_specific->generic_frame_info
|
||||
->decode_target_indications[dti_index] ==
|
||||
DecodeTargetIndication::kSwitch);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -1428,8 +1455,6 @@ void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
|
||||
ref_buf_list.push_back(ref_buf_.at(0));
|
||||
}
|
||||
|
||||
size_t max_ref_temporal_layer_id = 0;
|
||||
|
||||
std::vector<size_t> ref_pid_list;
|
||||
|
||||
vp9_info->num_ref_pics = 0;
|
||||
@ -1461,9 +1486,6 @@ void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
|
||||
|
||||
vp9_info->p_diff[vp9_info->num_ref_pics] = static_cast<uint8_t>(p_diff);
|
||||
++vp9_info->num_ref_pics;
|
||||
|
||||
max_ref_temporal_layer_id =
|
||||
std::max(max_ref_temporal_layer_id, ref_buf.temporal_layer_id);
|
||||
} else {
|
||||
RTC_DCHECK(inter_layer_predicted);
|
||||
// RTP spec only allows to use previous spatial layer for inter-layer
|
||||
@ -1471,10 +1493,6 @@ void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
|
||||
RTC_DCHECK_EQ(ref_buf.spatial_layer_id + 1, layer_id.spatial_layer_id);
|
||||
}
|
||||
}
|
||||
|
||||
vp9_info->temporal_up_switch =
|
||||
(max_ref_temporal_layer_id <
|
||||
static_cast<size_t>(layer_id.temporal_layer_id));
|
||||
}
|
||||
|
||||
void LibvpxVp9Encoder::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt,
|
||||
@ -1636,8 +1654,7 @@ void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
|
||||
|
||||
codec_specific_ = {};
|
||||
absl::optional<int> spatial_index;
|
||||
if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt,
|
||||
input_image_->timestamp())) {
|
||||
if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt)) {
|
||||
// Drop the frame.
|
||||
encoded_image_.set_size(0);
|
||||
return;
|
||||
|
||||
@ -67,8 +67,7 @@ class LibvpxVp9Encoder : public VP9Encoder {
|
||||
|
||||
bool PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
||||
absl::optional<int>* spatial_idx,
|
||||
const vpx_codec_cx_pkt& pkt,
|
||||
uint32_t timestamp);
|
||||
const vpx_codec_cx_pkt& pkt);
|
||||
void FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
|
||||
const size_t pic_num,
|
||||
const bool inter_layer_predicted,
|
||||
|
||||
Reference in New Issue
Block a user