Add control for inter-layer prediction mode.
This allows to control inter-layer prediction at encoding VP9 SVC. There are three options: 1. Disabled. 2. Enabled for all pictures. 3. Enabled for key pictures, disabled for others. Inter-layer prediction is enabled for all pictures by default. Bug: none Change-Id: I49fe43d8744c92bec349d815100ba158519f0664 Reviewed-on: https://webrtc-review.googlesource.com/71500 Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Reviewed-by: Rasmus Brandt <brandtr@webrtc.org> Commit-Queue: Sergey Silkin <ssilkin@webrtc.org> Cr-Commit-Position: refs/heads/master@{#23049}
This commit is contained in:

committed by
Commit Bot

parent
ad7f6e5ccf
commit
6a8f30e5a3
@ -38,6 +38,7 @@ VideoCodecVP9 VideoEncoder::GetDefaultVp9Settings() {
|
|||||||
vp9_settings.automaticResizeOn = true;
|
vp9_settings.automaticResizeOn = true;
|
||||||
vp9_settings.numberOfSpatialLayers = 1;
|
vp9_settings.numberOfSpatialLayers = 1;
|
||||||
vp9_settings.flexibleMode = false;
|
vp9_settings.flexibleMode = false;
|
||||||
|
vp9_settings.interLayerPred = InterLayerPredMode::kOn;
|
||||||
|
|
||||||
return vp9_settings;
|
return vp9_settings;
|
||||||
}
|
}
|
||||||
|
@ -361,6 +361,15 @@ struct VideoCodecVP8 {
|
|||||||
int keyFrameInterval;
|
int keyFrameInterval;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class InterLayerPredMode {
|
||||||
|
kOn, // Allow inter-layer prediction for all frames.
|
||||||
|
// Frame of low spatial layer can be used for
|
||||||
|
// prediction of next spatial layer frame.
|
||||||
|
kOff, // Encoder produces independent spatial layers.
|
||||||
|
kOnKeyPic // Allow inter-layer prediction only for frames
|
||||||
|
// within key picture.
|
||||||
|
};
|
||||||
|
|
||||||
// VP9 specific.
|
// VP9 specific.
|
||||||
struct VideoCodecVP9 {
|
struct VideoCodecVP9 {
|
||||||
bool operator==(const VideoCodecVP9& other) const;
|
bool operator==(const VideoCodecVP9& other) const;
|
||||||
@ -376,6 +385,7 @@ struct VideoCodecVP9 {
|
|||||||
bool automaticResizeOn;
|
bool automaticResizeOn;
|
||||||
unsigned char numberOfSpatialLayers;
|
unsigned char numberOfSpatialLayers;
|
||||||
bool flexibleMode;
|
bool flexibleMode;
|
||||||
|
InterLayerPredMode interLayerPred;
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO(magjed): Move this and other H264 related classes out to their own file.
|
// TODO(magjed): Move this and other H264 related classes out to their own file.
|
||||||
|
@ -318,4 +318,69 @@ TEST_F(TestVp9Impl, EndOfPicture) {
|
|||||||
EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.end_of_picture);
|
EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.end_of_picture);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(TestVp9Impl, InterLayerPred) {
|
||||||
|
const size_t num_spatial_layers = 2;
|
||||||
|
const size_t num_temporal_layers = 1;
|
||||||
|
codec_settings_.VP9()->numberOfSpatialLayers =
|
||||||
|
static_cast<unsigned char>(num_spatial_layers);
|
||||||
|
codec_settings_.VP9()->numberOfTemporalLayers =
|
||||||
|
static_cast<unsigned char>(num_temporal_layers);
|
||||||
|
codec_settings_.VP9()->frameDroppingOn = false;
|
||||||
|
|
||||||
|
std::vector<SpatialLayer> layers =
|
||||||
|
GetSvcConfig(codec_settings_.width, codec_settings_.height,
|
||||||
|
num_spatial_layers, num_temporal_layers);
|
||||||
|
|
||||||
|
BitrateAllocation bitrate_allocation;
|
||||||
|
for (size_t i = 0; i < layers.size(); ++i) {
|
||||||
|
codec_settings_.spatialLayers[i] = layers[i];
|
||||||
|
bitrate_allocation.SetBitrate(i, 0, layers[i].targetBitrate * 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<InterLayerPredMode> inter_layer_pred_modes = {
|
||||||
|
InterLayerPredMode::kOff, InterLayerPredMode::kOn,
|
||||||
|
InterLayerPredMode::kOnKeyPic};
|
||||||
|
|
||||||
|
for (const InterLayerPredMode inter_layer_pred : inter_layer_pred_modes) {
|
||||||
|
codec_settings_.VP9()->interLayerPred = inter_layer_pred;
|
||||||
|
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||||
|
encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
|
||||||
|
0 /* max payload size (unused) */));
|
||||||
|
|
||||||
|
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||||
|
encoder_->SetRateAllocation(bitrate_allocation,
|
||||||
|
codec_settings_.maxFramerate));
|
||||||
|
|
||||||
|
SetWaitForEncodedFramesThreshold(2);
|
||||||
|
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||||
|
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
|
||||||
|
|
||||||
|
std::vector<EncodedImage> frames;
|
||||||
|
std::vector<CodecSpecificInfo> codec_specific;
|
||||||
|
ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
|
||||||
|
|
||||||
|
// Key frame.
|
||||||
|
EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
|
||||||
|
EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0);
|
||||||
|
EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
|
||||||
|
inter_layer_pred == InterLayerPredMode::kOff);
|
||||||
|
EXPECT_TRUE(
|
||||||
|
codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred);
|
||||||
|
|
||||||
|
SetWaitForEncodedFramesThreshold(2);
|
||||||
|
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
|
||||||
|
encoder_->Encode(*NextInputFrame(), nullptr, nullptr));
|
||||||
|
ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
|
||||||
|
|
||||||
|
// Delta frame.
|
||||||
|
EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
|
||||||
|
EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0);
|
||||||
|
EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
|
||||||
|
inter_layer_pred == InterLayerPredMode::kOff ||
|
||||||
|
inter_layer_pred == InterLayerPredMode::kOnKeyPic);
|
||||||
|
EXPECT_TRUE(
|
||||||
|
codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
@ -74,9 +74,10 @@ VP9EncoderImpl::VP9EncoderImpl()
|
|||||||
config_(nullptr),
|
config_(nullptr),
|
||||||
raw_(nullptr),
|
raw_(nullptr),
|
||||||
input_image_(nullptr),
|
input_image_(nullptr),
|
||||||
frames_since_kf_(0),
|
pics_since_key_(0),
|
||||||
num_temporal_layers_(0),
|
num_temporal_layers_(0),
|
||||||
num_spatial_layers_(0),
|
num_spatial_layers_(0),
|
||||||
|
inter_layer_pred_(InterLayerPredMode::kOn),
|
||||||
is_flexible_mode_(false),
|
is_flexible_mode_(false),
|
||||||
frames_encoded_(0),
|
frames_encoded_(0),
|
||||||
// Use two spatial when screensharing with flexible mode.
|
// Use two spatial when screensharing with flexible mode.
|
||||||
@ -367,6 +368,8 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
|
|||||||
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inter_layer_pred_ = inst->VP9().interLayerPred;
|
||||||
|
|
||||||
return InitAndSetControlSettings(inst);
|
return InitAndSetControlSettings(inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -456,10 +459,28 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
|
|||||||
vpx_codec_control(
|
vpx_codec_control(
|
||||||
encoder_, VP9E_SET_SVC,
|
encoder_, VP9E_SET_SVC,
|
||||||
(num_temporal_layers_ > 1 || num_spatial_layers_ > 1) ? 1 : 0);
|
(num_temporal_layers_ > 1 || num_spatial_layers_ > 1) ? 1 : 0);
|
||||||
|
|
||||||
if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) {
|
if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) {
|
||||||
vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS,
|
vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS,
|
||||||
&svc_params_);
|
&svc_params_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (num_spatial_layers_ > 1) {
|
||||||
|
switch (inter_layer_pred_) {
|
||||||
|
case InterLayerPredMode::kOn:
|
||||||
|
vpx_codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 0);
|
||||||
|
break;
|
||||||
|
case InterLayerPredMode::kOff:
|
||||||
|
vpx_codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 1);
|
||||||
|
break;
|
||||||
|
case InterLayerPredMode::kOnKeyPic:
|
||||||
|
vpx_codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 2);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
RTC_NOTREACHED();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Register callback for getting each spatial layer.
|
// Register callback for getting each spatial layer.
|
||||||
vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = {
|
vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = {
|
||||||
VP9EncoderImpl::EncoderOutputCodedPacketCallback,
|
VP9EncoderImpl::EncoderOutputCodedPacketCallback,
|
||||||
@ -604,7 +625,6 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
|||||||
((pkt.data.frame.flags & VPX_FRAME_IS_KEY) && !codec_.VP9()->flexibleMode)
|
((pkt.data.frame.flags & VPX_FRAME_IS_KEY) && !codec_.VP9()->flexibleMode)
|
||||||
? true
|
? true
|
||||||
: false;
|
: false;
|
||||||
vp9_info->non_ref_for_inter_layer_pred = false;
|
|
||||||
|
|
||||||
vpx_svc_layer_id_t layer_id = {0};
|
vpx_svc_layer_id_t layer_id = {0};
|
||||||
vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
|
vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id);
|
||||||
@ -630,18 +650,30 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
|||||||
// TODO(asapersson): this info has to be obtained from the encoder.
|
// TODO(asapersson): this info has to be obtained from the encoder.
|
||||||
vp9_info->temporal_up_switch = false;
|
vp9_info->temporal_up_switch = false;
|
||||||
|
|
||||||
if (first_frame_in_picture) {
|
if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
|
||||||
// TODO(asapersson): this info has to be obtained from the encoder.
|
pics_since_key_ = 0;
|
||||||
vp9_info->inter_layer_predicted = false;
|
} else if (first_frame_in_picture) {
|
||||||
++frames_since_kf_;
|
++pics_since_key_;
|
||||||
} else {
|
|
||||||
// TODO(asapersson): this info has to be obtained from the encoder.
|
|
||||||
vp9_info->inter_layer_predicted = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) {
|
const bool is_key_pic = (pics_since_key_ == 0);
|
||||||
frames_since_kf_ = 0;
|
const bool is_inter_layer_pred_allowed =
|
||||||
}
|
(inter_layer_pred_ == InterLayerPredMode::kOn ||
|
||||||
|
(inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic));
|
||||||
|
|
||||||
|
// Always set inter_layer_predicted to true on high layer frame if inter-layer
|
||||||
|
// prediction (ILP) is allowed even if encoder didn't actually use it.
|
||||||
|
// Setting inter_layer_predicted to false would allow receiver to decode high
|
||||||
|
// layer frame without decoding low layer frame. If that would happen (e.g.
|
||||||
|
// if low layer frame is lost) then receiver won't be able to decode next high
|
||||||
|
// layer frame which uses ILP.
|
||||||
|
vp9_info->inter_layer_predicted =
|
||||||
|
first_frame_in_picture ? false : is_inter_layer_pred_allowed;
|
||||||
|
|
||||||
|
const bool is_last_layer =
|
||||||
|
(layer_id.spatial_layer_id + 1 == num_spatial_layers_);
|
||||||
|
vp9_info->non_ref_for_inter_layer_pred =
|
||||||
|
is_last_layer ? true : !is_inter_layer_pred_allowed;
|
||||||
|
|
||||||
// Always populate this, so that the packetizer can properly set the marker
|
// Always populate this, so that the packetizer can properly set the marker
|
||||||
// bit.
|
// bit.
|
||||||
@ -656,7 +688,7 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
vp9_info->gof_idx =
|
vp9_info->gof_idx =
|
||||||
static_cast<uint8_t>(frames_since_kf_ % gof_.num_frames_in_gof);
|
static_cast<uint8_t>(pics_since_key_ % gof_.num_frames_in_gof);
|
||||||
vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx];
|
vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -120,9 +120,10 @@ class VP9EncoderImpl : public VP9Encoder {
|
|||||||
const VideoFrame* input_image_;
|
const VideoFrame* input_image_;
|
||||||
GofInfoVP9 gof_; // Contains each frame's temporal information for
|
GofInfoVP9 gof_; // Contains each frame's temporal information for
|
||||||
// non-flexible mode.
|
// non-flexible mode.
|
||||||
size_t frames_since_kf_;
|
size_t pics_since_key_;
|
||||||
uint8_t num_temporal_layers_;
|
uint8_t num_temporal_layers_;
|
||||||
uint8_t num_spatial_layers_;
|
uint8_t num_spatial_layers_;
|
||||||
|
InterLayerPredMode inter_layer_pred_;
|
||||||
|
|
||||||
// Used for flexible mode.
|
// Used for flexible mode.
|
||||||
bool is_flexible_mode_;
|
bool is_flexible_mode_;
|
||||||
|
Reference in New Issue
Block a user