Frame rate controller per spatial layer.

This allows VP9 encoder wrapper to control frame rate of each spatial
layer. The wrapper configures encoder to skip encoding spatial layer
when actual frame rate exceeds the target frame rate of that layer.
Target frame rate of high spatial layer is expected to be equal or
higher then that of low spatial layer. For now frame rate controller
is only enabled in screen sharing mode.

Added unit test which configures encoder to produce 3 spatial layers
with frame rates 10, 20 and 30fps and verifies that absolute delta of
final and target rate doesn't exceed 10%.

Bug: webrtc:9682
Change-Id: I7a7833f63927dd475e7b42d43e4d29061613e64e
Reviewed-on: https://webrtc-review.googlesource.com/96640
Commit-Queue: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#24593}
This commit is contained in:
Sergey Silkin
2018-09-05 21:07:17 +02:00
committed by Commit Bot
parent afa12d0cee
commit ae9e188e67
3 changed files with 137 additions and 21 deletions

View File

@ -148,6 +148,7 @@ TEST_F(TestVp9Impl, EncodedRotationEqualsInputRotation) {
ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
EXPECT_EQ(kVideoRotation_0, encoded_frame.rotation_);
input_frame = NextInputFrame();
input_frame->set_rotation(kVideoRotation_90);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->Encode(*input_frame, nullptr, nullptr));
@ -256,8 +257,11 @@ TEST_F(TestVp9Impl, EncoderExplicitLayering) {
codec_settings_.spatialLayers[0].width = codec_settings_.width / 2;
codec_settings_.spatialLayers[0].height = codec_settings_.height / 2;
codec_settings_.spatialLayers[0].maxFramerate = codec_settings_.maxFramerate;
codec_settings_.spatialLayers[1].width = codec_settings_.width;
codec_settings_.spatialLayers[1].height = codec_settings_.height;
codec_settings_.spatialLayers[1].maxFramerate = codec_settings_.maxFramerate;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
0 /* max payload size (unused) */));
@ -586,6 +590,11 @@ TEST_F(TestVp9ImplFrameDropping, PreEncodeFrameDropping) {
const float expected_framerate_fps = 5.0f;
const float max_abs_framerate_error_fps = expected_framerate_fps * 0.1f;
codec_settings_.maxFramerate = static_cast<uint32_t>(expected_framerate_fps);
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
0 /* max payload size (unused) */));
VideoFrame* input_frame = NextInputFrame();
for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
@ -601,6 +610,73 @@ TEST_F(TestVp9ImplFrameDropping, PreEncodeFrameDropping) {
max_abs_framerate_error_fps);
}
TEST_F(TestVp9ImplFrameDropping, DifferentFrameratePerSpatialLayer) {
// Assign different frame rate to spatial layers and check that result frame
// rate is close to the assigned one.
const uint8_t num_spatial_layers = 3;
const float input_framerate_fps = 30.0;
const size_t video_duration_secs = 3;
const size_t num_input_frames = video_duration_secs * input_framerate_fps;
codec_settings_.VP9()->numberOfSpatialLayers = num_spatial_layers;
codec_settings_.VP9()->frameDroppingOn = false;
VideoBitrateAllocation bitrate_allocation;
for (uint8_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
// Frame rate increases from low to high layer.
const uint32_t framerate_fps = 10 * (sl_idx + 1);
codec_settings_.spatialLayers[sl_idx].width = codec_settings_.width;
codec_settings_.spatialLayers[sl_idx].height = codec_settings_.height;
codec_settings_.spatialLayers[sl_idx].maxFramerate = framerate_fps;
codec_settings_.spatialLayers[sl_idx].minBitrate =
codec_settings_.startBitrate;
codec_settings_.spatialLayers[sl_idx].maxBitrate =
codec_settings_.startBitrate;
codec_settings_.spatialLayers[sl_idx].targetBitrate =
codec_settings_.startBitrate;
bitrate_allocation.SetBitrate(
sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000);
}
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
0 /* max payload size (unused) */));
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->SetRateAllocation(bitrate_allocation,
codec_settings_.maxFramerate));
VideoFrame* input_frame = NextInputFrame();
for (size_t frame_num = 0; frame_num < num_input_frames; ++frame_num) {
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->Encode(*input_frame, nullptr, nullptr));
const size_t timestamp = input_frame->timestamp() +
kVideoPayloadTypeFrequency / input_framerate_fps;
input_frame->set_timestamp(static_cast<uint32_t>(timestamp));
}
std::vector<EncodedImage> encoded_frames;
std::vector<CodecSpecificInfo> codec_infos;
ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_infos));
std::vector<size_t> num_encoded_frames(num_spatial_layers, 0);
for (EncodedImage& encoded_frame : encoded_frames) {
++num_encoded_frames[encoded_frame.SpatialIndex().value_or(0)];
}
for (uint8_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
const float layer_target_framerate_fps =
codec_settings_.spatialLayers[sl_idx].maxFramerate;
const float layer_output_framerate_fps =
static_cast<float>(num_encoded_frames[sl_idx]) / video_duration_secs;
const float max_framerate_error_fps = layer_target_framerate_fps * 0.1f;
EXPECT_NEAR(layer_output_framerate_fps, layer_target_framerate_fps,
max_framerate_error_fps);
}
}
class TestVp9ImplProfile2 : public TestVp9Impl {
protected:
void SetUp() override {

View File

@ -36,8 +36,6 @@
namespace webrtc {
namespace {
const float kMaxScreenSharingFramerateFps = 5.0f;
// Only positive speeds, range for real-time coding currently is: 5 - 8.
// Lower means slower/better quality, higher means fastest/lower quality.
int GetCpuSpeed(int width, int height) {
@ -157,7 +155,6 @@ VP9EncoderImpl::VP9EncoderImpl(const cricket::VideoCodec& codec)
num_spatial_layers_(0),
is_svc_(false),
inter_layer_pred_(InterLayerPredMode::kOn),
framerate_controller_(kMaxScreenSharingFramerateFps),
is_flexible_mode_(false) {
memset(&codec_, 0, sizeof(codec_));
memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
@ -224,6 +221,14 @@ bool VP9EncoderImpl::SetSvcRates(
force_key_frame_ = true;
}
}
if (!was_layer_enabled) {
// Reset frame rate controller if layer is resumed after pause.
framerate_controller_[sl_idx].Reset();
}
framerate_controller_[sl_idx].SetTargetRate(
codec_.spatialLayers[sl_idx].maxFramerate);
}
} else {
float rate_ratio[VPX_MAX_LAYERS] = {0};
@ -263,6 +268,8 @@ bool VP9EncoderImpl::SetSvcRates(
<< num_temporal_layers_;
return false;
}
framerate_controller_[i].SetTargetRate(codec_.maxFramerate);
}
}
@ -353,15 +360,13 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst,
num_spatial_layers_ = inst->VP9().numberOfSpatialLayers;
RTC_DCHECK_GT(num_spatial_layers_, 0);
num_temporal_layers_ = inst->VP9().numberOfTemporalLayers;
if (num_temporal_layers_ == 0)
if (num_temporal_layers_ == 0) {
num_temporal_layers_ = 1;
// Init framerate controller.
if (codec_.mode == VideoCodecMode::kScreensharing) {
framerate_controller_.Reset();
framerate_controller_.SetTargetRate(kMaxScreenSharingFramerateFps);
}
framerate_controller_ = std::vector<FramerateController>(
num_spatial_layers_, FramerateController(codec_.maxFramerate));
is_svc_ = (num_spatial_layers_ > 1 || num_temporal_layers_ > 1);
// Allocate memory for encoded image
@ -537,6 +542,15 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
svc_params_.scaling_factor_num[i] = 1;
svc_params_.scaling_factor_den[i] = scale_factor;
RTC_DCHECK_GT(codec_.spatialLayers[i].maxFramerate, 0);
RTC_DCHECK_LE(codec_.spatialLayers[i].maxFramerate, codec_.maxFramerate);
if (i > 0) {
// Frame rate of high spatial layer is supposed to be equal or higher
// than frame rate of low spatial layer.
RTC_DCHECK_GE(codec_.spatialLayers[i].maxFramerate,
codec_.spatialLayers[i - 1].maxFramerate);
}
}
} else {
int scaling_factor_num = 256;
@ -669,10 +683,30 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
}
if (VideoCodecMode::kScreensharing == codec_.mode && !force_key_frame_) {
if (framerate_controller_.DropFrame(1000 * input_image.timestamp() /
kVideoPayloadTypeFrequency)) {
// Skip encoding spatial layer frames if their target frame rate is lower
// than actual input frame rate.
vpx_svc_layer_id_t layer_id = {0};
const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof;
layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx];
const uint32_t frame_timestamp_ms =
1000 * input_image.timestamp() / kVideoPayloadTypeFrequency;
for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) {
if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) {
++layer_id.spatial_layer_id;
} else {
break;
}
}
RTC_DCHECK_LE(layer_id.spatial_layer_id, num_active_spatial_layers_);
if (layer_id.spatial_layer_id >= num_active_spatial_layers_) {
// Drop entire picture.
return WEBRTC_VIDEO_CODEC_OK;
}
vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
}
RTC_DCHECK_EQ(input_image.width(), raw_->d_w);
@ -731,11 +765,17 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
flags = VPX_EFLAG_FORCE_KF;
}
RTC_CHECK_GT(codec_.maxFramerate, 0);
uint32_t target_framerate_fps = codec_.mode == VideoCodecMode::kScreensharing
? kMaxScreenSharingFramerateFps
: codec_.maxFramerate;
uint32_t duration = 90000 / target_framerate_fps;
// TODO(ssilkin): Frame duration should be specified per spatial layer
// since their frame rate can be different. For now calculate frame duration
// based on target frame rate of the highest spatial layer, which frame rate
// is supposed to be equal or higher than frame rate of low spatial layers.
// Also, timestamp should represent actual time passed since previous frame
// (not 'expected' time). Then rate controller can drain buffer more
// accurately.
RTC_DCHECK_GE(framerate_controller_.size(), num_active_spatial_layers_);
uint32_t duration = static_cast<uint32_t>(
90000 /
framerate_controller_[num_active_spatial_layers_ - 1].GetTargetRate());
const vpx_codec_err_t rv = vpx_codec_encode(encoder_, raw_, timestamp_,
duration, flags, VPX_DL_REALTIME);
if (rv != VPX_CODEC_OK) {
@ -1067,10 +1107,11 @@ void VP9EncoderImpl::DeliverBufferedFrame(bool end_of_picture) {
&frag_info);
encoded_image_._length = 0;
if (end_of_picture && codec_.mode == VideoCodecMode::kScreensharing) {
const uint32_t timestamp_ms =
if (codec_.mode == VideoCodecMode::kScreensharing) {
const uint8_t spatial_idx = encoded_image_.SpatialIndex().value_or(0);
const uint32_t frame_timestamp_ms =
1000 * encoded_image_.Timestamp() / kVideoPayloadTypeFrequency;
framerate_controller_.AddFrame(timestamp_ms);
framerate_controller_[spatial_idx].AddFrame(frame_timestamp_ms);
}
}
}

View File

@ -117,8 +117,7 @@ class VP9EncoderImpl : public VP9Encoder {
bool is_svc_;
InterLayerPredMode inter_layer_pred_;
// Framerate controller.
FramerateController framerate_controller_;
std::vector<FramerateController> framerate_controller_;
// Used for flexible mode.
bool is_flexible_mode_;