diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc index a5d07ccc57..d8062390dc 100644 --- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc +++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -751,7 +751,7 @@ TEST_F(TestVp9Impl, EnablingNewLayerIsDelayedInScreenshareAndAddsSsInfo) { codec_settings_.maxFramerate = 30; ConfigureSvc(num_spatial_layers); codec_settings_.spatialLayers[0].maxFramerate = 5.0; - // use 30 for the SL 1 instead of 5, so even if SL 0 frame is dropped due to + // use 30 for the SL 1 instead of 10, so even if SL 0 frame is dropped due to // framerate capping we would still get back at least a middle layer. It // simplifies the test. codec_settings_.spatialLayers[1].maxFramerate = 30.0; @@ -815,6 +815,93 @@ TEST_F(TestVp9Impl, EnablingNewLayerIsDelayedInScreenshareAndAddsSsInfo) { EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); } +TEST_F(TestVp9Impl, ScreenshareFrameDropping) { + const int num_spatial_layers = 3; + const int num_frames_to_detect_drops = 2; + + codec_settings_.maxFramerate = 30; + ConfigureSvc(num_spatial_layers); + // use 30 for the SL0 and SL1 because it simplifies the test. + codec_settings_.spatialLayers[0].maxFramerate = 30.0; + codec_settings_.spatialLayers[1].maxFramerate = 30.0; + codec_settings_.spatialLayers[2].maxFramerate = 30.0; + codec_settings_.VP9()->frameDroppingOn = true; + codec_settings_.mode = VideoCodecMode::kScreensharing; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + codec_settings_.VP9()->flexibleMode = true; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, + 0 /* max payload size (unused) */)); + + // Enable all but the last layer. + VideoBitrateAllocation bitrate_allocation; + // Very low bitrate for the lowest spatial layer to ensure rate-control drops. + bitrate_allocation.SetBitrate(0, 0, 1000); + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000); + // Disable highest layer. + bitrate_allocation.SetBitrate(2, 0, 0); + + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + + bool frame_dropped = false; + // Encode enough frames to force drop due to rate-control. + for (size_t frame_num = 0; frame_num < num_frames_to_detect_drops; + ++frame_num) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr)); + std::vector encoded_frames; + std::vector codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_LE(encoded_frames.size(), 2u); + EXPECT_GE(encoded_frames.size(), 1u); + if (encoded_frames.size() == 1) { + frame_dropped = true; + // Dropped frame is on the SL0. + EXPECT_EQ(encoded_frames[0].SpatialIndex(), 1); + } + } + EXPECT_TRUE(frame_dropped); + + // Enable the last layer. + bitrate_allocation.SetBitrate( + 2, 0, codec_settings_.spatialLayers[2].targetBitrate * 1000); + encoder_->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings_.maxFramerate)); + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr)); + std::vector encoded_frames; + std::vector codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + // No drop allowed. + EXPECT_EQ(encoded_frames.size(), 3u); + + // Verify that frame-dropping is re-enabled back. + frame_dropped = false; + // Encode enough frames to force drop due to rate-control. + for (size_t frame_num = 0; frame_num < num_frames_to_detect_drops; + ++frame_num) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr)); + std::vector encoded_frames; + std::vector codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_LE(encoded_frames.size(), 3u); + EXPECT_GE(encoded_frames.size(), 2u); + if (encoded_frames.size() == 2) { + frame_dropped = true; + // Dropped frame is on the SL0. + EXPECT_EQ(encoded_frames[0].SpatialIndex(), 1); + EXPECT_EQ(encoded_frames[1].SpatialIndex(), 2); + } + } + EXPECT_TRUE(frame_dropped); +} + TEST_F(TestVp9Impl, RemovingLayerIsNotDelayedInScreenshareAndAddsSsInfo) { const size_t num_spatial_layers = 3; // Chosen by hand, the 2nd frame is dropped with configured per-layer max diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc index 9fedf794a9..8648ba5883 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -711,18 +711,35 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) { RTC_NOTREACHED(); } - // Configure encoder to drop entire superframe whenever it needs to drop - // a layer. This mode is prefered over per-layer dropping which causes - // quality flickering and is not compatible with RTP non-flexible mode. - vpx_svc_frame_drop_t svc_drop_frame; - memset(&svc_drop_frame, 0, sizeof(svc_drop_frame)); - svc_drop_frame.framedrop_mode = - full_superframe_drop_ ? FULL_SUPERFRAME_DROP : CONSTRAINED_LAYER_DROP; - svc_drop_frame.max_consec_drop = std::numeric_limits::max(); - for (size_t i = 0; i < num_spatial_layers_; ++i) { - svc_drop_frame.framedrop_thresh[i] = config_->rc_dropframe_thresh; + memset(&svc_drop_frame_, 0, sizeof(svc_drop_frame_)); + dropping_only_base_layer_ = inter_layer_pred_ == InterLayerPredMode::kOn && + codec_.mode == VideoCodecMode::kScreensharing && + num_spatial_layers_ > 1; + if (dropping_only_base_layer_) { + // Screenshare dropping mode: only the base spatial layer + // can be dropped and it doesn't affect other spatial layers. + // This mode is preferable because base layer has low bitrate targets + // and more likely to drop frames. It shouldn't reduce framerate on other + // layers. + svc_drop_frame_.framedrop_mode = LAYER_DROP; + svc_drop_frame_.max_consec_drop = 5; + svc_drop_frame_.framedrop_thresh[0] = config_->rc_dropframe_thresh; + for (size_t i = 1; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = 0; + } + } else { + // Configure encoder to drop entire superframe whenever it needs to drop + // a layer. This mode is preferred over per-layer dropping which causes + // quality flickering and is not compatible with RTP non-flexible mode. + svc_drop_frame_.framedrop_mode = + full_superframe_drop_ ? FULL_SUPERFRAME_DROP : CONSTRAINED_LAYER_DROP; + svc_drop_frame_.max_consec_drop = std::numeric_limits::max(); + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh; + } } - vpx_codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame); + vpx_codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER, + &svc_drop_frame_); } // Register callback for getting each spatial layer. @@ -888,9 +905,22 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, if (less_layers_requested || more_layers_requested) { ss_info_needed_ = true; } + if (more_layers_requested && !force_key_frame_) { + // Prohibit drop of all layers for the next frame, so newly enabled + // layer would have a valid spatial reference. + for (size_t i = 0; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = 0; + } + } } } + if (num_spatial_layers_ > 1) { + // Update frame dropping settings as they may change on per-frame basis. + vpx_codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER, + &svc_drop_frame_); + } + if (vpx_codec_enc_config_set(encoder_, config_)) { return WEBRTC_VIDEO_CODEC_ERROR; } @@ -1442,6 +1472,16 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { void VP9EncoderImpl::DeliverBufferedFrame(bool end_of_picture) { if (encoded_image_.size() > 0) { + if (num_spatial_layers_ > 1) { + // Restore frame dropping settings, as dropping may be temporary forbidden + // due to dynamically enabled layers. + svc_drop_frame_.framedrop_thresh[0] = config_->rc_dropframe_thresh; + for (size_t i = 1; i < num_spatial_layers_; ++i) { + svc_drop_frame_.framedrop_thresh[i] = + dropping_only_base_layer_ ? 0 : config_->rc_dropframe_thresh; + } + } + codec_specific_.codecSpecific.VP9.end_of_picture = end_of_picture; // No data partitioning in VP9, so 1 partition only. diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h index 73bca263ef..fb195a7c00 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.h +++ b/modules/video_coding/codecs/vp9/vp9_impl.h @@ -124,6 +124,8 @@ class VP9EncoderImpl : public VP9Encoder { const bool trusted_rate_controller_; const bool dynamic_rate_settings_; const bool full_superframe_drop_; + bool dropping_only_base_layer_; + vpx_svc_frame_drop_t svc_drop_frame_; bool first_frame_in_picture_; VideoBitrateAllocation current_bitrate_allocation_; absl::optional requested_rate_settings_;