VP9 screenshare: drop base layer separately

Because of a low bitrate target, base layer has drops much more frequently than other layers. But it reduces overall framerate, especially then input framerate is low (5 fps). This CL allows pre-layer drops and disables droppoing on higher spatial layers for screenshare, solving the issue. Additional care have to be taken then new spatial layers are enabled dynamically to not create non-compatible with RTP references. Bug: webrtc:10257 Change-Id: Ie056484c99a3f35ff4405ef71337dc2d034db8bb Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/138262 Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org> Reviewed-by: Sergey Silkin <ssilkin@webrtc.org> Cr-Commit-Position: refs/heads/master@{#28063}
2019-05-24 16:50:00 +02:00
parent d9b4f3330f
commit 039a7146ab
3 changed files with 141 additions and 12 deletions
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@ -751,7 +751,7 @@ TEST_F(TestVp9Impl, EnablingNewLayerIsDelayedInScreenshareAndAddsSsInfo) {
  codec_settings_.maxFramerate = 30;
  ConfigureSvc(num_spatial_layers);
  codec_settings_.spatialLayers[0].maxFramerate = 5.0;
-  // use 30 for the SL 1 instead of 5, so even if SL 0 frame is dropped due to
+  // use 30 for the SL 1 instead of 10, so even if SL 0 frame is dropped due to
  // framerate capping we would still get back at least a middle layer. It
  // simplifies the test.
  codec_settings_.spatialLayers[1].maxFramerate = 30.0;
@ -815,6 +815,93 @@ TEST_F(TestVp9Impl, EnablingNewLayerIsDelayedInScreenshareAndAddsSsInfo) {
  EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
 }
 TEST_F(TestVp9Impl, ScreenshareFrameDropping) {
  const int num_spatial_layers = 3;
  const int num_frames_to_detect_drops = 2;
  codec_settings_.maxFramerate = 30;
  ConfigureSvc(num_spatial_layers);
  // use 30 for the SL0 and SL1 because it simplifies the test.
  codec_settings_.spatialLayers[0].maxFramerate = 30.0;
  codec_settings_.spatialLayers[1].maxFramerate = 30.0;
  codec_settings_.spatialLayers[2].maxFramerate = 30.0;
  codec_settings_.VP9()->frameDroppingOn = true;
  codec_settings_.mode = VideoCodecMode::kScreensharing;
  codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
  codec_settings_.VP9()->flexibleMode = true;
  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
            encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
                                 0 /* max payload size (unused) */));
  // Enable all but the last layer.
  VideoBitrateAllocation bitrate_allocation;
  // Very low bitrate for the lowest spatial layer to ensure rate-control drops.
  bitrate_allocation.SetBitrate(0, 0, 1000);
  bitrate_allocation.SetBitrate(
      1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000);
  // Disable highest layer.
  bitrate_allocation.SetBitrate(2, 0, 0);
  encoder_->SetRates(VideoEncoder::RateControlParameters(
      bitrate_allocation, codec_settings_.maxFramerate));
  bool frame_dropped = false;
  // Encode enough frames to force drop due to rate-control.
  for (size_t frame_num = 0; frame_num < num_frames_to_detect_drops;
       ++frame_num) {
    SetWaitForEncodedFramesThreshold(1);
    EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
              encoder_->Encode(*NextInputFrame(), nullptr));
    std::vector<EncodedImage> encoded_frames;
    std::vector<CodecSpecificInfo> codec_specific_info;
    ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
    EXPECT_LE(encoded_frames.size(), 2u);
    EXPECT_GE(encoded_frames.size(), 1u);
    if (encoded_frames.size() == 1) {
      frame_dropped = true;
      // Dropped frame is on the SL0.
      EXPECT_EQ(encoded_frames[0].SpatialIndex(), 1);
    }
  }
  EXPECT_TRUE(frame_dropped);
  // Enable the last layer.
  bitrate_allocation.SetBitrate(
      2, 0, codec_settings_.spatialLayers[2].targetBitrate * 1000);
  encoder_->SetRates(VideoEncoder::RateControlParameters(
      bitrate_allocation, codec_settings_.maxFramerate));
  SetWaitForEncodedFramesThreshold(1);
  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
            encoder_->Encode(*NextInputFrame(), nullptr));
  std::vector<EncodedImage> encoded_frames;
  std::vector<CodecSpecificInfo> codec_specific_info;
  ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
  // No drop allowed.
  EXPECT_EQ(encoded_frames.size(), 3u);
  // Verify that frame-dropping is re-enabled back.
  frame_dropped = false;
  // Encode enough frames to force drop due to rate-control.
  for (size_t frame_num = 0; frame_num < num_frames_to_detect_drops;
       ++frame_num) {
    SetWaitForEncodedFramesThreshold(1);
    EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
              encoder_->Encode(*NextInputFrame(), nullptr));
    std::vector<EncodedImage> encoded_frames;
    std::vector<CodecSpecificInfo> codec_specific_info;
    ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
    EXPECT_LE(encoded_frames.size(), 3u);
    EXPECT_GE(encoded_frames.size(), 2u);
    if (encoded_frames.size() == 2) {
      frame_dropped = true;
      // Dropped frame is on the SL0.
      EXPECT_EQ(encoded_frames[0].SpatialIndex(), 1);
      EXPECT_EQ(encoded_frames[1].SpatialIndex(), 2);
    }
  }
  EXPECT_TRUE(frame_dropped);
 }
 TEST_F(TestVp9Impl, RemovingLayerIsNotDelayedInScreenshareAndAddsSsInfo) {
  const size_t num_spatial_layers = 3;
  // Chosen by hand, the 2nd frame is dropped with configured per-layer max
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@ -711,18 +711,35 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
        RTC_NOTREACHED();
    }
-    // Configure encoder to drop entire superframe whenever it needs to drop
+    memset(&svc_drop_frame_, 0, sizeof(svc_drop_frame_));
-    // a layer. This mode is prefered over per-layer dropping which causes
+    dropping_only_base_layer_ = inter_layer_pred_ == InterLayerPredMode::kOn &&
-    // quality flickering and is not compatible with RTP non-flexible mode.
+                                codec_.mode == VideoCodecMode::kScreensharing &&
-    vpx_svc_frame_drop_t svc_drop_frame;
+                                num_spatial_layers_ > 1;
-    memset(&svc_drop_frame, 0, sizeof(svc_drop_frame));
+    if (dropping_only_base_layer_) {
-    svc_drop_frame.framedrop_mode =
+      // Screenshare dropping mode: only the base spatial layer
-        full_superframe_drop_ ? FULL_SUPERFRAME_DROP : CONSTRAINED_LAYER_DROP;
+      // can be dropped and it doesn't affect other spatial layers.
-    svc_drop_frame.max_consec_drop = std::numeric_limits<int>::max();
+      // This mode is preferable because base layer has low bitrate targets
-    for (size_t i = 0; i < num_spatial_layers_; ++i) {
+      // and more likely to drop frames. It shouldn't reduce framerate on other
-      svc_drop_frame.framedrop_thresh[i] = config_->rc_dropframe_thresh;
+      // layers.
      svc_drop_frame_.framedrop_mode = LAYER_DROP;
      svc_drop_frame_.max_consec_drop = 5;
      svc_drop_frame_.framedrop_thresh[0] = config_->rc_dropframe_thresh;
      for (size_t i = 1; i < num_spatial_layers_; ++i) {
        svc_drop_frame_.framedrop_thresh[i] = 0;
      }
    } else {
      // Configure encoder to drop entire superframe whenever it needs to drop
      // a layer. This mode is preferred over per-layer dropping which causes
      // quality flickering and is not compatible with RTP non-flexible mode.
      svc_drop_frame_.framedrop_mode =
          full_superframe_drop_ ? FULL_SUPERFRAME_DROP : CONSTRAINED_LAYER_DROP;
      svc_drop_frame_.max_consec_drop = std::numeric_limits<int>::max();
      for (size_t i = 0; i < num_spatial_layers_; ++i) {
        svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh;
      }
    }
-    vpx_codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame);
+    vpx_codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER,
                      &svc_drop_frame_);
  }
  // Register callback for getting each spatial layer.
@ -888,9 +905,22 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
      if (less_layers_requested || more_layers_requested) {
        ss_info_needed_ = true;
      }
      if (more_layers_requested && !force_key_frame_) {
        // Prohibit drop of all layers for the next frame, so newly enabled
        // layer would have a valid spatial reference.
        for (size_t i = 0; i < num_spatial_layers_; ++i) {
          svc_drop_frame_.framedrop_thresh[i] = 0;
        }
      }
    }
  }
  if (num_spatial_layers_ > 1) {
    // Update frame dropping settings as they may change on per-frame basis.
    vpx_codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER,
                      &svc_drop_frame_);
  }
  if (vpx_codec_enc_config_set(encoder_, config_)) {
    return WEBRTC_VIDEO_CODEC_ERROR;
  }
@ -1442,6 +1472,16 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
 void VP9EncoderImpl::DeliverBufferedFrame(bool end_of_picture) {
  if (encoded_image_.size() > 0) {
    if (num_spatial_layers_ > 1) {
      // Restore frame dropping settings, as dropping may be temporary forbidden
      // due to dynamically enabled layers.
      svc_drop_frame_.framedrop_thresh[0] = config_->rc_dropframe_thresh;
      for (size_t i = 1; i < num_spatial_layers_; ++i) {
        svc_drop_frame_.framedrop_thresh[i] =
            dropping_only_base_layer_ ? 0 : config_->rc_dropframe_thresh;
      }
    }
    codec_specific_.codecSpecific.VP9.end_of_picture = end_of_picture;
    // No data partitioning in VP9, so 1 partition only.
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@ -124,6 +124,8 @@ class VP9EncoderImpl : public VP9Encoder {
  const bool trusted_rate_controller_;
  const bool dynamic_rate_settings_;
  const bool full_superframe_drop_;
  bool dropping_only_base_layer_;
  vpx_svc_frame_drop_t svc_drop_frame_;
  bool first_frame_in_picture_;
  VideoBitrateAllocation current_bitrate_allocation_;
  absl::optional<RateControlParameters> requested_rate_settings_;