From 002b6f4f2383df28585932c9f83b4e1aed2025f6 Mon Sep 17 00:00:00 2001
From: Ilya Nikolaevskiy <ilnik@webrtc.org>
Date: Mon, 30 Sep 2019 10:32:13 +0200
Subject: [PATCH] Fixes for support of disabling lower spatial layers in VP9

1) Always allocate at least one spatial layer in svc rate allocator

2) Ensure tests reflect known existing failing scenario
(k-svc video with no external ref control).

3) Update log representation of bitrate allocation, as it looks very
confusing with lower layers disabled.

Was:
[
[],
[], [x, y, z]]
New:
[
[]
[]
[x,y,z]]

Bug: webrtc:10977
Change-Id: I248d9b44c8848710aa5a194a5c1b96df6a2734ac
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/154744
Reviewed-by: Niels Moller <nisse@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29345}
---
 api/video/video_bitrate_allocation.cc         |   2 +-
 .../codecs/vp9/svc_rate_allocator.cc          |   7 +-
 .../codecs/vp9/test/vp9_impl_unittest.cc      | 164 ++++++++++++++++++
 3 files changed, 170 insertions(+), 3 deletions(-)

diff --git a/api/video/video_bitrate_allocation.cc b/api/video/video_bitrate_allocation.cc
index 1b3569047e..32e72467f5 100644
--- a/api/video/video_bitrate_allocation.cc
+++ b/api/video/video_bitrate_allocation.cc
@@ -151,7 +151,7 @@ std::string VideoBitrateAllocation::ToString() const {
       break;
 
     const uint32_t layer_sum = GetSpatialLayerSum(si);
-    if (layer_sum == sum_) {
+    if (layer_sum == sum_ && si == 0) {
       ssb << " [";
     } else {
       if (si > 0)
diff --git a/modules/video_coding/codecs/vp9/svc_rate_allocator.cc b/modules/video_coding/codecs/vp9/svc_rate_allocator.cc
index a02e69aa47..86b677d6c8 100644
--- a/modules/video_coding/codecs/vp9/svc_rate_allocator.cc
+++ b/modules/video_coding/codecs/vp9/svc_rate_allocator.cc
@@ -316,12 +316,15 @@ VideoBitrateAllocation SvcRateAllocator::GetAllocationScreenSharing(
     DataRate total_bitrate,
     size_t first_active_layer,
     size_t num_spatial_layers) const {
+  VideoBitrateAllocation bitrate_allocation;
+
   if (num_spatial_layers == 0 ||
       total_bitrate <
           DataRate::kbps(codec_.spatialLayers[first_active_layer].minBitrate)) {
-    return VideoBitrateAllocation();
+    // Always enable at least one layer.
+    bitrate_allocation.SetBitrate(first_active_layer, 0, total_bitrate.bps());
+    return bitrate_allocation;
   }
-  VideoBitrateAllocation bitrate_allocation;
 
   DataRate allocated_rate = DataRate::Zero();
   DataRate top_layer_rate = DataRate::Zero();
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index e54ac348ba..4463f18208 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -353,6 +353,162 @@ TEST_F(TestVp9Impl, EnableDisableSpatialLayers) {
 }
 
 TEST_F(TestVp9Impl, DisableEnableBaseLayerTriggersKeyFrame) {
+  // Configure encoder to produce N spatial layers. Encode frames for all
+  // layers. Then disable all but the last layer. Then reenable all back again.
+  test::ScopedFieldTrials override_field_trials(
+      "WebRTC-Vp9ExternalRefCtrl/Enabled/");
+  const size_t num_spatial_layers = 3;
+  const size_t num_temporal_layers = 3;
+  // Must not be multiple of temporal period to exercise all code paths.
+  const size_t num_frames_to_encode = 5;
+
+  ConfigureSvc(num_spatial_layers, num_temporal_layers);
+  codec_settings_.VP9()->frameDroppingOn = false;
+  codec_settings_.VP9()->flexibleMode = false;
+  codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOnKeyPic;
+  codec_settings_.mode = VideoCodecMode::kRealtimeVideo;
+
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->InitEncode(&codec_settings_, kSettings));
+
+  VideoBitrateAllocation bitrate_allocation;
+  for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+    for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+      // Allocate high bit rate to avoid frame dropping due to rate control.
+      bitrate_allocation.SetBitrate(
+          sl_idx, tl_idx,
+          codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 * 2);
+    }
+  }
+  encoder_->SetRates(VideoEncoder::RateControlParameters(
+      bitrate_allocation, codec_settings_.maxFramerate));
+
+  for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+    SetWaitForEncodedFramesThreshold(num_spatial_layers);
+    EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+              encoder_->Encode(*NextInputFrame(), nullptr));
+    std::vector<EncodedImage> encoded_frame;
+    std::vector<CodecSpecificInfo> codec_specific_info;
+    ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+    EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+              frame_num == 0);
+  }
+
+  // Disable all but top layer.
+  for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) {
+    for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+      bitrate_allocation.SetBitrate(sl_idx, tl_idx, 0);
+    }
+  }
+  encoder_->SetRates(VideoEncoder::RateControlParameters(
+      bitrate_allocation, codec_settings_.maxFramerate));
+
+  bool seen_ss_data = false;
+  for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+    SetWaitForEncodedFramesThreshold(1);
+    EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+              encoder_->Encode(*NextInputFrame(), nullptr));
+    std::vector<EncodedImage> encoded_frame;
+    std::vector<CodecSpecificInfo> codec_specific_info;
+    ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+    // SS available immediatly after switching on base temporal layer.
+    if (seen_ss_data) {
+      EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+                false);
+    } else {
+      EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+                codec_specific_info[0].codecSpecific.VP9.temporal_idx == 0);
+      seen_ss_data |=
+          codec_specific_info[0].codecSpecific.VP9.ss_data_available;
+    }
+    // No key-frames generated for disabling layers.
+    EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta);
+    EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2);
+  }
+  EXPECT_TRUE(seen_ss_data);
+
+  // Force key-frame.
+  std::vector<VideoFrameType> frame_types = {VideoFrameType::kVideoFrameKey};
+  SetWaitForEncodedFramesThreshold(1);
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->Encode(*NextInputFrame(), &frame_types));
+  std::vector<EncodedImage> encoded_frame;
+  std::vector<CodecSpecificInfo> codec_specific_info;
+  ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+  // Key-frame should be produced.
+  EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameKey);
+  EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2);
+
+  // Encode some more frames.
+  for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+    SetWaitForEncodedFramesThreshold(1);
+    EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+              encoder_->Encode(*NextInputFrame(), nullptr));
+    std::vector<EncodedImage> encoded_frame;
+    std::vector<CodecSpecificInfo> codec_specific_info;
+    ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+    EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta);
+    EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2);
+  }
+
+  // Enable the second layer back.
+  // Allocate high bit rate to avoid frame dropping due to rate control.
+  for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+    bitrate_allocation.SetBitrate(
+        1, tl_idx, codec_settings_.spatialLayers[0].targetBitrate * 1000 * 2);
+  }
+  encoder_->SetRates(VideoEncoder::RateControlParameters(
+      bitrate_allocation, codec_settings_.maxFramerate));
+
+  for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+    SetWaitForEncodedFramesThreshold(2);
+    EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+              encoder_->Encode(*NextInputFrame(), nullptr));
+    std::vector<EncodedImage> encoded_frame;
+    std::vector<CodecSpecificInfo> codec_specific_info;
+    ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+    ASSERT_EQ(encoded_frame.size(), 2u);
+    // SS available immediatly after switching on.
+    EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+              frame_num == 0);
+    // Keyframe should be generated when enabling lower layers.
+    const VideoFrameType expected_type = frame_num == 0
+                                             ? VideoFrameType::kVideoFrameKey
+                                             : VideoFrameType::kVideoFrameDelta;
+    EXPECT_EQ(encoded_frame[0]._frameType, expected_type);
+    EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 1);
+    EXPECT_EQ(encoded_frame[1].SpatialIndex().value_or(-1), 2);
+  }
+
+  // Enable the first layer back.
+  // Allocate high bit rate to avoid frame dropping due to rate control.
+  for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) {
+    bitrate_allocation.SetBitrate(
+        0, tl_idx, codec_settings_.spatialLayers[1].targetBitrate * 1000 * 2);
+  }
+  encoder_->SetRates(VideoEncoder::RateControlParameters(
+      bitrate_allocation, codec_settings_.maxFramerate));
+
+  for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) {
+    SetWaitForEncodedFramesThreshold(num_spatial_layers);
+    EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+              encoder_->Encode(*NextInputFrame(), nullptr));
+    std::vector<EncodedImage> encoded_frame;
+    std::vector<CodecSpecificInfo> codec_specific_info;
+    ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+    ASSERT_EQ(encoded_frame.size(), 3u);
+    // SS available immediatly after switching on.
+    EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
+              frame_num == 0);
+    // Keyframe should be generated when enabling lower layers.
+    const VideoFrameType expected_type = frame_num == 0
+                                             ? VideoFrameType::kVideoFrameKey
+                                             : VideoFrameType::kVideoFrameDelta;
+    EXPECT_EQ(encoded_frame[0]._frameType, expected_type);
+  }
+}
+
+TEST_F(TestVp9Impl, DisableEnableBaseLayerTriggersKeyFrameForScreenshare) {
   // Configure encoder to produce N spatial layers. Encode frames for all
   // layers. Then disable all but the last layer. Then reenable all back again.
   const size_t num_spatial_layers = 3;
@@ -360,6 +516,9 @@ TEST_F(TestVp9Impl, DisableEnableBaseLayerTriggersKeyFrame) {
 
   ConfigureSvc(num_spatial_layers);
   codec_settings_.VP9()->frameDroppingOn = false;
+  codec_settings_.mode = VideoCodecMode::kScreensharing;
+  codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
+  codec_settings_.VP9()->flexibleMode = true;
 
   EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
             encoder_->InitEncode(&codec_settings_, kSettings));
@@ -404,6 +563,7 @@ TEST_F(TestVp9Impl, DisableEnableBaseLayerTriggersKeyFrame) {
               frame_num == 0);
     // No key-frames generated for disabling layers.
     EXPECT_EQ(encoded_frame[0]._frameType, VideoFrameType::kVideoFrameDelta);
+    EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 2);
   }
 
   // Force key-frame.
@@ -431,6 +591,7 @@ TEST_F(TestVp9Impl, DisableEnableBaseLayerTriggersKeyFrame) {
     std::vector<EncodedImage> encoded_frame;
     std::vector<CodecSpecificInfo> codec_specific_info;
     ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+    ASSERT_EQ(encoded_frame.size(), 2u);
     // SS available immediatly after switching on.
     EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
               frame_num == 0);
@@ -439,6 +600,8 @@ TEST_F(TestVp9Impl, DisableEnableBaseLayerTriggersKeyFrame) {
                                              ? VideoFrameType::kVideoFrameKey
                                              : VideoFrameType::kVideoFrameDelta;
     EXPECT_EQ(encoded_frame[0]._frameType, expected_type);
+    EXPECT_EQ(encoded_frame[0].SpatialIndex().value_or(-1), 1);
+    EXPECT_EQ(encoded_frame[1].SpatialIndex().value_or(-1), 2);
   }
 
   // Enable the first layer back.
@@ -455,6 +618,7 @@ TEST_F(TestVp9Impl, DisableEnableBaseLayerTriggersKeyFrame) {
     std::vector<EncodedImage> encoded_frame;
     std::vector<CodecSpecificInfo> codec_specific_info;
     ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info));
+    ASSERT_EQ(encoded_frame.size(), 3u);
     // SS available immediatly after switching on.
     EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available,
               frame_num == 0);