Support layer skipping in full svc structures with 3 temporal layers

Bug: webrtc:11999 Change-Id: I09d9e9e83f43dc9e552f0dd72ba3e7e588fbab48 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/187346 Commit-Queue: Danil Chapovalov <danilchap@webrtc.org> Reviewed-by: Philip Eliasson <philipel@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32373}
2020-10-09 18:58:42 +02:00
parent ff7913204c
commit 294729f33c
11 changed files with 353 additions and 373 deletions
--- a/modules/video_coding/codecs/av1/BUILD.gn
+++ b/modules/video_coding/codecs/av1/BUILD.gn
@ -129,14 +129,21 @@ rtc_library("libaom_av1_encoder") {
 if (rtc_include_tests) {
  rtc_library("scalability_structure_tests") {
    testonly = true
-    sources = [ "scalability_structure_unittest.cc" ]
+    sources = [
+      "scalability_structure_l3t3_unittest.cc",
+      "scalability_structure_test_helpers.cc",
+      "scalability_structure_test_helpers.h",
+      "scalability_structure_unittest.cc",
+    ]
    deps = [
      ":scalability_structures",
      ":scalable_video_controller",
      "../..:chain_diff_calculator",
      "../..:frame_dependencies_calculator",
      "../../../../api/transport/rtp:dependency_descriptor",
+      "../../../../api/video:video_bitrate_allocation",
      "../../../../api/video:video_frame_type",
+      "../../../../common_video/generic_frame_descriptor",
      "../../../../test:test_support",
    ]
    absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
--- a/modules/video_coding/codecs/av1/scalability_structure_full_svc.cc
+++ b/modules/video_coding/codecs/av1/scalability_structure_full_svc.cc
@ -12,15 +12,20 @@
 #include <utility>
 #include <vector>

+#include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
 #include "api/transport/rtp/dependency_descriptor.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/logging.h"

 namespace webrtc {
+namespace {
+enum : int { kKey, kDelta };
+}  // namespace

 constexpr int ScalabilityStructureFullSvc::kMaxNumSpatialLayers;
 constexpr int ScalabilityStructureFullSvc::kMaxNumTemporalLayers;
+constexpr absl::string_view ScalabilityStructureFullSvc::kFramePatternNames[];

 ScalabilityStructureFullSvc::ScalabilityStructureFullSvc(
    int num_spatial_layers,
@ -50,7 +55,9 @@ ScalabilityStructureFullSvc::StreamConfig() const {
 }

 bool ScalabilityStructureFullSvc::TemporalLayerIsActive(int tid) const {
-  RTC_DCHECK_LT(tid, num_temporal_layers_);
+  if (tid >= num_temporal_layers_) {
+    return false;
+  }
  for (int sid = 0; sid < num_spatial_layers_; ++sid) {
    if (DecodeTargetIsActive(sid, tid)) {
      return true;
@ -87,43 +94,71 @@ DecodeTargetIndication ScalabilityStructureFullSvc::Dti(
  return DecodeTargetIndication::kRequired;
 }

+ScalabilityStructureFullSvc::FramePattern
+ScalabilityStructureFullSvc::NextPattern() const {
+  switch (last_pattern_) {
+    case kNone:
+    case kDeltaT2B:
+      return kDeltaT0;
+    case kDeltaT2A:
+      if (TemporalLayerIsActive(1)) {
+        return kDeltaT1;
+      }
+      return kDeltaT0;
+    case kDeltaT1:
+      if (TemporalLayerIsActive(2)) {
+        return kDeltaT2B;
+      }
+      return kDeltaT0;
+    case kDeltaT0:
+      if (TemporalLayerIsActive(2)) {
+        return kDeltaT2A;
+      }
+      if (TemporalLayerIsActive(1)) {
+        return kDeltaT1;
+      }
+      return kDeltaT0;
+  }
+}
+
 std::vector<ScalableVideoController::LayerFrameConfig>
 ScalabilityStructureFullSvc::NextFrameConfig(bool restart) {
  std::vector<LayerFrameConfig> configs;
+  if (active_decode_targets_.none()) {
+    last_pattern_ = kNone;
+    return configs;
+  }
  configs.reserve(num_spatial_layers_);

-  if (next_pattern_ == kKey || restart) {
-    can_depend_on_t0_frame_for_spatial_id_.reset();
-    next_pattern_ = kKey;
-  }
-  // T1 could have been disabled after previous call to NextFrameConfig,
-  // thus need to check it here rather than when setting next_pattern_ below.
-  if (next_pattern_ == kDeltaT1 && !TemporalLayerIsActive(/*tid=*/1)) {
-    next_pattern_ = kDeltaT0;
+  if (last_pattern_ == kNone || restart) {
+    can_reference_t0_frame_for_spatial_id_.reset();
+    last_pattern_ = kNone;
  }
+  FramePattern current_pattern = NextPattern();

  absl::optional<int> spatial_dependency_buffer_id;
-  switch (next_pattern_) {
-    case kKey:
+  switch (current_pattern) {
    case kDeltaT0:
+      // Disallow temporal references cross T0 on higher temporal layers.
+      can_reference_t1_frame_for_spatial_id_.reset();
      for (int sid = 0; sid < num_spatial_layers_; ++sid) {
        if (!DecodeTargetIsActive(sid, /*tid=*/0)) {
          // Next frame from the spatial layer `sid` shouldn't depend on
          // potentially old previous frame from the spatial layer `sid`.
-          can_depend_on_t0_frame_for_spatial_id_.reset(sid);
+          can_reference_t0_frame_for_spatial_id_.reset(sid);
          continue;
        }
        configs.emplace_back();
        ScalableVideoController::LayerFrameConfig& config = configs.back();
-        config.Id(next_pattern_).S(sid).T(0);
+        config.Id(last_pattern_ == kNone ? kKey : kDelta).S(sid).T(0);

        if (spatial_dependency_buffer_id) {
          config.Reference(*spatial_dependency_buffer_id);
-        } else if (next_pattern_ == kKey) {
+        } else if (last_pattern_ == kNone) {
          config.Keyframe();
        }

-        if (can_depend_on_t0_frame_for_spatial_id_[sid]) {
+        if (can_reference_t0_frame_for_spatial_id_[sid]) {
          config.ReferenceAndUpdate(BufferIndex(sid, /*tid=*/0));
        } else {
          // TODO(bugs.webrtc.org/11999): Propagate chain restart on delta frame
@ -131,37 +166,81 @@ ScalabilityStructureFullSvc::NextFrameConfig(bool restart) {
          config.Update(BufferIndex(sid, /*tid=*/0));
        }

-        can_depend_on_t0_frame_for_spatial_id_.set(sid);
+        can_reference_t0_frame_for_spatial_id_.set(sid);
        spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/0);
      }
-
-      next_pattern_ = num_temporal_layers_ == 2 ? kDeltaT1 : kDeltaT0;
      break;
    case kDeltaT1:
      for (int sid = 0; sid < num_spatial_layers_; ++sid) {
        if (!DecodeTargetIsActive(sid, /*tid=*/1) ||
-            !can_depend_on_t0_frame_for_spatial_id_[sid]) {
+            !can_reference_t0_frame_for_spatial_id_[sid]) {
          continue;
        }
        configs.emplace_back();
        ScalableVideoController::LayerFrameConfig& config = configs.back();
-        config.Id(next_pattern_).S(sid).T(1);
+        config.Id(kDelta).S(sid).T(1);
        // Temporal reference.
-        RTC_DCHECK(DecodeTargetIsActive(sid, /*tid=*/0));
        config.Reference(BufferIndex(sid, /*tid=*/0));
        // Spatial reference unless this is the lowest active spatial layer.
        if (spatial_dependency_buffer_id) {
          config.Reference(*spatial_dependency_buffer_id);
        }
        // No frame reference top layer frame, so no need save it into a buffer.
-        if (sid < num_spatial_layers_ - 1) {
+        if (num_temporal_layers_ > 2 || sid < num_spatial_layers_ - 1) {
          config.Update(BufferIndex(sid, /*tid=*/1));
+          can_reference_t1_frame_for_spatial_id_.set(sid);
        }
        spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/1);
      }
-      next_pattern_ = kDeltaT0;
+      break;
+    case kDeltaT2A:
+    case kDeltaT2B:
+      for (int sid = 0; sid < num_spatial_layers_; ++sid) {
+        if (!DecodeTargetIsActive(sid, /*tid=*/2) ||
+            !can_reference_t0_frame_for_spatial_id_[sid]) {
+          continue;
+        }
+        configs.emplace_back();
+        ScalableVideoController::LayerFrameConfig& config = configs.back();
+        config.Id(kDelta).S(sid).T(2);
+        // Temporal reference.
+        if (current_pattern == kDeltaT2B &&
+            can_reference_t1_frame_for_spatial_id_[sid]) {
+          config.Reference(BufferIndex(sid, /*tid=*/1));
+        } else {
+          config.Reference(BufferIndex(sid, /*tid=*/0));
+        }
+        // Spatial reference unless this is the lowest active spatial layer.
+        if (spatial_dependency_buffer_id) {
+          config.Reference(*spatial_dependency_buffer_id);
+        }
+        // No frame reference top layer frame, so no need save it into a buffer.
+        if (sid < num_spatial_layers_ - 1) {
+          config.Update(BufferIndex(sid, /*tid=*/2));
+        }
+        spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/2);
+      }
+      break;
+    case kNone:
+      RTC_NOTREACHED();
      break;
  }
+
+  if (configs.empty() && !restart) {
+    RTC_LOG(LS_WARNING) << "Failed to generate configuration for L"
+                        << num_spatial_layers_ << "T" << num_temporal_layers_
+                        << " with active decode targets "
+                        << active_decode_targets_.to_string('-').substr(
+                               active_decode_targets_.size() -
+                               num_spatial_layers_ * num_temporal_layers_)
+                        << " and transition from "
+                        << kFramePatternNames[last_pattern_] << " to "
+                        << kFramePatternNames[current_pattern]
+                        << ". Resetting.";
+    return NextFrameConfig(/*restart=*/true);
+  }
+
+  last_pattern_ = current_pattern;
  return configs;
 }

--- a/modules/video_coding/codecs/av1/scalability_structure_full_svc.h
+++ b/modules/video_coding/codecs/av1/scalability_structure_full_svc.h
@ -34,13 +34,16 @@ class ScalabilityStructureFullSvc : public ScalableVideoController {

 private:
  enum FramePattern {
-    kKey,
+    kNone,
+    kDeltaT2A,
    kDeltaT1,
+    kDeltaT2B,
    kDeltaT0,
  };
+  static constexpr absl::string_view kFramePatternNames[] = {
+      "None", "DeltaT2A", "DeltaT1", "DeltaT2B", "DeltaT0"};
  static constexpr int kMaxNumSpatialLayers = 3;
-  // TODO(bugs.webrtc.org/11999): Support up to 3 temporal layers.
-  static constexpr int kMaxNumTemporalLayers = 2;
+  static constexpr int kMaxNumTemporalLayers = 3;

  // Index of the buffer to store last frame for layer (`sid`, `tid`)
  int BufferIndex(int sid, int tid) const {
@ -52,6 +55,7 @@ class ScalabilityStructureFullSvc : public ScalableVideoController {
  void SetDecodeTargetIsActive(int sid, int tid, bool value) {
    active_decode_targets_.set(sid * num_temporal_layers_ + tid, value);
  }
+  FramePattern NextPattern() const;
  bool TemporalLayerIsActive(int tid) const;
  static DecodeTargetIndication Dti(int sid,
                                    int tid,
@ -60,8 +64,9 @@ class ScalabilityStructureFullSvc : public ScalableVideoController {
  const int num_spatial_layers_;
  const int num_temporal_layers_;

-  FramePattern next_pattern_ = kKey;
-  std::bitset<kMaxNumSpatialLayers> can_depend_on_t0_frame_for_spatial_id_ = 0;
+  FramePattern last_pattern_ = kNone;
+  std::bitset<kMaxNumSpatialLayers> can_reference_t0_frame_for_spatial_id_ = 0;
+  std::bitset<kMaxNumSpatialLayers> can_reference_t1_frame_for_spatial_id_ = 0;
  std::bitset<32> active_decode_targets_;
 };

--- a/modules/video_coding/codecs/av1/scalability_structure_l1t3.cc
+++ b/modules/video_coding/codecs/av1/scalability_structure_l1t3.cc
@ -9,39 +9,14 @@
 */
 #include "modules/video_coding/codecs/av1/scalability_structure_l1t3.h"

-#include <utility>
 #include <vector>

-#include "absl/base/macros.h"
-#include "absl/types/optional.h"
 #include "api/transport/rtp/dependency_descriptor.h"
-#include "rtc_base/logging.h"

 namespace webrtc {
-namespace {
-
-constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent;
-constexpr auto kDiscardable = DecodeTargetIndication::kDiscardable;
-constexpr auto kSwitch = DecodeTargetIndication::kSwitch;
-
-constexpr DecodeTargetIndication kDtis[3][3] = {
-    {kSwitch, kSwitch, kSwitch},               // T0
-    {kNotPresent, kDiscardable, kSwitch},      // T1
-    {kNotPresent, kNotPresent, kDiscardable},  // T2
-};
-
-}  // namespace

 ScalabilityStructureL1T3::~ScalabilityStructureL1T3() = default;

-ScalableVideoController::StreamLayersConfig
-ScalabilityStructureL1T3::StreamConfig() const {
-  StreamLayersConfig result;
-  result.num_spatial_layers = 1;
-  result.num_temporal_layers = 3;
-  return result;
-}
-
 FrameDependencyStructure ScalabilityStructureL1T3::DependencyStructure() const {
  FrameDependencyStructure structure;
  structure.num_decode_targets = 3;
@ -56,54 +31,4 @@ FrameDependencyStructure ScalabilityStructureL1T3::DependencyStructure() const {
  return structure;
 }

-std::vector<ScalableVideoController::LayerFrameConfig>
-ScalabilityStructureL1T3::NextFrameConfig(bool restart) {
-  if (restart) {
-    next_pattern_ = kKeyFrame;
-  }
-  std::vector<LayerFrameConfig> config(1);
-
-  switch (next_pattern_) {
-    case kKeyFrame:
-      config[0].T(0).Keyframe().Update(0);
-      next_pattern_ = kDeltaFrameT2A;
-      break;
-    case kDeltaFrameT2A:
-      config[0].T(2).Reference(0);
-      next_pattern_ = kDeltaFrameT1;
-      break;
-    case kDeltaFrameT1:
-      config[0].T(1).Reference(0).Update(1);
-      next_pattern_ = kDeltaFrameT2B;
-      break;
-    case kDeltaFrameT2B:
-      config[0].T(2).Reference(1);
-      next_pattern_ = kDeltaFrameT0;
-      break;
-    case kDeltaFrameT0:
-      config[0].T(0).ReferenceAndUpdate(0);
-      next_pattern_ = kDeltaFrameT2A;
-      break;
-  }
-  return config;
-}
-
-absl::optional<GenericFrameInfo> ScalabilityStructureL1T3::OnEncodeDone(
-    LayerFrameConfig config) {
-  absl::optional<GenericFrameInfo> frame_info;
-  if (config.TemporalId() < 0 ||
-      config.TemporalId() >= int{ABSL_ARRAYSIZE(kDtis)}) {
-    RTC_LOG(LS_ERROR) << "Unexpected temporal id " << config.TemporalId();
-    return frame_info;
-  }
-  frame_info.emplace();
-  frame_info->temporal_id = config.TemporalId();
-  frame_info->encoder_buffers = config.Buffers();
-  frame_info->decode_target_indications.assign(
-      std::begin(kDtis[config.TemporalId()]),
-      std::end(kDtis[config.TemporalId()]));
-  frame_info->part_of_chain = {config.TemporalId() == 0};
-  return frame_info;
-}
-
 }  // namespace webrtc
--- a/modules/video_coding/codecs/av1/scalability_structure_l1t3.h
+++ b/modules/video_coding/codecs/av1/scalability_structure_l1t3.h
@ -10,12 +10,8 @@
 #ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L1T3_H_
 #define MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L1T3_H_

-#include <vector>
-
-#include "absl/types/optional.h"
 #include "api/transport/rtp/dependency_descriptor.h"
-#include "common_video/generic_frame_descriptor/generic_frame_info.h"
-#include "modules/video_coding/codecs/av1/scalable_video_controller.h"
+#include "modules/video_coding/codecs/av1/scalability_structure_full_svc.h"

 namespace webrtc {

@ -25,27 +21,12 @@ namespace webrtc {
 //         |_/     |_/
 // T0     0-------0------
 // Time-> 0 1 2 3 4 5 6 7
-class ScalabilityStructureL1T3 : public ScalableVideoController {
+class ScalabilityStructureL1T3 : public ScalabilityStructureFullSvc {
 public:
+  ScalabilityStructureL1T3() : ScalabilityStructureFullSvc(1, 3) {}
  ~ScalabilityStructureL1T3() override;

-  StreamLayersConfig StreamConfig() const override;
  FrameDependencyStructure DependencyStructure() const override;
-
-  std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override;
-  absl::optional<GenericFrameInfo> OnEncodeDone(
-      LayerFrameConfig config) override;
-
- private:
-  enum FramePattern {
-    kKeyFrame,
-    kDeltaFrameT2A,
-    kDeltaFrameT1,
-    kDeltaFrameT2B,
-    kDeltaFrameT0,
-  };
-
-  FramePattern next_pattern_ = kKeyFrame;
 };

 }  // namespace webrtc
--- a/modules/video_coding/codecs/av1/scalability_structure_l3t3.cc
+++ b/modules/video_coding/codecs/av1/scalability_structure_l3t3.cc
@ -9,89 +9,14 @@
 */
 #include "modules/video_coding/codecs/av1/scalability_structure_l3t3.h"

-#include <utility>
 #include <vector>

-#include "absl/base/macros.h"
-#include "absl/types/optional.h"
 #include "api/transport/rtp/dependency_descriptor.h"
-#include "rtc_base/logging.h"

 namespace webrtc {
-namespace {
-
-constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent;
-constexpr auto kDiscardable = DecodeTargetIndication::kDiscardable;
-constexpr auto kSwitch = DecodeTargetIndication::kSwitch;
-constexpr auto kRequired = DecodeTargetIndication::kRequired;
-
-constexpr DecodeTargetIndication kDtis[12][9] = {
-    // Key, S0
-    {kSwitch, kSwitch, kSwitch,   // S0
-     kSwitch, kSwitch, kSwitch,   // S1
-     kSwitch, kSwitch, kSwitch},  // S2
-    // Key, S1
-    {kNotPresent, kNotPresent, kNotPresent,  // S0
-     kSwitch, kSwitch, kSwitch,              // S1
-     kSwitch, kSwitch, kSwitch},             // S2
-    // Key, S2
-    {kNotPresent, kNotPresent, kNotPresent,  // S0
-     kNotPresent, kNotPresent, kNotPresent,  // S1
-     kSwitch, kSwitch, kSwitch},             // S2
-    // Delta, S0T2
-    {kNotPresent, kNotPresent, kDiscardable,  // S0
-     kNotPresent, kNotPresent, kRequired,     // S1
-     kNotPresent, kNotPresent, kRequired},    // S2
-    // Delta, S1T2
-    {kNotPresent, kNotPresent, kNotPresent,   // S0
-     kNotPresent, kNotPresent, kDiscardable,  // S1
-     kNotPresent, kNotPresent, kRequired},    // S2
-    // Delta, S2T2
-    {kNotPresent, kNotPresent, kNotPresent,    // S0
-     kNotPresent, kNotPresent, kNotPresent,    // S1
-     kNotPresent, kNotPresent, kDiscardable},  // S2
-    // Delta, S0T1
-    {kNotPresent, kDiscardable, kSwitch,  // S0
-     kNotPresent, kRequired, kRequired,   // S1
-     kNotPresent, kRequired, kRequired},  // S2
-    // Delta, S1T1
-    {kNotPresent, kNotPresent, kNotPresent,  // S0
-     kNotPresent, kDiscardable, kSwitch,     // S1
-     kNotPresent, kRequired, kRequired},     // S2
-    // Delta, S2T1
-    {kNotPresent, kNotPresent, kNotPresent,  // S0
-     kNotPresent, kNotPresent, kNotPresent,  // S1
-     kNotPresent, kDiscardable, kSwitch},    // S2
-    // Delta, S0T0
-    {kSwitch, kSwitch, kSwitch,         // S0
-     kRequired, kRequired, kRequired,   // S1
-     kRequired, kRequired, kRequired},  // S2
-    // Delta, S1T0
-    {kNotPresent, kNotPresent, kNotPresent,  // S0
-     kSwitch, kSwitch, kSwitch,              // S1
-     kRequired, kRequired, kRequired},       // S2
-    // Delta, S2T0
-    {kNotPresent, kNotPresent, kNotPresent,  // S0
-     kNotPresent, kNotPresent, kNotPresent,  // S1
-     kSwitch, kSwitch, kSwitch},             // S2
-};
-
-}  // namespace

 ScalabilityStructureL3T3::~ScalabilityStructureL3T3() = default;

-ScalableVideoController::StreamLayersConfig
-ScalabilityStructureL3T3::StreamConfig() const {
-  StreamLayersConfig result;
-  result.num_spatial_layers = 3;
-  result.num_temporal_layers = 3;
-  result.scaling_factor_num[0] = 1;
-  result.scaling_factor_den[0] = 4;
-  result.scaling_factor_num[1] = 1;
-  result.scaling_factor_den[1] = 2;
-  return result;
-}
-
 FrameDependencyStructure ScalabilityStructureL3T3::DependencyStructure() const {
  FrameDependencyStructure structure;
  structure.num_decode_targets = 9;
@ -121,104 +46,4 @@ FrameDependencyStructure ScalabilityStructureL3T3::DependencyStructure() const {
  return structure;
 }

-ScalableVideoController::LayerFrameConfig
-ScalabilityStructureL3T3::KeyFrameConfig() const {
-  return LayerFrameConfig().Id(0).S(0).T(0).Keyframe().Update(0);
-}
-
-std::vector<ScalableVideoController::LayerFrameConfig>
-ScalabilityStructureL3T3::NextFrameConfig(bool restart) {
-  if (restart) {
-    next_pattern_ = kKeyFrame;
-  }
-  std::vector<LayerFrameConfig> config(3);
-
-  // For this structure name each of 8 buffers after the layer of the frame that
-  // buffer keeps.
-  static constexpr int kS0T0 = 0;
-  static constexpr int kS1T0 = 1;
-  static constexpr int kS2T0 = 2;
-  static constexpr int kS0T1 = 3;
-  static constexpr int kS1T1 = 4;
-  static constexpr int kS2T1 = 5;
-  static constexpr int kS0T2 = 6;
-  static constexpr int kS1T2 = 7;
-  switch (next_pattern_) {
-    case kKeyFrame:
-      config[0].Id(0).S(0).T(0).Keyframe().Update(kS0T0);
-      config[1].Id(1).S(1).T(0).Update(kS1T0).Reference(kS0T0);
-      config[2].Id(2).S(2).T(0).Update(kS2T0).Reference(kS1T0);
-      next_pattern_ = kDeltaFrameT2A;
-      break;
-    case kDeltaFrameT2A:
-      config[0].Id(3).S(0).T(2).Reference(kS0T0).Update(kS0T2);
-      config[1].Id(4).S(1).T(2).Reference(kS1T0).Reference(kS0T2).Update(kS1T2);
-      config[2].Id(5).S(2).T(2).Reference(kS2T0).Reference(kS1T2);
-      next_pattern_ = kDeltaFrameT1;
-      break;
-    case kDeltaFrameT1:
-      config[0].Id(6).S(0).T(1).Reference(kS0T0).Update(kS0T1);
-      config[1].Id(7).S(1).T(1).Reference(kS1T0).Reference(kS0T1).Update(kS1T1);
-      config[2].Id(8).S(2).T(1).Reference(kS2T0).Reference(kS1T1).Update(kS2T1);
-      next_pattern_ = kDeltaFrameT2B;
-      break;
-    case kDeltaFrameT2B:
-      config[0].Id(3).S(0).T(2).Reference(kS0T1).Update(kS0T2);
-      config[1].Id(4).S(1).T(2).Reference(kS1T1).Reference(kS0T2).Update(kS1T2);
-      config[2].Id(5).S(2).T(2).Reference(kS2T1).Reference(kS1T2);
-      next_pattern_ = kDeltaFrameT0;
-      break;
-    case kDeltaFrameT0:
-      config[0].Id(9).S(0).T(0).ReferenceAndUpdate(kS0T0);
-      config[1].Id(10).S(1).T(0).ReferenceAndUpdate(kS1T0).Reference(kS0T0);
-      config[2].Id(11).S(2).T(0).ReferenceAndUpdate(kS2T0).Reference(kS1T0);
-      next_pattern_ = kDeltaFrameT2A;
-      break;
-  }
-  return config;
-}
-
-absl::optional<GenericFrameInfo> ScalabilityStructureL3T3::OnEncodeDone(
-    LayerFrameConfig config) {
-  if (config.IsKeyframe() && config.Id() != 0) {
-    // Encoder generated a key frame without asking to.
-    if (config.SpatialId() > 0) {
-      RTC_LOG(LS_WARNING) << "Unexpected spatial id " << config.SpatialId()
-                          << " for key frame.";
-    }
-    config = LayerFrameConfig()
-                 .Keyframe()
-                 .Id(0)
-                 .S(0)
-                 .T(0)
-                 .Update(0)
-                 .Update(1)
-                 .Update(2)
-                 .Update(3)
-                 .Update(4)
-                 .Update(5)
-                 .Update(6)
-                 .Update(7);
-  }
-
-  absl::optional<GenericFrameInfo> frame_info;
-  if (config.Id() < 0 || config.Id() >= int{ABSL_ARRAYSIZE(kDtis)}) {
-    RTC_LOG(LS_ERROR) << "Unexpected config id " << config.Id();
-    return frame_info;
-  }
-  frame_info.emplace();
-  frame_info->spatial_id = config.SpatialId();
-  frame_info->temporal_id = config.TemporalId();
-  frame_info->encoder_buffers = config.Buffers();
-  frame_info->decode_target_indications.assign(std::begin(kDtis[config.Id()]),
-                                               std::end(kDtis[config.Id()]));
-  if (config.TemporalId() == 0) {
-    frame_info->part_of_chain = {config.SpatialId() == 0,
-                                 config.SpatialId() <= 1, true};
-  } else {
-    frame_info->part_of_chain = {false, false, false};
-  }
-  return frame_info;
-}
-
 }  // namespace webrtc
--- a/modules/video_coding/codecs/av1/scalability_structure_l3t3.h
+++ b/modules/video_coding/codecs/av1/scalability_structure_l3t3.h
@ -10,38 +10,18 @@
 #ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L3T3_H_
 #define MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L3T3_H_

-#include <vector>
-
-#include "absl/types/optional.h"
 #include "api/transport/rtp/dependency_descriptor.h"
-#include "common_video/generic_frame_descriptor/generic_frame_info.h"
-#include "modules/video_coding/codecs/av1/scalable_video_controller.h"
+#include "modules/video_coding/codecs/av1/scalability_structure_full_svc.h"

 namespace webrtc {

 // https://aomediacodec.github.io/av1-rtp-spec/#a63-l3t3-full-svc
-class ScalabilityStructureL3T3 : public ScalableVideoController {
+class ScalabilityStructureL3T3 : public ScalabilityStructureFullSvc {
 public:
+  ScalabilityStructureL3T3() : ScalabilityStructureFullSvc(3, 3) {}
  ~ScalabilityStructureL3T3() override;

-  StreamLayersConfig StreamConfig() const override;
  FrameDependencyStructure DependencyStructure() const override;
-
-  std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override;
-  absl::optional<GenericFrameInfo> OnEncodeDone(
-      LayerFrameConfig config) override;
-
- private:
-  enum FramePattern {
-    kKeyFrame,
-    kDeltaFrameT2A,
-    kDeltaFrameT1,
-    kDeltaFrameT2B,
-    kDeltaFrameT0,
-  };
-  LayerFrameConfig KeyFrameConfig() const;
-
-  FramePattern next_pattern_ = kKeyFrame;
 };

 }  // namespace webrtc
--- a/modules/video_coding/codecs/av1/scalability_structure_l3t3_unittest.cc
+++ b/modules/video_coding/codecs/av1/scalability_structure_l3t3_unittest.cc
@ -0,0 +1,61 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/video_coding/codecs/av1/scalability_structure_l3t3.h"
+
+#include "modules/video_coding/codecs/av1/scalability_structure_test_helpers.h"
+#include "test/gmock.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+namespace {
+
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+TEST(ScalabilityStructureL3T3Test, SkipS1T1FrameKeepsStructureValid) {
+  ScalabilityStructureL3T3 structure;
+  ScalabilityStructureWrapper wrapper(structure);
+
+  structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/3));
+  auto frames = wrapper.GenerateFrames(/*num_temporal_units=*/1);
+  EXPECT_THAT(frames, SizeIs(2));
+  EXPECT_EQ(frames[0].temporal_id, 0);
+
+  frames = wrapper.GenerateFrames(/*num_temporal_units=*/1);
+  EXPECT_THAT(frames, SizeIs(2));
+  EXPECT_EQ(frames[0].temporal_id, 2);
+
+  structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/0));
+  frames = wrapper.GenerateFrames(/*num_temporal_units=*/1);
+  EXPECT_THAT(frames, SizeIs(1));
+  EXPECT_EQ(frames[0].temporal_id, 1);
+
+  structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/3));
+  // Rely on checks inside GenerateFrames frame references are valid.
+  frames = wrapper.GenerateFrames(/*num_temporal_units=*/1);
+  EXPECT_THAT(frames, SizeIs(2));
+  EXPECT_EQ(frames[0].temporal_id, 2);
+}
+
+TEST(ScalabilityStructureL3T3Test, SwitchSpatialLayerBeforeT1Frame) {
+  ScalabilityStructureL3T3 structure;
+  ScalabilityStructureWrapper wrapper(structure);
+
+  structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/0));
+  EXPECT_THAT(wrapper.GenerateFrames(1), SizeIs(1));
+  structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/0, /*s1=*/2));
+  auto frames = wrapper.GenerateFrames(1);
+  ASSERT_THAT(frames, SizeIs(1));
+  EXPECT_THAT(frames[0].frame_diffs, IsEmpty());
+  EXPECT_EQ(frames[0].temporal_id, 0);
+}
+
+}  // namespace
+}  // namespace webrtc
--- a/modules/video_coding/codecs/av1/scalability_structure_test_helpers.cc
+++ b/modules/video_coding/codecs/av1/scalability_structure_test_helpers.cc
@ -0,0 +1,95 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/video_coding/codecs/av1/scalability_structure_test_helpers.h"
+
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "api/transport/rtp/dependency_descriptor.h"
+#include "api/video/video_bitrate_allocation.h"
+#include "api/video/video_frame_type.h"
+#include "modules/video_coding/chain_diff_calculator.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller.h"
+#include "modules/video_coding/frame_dependencies_calculator.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+
+VideoBitrateAllocation EnableTemporalLayers(int s0, int s1, int s2) {
+  VideoBitrateAllocation bitrate;
+  for (int tid = 0; tid < s0; ++tid) {
+    bitrate.SetBitrate(0, tid, 1'000'000);
+  }
+  for (int tid = 0; tid < s1; ++tid) {
+    bitrate.SetBitrate(1, tid, 1'000'000);
+  }
+  for (int tid = 0; tid < s2; ++tid) {
+    bitrate.SetBitrate(2, tid, 1'000'000);
+  }
+  return bitrate;
+}
+
+std::vector<GenericFrameInfo> ScalabilityStructureWrapper::GenerateFrames(
+    int num_temporal_units,
+    bool restart) {
+  std::vector<GenericFrameInfo> frames;
+  for (int i = 0; i < num_temporal_units; ++i) {
+    for (auto& layer_frame : structure_controller_.NextFrameConfig(restart)) {
+      int64_t frame_id = ++frame_id_;
+      bool is_keyframe = layer_frame.IsKeyframe();
+
+      absl::optional<GenericFrameInfo> frame_info =
+          structure_controller_.OnEncodeDone(std::move(layer_frame));
+      EXPECT_TRUE(frame_info.has_value());
+      if (is_keyframe) {
+        chain_diff_calculator_.Reset(frame_info->part_of_chain);
+      }
+      frame_info->chain_diffs =
+          chain_diff_calculator_.From(frame_id, frame_info->part_of_chain);
+      for (int64_t base_frame_id : frame_deps_calculator_.FromBuffersUsage(
+               is_keyframe ? VideoFrameType::kVideoFrameKey
+                           : VideoFrameType::kVideoFrameDelta,
+               frame_id, frame_info->encoder_buffers)) {
+        EXPECT_LT(base_frame_id, frame_id);
+        EXPECT_GE(base_frame_id, 0);
+        frame_info->frame_diffs.push_back(frame_id - base_frame_id);
+      }
+
+      frames.push_back(*std::move(frame_info));
+    }
+    restart = false;
+  }
+
+  if (restart) {
+    buffer_contains_frame_.reset();
+  }
+  for (const GenericFrameInfo& frame : frames) {
+    for (const CodecBufferUsage& buffer_usage : frame.encoder_buffers) {
+      if (buffer_usage.id < 0 || buffer_usage.id >= 8) {
+        ADD_FAILURE() << "Invalid buffer id " << buffer_usage.id
+                      << ". Up to 8 buffers are supported.";
+        continue;
+      }
+      if (buffer_usage.referenced && !buffer_contains_frame_[buffer_usage.id]) {
+        ADD_FAILURE() << "buffer " << buffer_usage.id
+                      << " was reference before updated.";
+      }
+      if (buffer_usage.updated) {
+        buffer_contains_frame_.set(buffer_usage.id);
+      }
+    }
+  }
+
+  return frames;
+}
+
+}  // namespace webrtc
--- a/modules/video_coding/codecs/av1/scalability_structure_test_helpers.h
+++ b/modules/video_coding/codecs/av1/scalability_structure_test_helpers.h
@ -0,0 +1,52 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_TEST_HELPERS_H_
+#define MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_TEST_HELPERS_H_
+
+#include <stdint.h>
+
+#include <vector>
+
+#include "api/transport/rtp/dependency_descriptor.h"
+#include "api/video/video_bitrate_allocation.h"
+#include "common_video/generic_frame_descriptor/generic_frame_info.h"
+#include "modules/video_coding/chain_diff_calculator.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller.h"
+#include "modules/video_coding/frame_dependencies_calculator.h"
+
+namespace webrtc {
+
+// Creates bitrate allocation with non-zero bitrate for given number of temporal
+// layers for each spatial layer.
+VideoBitrateAllocation EnableTemporalLayers(int s0, int s1 = 0, int s2 = 0);
+
+class ScalabilityStructureWrapper {
+ public:
+  explicit ScalabilityStructureWrapper(ScalableVideoController& structure)
+      : structure_controller_(structure) {}
+
+  std::vector<GenericFrameInfo> GenerateFrames(int num_tempral_units,
+                                               bool restart);
+
+  std::vector<GenericFrameInfo> GenerateFrames(int num_temporal_units) {
+    return GenerateFrames(num_temporal_units, /*restart=*/false);
+  }
+
+ private:
+  ScalableVideoController& structure_controller_;
+  std::bitset<8> buffer_contains_frame_ = 0;
+  FrameDependenciesCalculator frame_deps_calculator_;
+  ChainDiffCalculator chain_diff_calculator_;
+  int64_t frame_id_ = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_TEST_HELPERS_H_
--- a/modules/video_coding/codecs/av1/scalability_structure_unittest.cc
+++ b/modules/video_coding/codecs/av1/scalability_structure_unittest.cc
@ -17,11 +17,9 @@

 #include "absl/types/optional.h"
 #include "api/transport/rtp/dependency_descriptor.h"
-#include "api/video/video_frame_type.h"
-#include "modules/video_coding/chain_diff_calculator.h"
 #include "modules/video_coding/codecs/av1/create_scalability_structure.h"
+#include "modules/video_coding/codecs/av1/scalability_structure_test_helpers.h"
 #include "modules/video_coding/codecs/av1/scalable_video_controller.h"
-#include "modules/video_coding/frame_dependencies_calculator.h"
 #include "test/gmock.h"
 #include "test/gtest.h"

@ -50,44 +48,7 @@ struct SvcTestParam {
  int num_temporal_units;
 };

-class ScalabilityStructureTest : public TestWithParam<SvcTestParam> {
- public:
-  std::vector<GenericFrameInfo> GenerateAllFrames(
-      ScalableVideoController& structure_controller) {
-    std::vector<GenericFrameInfo> frames;
-    for (int i = 0; i < GetParam().num_temporal_units; ++i) {
-      for (auto& layer_frame :
-           structure_controller.NextFrameConfig(/*reset=*/false)) {
-        int64_t frame_id = ++frame_id_;
-        bool is_keyframe = layer_frame.IsKeyframe();
-        absl::optional<GenericFrameInfo> frame_info =
-            structure_controller.OnEncodeDone(std::move(layer_frame));
-        EXPECT_TRUE(frame_info.has_value());
-        if (is_keyframe) {
-          chain_diff_calculator_.Reset(frame_info->part_of_chain);
-        }
-        frame_info->chain_diffs =
-            chain_diff_calculator_.From(frame_id, frame_info->part_of_chain);
-        for (int64_t base_frame_id : frame_deps_calculator_.FromBuffersUsage(
-                 is_keyframe ? VideoFrameType::kVideoFrameKey
-                             : VideoFrameType::kVideoFrameDelta,
-                 frame_id, frame_info->encoder_buffers)) {
-          EXPECT_LT(base_frame_id, frame_id);
-          EXPECT_GE(base_frame_id, 0);
-          frame_info->frame_diffs.push_back(frame_id - base_frame_id);
-        }
-
-        frames.push_back(*std::move(frame_info));
-      }
-    }
-    return frames;
-  }
-
- private:
-  FrameDependenciesCalculator frame_deps_calculator_;
-  ChainDiffCalculator chain_diff_calculator_;
-  int64_t frame_id_ = 0;
-};
+class ScalabilityStructureTest : public TestWithParam<SvcTestParam> {};

 TEST_P(ScalabilityStructureTest,
       NumberOfDecodeTargetsAndChainsAreInRangeAndConsistent) {
@ -156,7 +117,8 @@ TEST_P(ScalabilityStructureTest, FrameInfoMatchesFrameDependencyStructure) {
      CreateScalabilityStructure(GetParam().name);
  FrameDependencyStructure structure = svc_controller->DependencyStructure();
  std::vector<GenericFrameInfo> frame_infos =
-      GenerateAllFrames(*svc_controller);
+      ScalabilityStructureWrapper(*svc_controller)
+          .GenerateFrames(GetParam().num_temporal_units);
  for (size_t frame_id = 0; frame_id < frame_infos.size(); ++frame_id) {
    const auto& frame = frame_infos[frame_id];
    EXPECT_GE(frame.spatial_id, 0) << " for frame " << frame_id;
@ -174,7 +136,8 @@ TEST_P(ScalabilityStructureTest, ThereIsAPerfectTemplateForEachFrame) {
      CreateScalabilityStructure(GetParam().name);
  FrameDependencyStructure structure = svc_controller->DependencyStructure();
  std::vector<GenericFrameInfo> frame_infos =
-      GenerateAllFrames(*svc_controller);
+      ScalabilityStructureWrapper(*svc_controller)
+          .GenerateFrames(GetParam().num_temporal_units);
  for (size_t frame_id = 0; frame_id < frame_infos.size(); ++frame_id) {
    EXPECT_THAT(structure.templates, Contains(frame_infos[frame_id]))
        << " for frame " << frame_id;
@ -185,7 +148,8 @@ TEST_P(ScalabilityStructureTest, FrameDependsOnSameOrLowerLayer) {
  std::unique_ptr<ScalableVideoController> svc_controller =
      CreateScalabilityStructure(GetParam().name);
  std::vector<GenericFrameInfo> frame_infos =
-      GenerateAllFrames(*svc_controller);
+      ScalabilityStructureWrapper(*svc_controller)
+          .GenerateFrames(GetParam().num_temporal_units);
  int64_t num_frames = frame_infos.size();

  for (int64_t frame_id = 0; frame_id < num_frames; ++frame_id) {
@ -205,7 +169,8 @@ TEST_P(ScalabilityStructureTest, NoFrameDependsOnDiscardableOrNotPresent) {
  std::unique_ptr<ScalableVideoController> svc_controller =
      CreateScalabilityStructure(GetParam().name);
  std::vector<GenericFrameInfo> frame_infos =
-      GenerateAllFrames(*svc_controller);
+      ScalabilityStructureWrapper(*svc_controller)
+          .GenerateFrames(GetParam().num_temporal_units);
  int64_t num_frames = frame_infos.size();
  FrameDependencyStructure structure = svc_controller->DependencyStructure();

@ -237,7 +202,8 @@ TEST_P(ScalabilityStructureTest, NoFrameDependsThroughSwitchIndication) {
      CreateScalabilityStructure(GetParam().name);
  FrameDependencyStructure structure = svc_controller->DependencyStructure();
  std::vector<GenericFrameInfo> frame_infos =
-      GenerateAllFrames(*svc_controller);
+      ScalabilityStructureWrapper(*svc_controller)
+          .GenerateFrames(GetParam().num_temporal_units);
  int64_t num_frames = frame_infos.size();
  std::vector<std::set<int64_t>> full_deps(num_frames);

@ -302,7 +268,9 @@ TEST_P(ScalabilityStructureSetRatesTest, ProduceNoFrameForDisabledLayers) {
  }

  svc_controller->OnRatesUpdated(all_bitrates);
-  std::vector<GenericFrameInfo> frames = GenerateAllFrames(*svc_controller);
+  ScalabilityStructureWrapper wrapper(*svc_controller);
+  std::vector<GenericFrameInfo> frames =
+      wrapper.GenerateFrames(GetParam().num_temporal_units);

  for (int sid = 0; sid < structure.num_spatial_layers; ++sid) {
    for (int tid = 0; tid < structure.num_temporal_layers; ++tid) {
@ -317,7 +285,7 @@ TEST_P(ScalabilityStructureSetRatesTest, ProduceNoFrameForDisabledLayers) {
      svc_controller->OnRatesUpdated(bitrates);
      // With layer (sid, tid) disabled, expect no frames are produced for it.
      EXPECT_THAT(
-          GenerateAllFrames(*svc_controller),
+          wrapper.GenerateFrames(GetParam().num_temporal_units),
          Not(Contains(AllOf(Field(&GenericFrameInfo::spatial_id, sid),
                             Field(&GenericFrameInfo::temporal_id, tid)))))
          << "For layer (" << sid << "," << tid << ")";
@ -348,11 +316,13 @@ INSTANTIATE_TEST_SUITE_P(Svc,
                         ScalabilityStructureSetRatesTest,
                         Values(SvcTestParam{"L1T2",
                                             /*num_temporal_units=*/4},
+                                SvcTestParam{"L1T3", /*num_temporal_units=*/8},
                                SvcTestParam{"L2T1",
                                             /*num_temporal_units=*/3},
                                SvcTestParam{"L2T2",
                                             /*num_temporal_units=*/4},
-                                SvcTestParam{"L3T1", /*num_temporal_units=*/3}),
+                                SvcTestParam{"L3T1", /*num_temporal_units=*/3},
+                                SvcTestParam{"L3T3", /*num_temporal_units=*/8}),
                         [](const testing::TestParamInfo<SvcTestParam>& info) {
                           return info.param.name;
                         });