Introduce layering controller interface for av1 encoder

Add TODOs into AV1 encoder wrapper where it suppose to be used. Bug: webrtc:11404 Change-Id: If049066b84be72829867d5084827a7d275648a7b Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/174806 Reviewed-by: Philip Eliasson <philipel@webrtc.org> Commit-Queue: Danil Chapovalov <danilchap@webrtc.org> Cr-Commit-Position: refs/heads/master@{#31278}
2020-05-15 14:21:03 +02:00
parent 46e9629dda
commit b471ac791c
6 changed files with 315 additions and 69 deletions
--- a/modules/video_coding/codecs/av1/BUILD.gn
+++ b/modules/video_coding/codecs/av1/BUILD.gn
@ -36,11 +36,27 @@ rtc_library("libaom_av1_decoder") {
  }
 }

+rtc_source_set("scalable_video_controller") {
+  sources = [
+    "scalable_video_controller.h",
+    "scalable_video_controller_no_layering.cc",
+    "scalable_video_controller_no_layering.h",
+  ]
+  deps = [
+    "../../../../api/transport/rtp:dependency_descriptor",
+    "../../../../common_video/generic_frame_descriptor",
+    "../../../../rtc_base:checks",
+    "//third_party/abseil-cpp/absl/container:inlined_vector",
+    "//third_party/abseil-cpp/absl/types:optional",
+  ]
+}
+
 rtc_library("libaom_av1_encoder") {
  visibility = [ "*" ]
  poisonous = [ "software_video_codecs" ]
  public = [ "libaom_av1_encoder.h" ]
  deps = [
+    ":scalable_video_controller",
    "../../../../api/video_codecs:video_codecs_api",
    "//third_party/abseil-cpp/absl/base:core_headers",
  ]
@ -79,6 +95,8 @@ if (rtc_include_tests) {
        "../..:video_codec_interface",
        "../../../../api:create_frame_generator",
        "../../../../api:frame_generator_api",
+        "../../../../api:mock_video_encoder",
+        "../../../../api/video:video_frame_i420",
        "../../../../api/video_codecs:video_codecs_api",
        "../../../../test:test_support",
        "//third_party/abseil-cpp/absl/types:optional",
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@ -13,6 +13,7 @@
 #include <stdint.h>

 #include <memory>
+#include <utility>
 #include <vector>

 #include "absl/algorithm/container.h"
@ -22,6 +23,8 @@
 #include "api/video/video_frame.h"
 #include "api/video_codecs/video_codec.h"
 #include "api/video_codecs/video_encoder.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h"
 #include "modules/video_coding/include/video_codec_interface.h"
 #include "modules/video_coding/include/video_error_codes.h"
 #include "rtc_base/checks.h"
@ -47,7 +50,8 @@ constexpr float kMinimumFrameRate = 1.0;

 class LibaomAv1Encoder final : public VideoEncoder {
 public:
-  LibaomAv1Encoder();
+  explicit LibaomAv1Encoder(
+      std::unique_ptr<ScalableVideoController> svc_controller);
  ~LibaomAv1Encoder();

  int InitEncode(const VideoCodec* codec_settings,
@ -66,6 +70,7 @@ class LibaomAv1Encoder final : public VideoEncoder {
  EncoderInfo GetEncoderInfo() const override;

 private:
+  const std::unique_ptr<ScalableVideoController> svc_controller_;
  bool inited_;
  bool keyframe_required_;
  VideoCodec encoder_settings_;
@ -100,11 +105,15 @@ int32_t VerifyCodecSettings(const VideoCodec& codec_settings) {
  return WEBRTC_VIDEO_CODEC_OK;
 }

-LibaomAv1Encoder::LibaomAv1Encoder()
-    : inited_(false),
+LibaomAv1Encoder::LibaomAv1Encoder(
+    std::unique_ptr<ScalableVideoController> svc_controller)
+    : svc_controller_(std::move(svc_controller)),
+      inited_(false),
      keyframe_required_(true),
      frame_for_encode_(nullptr),
-      encoded_image_callback_(nullptr) {}
+      encoded_image_callback_(nullptr) {
+  RTC_DCHECK(svc_controller_);
+}

 LibaomAv1Encoder::~LibaomAv1Encoder() {
  Release();
@ -205,6 +214,11 @@ int LibaomAv1Encoder::InitEncode(const VideoCodec* codec_settings,
    return WEBRTC_VIDEO_CODEC_ERROR;
  }

+  ScalableVideoController::StreamLayersConfig svc_config =
+      svc_controller_->StreamConfig();
+  // TODO(danilchap): Configure SVC.
+  (void)svc_config;
+
  return WEBRTC_VIDEO_CODEC_OK;
 }

@ -239,6 +253,14 @@ int32_t LibaomAv1Encoder::Encode(
      frame_types != nullptr &&
      absl::c_linear_search(*frame_types, VideoFrameType::kVideoFrameKey);

+  std::vector<ScalableVideoController::LayerFrameConfig> layer_frames =
+      svc_controller_->NextFrameConfig(keyframe_required_);
+
+  if (layer_frames.empty()) {
+    RTC_LOG(LS_ERROR) << "SVCController returned no configuration for a frame.";
+    return WEBRTC_VIDEO_CODEC_ERROR;
+  }
+
  // Convert input frame to I420, if needed.
  VideoFrame prepped_input_frame = frame;
  if (prepped_input_frame.video_frame_buffer()->type() !=
@ -263,7 +285,15 @@ int32_t LibaomAv1Encoder::Encode(

  const uint32_t duration =
      kRtpTicksPerSecond / static_cast<float>(encoder_settings_.maxFramerate);
-  aom_enc_frame_flags_t flags = (keyframe_required_) ? AOM_EFLAG_FORCE_KF : 0;
+
+  // TODO(danilchap): Remove this checks when layering is implemented.
+  RTC_DCHECK_EQ(layer_frames.size(), 1);
+  for (ScalableVideoController::LayerFrameConfig& layer_frame : layer_frames) {
+    aom_enc_frame_flags_t flags =
+        layer_frame.is_keyframe ? AOM_EFLAG_FORCE_KF : 0;
+
+    // TODO(danilchap): configure buffers and layers based on
+    // `layer_frame.buffers` when layering is enabled.

    // Encode a frame.
    aom_codec_err_t ret = aom_codec_encode(&ctx_, frame_for_encode_,
@ -279,7 +309,8 @@ int32_t LibaomAv1Encoder::Encode(
    encoded_image._completeFrame = true;
    aom_codec_iter_t iter = nullptr;
    int data_pkt_count = 0;
-  while (const aom_codec_cx_pkt_t* pkt = aom_codec_get_cx_data(&ctx_, &iter)) {
+    while (const aom_codec_cx_pkt_t* pkt =
+               aom_codec_get_cx_data(&ctx_, &iter)) {
      if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) {
        if (data_pkt_count > 0) {
          RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than "
@ -287,13 +318,13 @@ int32_t LibaomAv1Encoder::Encode(
          Release();
        }
        // TODO(bugs.webrtc.org/11174): Remove this hack when
-      // webrtc_pc_e2e::SingleProcessEncodedImageDataInjector not used or fixed
-      // not to assume that encoded image transfered as is.
+        // webrtc_pc_e2e::SingleProcessEncodedImageDataInjector not used or
+        // fixed not to assume that encoded image transfered as is.
        const uint8_t* data = static_cast<const uint8_t*>(pkt->data.frame.buf);
        size_t size = pkt->data.frame.sz;
        if (size > 2 && data[0] == 0b0'0010'010 && data[1] == 0) {
-        // Typically frame starts with a Temporal Delimter OBU of size 0 that is
-        // not need by any component in webrtc and discarded during rtp
+          // Typically frame starts with a Temporal Delimter OBU of size 0 that
+          // is not need by any component in webrtc and discarded during rtp
          // packetization. Before discarded it confuses test framework that
          // assumes received encoded frame is exactly same as sent frame.
          data += 2;
@ -301,8 +332,9 @@ int32_t LibaomAv1Encoder::Encode(
        }
        encoded_image.SetEncodedData(EncodedImageBuffer::Create(data, size));

-      bool is_key_frame = ((pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0);
-      encoded_image._frameType = is_key_frame
+        layer_frame.is_keyframe =
+            ((pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0);
+        encoded_image._frameType = layer_frame.is_keyframe
                                       ? VideoFrameType::kVideoFrameKey
                                       : VideoFrameType::kVideoFrameDelta;
        encoded_image.SetTimestamp(frame.timestamp());
@ -330,8 +362,17 @@ int32_t LibaomAv1Encoder::Encode(
    // Deliver encoded image data.
    if (encoded_image.size() > 0) {
      CodecSpecificInfo codec_specific_info;
-    encoded_image_callback_->OnEncodedImage(encoded_image, &codec_specific_info,
-                                            nullptr);
+      codec_specific_info.codecType = kVideoCodecAV1;
+      bool is_keyframe = layer_frame.is_keyframe;
+      codec_specific_info.generic_frame_info =
+          svc_controller_->OnEncodeDone(std::move(layer_frame));
+      if (is_keyframe && codec_specific_info.generic_frame_info) {
+        codec_specific_info.template_structure =
+            svc_controller_->DependencyStructure();
+      }
+      encoded_image_callback_->OnEncodedImage(encoded_image,
+                                              &codec_specific_info, nullptr);
+    }
  }

  return WEBRTC_VIDEO_CODEC_OK;
@ -389,7 +430,13 @@ VideoEncoder::EncoderInfo LibaomAv1Encoder::GetEncoderInfo() const {
 const bool kIsLibaomAv1EncoderSupported = true;

 std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder() {
-  return std::make_unique<LibaomAv1Encoder>();
+  return std::make_unique<LibaomAv1Encoder>(
+      std::make_unique<ScalableVideoControllerNoLayering>());
+}
+
+std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder(
+    std::unique_ptr<ScalableVideoController> svc_controller) {
+  return std::make_unique<LibaomAv1Encoder>(std::move(svc_controller));
 }

 }  // namespace webrtc
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.h
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.h
@ -14,12 +14,15 @@

 #include "absl/base/attributes.h"
 #include "api/video_codecs/video_encoder.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller.h"

 namespace webrtc {

 ABSL_CONST_INIT extern const bool kIsLibaomAv1EncoderSupported;

 std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder();
+std::unique_ptr<VideoEncoder> CreateLibaomAv1Encoder(
+    std::unique_ptr<ScalableVideoController> controller);

 }  // namespace webrtc

--- a/modules/video_coding/codecs/av1/scalable_video_controller.h
+++ b/modules/video_coding/codecs/av1/scalable_video_controller.h
@ -0,0 +1,71 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_H_
+#define MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_H_
+
+#include <vector>
+
+#include "absl/container/inlined_vector.h"
+#include "absl/types/optional.h"
+#include "api/transport/rtp/dependency_descriptor.h"
+#include "common_video/generic_frame_descriptor/generic_frame_info.h"
+
+namespace webrtc {
+
+// Controls how video should be encoded to be scalable. Outputs results as
+// buffer usage configuration for encoder and enough details to communicate the
+// scalability structure via dependency descriptor rtp header extension.
+class ScalableVideoController {
+ public:
+  struct StreamLayersConfig {
+    int num_spatial_layers = 1;
+    int num_temporal_layers = 1;
+  };
+  struct LayerFrameConfig {
+    // Id to match configuration returned by NextFrameConfig with
+    // (possibly modified) configuration passed back via OnEncoderDone.
+    // The meaning of the id is an implementation detail of
+    // the ScalableVideoController.
+    int id = 0;
+
+    // Indication frame should be encoded as a key frame. In particular when
+    // `is_keyframe=true` property `CodecBufferUsage::referenced` should be
+    // ignored and treated as false.
+    bool is_keyframe = false;
+
+    int spatial_id = 0;
+    int temporal_id = 0;
+    // Describes how encoder which buffers encoder allowed to reference and
+    // which buffers encoder should update.
+    absl::InlinedVector<CodecBufferUsage, kMaxEncoderBuffers> buffers;
+  };
+
+  virtual ~ScalableVideoController() = default;
+
+  // Returns video structure description for encoder to configure itself.
+  virtual StreamLayersConfig StreamConfig() const = 0;
+
+  // Returns video structure description in format compatible with
+  // dependency descriptor rtp header extension.
+  virtual FrameDependencyStructure DependencyStructure() const = 0;
+
+  // When `restart` is true, first `LayerFrameConfig` should have `is_keyframe`
+  // set to true.
+  // Returned vector shouldn't be empty.
+  virtual std::vector<LayerFrameConfig> NextFrameConfig(bool restart) = 0;
+
+  // Returns configuration to pass to EncoderCallback.
+  virtual absl::optional<GenericFrameInfo> OnEncodeDone(
+      LayerFrameConfig config) = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_H_
--- a/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.cc
+++ b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.cc
@ -0,0 +1,69 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h"
+
+#include <utility>
+#include <vector>
+
+#include "api/transport/rtp/dependency_descriptor.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+ScalableVideoControllerNoLayering::~ScalableVideoControllerNoLayering() =
+    default;
+
+ScalableVideoController::StreamLayersConfig
+ScalableVideoControllerNoLayering::StreamConfig() const {
+  StreamLayersConfig result;
+  result.num_spatial_layers = 1;
+  result.num_temporal_layers = 1;
+  return result;
+}
+
+FrameDependencyStructure
+ScalableVideoControllerNoLayering::DependencyStructure() const {
+  FrameDependencyStructure structure;
+  structure.num_decode_targets = 1;
+  FrameDependencyTemplate a_template;
+  a_template.decode_target_indications = {DecodeTargetIndication::kSwitch};
+  structure.templates.push_back(a_template);
+  return structure;
+}
+
+std::vector<ScalableVideoController::LayerFrameConfig>
+ScalableVideoControllerNoLayering::NextFrameConfig(bool restart) {
+  if (restart) {
+    start_ = true;
+  }
+  std::vector<LayerFrameConfig> result(1);
+  result[0].id = 0;
+  result[0].is_keyframe = start_;
+  result[0].buffers = {{/*id=*/0, /*references=*/!start_, /*updates=*/true}};
+
+  start_ = false;
+  return result;
+}
+
+absl::optional<GenericFrameInfo>
+ScalableVideoControllerNoLayering::OnEncodeDone(LayerFrameConfig config) {
+  RTC_DCHECK_EQ(config.id, 0);
+  absl::optional<GenericFrameInfo> frame_info(absl::in_place);
+  frame_info->encoder_buffers = std::move(config.buffers);
+  if (config.is_keyframe) {
+    for (auto& buffer : frame_info->encoder_buffers) {
+      buffer.referenced = false;
+    }
+  }
+  frame_info->decode_target_indications = {DecodeTargetIndication::kSwitch};
+  return frame_info;
+}
+
+}  // namespace webrtc
--- a/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h
+++ b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h
@ -0,0 +1,38 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_
+#define MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_
+
+#include <vector>
+
+#include "api/transport/rtp/dependency_descriptor.h"
+#include "common_video/generic_frame_descriptor/generic_frame_info.h"
+#include "modules/video_coding/codecs/av1/scalable_video_controller.h"
+
+namespace webrtc {
+
+class ScalableVideoControllerNoLayering : public ScalableVideoController {
+ public:
+  ~ScalableVideoControllerNoLayering() override;
+
+  StreamLayersConfig StreamConfig() const override;
+  FrameDependencyStructure DependencyStructure() const override;
+
+  std::vector<LayerFrameConfig> NextFrameConfig(bool restart) override;
+  absl::optional<GenericFrameInfo> OnEncodeDone(
+      LayerFrameConfig config) override;
+
+ private:
+  bool start_ = true;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_