SVC: Add end to end tests for VP8 and VP9

The tests check that the various scalability mode are supported and the frames are marked properly by the encoder with their spatial and temporal index. The same information is then checked on the receiving side. A new member is added on EncodedImage to store the temporal index, and is filled by the encoders and retreived by the ref finder objects on the decoding side. Bug: webrtc:11607 Change-Id: I7522f6a6fc5402244cab0c4c64b544ce09bc5204 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/260189 Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Reviewed-by: Rasmus Brandt <brandtr@webrtc.org> Reviewed-by: Artem Titov <titovartem@webrtc.org> Commit-Queue: Florent Castelli <orphis@webrtc.org> Cr-Commit-Position: refs/heads/main@{#37303}
2022-04-25 17:28:00 +02:00
parent d151cc6fa3
commit 90b74389a2
17 changed files with 1666 additions and 5 deletions
--- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
+++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc
@ -49,8 +49,8 @@ namespace {
 // Encoder configuration parameters
 constexpr int kQpMin = 10;
 constexpr int kUsageProfile = AOM_USAGE_REALTIME;
-constexpr int kMinQindex = 145;   // Min qindex threshold for QP scaling.
-constexpr int kMaxQindex = 205;   // Max qindex threshold for QP scaling.
+constexpr int kMinQindex = 145;  // Min qindex threshold for QP scaling.
+constexpr int kMaxQindex = 205;  // Max qindex threshold for QP scaling.
 constexpr int kBitDepth = 8;
 constexpr int kLagInFrames = 0;  // No look ahead.
 constexpr int kRtpTicksPerSecond = 90000;
@ -684,6 +684,7 @@ int32_t LibaomAv1Encoder::Encode(
          encoded_image._encodedWidth = cfg_.g_w * n / d;
          encoded_image._encodedHeight = cfg_.g_h * n / d;
          encoded_image.SetSpatialIndex(layer_frame->SpatialId());
+          encoded_image.SetTemporalIndex(layer_frame->TemporalId());
        } else {
          encoded_image._encodedWidth = cfg_.g_w;
          encoded_image._encodedHeight = cfg_.g_h;
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
@ -1156,6 +1156,10 @@ int LibvpxVp8Encoder::GetEncodedPartitions(const VideoFrame& input_image,
        encoded_images_[encoder_idx].SetSpatialIndex(stream_idx);
        PopulateCodecSpecific(&codec_specific, *pkt, stream_idx, encoder_idx,
                              input_image.timestamp());
+        if (codec_specific.codecSpecific.VP8.temporalIdx != kNoTemporalIdx) {
+          encoded_images_[encoder_idx].SetTemporalIndex(
+              codec_specific.codecSpecific.VP8.temporalIdx);
+        }
        break;
      }
    }
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
@ -1260,6 +1260,7 @@ int LibvpxVp9Encoder::Encode(const VideoFrame& input_image,

 bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
                                             absl::optional<int>* spatial_idx,
+                                             absl::optional<int>* temporal_idx,
                                             const vpx_codec_cx_pkt& pkt) {
  RTC_CHECK(codec_specific != nullptr);
  codec_specific->codecType = kVideoCodecVP9;
@ -1285,8 +1286,10 @@ bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
  if (num_temporal_layers_ == 1) {
    RTC_CHECK_EQ(layer_id.temporal_layer_id, 0);
    vp9_info->temporal_idx = kNoTemporalIdx;
+    *temporal_idx = absl::nullopt;
  } else {
    vp9_info->temporal_idx = layer_id.temporal_layer_id;
+    *temporal_idx = layer_id.temporal_layer_id;
  }
  if (num_active_spatial_layers_ == 1) {
    RTC_CHECK_EQ(layer_id.spatial_layer_id, 0);
@ -1702,12 +1705,15 @@ void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {

  codec_specific_ = {};
  absl::optional<int> spatial_index;
-  if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt)) {
+  absl::optional<int> temporal_index;
+  if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, &temporal_index,
+                             *pkt)) {
    // Drop the frame.
    encoded_image_.set_size(0);
    return;
  }
  encoded_image_.SetSpatialIndex(spatial_index);
+  encoded_image_.SetTemporalIndex(temporal_index);

  const bool is_key_frame =
      ((pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false) &&
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h
@ -67,6 +67,7 @@ class LibvpxVp9Encoder : public VP9Encoder {

  bool PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
                             absl::optional<int>* spatial_idx,
+                             absl::optional<int>* temporal_idx,
                             const vpx_codec_cx_pkt& pkt);
  void FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
                            size_t pic_num,