Add spatial index to EncodedImage.

Replaces the VP8 simulcast index and VP9 spatial index formely part of CodecSpecificInfo. Bug: webrtc:9378 Change-Id: I80eafd63fbdee0a25864338196a690628b4bd3d2 Reviewed-on: https://webrtc-review.googlesource.com/83161 Commit-Queue: Niels Moller <nisse@webrtc.org> Reviewed-by: Erik Språng <sprang@webrtc.org> Reviewed-by: Sebastian Jansson <srte@webrtc.org> Reviewed-by: Magnus Jedvert <magjed@webrtc.org> Reviewed-by: Philip Eliasson <philipel@webrtc.org> Reviewed-by: Rasmus Brandt <brandtr@webrtc.org> Cr-Commit-Position: refs/heads/master@{#24485}
2018-08-27 15:33:42 +02:00
parent 02e9e44c0c
commit da0898dfae
30 changed files with 160 additions and 215 deletions
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
@ -506,6 +506,7 @@ int32_t H264EncoderImpl::Encode(const VideoFrame& input_frame,
            : VideoContentType::UNSPECIFIED;
    encoded_images_[i].timing_.flags = VideoSendTiming::kInvalid;
    encoded_images_[i]._frameType = ConvertToVideoFrameType(info.eFrameType);
+    encoded_images_[i].SetSpatialIndex(configurations_[i].simulcast_idx);

    // Split encoded image up into fragments. This also updates
    // |encoded_image_|.
@ -526,8 +527,6 @@ int32_t H264EncoderImpl::Encode(const VideoFrame& input_frame,
      codec_specific.codecType = kVideoCodecH264;
      codec_specific.codecSpecific.H264.packetization_mode =
          packetization_mode_;
-      codec_specific.codecSpecific.H264.simulcast_idx =
-          configurations_[i].simulcast_idx;
      encoded_image_callback_->OnEncodedImage(encoded_images_[i],
                                              &codec_specific, &frag_header);
    }
--- a/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
+++ b/modules/video_coding/codecs/multiplex/multiplex_encoder_adapter.cc
@ -285,7 +285,6 @@ EncodedImageCallback::Result MultiplexEncoderAdapter::OnEncodedImage(

      CodecSpecificInfo codec_info = *codecSpecificInfo;
      codec_info.codecType = kVideoCodecMultiplex;
-      codec_info.codecSpecific.generic.simulcast_idx = 0;
      encoded_complete_callback_->OnEncodedImage(combined_image_, &codec_info,
                                                 fragmentation);
    }
--- a/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
+++ b/modules/video_coding/codecs/multiplex/test/multiplex_adapter_unittest.cc
@ -232,7 +232,7 @@ TEST_P(TestMultiplexAdapter, CheckSingleFrameEncodedBitstream) {
  CodecSpecificInfo codec_specific_info;
  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
  EXPECT_EQ(kVideoCodecMultiplex, codec_specific_info.codecType);
-  EXPECT_EQ(0, codec_specific_info.codecSpecific.generic.simulcast_idx);
+  EXPECT_FALSE(encoded_frame.SpatialIndex());

  const MultiplexImage& unpacked_frame =
      MultiplexEncodedImagePacker::Unpack(encoded_frame);
@ -252,7 +252,7 @@ TEST_P(TestMultiplexAdapter, CheckDoubleFramesEncodedBitstream) {
  CodecSpecificInfo codec_specific_info;
  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
  EXPECT_EQ(kVideoCodecMultiplex, codec_specific_info.codecType);
-  EXPECT_EQ(0, codec_specific_info.codecSpecific.generic.simulcast_idx);
+  EXPECT_FALSE(encoded_frame.SpatialIndex());

  const MultiplexImage& unpacked_frame =
      MultiplexEncodedImagePacker::Unpack(encoded_frame);
--- a/modules/video_coding/codecs/test/videoprocessor.cc
+++ b/modules/video_coding/codecs/test/videoprocessor.cc
@ -56,22 +56,17 @@ size_t GetMaxNaluSizeBytes(const EncodedImage& encoded_frame,
  return max_size;
 }

-void GetLayerIndices(const CodecSpecificInfo& codec_specific,
-                     size_t* spatial_idx,
-                     size_t* temporal_idx) {
+size_t GetTemporalLayerIndex(const CodecSpecificInfo& codec_specific) {
+  size_t temporal_idx = 0;
  if (codec_specific.codecType == kVideoCodecVP8) {
-    *spatial_idx = codec_specific.codecSpecific.VP8.simulcastIdx;
-    *temporal_idx = codec_specific.codecSpecific.VP8.temporalIdx;
+    temporal_idx = codec_specific.codecSpecific.VP8.temporalIdx;
  } else if (codec_specific.codecType == kVideoCodecVP9) {
-    *spatial_idx = codec_specific.codecSpecific.VP9.spatial_idx;
-    *temporal_idx = codec_specific.codecSpecific.VP9.temporal_idx;
+    temporal_idx = codec_specific.codecSpecific.VP9.temporal_idx;
  }
-  if (*spatial_idx == kNoSpatialIdx) {
-    *spatial_idx = 0;
-  }
-  if (*temporal_idx == kNoTemporalIdx) {
-    *temporal_idx = 0;
+  if (temporal_idx == kNoTemporalIdx) {
+    temporal_idx = 0;
  }
+  return temporal_idx;
 }

 int GetElapsedTimeMicroseconds(int64_t start_ns, int64_t stop_ns) {
@ -347,9 +342,8 @@ void VideoProcessor::FrameEncoded(
  }

  // Layer metadata.
-  size_t spatial_idx = 0;
-  size_t temporal_idx = 0;
-  GetLayerIndices(codec_specific, &spatial_idx, &temporal_idx);
+  size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
+  size_t temporal_idx = GetTemporalLayerIndex(codec_specific);

  FrameStatistics* frame_stat =
      stats_->GetFrameWithTimestamp(encoded_image.Timestamp(), spatial_idx);
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
@ -817,7 +817,6 @@ void LibvpxVp8Encoder::PopulateCodecSpecific(
  codec_specific->codecType = kVideoCodecVP8;
  codec_specific->codec_name = ImplementationName();
  CodecSpecificInfoVP8* vp8Info = &(codec_specific->codecSpecific.VP8);
-  vp8Info->simulcastIdx = stream_idx;
  vp8Info->keyIdx = kNoKeyIdx;  // TODO(hlundin) populate this
  vp8Info->nonReference = (pkt.data.frame.flags & VPX_FRAME_IS_DROPPABLE) != 0;
  temporal_layers_[stream_idx]->PopulateCodecSpecific(
@ -876,6 +875,7 @@ int LibvpxVp8Encoder::GetEncodedPartitions(
          encoded_images_[encoder_idx]._frameType = kVideoFrameKey;
          is_keyframe = true;
        }
+        encoded_images_[encoder_idx].SetSpatialIndex(stream_idx);
        PopulateCodecSpecific(&codec_specific, tl_configs[stream_idx], *pkt,
                              stream_idx, input_image.timestamp());
        break;
--- a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
@ -70,7 +70,7 @@ class TestVp8Impl : public VideoCodecUnitTest {
    VerifyQpParser(*encoded_frame);
    EXPECT_STREQ("libvpx", codec_specific_info->codec_name);
    EXPECT_EQ(kVideoCodecVP8, codec_specific_info->codecType);
-    EXPECT_EQ(0u, codec_specific_info->codecSpecific.VP8.simulcastIdx);
+    EXPECT_EQ(0, encoded_frame->SpatialIndex());
  }

  void EncodeAndExpectFrameWith(const VideoFrame& input_frame,
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@ -356,7 +356,7 @@ TEST_F(TestVp9Impl, EndOfPicture) {
            encoder_->Encode(*NextInputFrame(), nullptr, nullptr));

  ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific));
-  EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, kNoSpatialIdx);
+  EXPECT_FALSE(frames[0].SpatialIndex());
  EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.end_of_picture);
 }

@ -395,7 +395,7 @@ TEST_F(TestVp9Impl, InterLayerPred) {

    // Key frame.
    EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
-    EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0);
+    EXPECT_EQ(frames[0].SpatialIndex(), 0);
    EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
              inter_layer_pred == InterLayerPredMode::kOff);
    EXPECT_TRUE(
@ -408,7 +408,7 @@ TEST_F(TestVp9Impl, InterLayerPred) {

    // Delta frame.
    EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted);
-    EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0);
+    EXPECT_EQ(frames[0].SpatialIndex(), 0);
    EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred,
              inter_layer_pred == InterLayerPredMode::kOff ||
                  inter_layer_pred == InterLayerPredMode::kOnKeyPic);
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@ -754,6 +754,7 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
 }

 void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
+                                           absl::optional<int>* spatial_idx,
                                           const vpx_codec_cx_pkt& pkt,
                                           uint32_t timestamp,
                                           bool first_frame_in_picture) {
@ -780,9 +781,9 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
  }
  if (num_active_spatial_layers_ == 1) {
    RTC_CHECK_EQ(layer_id.spatial_layer_id, 0);
-    vp9_info->spatial_idx = kNoSpatialIdx;
+    *spatial_idx = absl::nullopt;
  } else {
-    vp9_info->spatial_idx = layer_id.spatial_layer_id;
+    *spatial_idx = layer_id.spatial_layer_id;
  }
  if (layer_id.spatial_layer_id != 0) {
    vp9_info->ss_data_available = false;
@ -1021,8 +1022,10 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
  RTC_DCHECK_LE(encoded_image_._length, encoded_image_._size);

  memset(&codec_specific_, 0, sizeof(codec_specific_));
-  PopulateCodecSpecific(&codec_specific_, *pkt, input_image_->timestamp(),
-                        first_frame_in_picture);
+  absl::optional<int> spatial_index;
+  PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt,
+                        input_image_->timestamp(), first_frame_in_picture);
+  encoded_image_.SetSpatialIndex(spatial_index);

  if (is_flexible_mode_) {
    UpdateReferenceBuffers(*pkt, pics_since_key_);
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@ -61,6 +61,7 @@ class VP9EncoderImpl : public VP9Encoder {
  int InitAndSetControlSettings(const VideoCodec* inst);

  void PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
+                             absl::optional<int>* spatial_idx,
                             const vpx_codec_cx_pkt& pkt,
                             uint32_t timestamp,
                             bool first_frame_in_picture);
--- a/modules/video_coding/encoded_frame.cc
+++ b/modules/video_coding/encoded_frame.cc
@ -82,7 +82,6 @@ void VCMEncodedFrame::CopyCodecSpecific(const RTPVideoHeader* header) {
        if (_codecSpecificInfo.codecType != kVideoCodecVP9) {
          // This is the first packet for this frame.
          _codecSpecificInfo.codecSpecific.VP9.temporal_idx = 0;
-          _codecSpecificInfo.codecSpecific.VP9.spatial_idx = 0;
          _codecSpecificInfo.codecSpecific.VP9.gof_idx = 0;
          _codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted = false;
          _codecSpecificInfo.codecType = kVideoCodecVP9;
@ -106,8 +105,6 @@ void VCMEncodedFrame::CopyCodecSpecific(const RTPVideoHeader* header) {
              vp9_header.temporal_up_switch;
        }
        if (vp9_header.spatial_idx != kNoSpatialIdx) {
-          _codecSpecificInfo.codecSpecific.VP9.spatial_idx =
-              vp9_header.spatial_idx;
          _codecSpecificInfo.codecSpecific.VP9.inter_layer_predicted =
              vp9_header.inter_layer_predicted;
        }
--- a/modules/video_coding/generic_encoder.cc
+++ b/modules/video_coding/generic_encoder.cc
@ -391,21 +391,10 @@ EncodedImageCallback::Result VCMEncodedFrameCallback::OnEncodedImage(
    const RTPFragmentationHeader* fragmentation_header) {
  TRACE_EVENT_INSTANT1("webrtc", "VCMEncodedFrameCallback::Encoded",
                       "timestamp", encoded_image.Timestamp());
-  size_t simulcast_svc_idx = 0;
-  if (codec_specific->codecType == kVideoCodecVP9) {
-    if (codec_specific->codecSpecific.VP9.num_spatial_layers > 1)
-      simulcast_svc_idx = codec_specific->codecSpecific.VP9.spatial_idx;
-  } else if (codec_specific->codecType == kVideoCodecVP8) {
-    simulcast_svc_idx = codec_specific->codecSpecific.VP8.simulcastIdx;
-  } else if (codec_specific->codecType == kVideoCodecGeneric) {
-    simulcast_svc_idx = codec_specific->codecSpecific.generic.simulcast_idx;
-  } else if (codec_specific->codecType == kVideoCodecH264) {
-    // TODO(ilnik): When h264 simulcast is landed, extract simulcast idx here.
-  }
-
+  const size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
  EncodedImage image_copy(encoded_image);

-  FillTimingInfo(simulcast_svc_idx, &image_copy);
+  FillTimingInfo(spatial_idx, &image_copy);

  // Piggyback ALR experiment group id and simulcast id into the content type.
  uint8_t experiment_id =
@ -421,7 +410,7 @@ EncodedImageCallback::Result VCMEncodedFrameCallback::OnEncodedImage(
  // id in content type to +1 of that is actual simulcast index. This is because
  // value 0 on the wire is reserved for 'no simulcast stream specified'.
  RTC_CHECK(videocontenttypehelpers::SetSimulcastId(
-      &image_copy.content_type_, static_cast<uint8_t>(simulcast_svc_idx + 1)));
+      &image_copy.content_type_, static_cast<uint8_t>(spatial_idx + 1)));

  Result result = post_encode_callback_->OnEncodedImage(
      image_copy, codec_specific, fragmentation_header);
--- a/modules/video_coding/generic_encoder_unittest.cc
+++ b/modules/video_coding/generic_encoder_unittest.cc
@ -95,8 +95,8 @@ std::vector<std::vector<FrameType>> GetTimingFrames(
      image._length = FrameSize(min_frame_size, max_frame_size, s, i);
      image.capture_time_ms_ = current_timestamp;
      image.SetTimestamp(static_cast<uint32_t>(current_timestamp * 90));
+      image.SetSpatialIndex(s);
      codec_specific.codecType = kVideoCodecGeneric;
-      codec_specific.codecSpecific.generic.simulcast_idx = s;
      callback.OnEncodeStarted(static_cast<uint32_t>(current_timestamp * 90),
                               current_timestamp, s);
      if (dropped) {
@ -189,7 +189,6 @@ TEST(TestVCMEncodedFrameCallback, NoTimingFrameIfNoEncodeStartTime) {
  image.capture_time_ms_ = timestamp;
  image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
  codec_specific.codecType = kVideoCodecGeneric;
-  codec_specific.codecSpecific.generic.simulcast_idx = 0;
  FakeEncodedImageCallback sink;
  VCMEncodedFrameCallback callback(&sink, nullptr);
  VideoCodec::TimingFrameTriggerThresholds thresholds;
@ -221,7 +220,6 @@ TEST(TestVCMEncodedFrameCallback, AdjustsCaptureTimeForInternalSourceEncoder) {
  image.capture_time_ms_ = timestamp;
  image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
  codec_specific.codecType = kVideoCodecGeneric;
-  codec_specific.codecSpecific.generic.simulcast_idx = 0;
  FakeEncodedImageCallback sink;
  VCMEncodedFrameCallback callback(&sink, nullptr);
  callback.SetInternalSource(true);
@ -257,7 +255,6 @@ TEST(TestVCMEncodedFrameCallback, NotifiesAboutDroppedFrames) {
  const int64_t kTimestampMs3 = 47721860;
  const int64_t kTimestampMs4 = 47721870;
  codec_specific.codecType = kVideoCodecGeneric;
-  codec_specific.codecSpecific.generic.simulcast_idx = 0;
  FakeEncodedImageCallback sink;
  VCMEncodedFrameCallback callback(&sink, nullptr);
  // Any non-zero bitrate needed to be set before the first frame.
@ -293,7 +290,6 @@ TEST(TestVCMEncodedFrameCallback, RestoresCaptureTimestamps) {
  CodecSpecificInfo codec_specific;
  const int64_t kTimestampMs = 123456;
  codec_specific.codecType = kVideoCodecGeneric;
-  codec_specific.codecSpecific.generic.simulcast_idx = 0;
  FakeEncodedImageCallback sink;
  VCMEncodedFrameCallback callback(&sink, nullptr);
  // Any non-zero bitrate needed to be set before the first frame.
--- a/modules/video_coding/include/video_codec_interface.h
+++ b/modules/video_coding/include/video_codec_interface.h
@ -28,6 +28,8 @@ class RTPFragmentationHeader;  // forward declaration
 // with a copy-constructor. See below.
 struct CodecSpecificInfoVP8 {
  bool nonReference;
+  // TODO(bugs.webrtc.org/9378): Delete simulcastIdx, replaced by spatial index
+  // member in EncodedImage. Unused, but assigned in downstream code.
  uint8_t simulcastIdx;
  uint8_t temporalIdx;
  bool layerSync;
@ -43,6 +45,8 @@ struct CodecSpecificInfoVP9 {
  bool non_ref_for_inter_layer_pred;

  uint8_t temporal_idx;
+  // TODO(bugs.webrtc.org/9378): Delete spatial_idx, replaced by spatial index
+  // member in EncodedImage. Unused, but assigned in downstream code.
  uint8_t spatial_idx;
  bool temporal_up_switch;
  bool inter_layer_predicted;  // Frame is dependent on directly lower spatial
@ -63,13 +67,14 @@ struct CodecSpecificInfoVP9 {
  bool end_of_picture;
 };

+// TODO(bugs.webrtc.org/9378): Delete this struct. Unused, except that
+// simulcast_idx is assigned in downstream code.
 struct CodecSpecificInfoGeneric {
  uint8_t simulcast_idx;
 };

 struct CodecSpecificInfoH264 {
  H264PacketizationMode packetization_mode;
-  uint8_t simulcast_idx;
 };

 union CodecSpecificInfoUnion {
--- a/modules/video_coding/utility/simulcast_test_fixture_impl.cc
+++ b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
@ -76,15 +76,9 @@ class SimulcastTestFixtureImpl::TestEncodedImageCallback
  virtual Result OnEncodedImage(const EncodedImage& encoded_image,
                                const CodecSpecificInfo* codec_specific_info,
                                const RTPFragmentationHeader* fragmentation) {
-    uint16_t simulcast_idx = 0;
    bool is_vp8 = (codec_specific_info->codecType == kVideoCodecVP8);
-    if (is_vp8) {
-      simulcast_idx = codec_specific_info->codecSpecific.VP8.simulcastIdx;
-    } else {
-      simulcast_idx = codec_specific_info->codecSpecific.H264.simulcast_idx;
-    }
    // Only store the base layer.
-    if (simulcast_idx) {
+    if (encoded_image.SpatialIndex().value_or(0) == 0) {
      if (encoded_image._frameType == kVideoFrameKey) {
        delete[] encoded_key_frame_._buffer;
        encoded_key_frame_._buffer = new uint8_t[encoded_image._size];
@ -104,9 +98,9 @@ class SimulcastTestFixtureImpl::TestEncodedImageCallback
      }
    }
    if (is_vp8) {
-      layer_sync_[codec_specific_info->codecSpecific.VP8.simulcastIdx] =
+      layer_sync_[encoded_image.SpatialIndex().value_or(0)] =
          codec_specific_info->codecSpecific.VP8.layerSync;
-      temporal_layer_[codec_specific_info->codecSpecific.VP8.simulcastIdx] =
+      temporal_layer_[encoded_image.SpatialIndex().value_or(0)] =
          codec_specific_info->codecSpecific.VP8.temporalIdx;
    }
    return Result(Result::OK, encoded_image.Timestamp());