Reland "Copy video frames metadata between encoded and plain frames in one place"

Reland with fixes: Do not remove extra metadata copies in software decoders as some downstream projects assumes these fields are copied by the encoders. Currently some video frames metadata like rotation or ntp timestamps are copied in every encoder and decoder separately. This CL makes copying to happen at a single place for send or receive side. This will make it easier to add new metadata in the future. Also, added some missing tests. Original Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/133346 Bug: webrtc:10460 Change-Id: I8e49589bf75f406e2b5ddee34882de0faedbd09a Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/134102 Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org> Reviewed-by: Johannes Kron <kron@webrtc.org> Reviewed-by: Erik Språng <sprang@webrtc.org> Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Cr-Commit-Position: refs/heads/master@{#27756}
2019-04-25 09:59:51 +02:00
parent bf4a221187
commit c9a2c5e93a
16 changed files with 367 additions and 292 deletions
--- a/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc
+++ b/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc
@ -19,7 +19,6 @@
 #include "api/video_codecs/video_decoder.h"
 #include "api/video_codecs/video_encoder.h"
 #include "common_video/libyuv/include/webrtc_libyuv.h"
-#include "common_video/test/utilities.h"
 #include "media/base/codec.h"
 #include "media/base/media_constants.h"
 #include "modules/video_coding/codecs/h264/include/h264.h"
@ -49,17 +48,9 @@ class TestH264Impl : public VideoCodecUnitTest {
 #ifdef WEBRTC_USE_H264
 #define MAYBE_EncodeDecode EncodeDecode
 #define MAYBE_DecodedQpEqualsEncodedQp DecodedQpEqualsEncodedQp
-#define MAYBE_EncodedColorSpaceEqualsInputColorSpace \
-  EncodedColorSpaceEqualsInputColorSpace
-#define MAYBE_DecodedColorSpaceEqualsEncodedColorSpace \
-  DecodedColorSpaceEqualsEncodedColorSpace
 #else
 #define MAYBE_EncodeDecode DISABLED_EncodeDecode
 #define MAYBE_DecodedQpEqualsEncodedQp DISABLED_DecodedQpEqualsEncodedQp
-#define MAYBE_EncodedColorSpaceEqualsInputColorSpace \
-  DISABLED_EncodedColorSpaceEqualsInputColorSpace
-#define MAYBE_DecodedColorSpaceEqualsEncodedColorSpace \
-  DISABLED_DecodedColorSpaceEqualsEncodedColorSpace
 #endif

 TEST_F(TestH264Impl, MAYBE_EncodeDecode) {
@ -105,45 +96,4 @@ TEST_F(TestH264Impl, MAYBE_DecodedQpEqualsEncodedQp) {
  EXPECT_EQ(encoded_frame.qp_, *decoded_qp);
 }

-TEST_F(TestH264Impl, MAYBE_EncodedColorSpaceEqualsInputColorSpace) {
-  VideoFrame* input_frame = NextInputFrame();
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr));
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  EXPECT_FALSE(encoded_frame.ColorSpace());
-
-  // Video frame with explicit color space information.
-  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/false);
-  VideoFrame input_frame_w_color_space =
-      VideoFrame::Builder()
-          .set_video_frame_buffer(input_frame->video_frame_buffer())
-          .set_color_space(color_space)
-          .build();
-
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
-            encoder_->Encode(input_frame_w_color_space, nullptr));
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  ASSERT_TRUE(encoded_frame.ColorSpace());
-  EXPECT_EQ(*encoded_frame.ColorSpace(), color_space);
-}
-
-TEST_F(TestH264Impl, MAYBE_DecodedColorSpaceEqualsEncodedColorSpace) {
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
-            encoder_->Encode(*NextInputFrame(), nullptr));
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  // Add color space to encoded frame.
-  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/false);
-  encoded_frame.SetColorSpace(color_space);
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0));
-  std::unique_ptr<VideoFrame> decoded_frame;
-  absl::optional<uint8_t> decoded_qp;
-  ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
-  ASSERT_TRUE(decoded_frame);
-  ASSERT_TRUE(decoded_frame->color_space());
-  EXPECT_EQ(color_space, *decoded_frame->color_space());
-}
-
 }  // namespace webrtc
--- a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
@ -227,51 +227,10 @@ TEST_F(TestVp8Impl, OnEncodedImageReportsInfo) {
  EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);

  EXPECT_EQ(kInitialTimestampRtp, encoded_frame.Timestamp());
-  EXPECT_EQ(kInitialTimestampMs, encoded_frame.capture_time_ms_);
  EXPECT_EQ(kWidth, static_cast<int>(encoded_frame._encodedWidth));
  EXPECT_EQ(kHeight, static_cast<int>(encoded_frame._encodedHeight));
 }

-// We only test the encoder here, since the decoded frame rotation is set based
-// on the CVO RTP header extension in VCMDecodedFrameCallback::Decoded.
-// TODO(brandtr): Consider passing through the rotation flag through the decoder
-// in the same way as done in the encoder.
-TEST_F(TestVp8Impl, EncodedRotationEqualsInputRotation) {
-  VideoFrame* input_frame = NextInputFrame();
-  input_frame->set_rotation(kVideoRotation_0);
-
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
-  EXPECT_EQ(kVideoRotation_0, encoded_frame.rotation_);
-
-  input_frame->set_rotation(kVideoRotation_90);
-  EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
-  EXPECT_EQ(kVideoRotation_90, encoded_frame.rotation_);
-}
-
-TEST_F(TestVp8Impl, EncodedColorSpaceEqualsInputColorSpace) {
-  // Video frame without explicit color space information.
-  VideoFrame* input_frame = NextInputFrame();
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
-  EXPECT_FALSE(encoded_frame.ColorSpace());
-
-  // Video frame with explicit color space information.
-  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/false);
-  VideoFrame input_frame_w_color_space =
-      VideoFrame::Builder()
-          .set_video_frame_buffer(input_frame->video_frame_buffer())
-          .set_color_space(color_space)
-          .build();
-
-  EncodeAndWaitForFrame(input_frame_w_color_space, &encoded_frame,
-                        &codec_specific_info);
-  ASSERT_TRUE(encoded_frame.ColorSpace());
-  EXPECT_EQ(*encoded_frame.ColorSpace(), color_space);
-}
-
 TEST_F(TestVp8Impl, DecodedQpEqualsEncodedQp) {
  VideoFrame* input_frame = NextInputFrame();
  EncodedImage encoded_frame;
@ -290,24 +249,6 @@ TEST_F(TestVp8Impl, DecodedQpEqualsEncodedQp) {
  EXPECT_EQ(encoded_frame.qp_, *decoded_qp);
 }

-TEST_F(TestVp8Impl, DecodedColorSpaceEqualsEncodedColorSpace) {
-  VideoFrame* input_frame = NextInputFrame();
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
-
-  // Encoded frame with explicit color space information.
-  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/false);
-  encoded_frame.SetColorSpace(color_space);
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, -1));
-  std::unique_ptr<VideoFrame> decoded_frame;
-  absl::optional<uint8_t> decoded_qp;
-  ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
-  ASSERT_TRUE(decoded_frame);
-  ASSERT_TRUE(decoded_frame->color_space());
-  EXPECT_EQ(color_space, *decoded_frame->color_space());
-}
-
 TEST_F(TestVp8Impl, ChecksSimulcastSettings) {
  codec_settings_.numberOfSimulcastStreams = 2;
  // Resolutions are not in ascending order, temporal layers do not match.
@ -402,7 +343,6 @@ TEST_F(TestVp8Impl, MAYBE_AlignedStrideEncodeDecode) {
  // Compute PSNR on all planes (faster than SSIM).
  EXPECT_GT(I420PSNR(input_frame, decoded_frame.get()), 36);
  EXPECT_EQ(kInitialTimestampRtp, decoded_frame->timestamp());
-  EXPECT_EQ(kTestNtpTimeMs, decoded_frame->ntp_time_ms());
 }

 #if defined(WEBRTC_ANDROID)
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@ -11,7 +11,6 @@
 #include "api/video/color_space.h"
 #include "api/video/i420_buffer.h"
 #include "common_video/libyuv/include/webrtc_libyuv.h"
-#include "common_video/test/utilities.h"
 #include "media/base/vp9_profile.h"
 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
 #include "modules/video_coding/codecs/test/video_codec_unittest.h"
@ -146,50 +145,7 @@ TEST_F(TestVp9Impl, EncodeDecode) {
            color_space.chroma_siting_vertical());
 }

-// We only test the encoder here, since the decoded frame rotation is set based
-// on the CVO RTP header extension in VCMDecodedFrameCallback::Decoded.
-// TODO(brandtr): Consider passing through the rotation flag through the decoder
-// in the same way as done in the encoder.
-TEST_F(TestVp9Impl, EncodedRotationEqualsInputRotation) {
-  VideoFrame* input_frame = NextInputFrame();
-  input_frame->set_rotation(kVideoRotation_0);
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr));
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  EXPECT_EQ(kVideoRotation_0, encoded_frame.rotation_);
-
-  input_frame = NextInputFrame();
-  input_frame->set_rotation(kVideoRotation_90);
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr));
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  EXPECT_EQ(kVideoRotation_90, encoded_frame.rotation_);
-}
-
-TEST_F(TestVp9Impl, EncodedColorSpaceEqualsInputColorSpace) {
-  // Video frame without explicit color space information.
-  VideoFrame* input_frame = NextInputFrame();
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr));
-  EncodedImage encoded_frame;
-  CodecSpecificInfo codec_specific_info;
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  EXPECT_FALSE(encoded_frame.ColorSpace());
-
-  // Video frame with explicit color space information.
-  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/true);
-  VideoFrame input_frame_w_hdr =
-      VideoFrame::Builder()
-          .set_video_frame_buffer(input_frame->video_frame_buffer())
-          .set_color_space(color_space)
-          .build();
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
-            encoder_->Encode(input_frame_w_hdr, nullptr));
-  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
-  ASSERT_TRUE(encoded_frame.ColorSpace());
-  EXPECT_EQ(*encoded_frame.ColorSpace(), color_space);
-}
-
-TEST_F(TestVp9Impl, DecodedColorSpaceEqualsEncodedColorSpace) {
+TEST_F(TestVp9Impl, DecodedColorSpaceFromBitstream) {
  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
            encoder_->Encode(*NextInputFrame(), nullptr));
  EncodedImage encoded_frame;
@ -206,15 +162,6 @@ TEST_F(TestVp9Impl, DecodedColorSpaceEqualsEncodedColorSpace) {
  ASSERT_TRUE(decoded_frame->color_space());
  // No HDR metadata present.
  EXPECT_FALSE(decoded_frame->color_space()->hdr_metadata());
-
-  // Encoded frame with explicit color space information.
-  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/true);
-  encoded_frame.SetColorSpace(color_space);
-  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0));
-  ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
-  ASSERT_TRUE(decoded_frame);
-  ASSERT_TRUE(decoded_frame->color_space());
-  EXPECT_EQ(color_space, *decoded_frame->color_space());
 }

 TEST_F(TestVp9Impl, DecodedQpEqualsEncodedQp) {
--- a/modules/video_coding/encoded_frame.h
+++ b/modules/video_coding/encoded_frame.h
@ -52,8 +52,10 @@ class VCMEncodedFrame : protected EncodedImage {
    return static_cast<const webrtc::EncodedImage&>(*this);
  }

+  using EncodedImage::ColorSpace;
  using EncodedImage::data;
  using EncodedImage::set_size;
+  using EncodedImage::SetColorSpace;
  using EncodedImage::SetSpatialIndex;
  using EncodedImage::SetTimestamp;
  using EncodedImage::size;
--- a/modules/video_coding/generic_decoder.cc
+++ b/modules/video_coding/generic_decoder.cc
@ -74,6 +74,12 @@ void VCMDecodedFrameCallback::Decoded(VideoFrame& decodedImage,
    frameInfo = _timestampMap.Pop(decodedImage.timestamp());
  }

+  decodedImage.set_ntp_time_ms(frameInfo->ntp_time_ms);
+  if (frameInfo->color_space) {
+    decodedImage.set_color_space(*frameInfo->color_space);
+  }
+  decodedImage.set_rotation(frameInfo->rotation);
+
  if (frameInfo == NULL) {
    RTC_LOG(LS_WARNING) << "Too many frames backed up in the decoder, dropping "
                           "this one.";
@ -140,7 +146,6 @@ void VCMDecodedFrameCallback::Decoded(VideoFrame& decodedImage,

  decodedImage.set_timestamp_us(frameInfo->renderTimeMs *
                                rtc::kNumMicrosecsPerMillisec);
-  decodedImage.set_rotation(frameInfo->rotation);
  _receiveCallback->FrameToRender(decodedImage, qp, frameInfo->content_type);
 }

@ -199,6 +204,9 @@ int32_t VCMGenericDecoder::Decode(const VCMEncodedFrame& frame, int64_t nowMs) {
  _frameInfos[_nextFrameInfoIdx].renderTimeMs = frame.RenderTimeMs();
  _frameInfos[_nextFrameInfoIdx].rotation = frame.rotation();
  _frameInfos[_nextFrameInfoIdx].timing = frame.video_timing();
+  _frameInfos[_nextFrameInfoIdx].ntp_time_ms =
+      frame.EncodedImage().ntp_time_ms_;
+  _frameInfos[_nextFrameInfoIdx].color_space = frame.ColorSpace();
  // Set correctly only for key frames. Thus, use latest key frame
  // content type. If the corresponding key frame was lost, decode will fail
  // and content type will be ignored.
--- a/modules/video_coding/generic_decoder.h
+++ b/modules/video_coding/generic_decoder.h
@ -34,6 +34,8 @@ struct VCMFrameInformation {
  VideoRotation rotation;
  VideoContentType content_type;
  EncodedImage::Timing timing;
+  int64_t ntp_time_ms;
+  const ColorSpace* color_space;
 };

 class VCMDecodedFrameCallback : public DecodedImageCallback {