From 4b1afbe60ac270200860264034b75c8dca8334da Mon Sep 17 00:00:00 2001
From: Artem Titarenko <artit@google.com>
Date: Thu, 25 Apr 2019 11:39:19 +0000
Subject: [PATCH] Revert "Reland "Copy video frames metadata between encoded
 and plain frames in one place""
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit c9a2c5e93aa51606916e6728454bcff26bb75f79.

Reason for revert: Breaks downstream test

Original change's description:
> Reland "Copy video frames metadata between encoded and plain frames in one place"
>
> Reland with fixes: Do not remove extra metadata copies in software decoders as some downstream projects assumes these fields are copied by the encoders.
>
> Currently some video frames metadata like rotation or ntp timestamps are
> copied in every encoder and decoder separately. This CL makes copying to
> happen at a single place for send or receive side. This will make it
> easier to add new metadata in the future.
>
> Also, added some missing tests.
>
> Original Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/133346
>
> Bug: webrtc:10460
> Change-Id: I8e49589bf75f406e2b5ddee34882de0faedbd09a
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/134102
> Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
> Reviewed-by: Johannes Kron <kron@webrtc.org>
> Reviewed-by: Erik Språng <sprang@webrtc.org>
> Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#27756}

TBR=ilnik@webrtc.org,sprang@webrtc.org,kron@webrtc.org,artit@webrtc.org

Change-Id: I34cc563ec6383735c2a76a6f45a72a7726b74421
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: webrtc:10460
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/134204
Reviewed-by: Artem Titarenko <artit@google.com>
Commit-Queue: Artem Titarenko <artit@google.com>
Cr-Commit-Position: refs/heads/master@{#27765}
---
 media/base/fake_video_renderer.cc             |   7 -
 media/base/fake_video_renderer.h              |  20 ---
 .../codecs/h264/test/h264_impl_unittest.cc    |  50 ++++++
 .../codecs/vp8/test/vp8_impl_unittest.cc      |  60 +++++++
 .../codecs/vp9/test/vp9_impl_unittest.cc      |  55 +++++-
 modules/video_coding/encoded_frame.h          |   2 -
 modules/video_coding/generic_decoder.cc       |  10 +-
 modules/video_coding/generic_decoder.h        |   2 -
 test/fake_decoder.cc                          |  13 +-
 video/BUILD.gn                                |   6 +-
 ...tadata_writer.cc => frame_encode_timer.cc} | 113 ++++++------
 ...metadata_writer.h => frame_encode_timer.h} |  40 +++--
 ...test.cc => frame_encode_timer_unittest.cc} | 165 +++---------------
 video/video_receive_stream_unittest.cc        |  96 ----------
 video/video_stream_encoder.cc                 |  16 +-
 video/video_stream_encoder.h                  |   4 +-
 16 files changed, 292 insertions(+), 367 deletions(-)
 rename video/{frame_encode_metadata_writer.cc => frame_encode_timer.cc} (72%)
 rename video/{frame_encode_metadata_writer.h => frame_encode_timer.h} (63%)
 rename video/{frame_encode_metadata_writer_unittest.cc => frame_encode_timer_unittest.cc} (64%)

diff --git a/media/base/fake_video_renderer.cc b/media/base/fake_video_renderer.cc
index 4253ba4cc0..64de624bde 100644
--- a/media/base/fake_video_renderer.cc
+++ b/media/base/fake_video_renderer.cc
@@ -28,13 +28,6 @@ void FakeVideoRenderer::OnFrame(const webrtc::VideoFrame& frame) {
   height_ = frame.height();
   rotation_ = frame.rotation();
   timestamp_us_ = frame.timestamp_us();
-  ntp_timestamp_ms_ = frame.ntp_time_ms();
-  color_space_ = frame.color_space();
-  frame_rendered_event_.Set();
-}
-
-bool FakeVideoRenderer::WaitForRenderedFrame(int64_t timeout_ms) {
-  return frame_rendered_event_.Wait(timeout_ms);
 }
 
 }  // namespace cricket
diff --git a/media/base/fake_video_renderer.h b/media/base/fake_video_renderer.h
index e04bb3eea5..171c0e3ac8 100644
--- a/media/base/fake_video_renderer.h
+++ b/media/base/fake_video_renderer.h
@@ -19,7 +19,6 @@
 #include "api/video/video_rotation.h"
 #include "api/video/video_sink_interface.h"
 #include "rtc_base/critical_section.h"
-#include "rtc_base/event.h"
 
 namespace cricket {
 
@@ -31,7 +30,6 @@ class FakeVideoRenderer : public rtc::VideoSinkInterface<webrtc::VideoFrame> {
   void OnFrame(const webrtc::VideoFrame& frame) override;
 
   int errors() const { return errors_; }
-
   int width() const {
     rtc::CritScope cs(&crit_);
     return width_;
@@ -40,7 +38,6 @@ class FakeVideoRenderer : public rtc::VideoSinkInterface<webrtc::VideoFrame> {
     rtc::CritScope cs(&crit_);
     return height_;
   }
-
   webrtc::VideoRotation rotation() const {
     rtc::CritScope cs(&crit_);
     return rotation_;
@@ -50,29 +47,15 @@ class FakeVideoRenderer : public rtc::VideoSinkInterface<webrtc::VideoFrame> {
     rtc::CritScope cs(&crit_);
     return timestamp_us_;
   }
-
   int num_rendered_frames() const {
     rtc::CritScope cs(&crit_);
     return num_rendered_frames_;
   }
-
   bool black_frame() const {
     rtc::CritScope cs(&crit_);
     return black_frame_;
   }
 
-  int64_t ntp_time_ms() const {
-    rtc::CritScope cs(&crit_);
-    return ntp_timestamp_ms_;
-  }
-
-  absl::optional<webrtc::ColorSpace> color_space() const {
-    rtc::CritScope cs(&crit_);
-    return color_space_;
-  }
-
-  bool WaitForRenderedFrame(int64_t timeout_ms);
-
  private:
   static bool CheckFrameColorYuv(uint8_t y_min,
                                  uint8_t y_max,
@@ -133,11 +116,8 @@ class FakeVideoRenderer : public rtc::VideoSinkInterface<webrtc::VideoFrame> {
   webrtc::VideoRotation rotation_ = webrtc::kVideoRotation_0;
   int64_t timestamp_us_ = 0;
   int num_rendered_frames_ = 0;
-  int64_t ntp_timestamp_ms_ = 0;
   bool black_frame_ = false;
   rtc::CriticalSection crit_;
-  rtc::Event frame_rendered_event_;
-  absl::optional<webrtc::ColorSpace> color_space_;
 };
 
 }  // namespace cricket
diff --git a/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc b/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc
index 4af0ebb922..3654ed528a 100644
--- a/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc
+++ b/modules/video_coding/codecs/h264/test/h264_impl_unittest.cc
@@ -19,6 +19,7 @@
 #include "api/video_codecs/video_decoder.h"
 #include "api/video_codecs/video_encoder.h"
 #include "common_video/libyuv/include/webrtc_libyuv.h"
+#include "common_video/test/utilities.h"
 #include "media/base/codec.h"
 #include "media/base/media_constants.h"
 #include "modules/video_coding/codecs/h264/include/h264.h"
@@ -48,9 +49,17 @@ class TestH264Impl : public VideoCodecUnitTest {
 #ifdef WEBRTC_USE_H264
 #define MAYBE_EncodeDecode EncodeDecode
 #define MAYBE_DecodedQpEqualsEncodedQp DecodedQpEqualsEncodedQp
+#define MAYBE_EncodedColorSpaceEqualsInputColorSpace \
+  EncodedColorSpaceEqualsInputColorSpace
+#define MAYBE_DecodedColorSpaceEqualsEncodedColorSpace \
+  DecodedColorSpaceEqualsEncodedColorSpace
 #else
 #define MAYBE_EncodeDecode DISABLED_EncodeDecode
 #define MAYBE_DecodedQpEqualsEncodedQp DISABLED_DecodedQpEqualsEncodedQp
+#define MAYBE_EncodedColorSpaceEqualsInputColorSpace \
+  DISABLED_EncodedColorSpaceEqualsInputColorSpace
+#define MAYBE_DecodedColorSpaceEqualsEncodedColorSpace \
+  DISABLED_DecodedColorSpaceEqualsEncodedColorSpace
 #endif
 
 TEST_F(TestH264Impl, MAYBE_EncodeDecode) {
@@ -96,4 +105,45 @@ TEST_F(TestH264Impl, MAYBE_DecodedQpEqualsEncodedQp) {
   EXPECT_EQ(encoded_frame.qp_, *decoded_qp);
 }
 
+TEST_F(TestH264Impl, MAYBE_EncodedColorSpaceEqualsInputColorSpace) {
+  VideoFrame* input_frame = NextInputFrame();
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr));
+  EncodedImage encoded_frame;
+  CodecSpecificInfo codec_specific_info;
+  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+  EXPECT_FALSE(encoded_frame.ColorSpace());
+
+  // Video frame with explicit color space information.
+  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/false);
+  VideoFrame input_frame_w_color_space =
+      VideoFrame::Builder()
+          .set_video_frame_buffer(input_frame->video_frame_buffer())
+          .set_color_space(color_space)
+          .build();
+
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->Encode(input_frame_w_color_space, nullptr));
+  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+  ASSERT_TRUE(encoded_frame.ColorSpace());
+  EXPECT_EQ(*encoded_frame.ColorSpace(), color_space);
+}
+
+TEST_F(TestH264Impl, MAYBE_DecodedColorSpaceEqualsEncodedColorSpace) {
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->Encode(*NextInputFrame(), nullptr));
+  EncodedImage encoded_frame;
+  CodecSpecificInfo codec_specific_info;
+  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+  // Add color space to encoded frame.
+  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/false);
+  encoded_frame.SetColorSpace(color_space);
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0));
+  std::unique_ptr<VideoFrame> decoded_frame;
+  absl::optional<uint8_t> decoded_qp;
+  ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
+  ASSERT_TRUE(decoded_frame);
+  ASSERT_TRUE(decoded_frame->color_space());
+  EXPECT_EQ(color_space, *decoded_frame->color_space());
+}
+
 }  // namespace webrtc
diff --git a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
index eb6b89fe8e..a2597ef51a 100644
--- a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
@@ -227,10 +227,51 @@ TEST_F(TestVp8Impl, OnEncodedImageReportsInfo) {
   EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
 
   EXPECT_EQ(kInitialTimestampRtp, encoded_frame.Timestamp());
+  EXPECT_EQ(kInitialTimestampMs, encoded_frame.capture_time_ms_);
   EXPECT_EQ(kWidth, static_cast<int>(encoded_frame._encodedWidth));
   EXPECT_EQ(kHeight, static_cast<int>(encoded_frame._encodedHeight));
 }
 
+// We only test the encoder here, since the decoded frame rotation is set based
+// on the CVO RTP header extension in VCMDecodedFrameCallback::Decoded.
+// TODO(brandtr): Consider passing through the rotation flag through the decoder
+// in the same way as done in the encoder.
+TEST_F(TestVp8Impl, EncodedRotationEqualsInputRotation) {
+  VideoFrame* input_frame = NextInputFrame();
+  input_frame->set_rotation(kVideoRotation_0);
+
+  EncodedImage encoded_frame;
+  CodecSpecificInfo codec_specific_info;
+  EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
+  EXPECT_EQ(kVideoRotation_0, encoded_frame.rotation_);
+
+  input_frame->set_rotation(kVideoRotation_90);
+  EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
+  EXPECT_EQ(kVideoRotation_90, encoded_frame.rotation_);
+}
+
+TEST_F(TestVp8Impl, EncodedColorSpaceEqualsInputColorSpace) {
+  // Video frame without explicit color space information.
+  VideoFrame* input_frame = NextInputFrame();
+  EncodedImage encoded_frame;
+  CodecSpecificInfo codec_specific_info;
+  EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
+  EXPECT_FALSE(encoded_frame.ColorSpace());
+
+  // Video frame with explicit color space information.
+  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/false);
+  VideoFrame input_frame_w_color_space =
+      VideoFrame::Builder()
+          .set_video_frame_buffer(input_frame->video_frame_buffer())
+          .set_color_space(color_space)
+          .build();
+
+  EncodeAndWaitForFrame(input_frame_w_color_space, &encoded_frame,
+                        &codec_specific_info);
+  ASSERT_TRUE(encoded_frame.ColorSpace());
+  EXPECT_EQ(*encoded_frame.ColorSpace(), color_space);
+}
+
 TEST_F(TestVp8Impl, DecodedQpEqualsEncodedQp) {
   VideoFrame* input_frame = NextInputFrame();
   EncodedImage encoded_frame;
@@ -249,6 +290,24 @@ TEST_F(TestVp8Impl, DecodedQpEqualsEncodedQp) {
   EXPECT_EQ(encoded_frame.qp_, *decoded_qp);
 }
 
+TEST_F(TestVp8Impl, DecodedColorSpaceEqualsEncodedColorSpace) {
+  VideoFrame* input_frame = NextInputFrame();
+  EncodedImage encoded_frame;
+  CodecSpecificInfo codec_specific_info;
+  EncodeAndWaitForFrame(*input_frame, &encoded_frame, &codec_specific_info);
+
+  // Encoded frame with explicit color space information.
+  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/false);
+  encoded_frame.SetColorSpace(color_space);
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, -1));
+  std::unique_ptr<VideoFrame> decoded_frame;
+  absl::optional<uint8_t> decoded_qp;
+  ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
+  ASSERT_TRUE(decoded_frame);
+  ASSERT_TRUE(decoded_frame->color_space());
+  EXPECT_EQ(color_space, *decoded_frame->color_space());
+}
+
 TEST_F(TestVp8Impl, ChecksSimulcastSettings) {
   codec_settings_.numberOfSimulcastStreams = 2;
   // Resolutions are not in ascending order, temporal layers do not match.
@@ -343,6 +402,7 @@ TEST_F(TestVp8Impl, MAYBE_AlignedStrideEncodeDecode) {
   // Compute PSNR on all planes (faster than SSIM).
   EXPECT_GT(I420PSNR(input_frame, decoded_frame.get()), 36);
   EXPECT_EQ(kInitialTimestampRtp, decoded_frame->timestamp());
+  EXPECT_EQ(kTestNtpTimeMs, decoded_frame->ntp_time_ms());
 }
 
 #if defined(WEBRTC_ANDROID)
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index 309dac12da..95ba2660a1 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -11,6 +11,7 @@
 #include "api/video/color_space.h"
 #include "api/video/i420_buffer.h"
 #include "common_video/libyuv/include/webrtc_libyuv.h"
+#include "common_video/test/utilities.h"
 #include "media/base/vp9_profile.h"
 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
 #include "modules/video_coding/codecs/test/video_codec_unittest.h"
@@ -145,7 +146,50 @@ TEST_F(TestVp9Impl, EncodeDecode) {
             color_space.chroma_siting_vertical());
 }
 
-TEST_F(TestVp9Impl, DecodedColorSpaceFromBitstream) {
+// We only test the encoder here, since the decoded frame rotation is set based
+// on the CVO RTP header extension in VCMDecodedFrameCallback::Decoded.
+// TODO(brandtr): Consider passing through the rotation flag through the decoder
+// in the same way as done in the encoder.
+TEST_F(TestVp9Impl, EncodedRotationEqualsInputRotation) {
+  VideoFrame* input_frame = NextInputFrame();
+  input_frame->set_rotation(kVideoRotation_0);
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr));
+  EncodedImage encoded_frame;
+  CodecSpecificInfo codec_specific_info;
+  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+  EXPECT_EQ(kVideoRotation_0, encoded_frame.rotation_);
+
+  input_frame = NextInputFrame();
+  input_frame->set_rotation(kVideoRotation_90);
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr));
+  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+  EXPECT_EQ(kVideoRotation_90, encoded_frame.rotation_);
+}
+
+TEST_F(TestVp9Impl, EncodedColorSpaceEqualsInputColorSpace) {
+  // Video frame without explicit color space information.
+  VideoFrame* input_frame = NextInputFrame();
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr));
+  EncodedImage encoded_frame;
+  CodecSpecificInfo codec_specific_info;
+  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+  EXPECT_FALSE(encoded_frame.ColorSpace());
+
+  // Video frame with explicit color space information.
+  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/true);
+  VideoFrame input_frame_w_hdr =
+      VideoFrame::Builder()
+          .set_video_frame_buffer(input_frame->video_frame_buffer())
+          .set_color_space(color_space)
+          .build();
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->Encode(input_frame_w_hdr, nullptr));
+  ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
+  ASSERT_TRUE(encoded_frame.ColorSpace());
+  EXPECT_EQ(*encoded_frame.ColorSpace(), color_space);
+}
+
+TEST_F(TestVp9Impl, DecodedColorSpaceEqualsEncodedColorSpace) {
   EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
             encoder_->Encode(*NextInputFrame(), nullptr));
   EncodedImage encoded_frame;
@@ -162,6 +206,15 @@ TEST_F(TestVp9Impl, DecodedColorSpaceFromBitstream) {
   ASSERT_TRUE(decoded_frame->color_space());
   // No HDR metadata present.
   EXPECT_FALSE(decoded_frame->color_space()->hdr_metadata());
+
+  // Encoded frame with explicit color space information.
+  ColorSpace color_space = CreateTestColorSpace(/*with_hdr_metadata=*/true);
+  encoded_frame.SetColorSpace(color_space);
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Decode(encoded_frame, false, 0));
+  ASSERT_TRUE(WaitForDecodedFrame(&decoded_frame, &decoded_qp));
+  ASSERT_TRUE(decoded_frame);
+  ASSERT_TRUE(decoded_frame->color_space());
+  EXPECT_EQ(color_space, *decoded_frame->color_space());
 }
 
 TEST_F(TestVp9Impl, DecodedQpEqualsEncodedQp) {
diff --git a/modules/video_coding/encoded_frame.h b/modules/video_coding/encoded_frame.h
index a6bb55bfae..94da40f058 100644
--- a/modules/video_coding/encoded_frame.h
+++ b/modules/video_coding/encoded_frame.h
@@ -52,10 +52,8 @@ class VCMEncodedFrame : protected EncodedImage {
     return static_cast<const webrtc::EncodedImage&>(*this);
   }
 
-  using EncodedImage::ColorSpace;
   using EncodedImage::data;
   using EncodedImage::set_size;
-  using EncodedImage::SetColorSpace;
   using EncodedImage::SetSpatialIndex;
   using EncodedImage::SetTimestamp;
   using EncodedImage::size;
diff --git a/modules/video_coding/generic_decoder.cc b/modules/video_coding/generic_decoder.cc
index 281250b5d1..cf986d6329 100644
--- a/modules/video_coding/generic_decoder.cc
+++ b/modules/video_coding/generic_decoder.cc
@@ -74,12 +74,6 @@ void VCMDecodedFrameCallback::Decoded(VideoFrame& decodedImage,
     frameInfo = _timestampMap.Pop(decodedImage.timestamp());
   }
 
-  decodedImage.set_ntp_time_ms(frameInfo->ntp_time_ms);
-  if (frameInfo->color_space) {
-    decodedImage.set_color_space(*frameInfo->color_space);
-  }
-  decodedImage.set_rotation(frameInfo->rotation);
-
   if (frameInfo == NULL) {
     RTC_LOG(LS_WARNING) << "Too many frames backed up in the decoder, dropping "
                            "this one.";
@@ -146,6 +140,7 @@ void VCMDecodedFrameCallback::Decoded(VideoFrame& decodedImage,
 
   decodedImage.set_timestamp_us(frameInfo->renderTimeMs *
                                 rtc::kNumMicrosecsPerMillisec);
+  decodedImage.set_rotation(frameInfo->rotation);
   _receiveCallback->FrameToRender(decodedImage, qp, frameInfo->content_type);
 }
 
@@ -204,9 +199,6 @@ int32_t VCMGenericDecoder::Decode(const VCMEncodedFrame& frame, int64_t nowMs) {
   _frameInfos[_nextFrameInfoIdx].renderTimeMs = frame.RenderTimeMs();
   _frameInfos[_nextFrameInfoIdx].rotation = frame.rotation();
   _frameInfos[_nextFrameInfoIdx].timing = frame.video_timing();
-  _frameInfos[_nextFrameInfoIdx].ntp_time_ms =
-      frame.EncodedImage().ntp_time_ms_;
-  _frameInfos[_nextFrameInfoIdx].color_space = frame.ColorSpace();
   // Set correctly only for key frames. Thus, use latest key frame
   // content type. If the corresponding key frame was lost, decode will fail
   // and content type will be ignored.
diff --git a/modules/video_coding/generic_decoder.h b/modules/video_coding/generic_decoder.h
index 7f82eaddf0..36428fb9f3 100644
--- a/modules/video_coding/generic_decoder.h
+++ b/modules/video_coding/generic_decoder.h
@@ -34,8 +34,6 @@ struct VCMFrameInformation {
   VideoRotation rotation;
   VideoContentType content_type;
   EncodedImage::Timing timing;
-  int64_t ntp_time_ms;
-  const ColorSpace* color_space;
 };
 
 class VCMDecodedFrameCallback : public DecodedImageCallback {
diff --git a/test/fake_decoder.cc b/test/fake_decoder.cc
index 37c073fc0b..10ac8512cc 100644
--- a/test/fake_decoder.cc
+++ b/test/fake_decoder.cc
@@ -45,13 +45,12 @@ int32_t FakeDecoder::Decode(const EncodedImage& input,
     height_ = input._encodedHeight;
   }
 
-  rtc::scoped_refptr<I420Buffer> buffer = I420Buffer::Create(width_, height_);
-  I420Buffer::SetBlack(buffer);
-  VideoFrame frame = VideoFrame::Builder()
-                         .set_video_frame_buffer(buffer)
-                         .set_rotation(webrtc::kVideoRotation_0)
-                         .set_timestamp_ms(render_time_ms)
-                         .build();
+  VideoFrame frame =
+      VideoFrame::Builder()
+          .set_video_frame_buffer(I420Buffer::Create(width_, height_))
+          .set_rotation(webrtc::kVideoRotation_0)
+          .set_timestamp_ms(render_time_ms)
+          .build();
   frame.set_timestamp(input.Timestamp());
   frame.set_ntp_time_ms(input.ntp_time_ms_);
 
diff --git a/video/BUILD.gn b/video/BUILD.gn
index 8d776d5dde..5639a5817d 100644
--- a/video/BUILD.gn
+++ b/video/BUILD.gn
@@ -171,8 +171,8 @@ rtc_source_set("video_stream_encoder_impl") {
     "encoder_bitrate_adjuster.h",
     "encoder_overshoot_detector.cc",
     "encoder_overshoot_detector.h",
-    "frame_encode_metadata_writer.cc",
-    "frame_encode_metadata_writer.h",
+    "frame_encode_timer.cc",
+    "frame_encode_timer.h",
     "overuse_frame_detector.cc",
     "overuse_frame_detector.h",
     "video_stream_encoder.cc",
@@ -491,7 +491,7 @@ if (rtc_include_tests) {
       "end_to_end_tests/ssrc_tests.cc",
       "end_to_end_tests/stats_tests.cc",
       "end_to_end_tests/transport_feedback_tests.cc",
-      "frame_encode_metadata_writer_unittest.cc",
+      "frame_encode_timer_unittest.cc",
       "overuse_frame_detector_unittest.cc",
       "picture_id_tests.cc",
       "quality_scaling_tests.cc",
diff --git a/video/frame_encode_metadata_writer.cc b/video/frame_encode_timer.cc
similarity index 72%
rename from video/frame_encode_metadata_writer.cc
rename to video/frame_encode_timer.cc
index 4b5fabb6f5..42002f78ab 100644
--- a/video/frame_encode_metadata_writer.cc
+++ b/video/frame_encode_timer.cc
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "video/frame_encode_metadata_writer.h"
+#include "video/frame_encode_timer.h"
 
 #include <algorithm>
 
@@ -23,31 +23,29 @@ const int kMessagesThrottlingThreshold = 2;
 const int kThrottleRatio = 100000;
 }  // namespace
 
-FrameEncodeMetadataWriter::TimingFramesLayerInfo::TimingFramesLayerInfo() =
-    default;
-FrameEncodeMetadataWriter::TimingFramesLayerInfo::~TimingFramesLayerInfo() =
-    default;
+FrameEncodeTimer::TimingFramesLayerInfo::TimingFramesLayerInfo() = default;
+FrameEncodeTimer::TimingFramesLayerInfo::~TimingFramesLayerInfo() = default;
 
-FrameEncodeMetadataWriter::FrameEncodeMetadataWriter(
-    EncodedImageCallback* frame_drop_callback)
+FrameEncodeTimer::FrameEncodeTimer(EncodedImageCallback* frame_drop_callback)
     : frame_drop_callback_(frame_drop_callback),
       internal_source_(false),
       framerate_fps_(0),
       last_timing_frame_time_ms_(-1),
+      incorrect_capture_time_logged_messages_(0),
       reordered_frames_logged_messages_(0),
       stalled_encoder_logged_messages_(0) {
   codec_settings_.timing_frame_thresholds = {-1, 0};
 }
-FrameEncodeMetadataWriter::~FrameEncodeMetadataWriter() {}
+FrameEncodeTimer::~FrameEncodeTimer() {}
 
-void FrameEncodeMetadataWriter::OnEncoderInit(const VideoCodec& codec,
-                                              bool internal_source) {
+void FrameEncodeTimer::OnEncoderInit(const VideoCodec& codec,
+                                     bool internal_source) {
   rtc::CritScope cs(&lock_);
   codec_settings_ = codec;
   internal_source_ = internal_source;
 }
 
-void FrameEncodeMetadataWriter::OnSetRates(
+void FrameEncodeTimer::OnSetRates(
     const VideoBitrateAllocation& bitrate_allocation,
     uint32_t framerate_fps) {
   rtc::CritScope cs(&lock_);
@@ -62,7 +60,8 @@ void FrameEncodeMetadataWriter::OnSetRates(
   }
 }
 
-void FrameEncodeMetadataWriter::OnEncodeStarted(const VideoFrame& frame) {
+void FrameEncodeTimer::OnEncodeStarted(uint32_t rtp_timestamp,
+                                       int64_t capture_time_ms) {
   rtc::CritScope cs(&lock_);
   if (internal_source_) {
     return;
@@ -70,24 +69,19 @@ void FrameEncodeMetadataWriter::OnEncodeStarted(const VideoFrame& frame) {
 
   const size_t num_spatial_layers = NumSpatialLayers();
   timing_frames_info_.resize(num_spatial_layers);
-  FrameMetadata metadata;
-  metadata.rtp_timestamp = frame.timestamp();
-  metadata.encode_start_time_ms = rtc::TimeMillis();
-  metadata.ntp_time_ms = frame.ntp_time_ms();
-  metadata.timestamp_us = frame.timestamp_us();
-  metadata.rotation = frame.rotation();
-  metadata.color_space = frame.color_space();
   for (size_t si = 0; si < num_spatial_layers; ++si) {
-    RTC_DCHECK(timing_frames_info_[si].frames.empty() ||
-               rtc::TimeDiff(
-                   frame.render_time_ms(),
-                   timing_frames_info_[si].frames.back().timestamp_us / 1000) >=
-                   0);
+    RTC_DCHECK(
+        timing_frames_info_[si].encode_start_list.empty() ||
+        rtc::TimeDiff(
+            capture_time_ms,
+            timing_frames_info_[si].encode_start_list.back().capture_time_ms) >=
+            0);
     // If stream is disabled due to low bandwidth OnEncodeStarted still will be
     // called and have to be ignored.
     if (timing_frames_info_[si].target_bitrate_bytes_per_sec == 0)
       return;
-    if (timing_frames_info_[si].frames.size() == kMaxEncodeStartTimeListSize) {
+    if (timing_frames_info_[si].encode_start_list.size() ==
+        kMaxEncodeStartTimeListSize) {
       ++stalled_encoder_logged_messages_;
       if (stalled_encoder_logged_messages_ <= kMessagesThrottlingThreshold ||
           stalled_encoder_logged_messages_ % kThrottleRatio == 0) {
@@ -101,26 +95,25 @@ void FrameEncodeMetadataWriter::OnEncodeStarted(const VideoFrame& frame) {
       }
       frame_drop_callback_->OnDroppedFrame(
           EncodedImageCallback::DropReason::kDroppedByEncoder);
-      timing_frames_info_[si].frames.pop_front();
+      timing_frames_info_[si].encode_start_list.pop_front();
     }
-    timing_frames_info_[si].frames.emplace_back(metadata);
+    timing_frames_info_[si].encode_start_list.emplace_back(
+        rtp_timestamp, capture_time_ms, rtc::TimeMillis());
   }
 }
 
-void FrameEncodeMetadataWriter::FillTimingInfo(size_t simulcast_svc_idx,
-                                               EncodedImage* encoded_image) {
+void FrameEncodeTimer::FillTimingInfo(size_t simulcast_svc_idx,
+                                      EncodedImage* encoded_image,
+                                      int64_t encode_done_ms) {
   rtc::CritScope cs(&lock_);
   absl::optional<size_t> outlier_frame_size;
   absl::optional<int64_t> encode_start_ms;
   uint8_t timing_flags = VideoSendTiming::kNotTriggered;
 
-  int64_t encode_done_ms = rtc::TimeMillis();
-
   // Encoders with internal sources do not call OnEncodeStarted
   // |timing_frames_info_| may be not filled here.
   if (!internal_source_) {
-    encode_start_ms =
-        ExtractEncodeStartTimeAndFillMetadata(simulcast_svc_idx, encoded_image);
+    encode_start_ms = ExtractEncodeStartTime(simulcast_svc_idx, encoded_image);
   }
 
   if (timing_frames_info_.size() > simulcast_svc_idx) {
@@ -183,7 +176,7 @@ void FrameEncodeMetadataWriter::FillTimingInfo(size_t simulcast_svc_idx,
   }
 }
 
-void FrameEncodeMetadataWriter::Reset() {
+void FrameEncodeTimer::Reset() {
   rtc::CritScope cs(&lock_);
   timing_frames_info_.clear();
   last_timing_frame_time_ms_ = -1;
@@ -191,40 +184,48 @@ void FrameEncodeMetadataWriter::Reset() {
   stalled_encoder_logged_messages_ = 0;
 }
 
-absl::optional<int64_t>
-FrameEncodeMetadataWriter::ExtractEncodeStartTimeAndFillMetadata(
+absl::optional<int64_t> FrameEncodeTimer::ExtractEncodeStartTime(
     size_t simulcast_svc_idx,
     EncodedImage* encoded_image) {
   absl::optional<int64_t> result;
   size_t num_simulcast_svc_streams = timing_frames_info_.size();
   if (simulcast_svc_idx < num_simulcast_svc_streams) {
-    auto metadata_list = &timing_frames_info_[simulcast_svc_idx].frames;
+    auto encode_start_list =
+        &timing_frames_info_[simulcast_svc_idx].encode_start_list;
     // Skip frames for which there was OnEncodeStarted but no OnEncodedImage
     // call. These are dropped by encoder internally.
     // Because some hardware encoders don't preserve capture timestamp we
     // use RTP timestamps here.
-    while (!metadata_list->empty() &&
+    while (!encode_start_list->empty() &&
            IsNewerTimestamp(encoded_image->Timestamp(),
-                            metadata_list->front().rtp_timestamp)) {
+                            encode_start_list->front().rtp_timestamp)) {
       frame_drop_callback_->OnDroppedFrame(
           EncodedImageCallback::DropReason::kDroppedByEncoder);
-      metadata_list->pop_front();
+      encode_start_list->pop_front();
     }
-    if (!metadata_list->empty() &&
-        metadata_list->front().rtp_timestamp == encoded_image->Timestamp()) {
-      result.emplace(metadata_list->front().encode_start_time_ms);
-
-      encoded_image->capture_time_ms_ =
-          metadata_list->front().timestamp_us / 1000;
-      encoded_image->ntp_time_ms_ = metadata_list->front().ntp_time_ms;
-      encoded_image->rotation_ = metadata_list->front().rotation;
-      encoded_image->SetColorSpace(metadata_list->front().color_space);
-      encoded_image->content_type_ =
-          (codec_settings_.mode == VideoCodecMode::kScreensharing)
-              ? VideoContentType::SCREENSHARE
-              : VideoContentType::UNSPECIFIED;
-
-      metadata_list->pop_front();
+    if (!encode_start_list->empty() &&
+        encode_start_list->front().rtp_timestamp ==
+            encoded_image->Timestamp()) {
+      result.emplace(encode_start_list->front().encode_start_time_ms);
+      if (encoded_image->capture_time_ms_ !=
+          encode_start_list->front().capture_time_ms) {
+        // Force correct capture timestamp.
+        encoded_image->capture_time_ms_ =
+            encode_start_list->front().capture_time_ms;
+        ++incorrect_capture_time_logged_messages_;
+        if (incorrect_capture_time_logged_messages_ <=
+                kMessagesThrottlingThreshold ||
+            incorrect_capture_time_logged_messages_ % kThrottleRatio == 0) {
+          RTC_LOG(LS_WARNING)
+              << "Encoder is not preserving capture timestamps.";
+          if (incorrect_capture_time_logged_messages_ ==
+              kMessagesThrottlingThreshold) {
+            RTC_LOG(LS_WARNING) << "Too many log messages. Further incorrect "
+                                   "timestamps warnings will be throttled.";
+          }
+        }
+      }
+      encode_start_list->pop_front();
     } else {
       ++reordered_frames_logged_messages_;
       if (reordered_frames_logged_messages_ <= kMessagesThrottlingThreshold ||
@@ -242,7 +243,7 @@ FrameEncodeMetadataWriter::ExtractEncodeStartTimeAndFillMetadata(
   return result;
 }
 
-size_t FrameEncodeMetadataWriter::NumSpatialLayers() const {
+size_t FrameEncodeTimer::NumSpatialLayers() const {
   size_t num_spatial_layers = codec_settings_.numberOfSimulcastStreams;
   if (codec_settings_.codecType == kVideoCodecVP9) {
     num_spatial_layers = std::max(
diff --git a/video/frame_encode_metadata_writer.h b/video/frame_encode_timer.h
similarity index 63%
rename from video/frame_encode_metadata_writer.h
rename to video/frame_encode_timer.h
index c1ffcd9012..f92a33b71e 100644
--- a/video/frame_encode_metadata_writer.h
+++ b/video/frame_encode_timer.h
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#ifndef VIDEO_FRAME_ENCODE_METADATA_WRITER_H_
-#define VIDEO_FRAME_ENCODE_METADATA_WRITER_H_
+#ifndef VIDEO_FRAME_ENCODE_TIMER_H_
+#define VIDEO_FRAME_ENCODE_TIMER_H_
 
 #include <list>
 #include <vector>
@@ -22,18 +22,20 @@
 
 namespace webrtc {
 
-class FrameEncodeMetadataWriter {
+class FrameEncodeTimer {
  public:
-  explicit FrameEncodeMetadataWriter(EncodedImageCallback* frame_drop_callback);
-  ~FrameEncodeMetadataWriter();
+  explicit FrameEncodeTimer(EncodedImageCallback* frame_drop_callback);
+  ~FrameEncodeTimer();
 
   void OnEncoderInit(const VideoCodec& codec, bool internal_source);
   void OnSetRates(const VideoBitrateAllocation& bitrate_allocation,
                   uint32_t framerate_fps);
 
-  void OnEncodeStarted(const VideoFrame& frame);
+  void OnEncodeStarted(uint32_t rtp_timestamp, int64_t capture_time_ms);
 
-  void FillTimingInfo(size_t simulcast_svc_idx, EncodedImage* encoded_image);
+  void FillTimingInfo(size_t simulcast_svc_idx,
+                      EncodedImage* encoded_image,
+                      int64_t encode_done_ms);
   void Reset();
 
  private:
@@ -41,23 +43,26 @@ class FrameEncodeMetadataWriter {
 
   // For non-internal-source encoders, returns encode started time and fixes
   // capture timestamp for the frame, if corrupted by the encoder.
-  absl::optional<int64_t> ExtractEncodeStartTimeAndFillMetadata(
-      size_t simulcast_svc_idx,
-      EncodedImage* encoded_image) RTC_EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  absl::optional<int64_t> ExtractEncodeStartTime(size_t simulcast_svc_idx,
+                                                 EncodedImage* encoded_image)
+      RTC_EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
-  struct FrameMetadata {
+  struct EncodeStartTimeRecord {
+    EncodeStartTimeRecord(uint32_t timestamp,
+                          int64_t capture_time,
+                          int64_t encode_start_time)
+        : rtp_timestamp(timestamp),
+          capture_time_ms(capture_time),
+          encode_start_time_ms(encode_start_time) {}
     uint32_t rtp_timestamp;
+    int64_t capture_time_ms;
     int64_t encode_start_time_ms;
-    int64_t ntp_time_ms = 0;
-    int64_t timestamp_us = 0;
-    VideoRotation rotation = kVideoRotation_0;
-    absl::optional<ColorSpace> color_space;
   };
   struct TimingFramesLayerInfo {
     TimingFramesLayerInfo();
     ~TimingFramesLayerInfo();
     size_t target_bitrate_bytes_per_sec = 0;
-    std::list<FrameMetadata> frames;
+    std::list<EncodeStartTimeRecord> encode_start_list;
   };
 
   rtc::CriticalSection lock_;
@@ -69,10 +74,11 @@ class FrameEncodeMetadataWriter {
   // Separate instance for each simulcast stream or spatial layer.
   std::vector<TimingFramesLayerInfo> timing_frames_info_ RTC_GUARDED_BY(&lock_);
   int64_t last_timing_frame_time_ms_ RTC_GUARDED_BY(&lock_);
+  size_t incorrect_capture_time_logged_messages_ RTC_GUARDED_BY(&lock_);
   size_t reordered_frames_logged_messages_ RTC_GUARDED_BY(&lock_);
   size_t stalled_encoder_logged_messages_ RTC_GUARDED_BY(&lock_);
 };
 
 }  // namespace webrtc
 
-#endif  // VIDEO_FRAME_ENCODE_METADATA_WRITER_H_
+#endif  // VIDEO_FRAME_ENCODE_TIMER_H_
diff --git a/video/frame_encode_metadata_writer_unittest.cc b/video/frame_encode_timer_unittest.cc
similarity index 64%
rename from video/frame_encode_metadata_writer_unittest.cc
rename to video/frame_encode_timer_unittest.cc
index dcb870b014..12eb13678a 100644
--- a/video/frame_encode_metadata_writer_unittest.cc
+++ b/video/frame_encode_timer_unittest.cc
@@ -8,25 +8,18 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#include "video/frame_encode_metadata_writer.h"
-
 #include <cstddef>
 #include <vector>
 
-#include "api/video/i420_buffer.h"
-#include "api/video/video_frame.h"
 #include "api/video/video_timing.h"
-#include "common_video/test/utilities.h"
 #include "modules/video_coding/include/video_coding_defines.h"
 #include "rtc_base/time_utils.h"
 #include "test/gtest.h"
+#include "video/frame_encode_timer.h"
 
 namespace webrtc {
 namespace test {
 namespace {
-
-const rtc::scoped_refptr<I420Buffer> kFrameBuffer = I420Buffer::Create(4, 4);
-
 inline size_t FrameSize(const size_t& min_frame_size,
                         const size_t& max_frame_size,
                         const int& s,
@@ -72,7 +65,7 @@ std::vector<std::vector<FrameType>> GetTimingFrames(
     const int num_streams,
     const int num_frames) {
   FakeEncodedImageCallback sink;
-  FrameEncodeMetadataWriter encode_timer(&sink);
+  FrameEncodeTimer encode_timer(&sink);
   VideoCodec codec_settings;
   codec_settings.numberOfSimulcastStreams = num_streams;
   codec_settings.timing_frame_thresholds = {delay_ms,
@@ -90,12 +83,8 @@ std::vector<std::vector<FrameType>> GetTimingFrames(
   int64_t current_timestamp = 0;
   for (int i = 0; i < num_frames; ++i) {
     current_timestamp += 1;
-    VideoFrame frame = VideoFrame::Builder()
-                           .set_timestamp_rtp(current_timestamp * 90)
-                           .set_timestamp_ms(current_timestamp)
-                           .set_video_frame_buffer(kFrameBuffer)
-                           .build();
-    encode_timer.OnEncodeStarted(frame);
+    encode_timer.OnEncodeStarted(static_cast<uint32_t>(current_timestamp * 90),
+                                 current_timestamp);
     for (int si = 0; si < num_streams; ++si) {
       // every (5+s)-th frame is dropped on s-th stream by design.
       bool dropped = i % (5 + si) == 0;
@@ -112,7 +101,7 @@ std::vector<std::vector<FrameType>> GetTimingFrames(
         continue;
       }
 
-      encode_timer.FillTimingInfo(si, &image);
+      encode_timer.FillTimingInfo(si, &image, current_timestamp);
 
       if (IsTimingFrame(image)) {
         result[si].push_back(FrameType::kTiming);
@@ -201,7 +190,7 @@ TEST(FrameEncodeTimerTest, NoTimingFrameIfNoEncodeStartTime) {
   image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
 
   FakeEncodedImageCallback sink;
-  FrameEncodeMetadataWriter encode_timer(&sink);
+  FrameEncodeTimer encode_timer(&sink);
   VideoCodec codec_settings;
   // Make all frames timing frames.
   codec_settings.timing_frame_thresholds.delay_ms = 1;
@@ -211,20 +200,16 @@ TEST(FrameEncodeTimerTest, NoTimingFrameIfNoEncodeStartTime) {
   encode_timer.OnSetRates(bitrate_allocation, 30);
 
   // Verify a single frame works with encode start time set.
-  VideoFrame frame = VideoFrame::Builder()
-                         .set_timestamp_ms(timestamp)
-                         .set_timestamp_rtp(timestamp * 90)
-                         .set_video_frame_buffer(kFrameBuffer)
-                         .build();
-  encode_timer.OnEncodeStarted(frame);
-  encode_timer.FillTimingInfo(0, &image);
+  encode_timer.OnEncodeStarted(static_cast<uint32_t>(timestamp * 90),
+                               timestamp);
+  encode_timer.FillTimingInfo(0, &image, timestamp);
   EXPECT_TRUE(IsTimingFrame(image));
 
   // New frame, now skip OnEncodeStarted. Should not result in timing frame.
   image.capture_time_ms_ = ++timestamp;
   image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
   image.timing_ = EncodedImage::Timing();
-  encode_timer.FillTimingInfo(0, &image);
+  encode_timer.FillTimingInfo(0, &image, timestamp);
   EXPECT_FALSE(IsTimingFrame(image));
 }
 
@@ -241,7 +226,7 @@ TEST(FrameEncodeTimerTest, AdjustsCaptureTimeForInternalSourceEncoder) {
   image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
 
   FakeEncodedImageCallback sink;
-  FrameEncodeMetadataWriter encode_timer(&sink);
+  FrameEncodeTimer encode_timer(&sink);
 
   VideoCodec codec_settings;
   // Make all frames timing frames.
@@ -253,7 +238,7 @@ TEST(FrameEncodeTimerTest, AdjustsCaptureTimeForInternalSourceEncoder) {
   encode_timer.OnSetRates(bitrate_allocation, 30);
 
   // Verify a single frame without encode timestamps isn't a timing frame.
-  encode_timer.FillTimingInfo(0, &image);
+  encode_timer.FillTimingInfo(0, &image, timestamp);
   EXPECT_FALSE(IsTimingFrame(image));
 
   // New frame, but this time with encode timestamps set in timing_.
@@ -263,14 +248,14 @@ TEST(FrameEncodeTimerTest, AdjustsCaptureTimeForInternalSourceEncoder) {
   image.timing_ = EncodedImage::Timing();
   image.timing_.encode_start_ms = timestamp + kEncodeStartDelayMs;
   image.timing_.encode_finish_ms = timestamp + kEncodeFinishDelayMs;
-
-  encode_timer.FillTimingInfo(0, &image);
+  const int64_t kEncodeDoneTimestamp = 1234567;
+  encode_timer.FillTimingInfo(0, &image, kEncodeDoneTimestamp);
   EXPECT_TRUE(IsTimingFrame(image));
 
   // Frame is captured kEncodeFinishDelayMs before it's encoded, so restored
   // capture timestamp should be kEncodeFinishDelayMs in the past.
-  EXPECT_NEAR(image.capture_time_ms_, rtc::TimeMillis() - kEncodeFinishDelayMs,
-              1);
+  EXPECT_EQ(image.capture_time_ms_,
+            kEncodeDoneTimestamp - kEncodeFinishDelayMs);
 }
 
 TEST(FrameEncodeTimerTest, NotifiesAboutDroppedFrames) {
@@ -280,7 +265,7 @@ TEST(FrameEncodeTimerTest, NotifiesAboutDroppedFrames) {
   const int64_t kTimestampMs4 = 47721870;
 
   FakeEncodedImageCallback sink;
-  FrameEncodeMetadataWriter encode_timer(&sink);
+  FrameEncodeTimer encode_timer(&sink);
   encode_timer.OnEncoderInit(VideoCodec(), false);
   // Any non-zero bitrate needed to be set before the first frame.
   VideoBitrateAllocation bitrate_allocation;
@@ -288,27 +273,17 @@ TEST(FrameEncodeTimerTest, NotifiesAboutDroppedFrames) {
   encode_timer.OnSetRates(bitrate_allocation, 30);
 
   EncodedImage image;
-  VideoFrame frame = VideoFrame::Builder()
-                         .set_timestamp_rtp(kTimestampMs1 * 90)
-                         .set_timestamp_ms(kTimestampMs1)
-                         .set_video_frame_buffer(kFrameBuffer)
-                         .build();
-
   image.capture_time_ms_ = kTimestampMs1;
   image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
-  frame.set_timestamp(image.capture_time_ms_ * 90);
-  frame.set_timestamp_us(image.capture_time_ms_ * 1000);
-  encode_timer.OnEncodeStarted(frame);
+  encode_timer.OnEncodeStarted(image.Timestamp(), image.capture_time_ms_);
 
   EXPECT_EQ(0u, sink.GetNumFramesDropped());
-  encode_timer.FillTimingInfo(0, &image);
+  encode_timer.FillTimingInfo(0, &image, kTimestampMs1);
 
   image.capture_time_ms_ = kTimestampMs2;
   image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
   image.timing_ = EncodedImage::Timing();
-  frame.set_timestamp(image.capture_time_ms_ * 90);
-  frame.set_timestamp_us(image.capture_time_ms_ * 1000);
-  encode_timer.OnEncodeStarted(frame);
+  encode_timer.OnEncodeStarted(image.Timestamp(), image.capture_time_ms_);
   // No OnEncodedImageCall for timestamp2. Yet, at this moment it's not known
   // that frame with timestamp2 was dropped.
   EXPECT_EQ(0u, sink.GetNumFramesDropped());
@@ -316,19 +291,15 @@ TEST(FrameEncodeTimerTest, NotifiesAboutDroppedFrames) {
   image.capture_time_ms_ = kTimestampMs3;
   image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
   image.timing_ = EncodedImage::Timing();
-  frame.set_timestamp(image.capture_time_ms_ * 90);
-  frame.set_timestamp_us(image.capture_time_ms_ * 1000);
-  encode_timer.OnEncodeStarted(frame);
-  encode_timer.FillTimingInfo(0, &image);
+  encode_timer.OnEncodeStarted(image.Timestamp(), image.capture_time_ms_);
+  encode_timer.FillTimingInfo(0, &image, kTimestampMs3);
   EXPECT_EQ(1u, sink.GetNumFramesDropped());
 
   image.capture_time_ms_ = kTimestampMs4;
   image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
   image.timing_ = EncodedImage::Timing();
-  frame.set_timestamp(image.capture_time_ms_ * 90);
-  frame.set_timestamp_us(image.capture_time_ms_ * 1000);
-  encode_timer.OnEncodeStarted(frame);
-  encode_timer.FillTimingInfo(0, &image);
+  encode_timer.OnEncodeStarted(image.Timestamp(), image.capture_time_ms_);
+  encode_timer.FillTimingInfo(0, &image, kTimestampMs4);
   EXPECT_EQ(1u, sink.GetNumFramesDropped());
 }
 
@@ -337,7 +308,7 @@ TEST(FrameEncodeTimerTest, RestoresCaptureTimestamps) {
   const int64_t kTimestampMs = 123456;
   FakeEncodedImageCallback sink;
 
-  FrameEncodeMetadataWriter encode_timer(&sink);
+  FrameEncodeTimer encode_timer(&sink);
   encode_timer.OnEncoderInit(VideoCodec(), false);
   // Any non-zero bitrate needed to be set before the first frame.
   VideoBitrateAllocation bitrate_allocation;
@@ -346,93 +317,11 @@ TEST(FrameEncodeTimerTest, RestoresCaptureTimestamps) {
 
   image.capture_time_ms_ = kTimestampMs;  // Correct timestamp.
   image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
-  VideoFrame frame = VideoFrame::Builder()
-                         .set_timestamp_ms(image.capture_time_ms_)
-                         .set_timestamp_rtp(image.capture_time_ms_ * 90)
-                         .set_video_frame_buffer(kFrameBuffer)
-                         .build();
-  encode_timer.OnEncodeStarted(frame);
+  encode_timer.OnEncodeStarted(image.Timestamp(), image.capture_time_ms_);
   image.capture_time_ms_ = 0;  // Incorrect timestamp.
-  encode_timer.FillTimingInfo(0, &image);
+  encode_timer.FillTimingInfo(0, &image, kTimestampMs);
   EXPECT_EQ(kTimestampMs, image.capture_time_ms_);
 }
 
-TEST(FrameEncodeTimerTest, CopiesRotation) {
-  EncodedImage image;
-  const int64_t kTimestampMs = 123456;
-  FakeEncodedImageCallback sink;
-
-  FrameEncodeMetadataWriter encode_timer(&sink);
-  encode_timer.OnEncoderInit(VideoCodec(), false);
-  // Any non-zero bitrate needed to be set before the first frame.
-  VideoBitrateAllocation bitrate_allocation;
-  bitrate_allocation.SetBitrate(0, 0, 500000);
-  encode_timer.OnSetRates(bitrate_allocation, 30);
-
-  image.SetTimestamp(static_cast<uint32_t>(kTimestampMs * 90));
-  VideoFrame frame = VideoFrame::Builder()
-                         .set_timestamp_ms(kTimestampMs)
-                         .set_timestamp_rtp(kTimestampMs * 90)
-                         .set_rotation(kVideoRotation_180)
-                         .set_video_frame_buffer(kFrameBuffer)
-                         .build();
-  encode_timer.OnEncodeStarted(frame);
-  encode_timer.FillTimingInfo(0, &image);
-  EXPECT_EQ(kVideoRotation_180, image.rotation_);
-}
-
-TEST(FrameEncodeTimerTest, SetsContentType) {
-  EncodedImage image;
-  const int64_t kTimestampMs = 123456;
-  FakeEncodedImageCallback sink;
-
-  FrameEncodeMetadataWriter encode_timer(&sink);
-  VideoCodec codec;
-  codec.mode = VideoCodecMode::kScreensharing;
-  encode_timer.OnEncoderInit(codec, false);
-  // Any non-zero bitrate needed to be set before the first frame.
-  VideoBitrateAllocation bitrate_allocation;
-  bitrate_allocation.SetBitrate(0, 0, 500000);
-  encode_timer.OnSetRates(bitrate_allocation, 30);
-
-  image.SetTimestamp(static_cast<uint32_t>(kTimestampMs * 90));
-  VideoFrame frame = VideoFrame::Builder()
-                         .set_timestamp_ms(kTimestampMs)
-                         .set_timestamp_rtp(kTimestampMs * 90)
-                         .set_rotation(kVideoRotation_180)
-                         .set_video_frame_buffer(kFrameBuffer)
-                         .build();
-  encode_timer.OnEncodeStarted(frame);
-  encode_timer.FillTimingInfo(0, &image);
-  EXPECT_EQ(VideoContentType::SCREENSHARE, image.content_type_);
-}
-
-TEST(FrameEncodeTimerTest, CopiesColorSpace) {
-  EncodedImage image;
-  const int64_t kTimestampMs = 123456;
-  FakeEncodedImageCallback sink;
-
-  FrameEncodeMetadataWriter encode_timer(&sink);
-  encode_timer.OnEncoderInit(VideoCodec(), false);
-  // Any non-zero bitrate needed to be set before the first frame.
-  VideoBitrateAllocation bitrate_allocation;
-  bitrate_allocation.SetBitrate(0, 0, 500000);
-  encode_timer.OnSetRates(bitrate_allocation, 30);
-
-  webrtc::ColorSpace color_space =
-      CreateTestColorSpace(/*with_hdr_metadata=*/true);
-  image.SetTimestamp(static_cast<uint32_t>(kTimestampMs * 90));
-  VideoFrame frame = VideoFrame::Builder()
-                         .set_timestamp_ms(kTimestampMs)
-                         .set_timestamp_rtp(kTimestampMs * 90)
-                         .set_color_space(color_space)
-                         .set_video_frame_buffer(kFrameBuffer)
-                         .build();
-  encode_timer.OnEncodeStarted(frame);
-  encode_timer.FillTimingInfo(0, &image);
-  ASSERT_NE(image.ColorSpace(), nullptr);
-  EXPECT_EQ(color_space, *image.ColorSpace());
-}
-
 }  // namespace test
 }  // namespace webrtc
diff --git a/video/video_receive_stream_unittest.cc b/video/video_receive_stream_unittest.cc
index 69bd2616a4..2d29ef71b3 100644
--- a/video/video_receive_stream_unittest.cc
+++ b/video/video_receive_stream_unittest.cc
@@ -14,12 +14,9 @@
 #include "test/gmock.h"
 #include "test/gtest.h"
 
-#include "absl/memory/memory.h"
 #include "api/task_queue/default_task_queue_factory.h"
-#include "api/test/video/function_video_decoder_factory.h"
 #include "api/video_codecs/video_decoder.h"
 #include "call/rtp_stream_receiver_controller.h"
-#include "common_video/test/utilities.h"
 #include "media/base/fake_video_renderer.h"
 #include "modules/pacing/packet_router.h"
 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
@@ -27,7 +24,6 @@
 #include "rtc_base/critical_section.h"
 #include "rtc_base/event.h"
 #include "system_wrappers/include/clock.h"
-#include "test/fake_decoder.h"
 #include "test/field_trial.h"
 #include "test/video_decoder_proxy_factory.h"
 #include "video/call_stats.h"
@@ -66,12 +62,6 @@ class MockVideoDecoder : public VideoDecoder {
 
 class FrameObjectFake : public video_coding::EncodedFrame {
  public:
-  void SetPayloadType(uint8_t payload_type) { _payloadType = payload_type; }
-
-  void SetRotation(const VideoRotation& rotation) { rotation_ = rotation; }
-
-  void SetNtpTime(int64_t ntp_time_ms) { ntp_time_ms_ = ntp_time_ms; }
-
   int64_t ReceivedTime() const override { return 0; }
 
   int64_t RenderTime() const override { return _renderTimeMs; }
@@ -222,90 +212,4 @@ TEST_F(VideoReceiveStreamTest, PlayoutDelayPreservesDefaultMinValue) {
   EXPECT_EQ(default_min_playout_latency, timing_->min_playout_delay());
 }
 
-class VideoReceiveStreamTestWithFakeDecoder : public ::testing::Test {
- public:
-  VideoReceiveStreamTestWithFakeDecoder()
-      : fake_decoder_factory_(
-            []() { return absl::make_unique<test::FakeDecoder>(); }),
-        process_thread_(ProcessThread::Create("TestThread")),
-        task_queue_factory_(CreateDefaultTaskQueueFactory()),
-        config_(&mock_transport_),
-        call_stats_(Clock::GetRealTimeClock(), process_thread_.get()) {}
-
-  void SetUp() {
-    constexpr int kDefaultNumCpuCores = 2;
-    config_.rtp.remote_ssrc = 1111;
-    config_.rtp.local_ssrc = 2222;
-    config_.renderer = &fake_renderer_;
-    VideoReceiveStream::Decoder fake_decoder;
-    fake_decoder.payload_type = 99;
-    fake_decoder.video_format = SdpVideoFormat("VP8");
-    fake_decoder.decoder_factory = &fake_decoder_factory_;
-    config_.decoders.push_back(fake_decoder);
-    Clock* clock = Clock::GetRealTimeClock();
-    timing_ = new VCMTiming(clock);
-
-    video_receive_stream_.reset(new webrtc::internal::VideoReceiveStream(
-        task_queue_factory_.get(), &rtp_stream_receiver_controller_,
-        kDefaultNumCpuCores, &packet_router_, config_.Copy(),
-        process_thread_.get(), &call_stats_, clock, timing_));
-  }
-
- protected:
-  test::FunctionVideoDecoderFactory fake_decoder_factory_;
-  std::unique_ptr<ProcessThread> process_thread_;
-  const std::unique_ptr<TaskQueueFactory> task_queue_factory_;
-  VideoReceiveStream::Config config_;
-  CallStats call_stats_;
-  cricket::FakeVideoRenderer fake_renderer_;
-  MockTransport mock_transport_;
-  PacketRouter packet_router_;
-  RtpStreamReceiverController rtp_stream_receiver_controller_;
-  std::unique_ptr<webrtc::internal::VideoReceiveStream> video_receive_stream_;
-  VCMTiming* timing_;
-};
-
-TEST_F(VideoReceiveStreamTestWithFakeDecoder, PassesNtpTime) {
-  const int64_t kNtpTimestamp = 12345;
-  std::unique_ptr<FrameObjectFake> test_frame(new FrameObjectFake());
-  test_frame->SetPayloadType(99);
-  test_frame->id.picture_id = 0;
-  test_frame->SetNtpTime(kNtpTimestamp);
-
-  video_receive_stream_->Start();
-  video_receive_stream_->OnCompleteFrame(std::move(test_frame));
-  EXPECT_TRUE(fake_renderer_.WaitForRenderedFrame(kDefaultTimeOutMs));
-  EXPECT_EQ(kNtpTimestamp, fake_renderer_.ntp_time_ms());
-}
-
-TEST_F(VideoReceiveStreamTestWithFakeDecoder, PassesRotation) {
-  const webrtc::VideoRotation kRotation = webrtc::kVideoRotation_180;
-  std::unique_ptr<FrameObjectFake> test_frame(new FrameObjectFake());
-  test_frame->SetPayloadType(99);
-  test_frame->id.picture_id = 0;
-  test_frame->SetRotation(kRotation);
-
-  video_receive_stream_->Start();
-  video_receive_stream_->OnCompleteFrame(std::move(test_frame));
-  EXPECT_TRUE(fake_renderer_.WaitForRenderedFrame(kDefaultTimeOutMs));
-
-  EXPECT_EQ(kRotation, fake_renderer_.rotation());
-}
-
-TEST_F(VideoReceiveStreamTestWithFakeDecoder, PassesColorSpace) {
-  std::unique_ptr<FrameObjectFake> test_frame(new FrameObjectFake());
-  test_frame->SetPayloadType(99);
-  test_frame->id.picture_id = 0;
-  webrtc::ColorSpace color_space =
-      CreateTestColorSpace(/*with_hdr_metadata=*/true);
-  test_frame->SetColorSpace(color_space);
-
-  video_receive_stream_->Start();
-  video_receive_stream_->OnCompleteFrame(std::move(test_frame));
-  EXPECT_TRUE(fake_renderer_.WaitForRenderedFrame(kDefaultTimeOutMs));
-
-  ASSERT_TRUE(fake_renderer_.color_space().has_value());
-  EXPECT_EQ(color_space, *fake_renderer_.color_space());
-}
-
 }  // namespace webrtc
diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc
index c38365d8ce..dbe2d00234 100644
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@@ -513,7 +513,7 @@ VideoStreamEncoder::VideoStreamEncoder(
       input_framerate_(kFrameRateAvergingWindowSizeMs, 1000),
       pending_frame_drops_(0),
       next_frame_types_(1, VideoFrameType::kVideoFrameDelta),
-      frame_encode_metadata_writer_(this),
+      frame_encoder_timer_(this),
       experiment_groups_(GetExperimentGroups()),
       encoder_queue_(task_queue_factory->CreateTaskQueue(
           "EncoderQueue",
@@ -766,11 +766,10 @@ void VideoStreamEncoder::ReconfigureEncoder() {
     } else {
       encoder_initialized_ = true;
       encoder_->RegisterEncodeCompleteCallback(this);
-      frame_encode_metadata_writer_.OnEncoderInit(send_codec_,
-                                                  HasInternalSource());
+      frame_encoder_timer_.OnEncoderInit(send_codec_, HasInternalSource());
     }
 
-    frame_encode_metadata_writer_.Reset();
+    frame_encoder_timer_.Reset();
     last_encode_info_ms_ = absl::nullopt;
   }
 
@@ -1096,7 +1095,7 @@ void VideoStreamEncoder::SetEncoderRates(
 
   if (settings_changes) {
     encoder_->SetRates(rate_settings);
-    frame_encode_metadata_writer_.OnSetRates(
+    frame_encoder_timer_.OnSetRates(
         rate_settings.bitrate,
         static_cast<uint32_t>(rate_settings.framerate_fps + 0.5));
   }
@@ -1355,7 +1354,8 @@ void VideoStreamEncoder::EncodeVideoFrame(const VideoFrame& video_frame,
   TRACE_EVENT1("webrtc", "VCMGenericEncoder::Encode", "timestamp",
                out_frame.timestamp());
 
-  frame_encode_metadata_writer_.OnEncodeStarted(out_frame);
+  frame_encoder_timer_.OnEncodeStarted(out_frame.timestamp(),
+                                       out_frame.render_time_ms());
 
   const int32_t encode_status = encoder_->Encode(out_frame, &next_frame_types_);
 
@@ -1425,7 +1425,9 @@ EncodedImageCallback::Result VideoStreamEncoder::OnEncodedImage(
   const size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
   EncodedImage image_copy(encoded_image);
 
-  frame_encode_metadata_writer_.FillTimingInfo(spatial_idx, &image_copy);
+  frame_encoder_timer_.FillTimingInfo(
+      spatial_idx, &image_copy,
+      rtc::TimeMicros() / rtc::kNumMicrosecsPerMillisec);
 
   // Piggyback ALR experiment group id and simulcast id into the content type.
   const uint8_t experiment_id =
diff --git a/video/video_stream_encoder.h b/video/video_stream_encoder.h
index 7a0c1ef3a6..45ef430e15 100644
--- a/video/video_stream_encoder.h
+++ b/video/video_stream_encoder.h
@@ -35,7 +35,7 @@
 #include "rtc_base/synchronization/sequence_checker.h"
 #include "rtc_base/task_queue.h"
 #include "video/encoder_bitrate_adjuster.h"
-#include "video/frame_encode_metadata_writer.h"
+#include "video/frame_encode_timer.h"
 #include "video/overuse_frame_detector.h"
 
 namespace webrtc {
@@ -341,7 +341,7 @@ class VideoStreamEncoder : public VideoStreamEncoderInterface,
   // turn this into a simple bool |pending_keyframe_request_|.
   std::vector<VideoFrameType> next_frame_types_ RTC_GUARDED_BY(&encoder_queue_);
 
-  FrameEncodeMetadataWriter frame_encode_metadata_writer_;
+  FrameEncodeTimer frame_encoder_timer_;
 
   // Experiment groups parsed from field trials for realtime video ([0]) and
   // screenshare ([1]). 0 means no group specified. Positive values are