Add plumbing of RtpPacketInfos to each AudioFrame as input for SourceTracker.

This change adds the plumbing of RtpPacketInfo from ChannelReceive::OnRtpPacket() to ChannelReceive::GetAudioFrameWithInfo() for audio. It is a step towards replacing the non-spec compliant ContributingSources that updates itself at packet-receive time, with the spec-compliant SourceTracker that will update itself at frame-delivery-to-track time. Bug: webrtc:10668 Change-Id: I03385d6865bbc7bfbef7634f88de820a934f787a Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/139890 Reviewed-by: Stefan Holmer <stefan@webrtc.org> Reviewed-by: Minyue Li <minyue@webrtc.org> Commit-Queue: Chen Xing <chxg@google.com> Cr-Commit-Position: refs/heads/master@{#28434}
2019-07-01 17:16:32 +02:00
parent 62eb89d221
commit 3e8ef940fe
23 changed files with 195 additions and 26 deletions
--- a/api/audio/BUILD.gn
+++ b/api/audio/BUILD.gn
@ -16,6 +16,7 @@ rtc_source_set("audio_frame_api") {
  ]

  deps = [
+    "..:rtp_packet_info",
    "../../rtc_base:checks",
    "../../rtc_base:rtc_base_approved",
  ]
--- a/api/audio/audio_frame.cc
+++ b/api/audio/audio_frame.cc
@ -39,6 +39,7 @@ void AudioFrame::ResetWithoutMuting() {
  speech_type_ = kUndefined;
  vad_activity_ = kVadUnknown;
  profile_timestamp_ms_ = 0;
+  packet_infos_ = RtpPacketInfos();
 }

 void AudioFrame::UpdateFrame(uint32_t timestamp,
@ -72,6 +73,7 @@ void AudioFrame::CopyFrom(const AudioFrame& src) {
  timestamp_ = src.timestamp_;
  elapsed_time_ms_ = src.elapsed_time_ms_;
  ntp_time_ms_ = src.ntp_time_ms_;
+  packet_infos_ = src.packet_infos_;
  muted_ = src.muted();
  samples_per_channel_ = src.samples_per_channel_;
  sample_rate_hz_ = src.sample_rate_hz_;
--- a/api/audio/audio_frame.h
+++ b/api/audio/audio_frame.h
@ -14,6 +14,7 @@
 #include <stddef.h>
 #include <stdint.h>

+#include "api/rtp_packet_infos.h"
 #include "rtc_base/constructor_magic.h"

 namespace webrtc {
@ -115,6 +116,22 @@ class AudioFrame {
  // class/struct needs an explicit out-of-line destructor" build error.
  int64_t profile_timestamp_ms_ = 0;

+  // Information about packets used to assemble this audio frame. This is needed
+  // by |SourceTracker| when the frame is delivered to the RTCRtpReceiver's
+  // MediaStreamTrack, in order to implement getContributingSources(). See:
+  // https://w3c.github.io/webrtc-pc/#dom-rtcrtpreceiver-getcontributingsources
+  //
+  // TODO(bugs.webrtc.org/10757):
+  //   Note that this information might not be fully accurate since we currently
+  //   don't have a proper way to track it across the audio sync buffer. The
+  //   sync buffer is the small sample-holding buffer located after the audio
+  //   decoder and before where samples are assembled into output frames.
+  //
+  // |RtpPacketInfos| may also be empty if the audio samples did not come from
+  // RTP packets. E.g. if the audio were locally generated by packet loss
+  // concealment, comfort noise generation, etc.
+  RtpPacketInfos packet_infos_;
+
 private:
  // A permamently zeroed out buffer to represent muted frames. This is a
  // header-only class, so the only way to avoid creating a separate empty
--- a/audio/remix_resample.cc
+++ b/audio/remix_resample.cc
@ -27,6 +27,7 @@ void RemixAndResample(const AudioFrame& src_frame,
  dst_frame->timestamp_ = src_frame.timestamp_;
  dst_frame->elapsed_time_ms_ = src_frame.elapsed_time_ms_;
  dst_frame->ntp_time_ms_ = src_frame.ntp_time_ms_;
+  dst_frame->packet_infos_ = src_frame.packet_infos_;
 }

 void RemixAndResample(const int16_t* src_data,
--- a/modules/audio_coding/BUILD.gn
+++ b/modules/audio_coding/BUILD.gn
@ -1017,6 +1017,7 @@ rtc_static_library("neteq") {
    "..:module_api_public",
    "../../api:array_view",
    "../../api:rtp_headers",
+    "../../api:rtp_packet_info",
    "../../api:scoped_refptr",
    "../../api/audio:audio_frame_api",
    "../../api/audio_codecs:audio_codecs_api",
@ -1029,6 +1030,7 @@ rtc_static_library("neteq") {
    "../../rtc_base:safe_minmax",
    "../../rtc_base:sanitizer",
    "../../rtc_base/system:fallthrough",
+    "../../system_wrappers",
    "../../system_wrappers:field_trial",
    "../../system_wrappers:metrics",
    "//third_party/abseil-cpp/absl/memory",
@ -1066,6 +1068,7 @@ rtc_source_set("neteq_tools_minimal") {
    "../../api/audio_codecs:audio_codecs_api",
    "../../rtc_base:checks",
    "../../rtc_base:rtc_base_approved",
+    "../../system_wrappers",
    "../rtp_rtcp",
    "../rtp_rtcp:rtp_rtcp_format",
    "//third_party/abseil-cpp/absl/types:optional",
@ -1591,6 +1594,7 @@ if (rtc_include_tests) {
      "../../api/audio_codecs:builtin_audio_decoder_factory",
      "../../rtc_base:checks",
      "../../rtc_base:rtc_base_approved",
+      "../../system_wrappers",
      "../../test:fileutils",
      "../../test:test_support",
      "//testing/gtest",
--- a/modules/audio_coding/acm2/acm_receiver.cc
+++ b/modules/audio_coding/acm2/acm_receiver.cc
@ -34,7 +34,9 @@ namespace acm2 {

 AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
    : last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
-      neteq_(NetEq::Create(config.neteq_config, config.decoder_factory)),
+      neteq_(NetEq::Create(config.neteq_config,
+                           config.clock,
+                           config.decoder_factory)),
      clock_(config.clock),
      resampled_last_output_frame_(true) {
  RTC_DCHECK(clock_);
--- a/modules/audio_coding/neteq/include/neteq.h
+++ b/modules/audio_coding/neteq/include/neteq.h
@ -31,6 +31,7 @@ namespace webrtc {
 // Forward declarations.
 class AudioFrame;
 class AudioDecoderFactory;
+class Clock;

 struct NetEqNetworkStatistics {
  uint16_t current_buffer_size_ms;    // Current jitter buffer size in ms.
@ -149,6 +150,7 @@ class NetEq {
  // method.
  static NetEq* Create(
      const NetEq::Config& config,
+      Clock* clock,
      const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);

  virtual ~NetEq() {}
--- a/modules/audio_coding/neteq/neteq.cc
+++ b/modules/audio_coding/neteq/neteq.cc
@ -39,9 +39,10 @@ std::string NetEq::Config::ToString() const {
 // Return the new object.
 NetEq* NetEq::Create(
    const NetEq::Config& config,
+    Clock* clock,
    const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory) {
  return new NetEqImpl(config,
-                       NetEqImpl::Dependencies(config, decoder_factory));
+                       NetEqImpl::Dependencies(config, clock, decoder_factory));
 }

 }  // namespace webrtc
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@ -15,6 +15,7 @@
 #include <cstdint>
 #include <cstring>
 #include <list>
+#include <map>
 #include <utility>
 #include <vector>

@ -52,13 +53,16 @@
 #include "rtc_base/sanitizer.h"
 #include "rtc_base/strings/audio_format_to_string.h"
 #include "rtc_base/trace_event.h"
+#include "system_wrappers/include/clock.h"

 namespace webrtc {

 NetEqImpl::Dependencies::Dependencies(
    const NetEq::Config& config,
+    Clock* clock,
    const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory)
-    : tick_timer(new TickTimer),
+    : clock(clock),
+      tick_timer(new TickTimer),
      stats(new StatisticsCalculator),
      buffer_level_filter(new BufferLevelFilter),
      decoder_database(
@ -86,7 +90,8 @@ NetEqImpl::Dependencies::~Dependencies() = default;
 NetEqImpl::NetEqImpl(const NetEq::Config& config,
                     Dependencies&& deps,
                     bool create_components)
-    : tick_timer_(std::move(deps.tick_timer)),
+    : clock_(deps.clock),
+      tick_timer_(std::move(deps.tick_timer)),
      buffer_level_filter_(std::move(deps.buffer_level_filter)),
      decoder_database_(std::move(deps.decoder_database)),
      delay_manager_(std::move(deps.delay_manager)),
@ -468,17 +473,20 @@ int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header,
    RTC_LOG_F(LS_ERROR) << "payload is empty";
    return kInvalidPointer;
  }
+
+  int64_t receive_time_ms = clock_->TimeInMilliseconds();
  stats_->ReceivedPacket();

  PacketList packet_list;
  // Insert packet in a packet list.
-  packet_list.push_back([&rtp_header, &payload] {
+  packet_list.push_back([&rtp_header, &payload, &receive_time_ms] {
    // Convert to Packet.
    Packet packet;
    packet.payload_type = rtp_header.payloadType;
    packet.sequence_number = rtp_header.sequenceNumber;
    packet.timestamp = rtp_header.timestamp;
    packet.payload.SetData(payload.data(), payload.size());
+    packet.packet_info = RtpPacketInfo(rtp_header, receive_time_ms);
    // Waiting time will be set upon inserting the packet in the buffer.
    RTC_DCHECK(!packet.waiting_time);
    return packet;
@ -611,6 +619,7 @@ int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header,
      const auto sequence_number = packet.sequence_number;
      const auto payload_type = packet.payload_type;
      const Packet::Priority original_priority = packet.priority;
+      const auto& packet_info = packet.packet_info;
      auto packet_from_result = [&](AudioDecoder::ParseResult& result) {
        Packet new_packet;
        new_packet.sequence_number = sequence_number;
@ -618,6 +627,7 @@ int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header,
        new_packet.timestamp = result.timestamp;
        new_packet.priority.codec_level = result.priority;
        new_packet.priority.red_level = original_priority.red_level;
+        new_packet.packet_info = packet_info;
        new_packet.frame = std::move(result.frame);
        return new_packet;
      };
@ -879,7 +889,16 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
    comfort_noise_->Reset();
  }

-  // Copy from |algorithm_buffer| to |sync_buffer_|.
+  // We treat it as if all packets referenced to by |last_decoded_packet_infos_|
+  // were mashed together when creating the samples in |algorithm_buffer_|.
+  RtpPacketInfos packet_infos(std::move(last_decoded_packet_infos_));
+  last_decoded_packet_infos_.clear();
+
+  // Copy samples from |algorithm_buffer_| to |sync_buffer_|.
+  //
+  // TODO(bugs.webrtc.org/10757):
+  //   We would in the future also like to pass |packet_infos| so that we can do
+  //   sample-perfect tracking of that information across |sync_buffer_|.
  sync_buffer_->PushBack(*algorithm_buffer_);

  // Extract data from |sync_buffer_| to |output|.
@ -897,6 +916,13 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
  sync_buffer_->GetNextAudioInterleaved(num_output_samples_per_channel,
                                        audio_frame);
  audio_frame->sample_rate_hz_ = fs_hz_;
+  // TODO(bugs.webrtc.org/10757):
+  //   We don't have the ability to properly track individual packets once their
+  //   audio samples have entered |sync_buffer_|. So for now, treat it as if
+  //   |packet_infos| from packets decoded by the current |GetAudioInternal()|
+  //   call were all consumed assembling the current audio frame and the current
+  //   audio frame only.
+  audio_frame->packet_infos_ = std::move(packet_infos);
  if (sync_buffer_->FutureLength() < expand_->overlap_length()) {
    // The sync buffer should always contain |overlap_length| samples, but now
    // too many samples have been extracted. Reinstall the |overlap_length|
@ -1392,6 +1418,7 @@ int NetEqImpl::DecodeLoop(PacketList* packet_list,
                          int* decoded_length,
                          AudioDecoder::SpeechType* speech_type) {
  RTC_DCHECK(last_decoded_timestamps_.empty());
+  RTC_DCHECK(last_decoded_packet_infos_.empty());

  // Do decoding.
  while (!packet_list->empty() && !decoder_database_->IsComfortNoise(
@ -1409,6 +1436,8 @@ int NetEqImpl::DecodeLoop(PacketList* packet_list,
        rtc::ArrayView<int16_t>(&decoded_buffer_[*decoded_length],
                                decoded_buffer_length_ - *decoded_length));
    last_decoded_timestamps_.push_back(packet_list->front().timestamp);
+    last_decoded_packet_infos_.push_back(
+        std::move(packet_list->front().packet_info));
    packet_list->pop_front();
    if (opt_result) {
      const auto& result = *opt_result;
@ -1424,6 +1453,7 @@ int NetEqImpl::DecodeLoop(PacketList* packet_list,
      // TODO(ossu): What to put here?
      RTC_LOG(LS_WARNING) << "Decode error";
      *decoded_length = -1;
+      last_decoded_packet_infos_.clear();
      packet_list->clear();
      break;
    }
--- a/modules/audio_coding/neteq/neteq_impl.h
+++ b/modules/audio_coding/neteq/neteq_impl.h
@ -11,11 +11,15 @@
 #ifndef MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
 #define MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_

+#include <map>
 #include <memory>
 #include <string>
+#include <utility>
+#include <vector>

 #include "absl/types/optional.h"
 #include "api/audio/audio_frame.h"
+#include "api/rtp_packet_info.h"
 #include "modules/audio_coding/neteq/audio_multi_vector.h"
 #include "modules/audio_coding/neteq/defines.h"  // Modes, Operations
 #include "modules/audio_coding/neteq/expand_uma_logger.h"
@ -34,6 +38,7 @@ namespace webrtc {
 class Accelerate;
 class BackgroundNoise;
 class BufferLevelFilter;
+class Clock;
 class ComfortNoise;
 class DecisionLogic;
 class DecoderDatabase;
@ -93,11 +98,13 @@ class NetEqImpl : public webrtc::NetEq {
    // before sending the struct to the NetEqImpl constructor. However, there
    // are dependencies between some of the classes inside the struct, so
    // swapping out one may make it necessary to re-create another one.
-    explicit Dependencies(
+    Dependencies(
        const NetEq::Config& config,
+        Clock* clock,
        const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);
    ~Dependencies();

+    Clock* const clock;
    std::unique_ptr<TickTimer> tick_timer;
    std::unique_ptr<StatisticsCalculator> stats;
    std::unique_ptr<BufferLevelFilter> buffer_level_filter;
@ -338,6 +345,8 @@ class NetEqImpl : public webrtc::NetEq {
  // Creates DecisionLogic object with the mode given by |playout_mode_|.
  virtual void CreateDecisionLogic() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);

+  Clock* const clock_;
+
  rtc::CriticalSection crit_sect_;
  const std::unique_ptr<TickTimer> tick_timer_ RTC_GUARDED_BY(crit_sect_);
  const std::unique_ptr<BufferLevelFilter> buffer_level_filter_
@ -403,6 +412,8 @@ class NetEqImpl : public webrtc::NetEq {
  std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
      RTC_GUARDED_BY(crit_sect_);
  std::vector<uint32_t> last_decoded_timestamps_ RTC_GUARDED_BY(crit_sect_);
+  std::vector<RtpPacketInfo> last_decoded_packet_infos_
+      RTC_GUARDED_BY(crit_sect_);
  ExpandUmaLogger expand_uma_logger_ RTC_GUARDED_BY(crit_sect_);
  ExpandUmaLogger speech_expand_uma_logger_ RTC_GUARDED_BY(crit_sect_);
  bool no_time_stretching_ RTC_GUARDED_BY(crit_sect_);  // Only used for test.
--- a/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/modules/audio_coding/neteq/neteq_impl_unittest.cc
@ -9,6 +9,8 @@
 */

 #include <memory>
+#include <utility>
+#include <vector>

 #include "absl/memory/memory.h"
 #include "api/audio_codecs/builtin_audio_decoder_factory.h"
@ -30,6 +32,7 @@
 #include "modules/audio_coding/neteq/sync_buffer.h"
 #include "modules/audio_coding/neteq/timestamp_scaler.h"
 #include "rtc_base/numerics/safe_conversions.h"
+#include "system_wrappers/include/clock.h"
 #include "test/audio_decoder_proxy_factory.h"
 #include "test/function_audio_decoder_factory.h"
 #include "test/gmock.h"
@ -40,14 +43,17 @@
 using ::testing::_;
 using ::testing::AtLeast;
 using ::testing::DoAll;
+using ::testing::ElementsAre;
 using ::testing::InSequence;
 using ::testing::Invoke;
+using ::testing::IsEmpty;
 using ::testing::IsNull;
 using ::testing::Pointee;
 using ::testing::Return;
 using ::testing::ReturnNull;
 using ::testing::SetArgPointee;
 using ::testing::SetArrayArgument;
+using ::testing::SizeIs;
 using ::testing::WithArg;

 namespace webrtc {
@ -62,12 +68,12 @@ int DeletePacketsAndReturnOk(PacketList* packet_list) {

 class NetEqImplTest : public ::testing::Test {
 protected:
-  NetEqImplTest() { config_.sample_rate_hz = 8000; }
+  NetEqImplTest() : clock_(0) { config_.sample_rate_hz = 8000; }

  void CreateInstance(
      const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory) {
    ASSERT_TRUE(decoder_factory);
-    NetEqImpl::Dependencies deps(config_, decoder_factory);
+    NetEqImpl::Dependencies deps(config_, &clock_, decoder_factory);

    // Get a local pointer to NetEq's TickTimer object.
    tick_timer_ = deps.tick_timer.get();
@ -217,6 +223,10 @@ class NetEqImplTest : public ::testing::Test {
    EXPECT_EQ(1u, output.num_channels_);
    EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_);

+    // DTMF packets are immediately consumed by |InsertPacket()| and won't be
+    // returned by |GetAudio()|.
+    EXPECT_THAT(output.packet_infos_, IsEmpty());
+
    // Verify first 64 samples of actual output.
    const std::vector<int16_t> kOutput({
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1578, -2816, -3460, -3403, -2709, -1594,
@ -231,6 +241,7 @@ class NetEqImplTest : public ::testing::Test {

  std::unique_ptr<NetEqImpl> neteq_;
  NetEq::Config config_;
+  SimulatedClock clock_;
  TickTimer* tick_timer_ = nullptr;
  MockBufferLevelFilter* mock_buffer_level_filter_ = nullptr;
  BufferLevelFilter* buffer_level_filter_ = nullptr;
@ -263,7 +274,9 @@ class NetEqImplTest : public ::testing::Test {
 // TODO(hlundin): Move to separate file?
 TEST(NetEq, CreateAndDestroy) {
  NetEq::Config config;
-  NetEq* neteq = NetEq::Create(config, CreateBuiltinAudioDecoderFactory());
+  SimulatedClock clock(0);
+  NetEq* neteq =
+      NetEq::Create(config, &clock, CreateBuiltinAudioDecoderFactory());
  delete neteq;
 }

@ -458,6 +471,10 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
  rtp_header.sequenceNumber = 0x1234;
  rtp_header.timestamp = 0x12345678;
  rtp_header.ssrc = 0x87654321;
+  rtp_header.numCSRCs = 3;
+  rtp_header.arrOfCSRCs[0] = 43;
+  rtp_header.arrOfCSRCs[1] = 65;
+  rtp_header.arrOfCSRCs[2] = 17;

  // This is a dummy decoder that produces as many output samples as the input
  // has bytes. The output is an increasing series, starting at 1 for the first
@ -501,6 +518,8 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
                                          SdpAudioFormat("L16", 8000, 1)));

  // Insert one packet.
+  clock_.AdvanceTimeMilliseconds(123456);
+  int64_t expected_receive_time_ms = clock_.TimeInMilliseconds();
  EXPECT_EQ(NetEq::kOK,
            neteq_->InsertPacket(rtp_header, payload, kReceiveTime));

@ -514,6 +533,17 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
  EXPECT_EQ(1u, output.num_channels_);
  EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_);

+  // Verify |output.packet_infos_|.
+  ASSERT_THAT(output.packet_infos_, SizeIs(1));
+  {
+    const auto& packet_info = output.packet_infos_[0];
+    EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc);
+    EXPECT_THAT(packet_info.csrcs(), ElementsAre(43, 65, 17));
+    EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp);
+    EXPECT_FALSE(packet_info.audio_level().has_value());
+    EXPECT_EQ(packet_info.receive_time_ms(), expected_receive_time_ms);
+  }
+
  // Start with a simple check that the fake decoder is behaving as expected.
  EXPECT_EQ(kPayloadLengthSamples,
            static_cast<size_t>(decoder_.next_value() - 1));
@ -561,6 +591,8 @@ TEST_F(NetEqImplTest, ReorderedPacket) {
  rtp_header.sequenceNumber = 0x1234;
  rtp_header.timestamp = 0x12345678;
  rtp_header.ssrc = 0x87654321;
+  rtp_header.extension.hasAudioLevel = true;
+  rtp_header.extension.audioLevel = 42;

  EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return());
  EXPECT_CALL(mock_decoder, SampleRateHz())
@ -583,6 +615,8 @@ TEST_F(NetEqImplTest, ReorderedPacket) {
                                          SdpAudioFormat("L16", 8000, 1)));

  // Insert one packet.
+  clock_.AdvanceTimeMilliseconds(123456);
+  int64_t expected_receive_time_ms = clock_.TimeInMilliseconds();
  EXPECT_EQ(NetEq::kOK,
            neteq_->InsertPacket(rtp_header, payload, kReceiveTime));

@ -595,16 +629,32 @@ TEST_F(NetEqImplTest, ReorderedPacket) {
  EXPECT_EQ(1u, output.num_channels_);
  EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_);

+  // Verify |output.packet_infos_|.
+  ASSERT_THAT(output.packet_infos_, SizeIs(1));
+  {
+    const auto& packet_info = output.packet_infos_[0];
+    EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc);
+    EXPECT_THAT(packet_info.csrcs(), IsEmpty());
+    EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp);
+    EXPECT_EQ(packet_info.audio_level(), rtp_header.extension.audioLevel);
+    EXPECT_EQ(packet_info.receive_time_ms(), expected_receive_time_ms);
+  }
+
  // Insert two more packets. The first one is out of order, and is already too
  // old, the second one is the expected next packet.
  rtp_header.sequenceNumber -= 1;
  rtp_header.timestamp -= kPayloadLengthSamples;
+  rtp_header.extension.audioLevel = 1;
  payload[0] = 1;
+  clock_.AdvanceTimeMilliseconds(1000);
  EXPECT_EQ(NetEq::kOK,
            neteq_->InsertPacket(rtp_header, payload, kReceiveTime));
  rtp_header.sequenceNumber += 2;
  rtp_header.timestamp += 2 * kPayloadLengthSamples;
+  rtp_header.extension.audioLevel = 2;
  payload[0] = 2;
+  clock_.AdvanceTimeMilliseconds(2000);
+  expected_receive_time_ms = clock_.TimeInMilliseconds();
  EXPECT_EQ(NetEq::kOK,
            neteq_->InsertPacket(rtp_header, payload, kReceiveTime));

@ -627,6 +677,17 @@ TEST_F(NetEqImplTest, ReorderedPacket) {
  // out-of-order packet should have been discarded.
  EXPECT_TRUE(packet_buffer_->Empty());

+  // Verify |output.packet_infos_|. Expect to only see the second packet.
+  ASSERT_THAT(output.packet_infos_, SizeIs(1));
+  {
+    const auto& packet_info = output.packet_infos_[0];
+    EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc);
+    EXPECT_THAT(packet_info.csrcs(), IsEmpty());
+    EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp);
+    EXPECT_EQ(packet_info.audio_level(), rtp_header.extension.audioLevel);
+    EXPECT_EQ(packet_info.receive_time_ms(), expected_receive_time_ms);
+  }
+
  EXPECT_CALL(mock_decoder, Die());
 }

@ -663,6 +724,7 @@ TEST_F(NetEqImplTest, FirstPacketUnknown) {
  EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
  EXPECT_EQ(1u, output.num_channels_);
  EXPECT_EQ(AudioFrame::kPLC, output.speech_type_);
+  EXPECT_THAT(output.packet_infos_, IsEmpty());

  // Register the payload type.
  EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType,
@ -685,6 +747,7 @@ TEST_F(NetEqImplTest, FirstPacketUnknown) {
    EXPECT_EQ(1u, output.num_channels_);
    EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_)
        << "NetEq did not decode the packets as expected.";
+    EXPECT_THAT(output.packet_infos_, SizeIs(1));
  }
 }

@ -722,6 +785,7 @@ TEST_F(NetEqImplTest, NoAudioInterruptionLoggedBeforeFirstDecode) {
    EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
    EXPECT_EQ(1u, output.num_channels_);
    EXPECT_NE(AudioFrame::kNormalSpeech, output.speech_type_);
+    EXPECT_THAT(output.packet_infos_, IsEmpty());
  }

  // Insert 10 packets.
@ -741,6 +805,7 @@ TEST_F(NetEqImplTest, NoAudioInterruptionLoggedBeforeFirstDecode) {
    EXPECT_EQ(1u, output.num_channels_);
    EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_)
        << "NetEq did not decode the packets as expected.";
+    EXPECT_THAT(output.packet_infos_, SizeIs(1));
  }

  auto lifetime_stats = neteq_->GetLifetimeStatistics();
@ -975,12 +1040,14 @@ TEST_F(NetEqImplTest, UnsupportedDecoder) {
  const size_t kExpectedOutputSize = 10 * (kSampleRateHz / 1000) * kChannels;
  EXPECT_EQ(kExpectedOutputSize, output.samples_per_channel_ * kChannels);
  EXPECT_EQ(kChannels, output.num_channels_);
+  EXPECT_THAT(output.packet_infos_, IsEmpty());

  // Second call to GetAudio will decode the packet that is ok. No errors are
  // expected.
  EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted));
  EXPECT_EQ(kExpectedOutputSize, output.samples_per_channel_ * kChannels);
  EXPECT_EQ(kChannels, output.num_channels_);
+  EXPECT_THAT(output.packet_infos_, SizeIs(1));

  // Die isn't called through NiceMock (since it's called by the
  // MockAudioDecoder constructor), so it needs to be mocked explicitly.
@ -1082,6 +1149,7 @@ TEST_F(NetEqImplTest, DecodedPayloadTooShort) {
  ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_);
  EXPECT_EQ(1u, output.num_channels_);
  EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_);
+  EXPECT_THAT(output.packet_infos_, SizeIs(1));

  EXPECT_CALL(mock_decoder, Die());
 }
@ -1178,6 +1246,7 @@ TEST_F(NetEqImplTest, DecodingError) {
  EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
  EXPECT_EQ(1u, output.num_channels_);
  EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_);
+  EXPECT_THAT(output.packet_infos_, SizeIs(2));  // 5 ms packets vs 10 ms output

  // Pull audio again. Decoder fails.
  EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&output, &muted));
@ -1191,12 +1260,14 @@ TEST_F(NetEqImplTest, DecodingError) {
  EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
  EXPECT_EQ(1u, output.num_channels_);
  EXPECT_EQ(AudioFrame::kPLC, output.speech_type_);
+  EXPECT_THAT(output.packet_infos_, IsEmpty());

  // Pull audio again, should behave normal.
  EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted));
  EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_);
  EXPECT_EQ(1u, output.num_channels_);
  EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_);
+  EXPECT_THAT(output.packet_infos_, SizeIs(2));  // 5 ms packets vs 10 ms output

  EXPECT_CALL(mock_decoder, Die());
 }
@ -1625,4 +1696,4 @@ TEST_F(NetEqImplTest120ms, Accelerate) {
  EXPECT_EQ(kAccelerate, neteq_->last_operation_for_test());
 }

-}// namespace webrtc
+}  // namespace webrtc
--- a/modules/audio_coding/neteq/neteq_network_stats_unittest.cc
+++ b/modules/audio_coding/neteq/neteq_network_stats_unittest.cc
@ -17,6 +17,7 @@
 #include "modules/audio_coding/neteq/include/neteq.h"
 #include "modules/audio_coding/neteq/tools/rtp_generator.h"
 #include "rtc_base/ref_counted_object.h"
+#include "system_wrappers/include/clock.h"
 #include "test/audio_decoder_proxy_factory.h"
 #include "test/gmock.h"

@ -163,7 +164,8 @@ class NetEqNetworkStatsTest {
        packet_loss_interval_(0xffffffff) {
    NetEq::Config config;
    config.sample_rate_hz = format.clockrate_hz;
-    neteq_ = absl::WrapUnique(NetEq::Create(config, decoder_factory_));
+    neteq_ = absl::WrapUnique(
+        NetEq::Create(config, Clock::GetRealTimeClock(), decoder_factory_));
    neteq_->RegisterPayloadType(kPayloadType, format);
  }

--- a/modules/audio_coding/neteq/neteq_stereo_unittest.cc
+++ b/modules/audio_coding/neteq/neteq_stereo_unittest.cc
@ -22,6 +22,7 @@
 #include "modules/audio_coding/neteq/tools/input_audio_file.h"
 #include "modules/audio_coding/neteq/tools/rtp_generator.h"
 #include "rtc_base/strings/string_builder.h"
+#include "system_wrappers/include/clock.h"
 #include "test/gtest.h"
 #include "test/testsupport/file_utils.h"

@ -57,6 +58,7 @@ class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
        frame_size_samples_(
            static_cast<size_t>(frame_size_ms_ * samples_per_ms_)),
        output_size_samples_(10 * samples_per_ms_),
+        clock_(0),
        rtp_generator_mono_(samples_per_ms_),
        rtp_generator_(samples_per_ms_),
        payload_size_bytes_(0),
@ -67,8 +69,8 @@ class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
    config.sample_rate_hz = sample_rate_hz_;
    rtc::scoped_refptr<AudioDecoderFactory> factory =
        CreateBuiltinAudioDecoderFactory();
-    neteq_mono_ = NetEq::Create(config, factory);
-    neteq_ = NetEq::Create(config, factory);
+    neteq_mono_ = NetEq::Create(config, &clock_, factory);
+    neteq_ = NetEq::Create(config, &clock_, factory);
    input_ = new int16_t[frame_size_samples_];
    encoded_ = new uint8_t[2 * frame_size_samples_];
    input_multi_channel_ = new int16_t[frame_size_samples_ * num_channels_];
@ -196,6 +198,7 @@ class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
      ASSERT_NO_FATAL_FAILURE(VerifyOutput(output_size_samples_));

      time_now += kTimeStepMs;
+      clock_.AdvanceTimeMilliseconds(kTimeStepMs);
    }
  }

@ -205,6 +208,7 @@ class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
  const int frame_size_ms_;
  const size_t frame_size_samples_;
  const size_t output_size_samples_;
+  SimulatedClock clock_;
  NetEq* neteq_mono_;
  NetEq* neteq_;
  test::RtpGenerator rtp_generator_mono_;
--- a/modules/audio_coding/neteq/neteq_unittest.cc
+++ b/modules/audio_coding/neteq/neteq_unittest.cc
@ -36,6 +36,7 @@
 #include "rtc_base/string_encode.h"
 #include "rtc_base/strings/string_builder.h"
 #include "rtc_base/system/arch.h"
+#include "system_wrappers/include/clock.h"
 #include "test/field_trial.h"
 #include "test/gtest.h"
 #include "test/testsupport/file_utils.h"
@ -288,11 +289,11 @@ class NetEqDecodingTest : public ::testing::Test {

  void DuplicateCng();

+  SimulatedClock clock_;
  NetEq* neteq_;
  NetEq::Config config_;
  std::unique_ptr<test::RtpFileSource> rtp_source_;
  std::unique_ptr<test::Packet> packet_;
-  unsigned int sim_clock_;
  AudioFrame out_frame_;
  int output_sample_rate_;
  int algorithmic_delay_ms_;
@ -306,16 +307,16 @@ const size_t NetEqDecodingTest::kBlockSize32kHz;
 const int NetEqDecodingTest::kInitSampleRateHz;

 NetEqDecodingTest::NetEqDecodingTest()
-    : neteq_(NULL),
+    : clock_(0),
+      neteq_(NULL),
      config_(),
-      sim_clock_(0),
      output_sample_rate_(kInitSampleRateHz),
      algorithmic_delay_ms_(0) {
  config_.sample_rate_hz = kInitSampleRateHz;
 }

 void NetEqDecodingTest::SetUp() {
-  neteq_ = NetEq::Create(config_, CreateBuiltinAudioDecoderFactory());
+  neteq_ = NetEq::Create(config_, &clock_, CreateBuiltinAudioDecoderFactory());
  NetEqNetworkStatistics stat;
  ASSERT_EQ(0, neteq_->NetworkStatistics(&stat));
  algorithmic_delay_ms_ = stat.current_buffer_size_ms;
@ -333,7 +334,7 @@ void NetEqDecodingTest::OpenInputFile(const std::string& rtp_file) {

 void NetEqDecodingTest::Process() {
  // Check if time to receive.
-  while (packet_ && sim_clock_ >= packet_->time_ms()) {
+  while (packet_ && clock_.TimeInMilliseconds() >= packet_->time_ms()) {
    if (packet_->payload_length_bytes() > 0) {
 #ifndef WEBRTC_CODEC_ISAC
      // Ignore payload type 104 (iSAC-swb) if ISAC is not supported.
@ -363,7 +364,7 @@ void NetEqDecodingTest::Process() {
  EXPECT_EQ(output_sample_rate_, neteq_->last_output_sample_rate_hz());

  // Increase time.
-  sim_clock_ += kTimeStepMs;
+  clock_.AdvanceTimeMilliseconds(kTimeStepMs);
 }

 void NetEqDecodingTest::DecodeAndCompare(
@ -394,7 +395,7 @@ void NetEqDecodingTest::DecodeAndCompare(
        output.AddResult(out_frame_.data(), out_frame_.samples_per_channel_));

    // Query the network statistics API once per second
-    if (sim_clock_ % 1000 == 0) {
+    if (clock_.TimeInMilliseconds() % 1000 == 0) {
      // Process NetworkStatistics.
      NetEqNetworkStatistics current_network_stats;
      ASSERT_EQ(0, neteq_->NetworkStatistics(&current_network_stats));
@ -1435,7 +1436,8 @@ class NetEqDecodingTestTwoInstances : public NetEqDecodingTest {
  }

  void CreateSecondInstance() {
-    neteq2_.reset(NetEq::Create(config2_, CreateBuiltinAudioDecoderFactory()));
+    neteq2_.reset(
+        NetEq::Create(config2_, &clock_, CreateBuiltinAudioDecoderFactory()));
    ASSERT_TRUE(neteq2_);
    LoadDecoders(neteq2_.get());
  }
--- a/modules/audio_coding/neteq/packet.cc
+++ b/modules/audio_coding/neteq/packet.cc
@ -28,6 +28,7 @@ Packet Packet::Clone() const {
  clone.payload_type = payload_type;
  clone.payload.SetData(payload.data(), payload.size());
  clone.priority = priority;
+  clone.packet_info = packet_info;

  return clone;
 }
--- a/modules/audio_coding/neteq/packet.h
+++ b/modules/audio_coding/neteq/packet.h
@ -16,6 +16,7 @@
 #include <memory>

 #include "api/audio_codecs/audio_decoder.h"
+#include "api/rtp_packet_info.h"
 #include "modules/audio_coding/neteq/tick_timer.h"
 #include "rtc_base/buffer.h"
 #include "rtc_base/checks.h"
@ -72,6 +73,7 @@ struct Packet {
  // Datagram excluding RTP header and header extension.
  rtc::Buffer payload;
  Priority priority;
+  RtpPacketInfo packet_info;
  std::unique_ptr<TickTimer::Stopwatch> waiting_time;
  std::unique_ptr<AudioDecoder::EncodedAudioFrame> frame;

--- a/modules/audio_coding/neteq/red_payload_splitter.cc
+++ b/modules/audio_coding/neteq/red_payload_splitter.cc
@ -117,6 +117,12 @@ bool RedPayloadSplitter::SplitRed(PacketList* packet_list) {
        new_packet.priority.red_level =
            rtc::dchecked_cast<int>((new_headers.size() - 1) - i);
        new_packet.payload.SetData(payload_ptr, payload_length);
+        new_packet.packet_info = RtpPacketInfo(
+            /*ssrc=*/red_packet.packet_info.ssrc(),
+            /*csrcs=*/std::vector<uint32_t>(),
+            /*rtp_timestamp=*/new_packet.timestamp,
+            /*audio_level=*/absl::nullopt,
+            /*receive_time_ms=*/red_packet.packet_info.receive_time_ms());
        new_packets.push_front(std::move(new_packet));
        payload_ptr += payload_length;
      }
--- a/modules/audio_coding/neteq/tools/neteq_performance_test.cc
+++ b/modules/audio_coding/neteq/tools/neteq_performance_test.cc
@ -39,7 +39,9 @@ int64_t NetEqPerformanceTest::Run(int runtime_ms,
  // Initialize NetEq instance.
  NetEq::Config config;
  config.sample_rate_hz = kSampRateHz;
-  NetEq* neteq = NetEq::Create(config, CreateBuiltinAudioDecoderFactory());
+  webrtc::Clock* clock = webrtc::Clock::GetRealTimeClock();
+  NetEq* neteq =
+      NetEq::Create(config, clock, CreateBuiltinAudioDecoderFactory());
  // Register decoder in |neteq|.
  if (!neteq->RegisterPayloadType(kPayloadType,
                                  SdpAudioFormat("l16", kSampRateHz, 1)))
@ -72,7 +74,6 @@ int64_t NetEqPerformanceTest::Run(int runtime_ms,
  RTC_CHECK_EQ(sizeof(input_payload), payload_len);

  // Main loop.
-  webrtc::Clock* clock = webrtc::Clock::GetRealTimeClock();
  int64_t start_time_ms = clock->TimeInMilliseconds();
  AudioFrame out_frame;
  while (time_now_ms < runtime_ms) {
--- a/modules/audio_coding/neteq/tools/neteq_quality_test.cc
+++ b/modules/audio_coding/neteq/tools/neteq_quality_test.cc
@ -16,6 +16,7 @@
 #include "modules/audio_coding/neteq/tools/output_wav_file.h"
 #include "modules/audio_coding/neteq/tools/resample_input_audio_file.h"
 #include "rtc_base/checks.h"
+#include "system_wrappers/include/clock.h"
 #include "test/testsupport/file_utils.h"

 namespace webrtc {
@ -213,7 +214,8 @@ NetEqQualityTest::NetEqQualityTest(

  NetEq::Config config;
  config.sample_rate_hz = out_sampling_khz_ * 1000;
-  neteq_.reset(NetEq::Create(config, decoder_factory));
+  neteq_.reset(
+      NetEq::Create(config, Clock::GetRealTimeClock(), decoder_factory));
  max_payload_bytes_ = in_size_samples_ * channels_ * sizeof(int16_t);
  in_data_.reset(new int16_t[in_size_samples_ * channels_]);
 }
--- a/modules/audio_coding/neteq/tools/neteq_quality_test.h
+++ b/modules/audio_coding/neteq/tools/neteq_quality_test.h
@ -20,6 +20,7 @@
 #include "modules/audio_coding/neteq/tools/input_audio_file.h"
 #include "modules/audio_coding/neteq/tools/rtp_generator.h"
 #include "rtc_base/flags.h"
+#include "system_wrappers/include/clock.h"
 #include "test/gtest.h"

 namespace webrtc {
--- a/modules/audio_coding/neteq/tools/neteq_test.cc
+++ b/modules/audio_coding/neteq/tools/neteq_test.cc
@ -14,6 +14,7 @@
 #include <iostream>

 #include "modules/rtp_rtcp/source/byte_io.h"
+#include "system_wrappers/include/clock.h"

 namespace webrtc {
 namespace test {
@ -57,7 +58,8 @@ NetEqTest::NetEqTest(const NetEq::Config& config,
                     std::unique_ptr<NetEqInput> input,
                     std::unique_ptr<AudioSink> output,
                     Callbacks callbacks)
-    : neteq_(NetEq::Create(config, decoder_factory)),
+    : clock_(0),
+      neteq_(NetEq::Create(config, &clock_, decoder_factory)),
      input_(std::move(input)),
      output_(std::move(output)),
      callbacks_(callbacks),
@ -92,6 +94,7 @@ NetEqTest::SimulationStepResult NetEqTest::RunToNextGetAudio() {
  while (!input_->ended()) {
    // Advance time to next event.
    RTC_DCHECK(input_->NextEventTime());
+    clock_.AdvanceTimeMilliseconds(*input_->NextEventTime() - time_now_ms);
    time_now_ms = *input_->NextEventTime();
    // Check if it is time to insert packet.
    if (input_->NextPacketTime() && time_now_ms >= *input_->NextPacketTime()) {
--- a/modules/audio_coding/neteq/tools/neteq_test.h
+++ b/modules/audio_coding/neteq/tools/neteq_test.h
@ -23,6 +23,7 @@
 #include "modules/audio_coding/neteq/include/neteq.h"
 #include "modules/audio_coding/neteq/tools/audio_sink.h"
 #include "modules/audio_coding/neteq/tools/neteq_input.h"
+#include "system_wrappers/include/clock.h"

 namespace webrtc {
 namespace test {
@ -106,6 +107,7 @@ class NetEqTest : public NetEqSimulator {

 private:
  void RegisterDecoders(const DecoderMap& codecs);
+  SimulatedClock clock_;
  absl::optional<Action> next_action_;
  absl::optional<int> last_packet_time_ms_;
  std::unique_ptr<NetEq> neteq_;
--- a/modules/audio_mixer/frame_combiner.cc
+++ b/modules/audio_mixer/frame_combiner.cc
@ -57,6 +57,7 @@ void SetAudioFrameFields(const std::vector<AudioFrame*>& mix_list,
    audio_frame_for_mixing->timestamp_ = mix_list[0]->timestamp_;
    audio_frame_for_mixing->elapsed_time_ms_ = mix_list[0]->elapsed_time_ms_;
    audio_frame_for_mixing->ntp_time_ms_ = mix_list[0]->ntp_time_ms_;
+    audio_frame_for_mixing->packet_infos_ = mix_list[0]->packet_infos_;
  }
 }