diff --git a/api/audio/BUILD.gn b/api/audio/BUILD.gn index b5a6111e30..46396d6730 100644 --- a/api/audio/BUILD.gn +++ b/api/audio/BUILD.gn @@ -16,6 +16,7 @@ rtc_source_set("audio_frame_api") { ] deps = [ + "..:rtp_packet_info", "../../rtc_base:checks", "../../rtc_base:rtc_base_approved", ] diff --git a/api/audio/audio_frame.cc b/api/audio/audio_frame.cc index 1e706b96fd..4c07aafb6e 100644 --- a/api/audio/audio_frame.cc +++ b/api/audio/audio_frame.cc @@ -39,6 +39,7 @@ void AudioFrame::ResetWithoutMuting() { speech_type_ = kUndefined; vad_activity_ = kVadUnknown; profile_timestamp_ms_ = 0; + packet_infos_ = RtpPacketInfos(); } void AudioFrame::UpdateFrame(uint32_t timestamp, @@ -72,6 +73,7 @@ void AudioFrame::CopyFrom(const AudioFrame& src) { timestamp_ = src.timestamp_; elapsed_time_ms_ = src.elapsed_time_ms_; ntp_time_ms_ = src.ntp_time_ms_; + packet_infos_ = src.packet_infos_; muted_ = src.muted(); samples_per_channel_ = src.samples_per_channel_; sample_rate_hz_ = src.sample_rate_hz_; diff --git a/api/audio/audio_frame.h b/api/audio/audio_frame.h index 8f1dc62a17..70eb701d6b 100644 --- a/api/audio/audio_frame.h +++ b/api/audio/audio_frame.h @@ -14,6 +14,7 @@ #include #include +#include "api/rtp_packet_infos.h" #include "rtc_base/constructor_magic.h" namespace webrtc { @@ -115,6 +116,22 @@ class AudioFrame { // class/struct needs an explicit out-of-line destructor" build error. int64_t profile_timestamp_ms_ = 0; + // Information about packets used to assemble this audio frame. This is needed + // by |SourceTracker| when the frame is delivered to the RTCRtpReceiver's + // MediaStreamTrack, in order to implement getContributingSources(). See: + // https://w3c.github.io/webrtc-pc/#dom-rtcrtpreceiver-getcontributingsources + // + // TODO(bugs.webrtc.org/10757): + // Note that this information might not be fully accurate since we currently + // don't have a proper way to track it across the audio sync buffer. The + // sync buffer is the small sample-holding buffer located after the audio + // decoder and before where samples are assembled into output frames. + // + // |RtpPacketInfos| may also be empty if the audio samples did not come from + // RTP packets. E.g. if the audio were locally generated by packet loss + // concealment, comfort noise generation, etc. + RtpPacketInfos packet_infos_; + private: // A permamently zeroed out buffer to represent muted frames. This is a // header-only class, so the only way to avoid creating a separate empty diff --git a/audio/remix_resample.cc b/audio/remix_resample.cc index e77c386742..3694d34e40 100644 --- a/audio/remix_resample.cc +++ b/audio/remix_resample.cc @@ -27,6 +27,7 @@ void RemixAndResample(const AudioFrame& src_frame, dst_frame->timestamp_ = src_frame.timestamp_; dst_frame->elapsed_time_ms_ = src_frame.elapsed_time_ms_; dst_frame->ntp_time_ms_ = src_frame.ntp_time_ms_; + dst_frame->packet_infos_ = src_frame.packet_infos_; } void RemixAndResample(const int16_t* src_data, diff --git a/modules/audio_coding/BUILD.gn b/modules/audio_coding/BUILD.gn index 8aad2030fd..9afc0e4eed 100644 --- a/modules/audio_coding/BUILD.gn +++ b/modules/audio_coding/BUILD.gn @@ -1017,6 +1017,7 @@ rtc_static_library("neteq") { "..:module_api_public", "../../api:array_view", "../../api:rtp_headers", + "../../api:rtp_packet_info", "../../api:scoped_refptr", "../../api/audio:audio_frame_api", "../../api/audio_codecs:audio_codecs_api", @@ -1029,6 +1030,7 @@ rtc_static_library("neteq") { "../../rtc_base:safe_minmax", "../../rtc_base:sanitizer", "../../rtc_base/system:fallthrough", + "../../system_wrappers", "../../system_wrappers:field_trial", "../../system_wrappers:metrics", "//third_party/abseil-cpp/absl/memory", @@ -1066,6 +1068,7 @@ rtc_source_set("neteq_tools_minimal") { "../../api/audio_codecs:audio_codecs_api", "../../rtc_base:checks", "../../rtc_base:rtc_base_approved", + "../../system_wrappers", "../rtp_rtcp", "../rtp_rtcp:rtp_rtcp_format", "//third_party/abseil-cpp/absl/types:optional", @@ -1591,6 +1594,7 @@ if (rtc_include_tests) { "../../api/audio_codecs:builtin_audio_decoder_factory", "../../rtc_base:checks", "../../rtc_base:rtc_base_approved", + "../../system_wrappers", "../../test:fileutils", "../../test:test_support", "//testing/gtest", diff --git a/modules/audio_coding/acm2/acm_receiver.cc b/modules/audio_coding/acm2/acm_receiver.cc index 3bce0c4063..5ac71dd0b4 100644 --- a/modules/audio_coding/acm2/acm_receiver.cc +++ b/modules/audio_coding/acm2/acm_receiver.cc @@ -34,7 +34,9 @@ namespace acm2 { AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config) : last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]), - neteq_(NetEq::Create(config.neteq_config, config.decoder_factory)), + neteq_(NetEq::Create(config.neteq_config, + config.clock, + config.decoder_factory)), clock_(config.clock), resampled_last_output_frame_(true) { RTC_DCHECK(clock_); diff --git a/modules/audio_coding/neteq/include/neteq.h b/modules/audio_coding/neteq/include/neteq.h index d91850fd77..ef144e69f0 100644 --- a/modules/audio_coding/neteq/include/neteq.h +++ b/modules/audio_coding/neteq/include/neteq.h @@ -31,6 +31,7 @@ namespace webrtc { // Forward declarations. class AudioFrame; class AudioDecoderFactory; +class Clock; struct NetEqNetworkStatistics { uint16_t current_buffer_size_ms; // Current jitter buffer size in ms. @@ -149,6 +150,7 @@ class NetEq { // method. static NetEq* Create( const NetEq::Config& config, + Clock* clock, const rtc::scoped_refptr& decoder_factory); virtual ~NetEq() {} diff --git a/modules/audio_coding/neteq/neteq.cc b/modules/audio_coding/neteq/neteq.cc index a84c94280a..0a36cb2215 100644 --- a/modules/audio_coding/neteq/neteq.cc +++ b/modules/audio_coding/neteq/neteq.cc @@ -39,9 +39,10 @@ std::string NetEq::Config::ToString() const { // Return the new object. NetEq* NetEq::Create( const NetEq::Config& config, + Clock* clock, const rtc::scoped_refptr& decoder_factory) { return new NetEqImpl(config, - NetEqImpl::Dependencies(config, decoder_factory)); + NetEqImpl::Dependencies(config, clock, decoder_factory)); } } // namespace webrtc diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc index 6f36fb132d..62184b0266 100644 --- a/modules/audio_coding/neteq/neteq_impl.cc +++ b/modules/audio_coding/neteq/neteq_impl.cc @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -52,13 +53,16 @@ #include "rtc_base/sanitizer.h" #include "rtc_base/strings/audio_format_to_string.h" #include "rtc_base/trace_event.h" +#include "system_wrappers/include/clock.h" namespace webrtc { NetEqImpl::Dependencies::Dependencies( const NetEq::Config& config, + Clock* clock, const rtc::scoped_refptr& decoder_factory) - : tick_timer(new TickTimer), + : clock(clock), + tick_timer(new TickTimer), stats(new StatisticsCalculator), buffer_level_filter(new BufferLevelFilter), decoder_database( @@ -86,7 +90,8 @@ NetEqImpl::Dependencies::~Dependencies() = default; NetEqImpl::NetEqImpl(const NetEq::Config& config, Dependencies&& deps, bool create_components) - : tick_timer_(std::move(deps.tick_timer)), + : clock_(deps.clock), + tick_timer_(std::move(deps.tick_timer)), buffer_level_filter_(std::move(deps.buffer_level_filter)), decoder_database_(std::move(deps.decoder_database)), delay_manager_(std::move(deps.delay_manager)), @@ -468,17 +473,20 @@ int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header, RTC_LOG_F(LS_ERROR) << "payload is empty"; return kInvalidPointer; } + + int64_t receive_time_ms = clock_->TimeInMilliseconds(); stats_->ReceivedPacket(); PacketList packet_list; // Insert packet in a packet list. - packet_list.push_back([&rtp_header, &payload] { + packet_list.push_back([&rtp_header, &payload, &receive_time_ms] { // Convert to Packet. Packet packet; packet.payload_type = rtp_header.payloadType; packet.sequence_number = rtp_header.sequenceNumber; packet.timestamp = rtp_header.timestamp; packet.payload.SetData(payload.data(), payload.size()); + packet.packet_info = RtpPacketInfo(rtp_header, receive_time_ms); // Waiting time will be set upon inserting the packet in the buffer. RTC_DCHECK(!packet.waiting_time); return packet; @@ -611,6 +619,7 @@ int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header, const auto sequence_number = packet.sequence_number; const auto payload_type = packet.payload_type; const Packet::Priority original_priority = packet.priority; + const auto& packet_info = packet.packet_info; auto packet_from_result = [&](AudioDecoder::ParseResult& result) { Packet new_packet; new_packet.sequence_number = sequence_number; @@ -618,6 +627,7 @@ int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header, new_packet.timestamp = result.timestamp; new_packet.priority.codec_level = result.priority; new_packet.priority.red_level = original_priority.red_level; + new_packet.packet_info = packet_info; new_packet.frame = std::move(result.frame); return new_packet; }; @@ -879,7 +889,16 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, comfort_noise_->Reset(); } - // Copy from |algorithm_buffer| to |sync_buffer_|. + // We treat it as if all packets referenced to by |last_decoded_packet_infos_| + // were mashed together when creating the samples in |algorithm_buffer_|. + RtpPacketInfos packet_infos(std::move(last_decoded_packet_infos_)); + last_decoded_packet_infos_.clear(); + + // Copy samples from |algorithm_buffer_| to |sync_buffer_|. + // + // TODO(bugs.webrtc.org/10757): + // We would in the future also like to pass |packet_infos| so that we can do + // sample-perfect tracking of that information across |sync_buffer_|. sync_buffer_->PushBack(*algorithm_buffer_); // Extract data from |sync_buffer_| to |output|. @@ -897,6 +916,13 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, sync_buffer_->GetNextAudioInterleaved(num_output_samples_per_channel, audio_frame); audio_frame->sample_rate_hz_ = fs_hz_; + // TODO(bugs.webrtc.org/10757): + // We don't have the ability to properly track individual packets once their + // audio samples have entered |sync_buffer_|. So for now, treat it as if + // |packet_infos| from packets decoded by the current |GetAudioInternal()| + // call were all consumed assembling the current audio frame and the current + // audio frame only. + audio_frame->packet_infos_ = std::move(packet_infos); if (sync_buffer_->FutureLength() < expand_->overlap_length()) { // The sync buffer should always contain |overlap_length| samples, but now // too many samples have been extracted. Reinstall the |overlap_length| @@ -1392,6 +1418,7 @@ int NetEqImpl::DecodeLoop(PacketList* packet_list, int* decoded_length, AudioDecoder::SpeechType* speech_type) { RTC_DCHECK(last_decoded_timestamps_.empty()); + RTC_DCHECK(last_decoded_packet_infos_.empty()); // Do decoding. while (!packet_list->empty() && !decoder_database_->IsComfortNoise( @@ -1409,6 +1436,8 @@ int NetEqImpl::DecodeLoop(PacketList* packet_list, rtc::ArrayView(&decoded_buffer_[*decoded_length], decoded_buffer_length_ - *decoded_length)); last_decoded_timestamps_.push_back(packet_list->front().timestamp); + last_decoded_packet_infos_.push_back( + std::move(packet_list->front().packet_info)); packet_list->pop_front(); if (opt_result) { const auto& result = *opt_result; @@ -1424,6 +1453,7 @@ int NetEqImpl::DecodeLoop(PacketList* packet_list, // TODO(ossu): What to put here? RTC_LOG(LS_WARNING) << "Decode error"; *decoded_length = -1; + last_decoded_packet_infos_.clear(); packet_list->clear(); break; } diff --git a/modules/audio_coding/neteq/neteq_impl.h b/modules/audio_coding/neteq/neteq_impl.h index 34a5c71258..d529c9e9d7 100644 --- a/modules/audio_coding/neteq/neteq_impl.h +++ b/modules/audio_coding/neteq/neteq_impl.h @@ -11,11 +11,15 @@ #ifndef MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ #define MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ +#include #include #include +#include +#include #include "absl/types/optional.h" #include "api/audio/audio_frame.h" +#include "api/rtp_packet_info.h" #include "modules/audio_coding/neteq/audio_multi_vector.h" #include "modules/audio_coding/neteq/defines.h" // Modes, Operations #include "modules/audio_coding/neteq/expand_uma_logger.h" @@ -34,6 +38,7 @@ namespace webrtc { class Accelerate; class BackgroundNoise; class BufferLevelFilter; +class Clock; class ComfortNoise; class DecisionLogic; class DecoderDatabase; @@ -93,11 +98,13 @@ class NetEqImpl : public webrtc::NetEq { // before sending the struct to the NetEqImpl constructor. However, there // are dependencies between some of the classes inside the struct, so // swapping out one may make it necessary to re-create another one. - explicit Dependencies( + Dependencies( const NetEq::Config& config, + Clock* clock, const rtc::scoped_refptr& decoder_factory); ~Dependencies(); + Clock* const clock; std::unique_ptr tick_timer; std::unique_ptr stats; std::unique_ptr buffer_level_filter; @@ -338,6 +345,8 @@ class NetEqImpl : public webrtc::NetEq { // Creates DecisionLogic object with the mode given by |playout_mode_|. virtual void CreateDecisionLogic() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); + Clock* const clock_; + rtc::CriticalSection crit_sect_; const std::unique_ptr tick_timer_ RTC_GUARDED_BY(crit_sect_); const std::unique_ptr buffer_level_filter_ @@ -403,6 +412,8 @@ class NetEqImpl : public webrtc::NetEq { std::unique_ptr generated_noise_stopwatch_ RTC_GUARDED_BY(crit_sect_); std::vector last_decoded_timestamps_ RTC_GUARDED_BY(crit_sect_); + std::vector last_decoded_packet_infos_ + RTC_GUARDED_BY(crit_sect_); ExpandUmaLogger expand_uma_logger_ RTC_GUARDED_BY(crit_sect_); ExpandUmaLogger speech_expand_uma_logger_ RTC_GUARDED_BY(crit_sect_); bool no_time_stretching_ RTC_GUARDED_BY(crit_sect_); // Only used for test. diff --git a/modules/audio_coding/neteq/neteq_impl_unittest.cc b/modules/audio_coding/neteq/neteq_impl_unittest.cc index 2b0d5832ae..0c7c09059c 100644 --- a/modules/audio_coding/neteq/neteq_impl_unittest.cc +++ b/modules/audio_coding/neteq/neteq_impl_unittest.cc @@ -9,6 +9,8 @@ */ #include +#include +#include #include "absl/memory/memory.h" #include "api/audio_codecs/builtin_audio_decoder_factory.h" @@ -30,6 +32,7 @@ #include "modules/audio_coding/neteq/sync_buffer.h" #include "modules/audio_coding/neteq/timestamp_scaler.h" #include "rtc_base/numerics/safe_conversions.h" +#include "system_wrappers/include/clock.h" #include "test/audio_decoder_proxy_factory.h" #include "test/function_audio_decoder_factory.h" #include "test/gmock.h" @@ -40,14 +43,17 @@ using ::testing::_; using ::testing::AtLeast; using ::testing::DoAll; +using ::testing::ElementsAre; using ::testing::InSequence; using ::testing::Invoke; +using ::testing::IsEmpty; using ::testing::IsNull; using ::testing::Pointee; using ::testing::Return; using ::testing::ReturnNull; using ::testing::SetArgPointee; using ::testing::SetArrayArgument; +using ::testing::SizeIs; using ::testing::WithArg; namespace webrtc { @@ -62,12 +68,12 @@ int DeletePacketsAndReturnOk(PacketList* packet_list) { class NetEqImplTest : public ::testing::Test { protected: - NetEqImplTest() { config_.sample_rate_hz = 8000; } + NetEqImplTest() : clock_(0) { config_.sample_rate_hz = 8000; } void CreateInstance( const rtc::scoped_refptr& decoder_factory) { ASSERT_TRUE(decoder_factory); - NetEqImpl::Dependencies deps(config_, decoder_factory); + NetEqImpl::Dependencies deps(config_, &clock_, decoder_factory); // Get a local pointer to NetEq's TickTimer object. tick_timer_ = deps.tick_timer.get(); @@ -217,6 +223,10 @@ class NetEqImplTest : public ::testing::Test { EXPECT_EQ(1u, output.num_channels_); EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + // DTMF packets are immediately consumed by |InsertPacket()| and won't be + // returned by |GetAudio()|. + EXPECT_THAT(output.packet_infos_, IsEmpty()); + // Verify first 64 samples of actual output. const std::vector kOutput({ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1578, -2816, -3460, -3403, -2709, -1594, @@ -231,6 +241,7 @@ class NetEqImplTest : public ::testing::Test { std::unique_ptr neteq_; NetEq::Config config_; + SimulatedClock clock_; TickTimer* tick_timer_ = nullptr; MockBufferLevelFilter* mock_buffer_level_filter_ = nullptr; BufferLevelFilter* buffer_level_filter_ = nullptr; @@ -263,7 +274,9 @@ class NetEqImplTest : public ::testing::Test { // TODO(hlundin): Move to separate file? TEST(NetEq, CreateAndDestroy) { NetEq::Config config; - NetEq* neteq = NetEq::Create(config, CreateBuiltinAudioDecoderFactory()); + SimulatedClock clock(0); + NetEq* neteq = + NetEq::Create(config, &clock, CreateBuiltinAudioDecoderFactory()); delete neteq; } @@ -458,6 +471,10 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) { rtp_header.sequenceNumber = 0x1234; rtp_header.timestamp = 0x12345678; rtp_header.ssrc = 0x87654321; + rtp_header.numCSRCs = 3; + rtp_header.arrOfCSRCs[0] = 43; + rtp_header.arrOfCSRCs[1] = 65; + rtp_header.arrOfCSRCs[2] = 17; // This is a dummy decoder that produces as many output samples as the input // has bytes. The output is an increasing series, starting at 1 for the first @@ -501,6 +518,8 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) { SdpAudioFormat("L16", 8000, 1))); // Insert one packet. + clock_.AdvanceTimeMilliseconds(123456); + int64_t expected_receive_time_ms = clock_.TimeInMilliseconds(); EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload, kReceiveTime)); @@ -514,6 +533,17 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) { EXPECT_EQ(1u, output.num_channels_); EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + // Verify |output.packet_infos_|. + ASSERT_THAT(output.packet_infos_, SizeIs(1)); + { + const auto& packet_info = output.packet_infos_[0]; + EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc); + EXPECT_THAT(packet_info.csrcs(), ElementsAre(43, 65, 17)); + EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp); + EXPECT_FALSE(packet_info.audio_level().has_value()); + EXPECT_EQ(packet_info.receive_time_ms(), expected_receive_time_ms); + } + // Start with a simple check that the fake decoder is behaving as expected. EXPECT_EQ(kPayloadLengthSamples, static_cast(decoder_.next_value() - 1)); @@ -561,6 +591,8 @@ TEST_F(NetEqImplTest, ReorderedPacket) { rtp_header.sequenceNumber = 0x1234; rtp_header.timestamp = 0x12345678; rtp_header.ssrc = 0x87654321; + rtp_header.extension.hasAudioLevel = true; + rtp_header.extension.audioLevel = 42; EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); EXPECT_CALL(mock_decoder, SampleRateHz()) @@ -583,6 +615,8 @@ TEST_F(NetEqImplTest, ReorderedPacket) { SdpAudioFormat("L16", 8000, 1))); // Insert one packet. + clock_.AdvanceTimeMilliseconds(123456); + int64_t expected_receive_time_ms = clock_.TimeInMilliseconds(); EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload, kReceiveTime)); @@ -595,16 +629,32 @@ TEST_F(NetEqImplTest, ReorderedPacket) { EXPECT_EQ(1u, output.num_channels_); EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + // Verify |output.packet_infos_|. + ASSERT_THAT(output.packet_infos_, SizeIs(1)); + { + const auto& packet_info = output.packet_infos_[0]; + EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc); + EXPECT_THAT(packet_info.csrcs(), IsEmpty()); + EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp); + EXPECT_EQ(packet_info.audio_level(), rtp_header.extension.audioLevel); + EXPECT_EQ(packet_info.receive_time_ms(), expected_receive_time_ms); + } + // Insert two more packets. The first one is out of order, and is already too // old, the second one is the expected next packet. rtp_header.sequenceNumber -= 1; rtp_header.timestamp -= kPayloadLengthSamples; + rtp_header.extension.audioLevel = 1; payload[0] = 1; + clock_.AdvanceTimeMilliseconds(1000); EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload, kReceiveTime)); rtp_header.sequenceNumber += 2; rtp_header.timestamp += 2 * kPayloadLengthSamples; + rtp_header.extension.audioLevel = 2; payload[0] = 2; + clock_.AdvanceTimeMilliseconds(2000); + expected_receive_time_ms = clock_.TimeInMilliseconds(); EXPECT_EQ(NetEq::kOK, neteq_->InsertPacket(rtp_header, payload, kReceiveTime)); @@ -627,6 +677,17 @@ TEST_F(NetEqImplTest, ReorderedPacket) { // out-of-order packet should have been discarded. EXPECT_TRUE(packet_buffer_->Empty()); + // Verify |output.packet_infos_|. Expect to only see the second packet. + ASSERT_THAT(output.packet_infos_, SizeIs(1)); + { + const auto& packet_info = output.packet_infos_[0]; + EXPECT_EQ(packet_info.ssrc(), rtp_header.ssrc); + EXPECT_THAT(packet_info.csrcs(), IsEmpty()); + EXPECT_EQ(packet_info.rtp_timestamp(), rtp_header.timestamp); + EXPECT_EQ(packet_info.audio_level(), rtp_header.extension.audioLevel); + EXPECT_EQ(packet_info.receive_time_ms(), expected_receive_time_ms); + } + EXPECT_CALL(mock_decoder, Die()); } @@ -663,6 +724,7 @@ TEST_F(NetEqImplTest, FirstPacketUnknown) { EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); EXPECT_EQ(1u, output.num_channels_); EXPECT_EQ(AudioFrame::kPLC, output.speech_type_); + EXPECT_THAT(output.packet_infos_, IsEmpty()); // Register the payload type. EXPECT_TRUE(neteq_->RegisterPayloadType(kPayloadType, @@ -685,6 +747,7 @@ TEST_F(NetEqImplTest, FirstPacketUnknown) { EXPECT_EQ(1u, output.num_channels_); EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_) << "NetEq did not decode the packets as expected."; + EXPECT_THAT(output.packet_infos_, SizeIs(1)); } } @@ -722,6 +785,7 @@ TEST_F(NetEqImplTest, NoAudioInterruptionLoggedBeforeFirstDecode) { EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); EXPECT_EQ(1u, output.num_channels_); EXPECT_NE(AudioFrame::kNormalSpeech, output.speech_type_); + EXPECT_THAT(output.packet_infos_, IsEmpty()); } // Insert 10 packets. @@ -741,6 +805,7 @@ TEST_F(NetEqImplTest, NoAudioInterruptionLoggedBeforeFirstDecode) { EXPECT_EQ(1u, output.num_channels_); EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_) << "NetEq did not decode the packets as expected."; + EXPECT_THAT(output.packet_infos_, SizeIs(1)); } auto lifetime_stats = neteq_->GetLifetimeStatistics(); @@ -975,12 +1040,14 @@ TEST_F(NetEqImplTest, UnsupportedDecoder) { const size_t kExpectedOutputSize = 10 * (kSampleRateHz / 1000) * kChannels; EXPECT_EQ(kExpectedOutputSize, output.samples_per_channel_ * kChannels); EXPECT_EQ(kChannels, output.num_channels_); + EXPECT_THAT(output.packet_infos_, IsEmpty()); // Second call to GetAudio will decode the packet that is ok. No errors are // expected. EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); EXPECT_EQ(kExpectedOutputSize, output.samples_per_channel_ * kChannels); EXPECT_EQ(kChannels, output.num_channels_); + EXPECT_THAT(output.packet_infos_, SizeIs(1)); // Die isn't called through NiceMock (since it's called by the // MockAudioDecoder constructor), so it needs to be mocked explicitly. @@ -1082,6 +1149,7 @@ TEST_F(NetEqImplTest, DecodedPayloadTooShort) { ASSERT_EQ(kMaxOutputSize, output.samples_per_channel_); EXPECT_EQ(1u, output.num_channels_); EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + EXPECT_THAT(output.packet_infos_, SizeIs(1)); EXPECT_CALL(mock_decoder, Die()); } @@ -1178,6 +1246,7 @@ TEST_F(NetEqImplTest, DecodingError) { EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); EXPECT_EQ(1u, output.num_channels_); EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + EXPECT_THAT(output.packet_infos_, SizeIs(2)); // 5 ms packets vs 10 ms output // Pull audio again. Decoder fails. EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&output, &muted)); @@ -1191,12 +1260,14 @@ TEST_F(NetEqImplTest, DecodingError) { EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); EXPECT_EQ(1u, output.num_channels_); EXPECT_EQ(AudioFrame::kPLC, output.speech_type_); + EXPECT_THAT(output.packet_infos_, IsEmpty()); // Pull audio again, should behave normal. EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(&output, &muted)); EXPECT_EQ(kMaxOutputSize, output.samples_per_channel_); EXPECT_EQ(1u, output.num_channels_); EXPECT_EQ(AudioFrame::kNormalSpeech, output.speech_type_); + EXPECT_THAT(output.packet_infos_, SizeIs(2)); // 5 ms packets vs 10 ms output EXPECT_CALL(mock_decoder, Die()); } @@ -1625,4 +1696,4 @@ TEST_F(NetEqImplTest120ms, Accelerate) { EXPECT_EQ(kAccelerate, neteq_->last_operation_for_test()); } -}// namespace webrtc +} // namespace webrtc diff --git a/modules/audio_coding/neteq/neteq_network_stats_unittest.cc b/modules/audio_coding/neteq/neteq_network_stats_unittest.cc index c0900104b8..e05a790517 100644 --- a/modules/audio_coding/neteq/neteq_network_stats_unittest.cc +++ b/modules/audio_coding/neteq/neteq_network_stats_unittest.cc @@ -17,6 +17,7 @@ #include "modules/audio_coding/neteq/include/neteq.h" #include "modules/audio_coding/neteq/tools/rtp_generator.h" #include "rtc_base/ref_counted_object.h" +#include "system_wrappers/include/clock.h" #include "test/audio_decoder_proxy_factory.h" #include "test/gmock.h" @@ -163,7 +164,8 @@ class NetEqNetworkStatsTest { packet_loss_interval_(0xffffffff) { NetEq::Config config; config.sample_rate_hz = format.clockrate_hz; - neteq_ = absl::WrapUnique(NetEq::Create(config, decoder_factory_)); + neteq_ = absl::WrapUnique( + NetEq::Create(config, Clock::GetRealTimeClock(), decoder_factory_)); neteq_->RegisterPayloadType(kPayloadType, format); } diff --git a/modules/audio_coding/neteq/neteq_stereo_unittest.cc b/modules/audio_coding/neteq/neteq_stereo_unittest.cc index d25e8d6824..2d62f8bdd7 100644 --- a/modules/audio_coding/neteq/neteq_stereo_unittest.cc +++ b/modules/audio_coding/neteq/neteq_stereo_unittest.cc @@ -22,6 +22,7 @@ #include "modules/audio_coding/neteq/tools/input_audio_file.h" #include "modules/audio_coding/neteq/tools/rtp_generator.h" #include "rtc_base/strings/string_builder.h" +#include "system_wrappers/include/clock.h" #include "test/gtest.h" #include "test/testsupport/file_utils.h" @@ -57,6 +58,7 @@ class NetEqStereoTest : public ::testing::TestWithParam { frame_size_samples_( static_cast(frame_size_ms_ * samples_per_ms_)), output_size_samples_(10 * samples_per_ms_), + clock_(0), rtp_generator_mono_(samples_per_ms_), rtp_generator_(samples_per_ms_), payload_size_bytes_(0), @@ -67,8 +69,8 @@ class NetEqStereoTest : public ::testing::TestWithParam { config.sample_rate_hz = sample_rate_hz_; rtc::scoped_refptr factory = CreateBuiltinAudioDecoderFactory(); - neteq_mono_ = NetEq::Create(config, factory); - neteq_ = NetEq::Create(config, factory); + neteq_mono_ = NetEq::Create(config, &clock_, factory); + neteq_ = NetEq::Create(config, &clock_, factory); input_ = new int16_t[frame_size_samples_]; encoded_ = new uint8_t[2 * frame_size_samples_]; input_multi_channel_ = new int16_t[frame_size_samples_ * num_channels_]; @@ -196,6 +198,7 @@ class NetEqStereoTest : public ::testing::TestWithParam { ASSERT_NO_FATAL_FAILURE(VerifyOutput(output_size_samples_)); time_now += kTimeStepMs; + clock_.AdvanceTimeMilliseconds(kTimeStepMs); } } @@ -205,6 +208,7 @@ class NetEqStereoTest : public ::testing::TestWithParam { const int frame_size_ms_; const size_t frame_size_samples_; const size_t output_size_samples_; + SimulatedClock clock_; NetEq* neteq_mono_; NetEq* neteq_; test::RtpGenerator rtp_generator_mono_; diff --git a/modules/audio_coding/neteq/neteq_unittest.cc b/modules/audio_coding/neteq/neteq_unittest.cc index 9f7d04d4c1..54291a98e4 100644 --- a/modules/audio_coding/neteq/neteq_unittest.cc +++ b/modules/audio_coding/neteq/neteq_unittest.cc @@ -36,6 +36,7 @@ #include "rtc_base/string_encode.h" #include "rtc_base/strings/string_builder.h" #include "rtc_base/system/arch.h" +#include "system_wrappers/include/clock.h" #include "test/field_trial.h" #include "test/gtest.h" #include "test/testsupport/file_utils.h" @@ -288,11 +289,11 @@ class NetEqDecodingTest : public ::testing::Test { void DuplicateCng(); + SimulatedClock clock_; NetEq* neteq_; NetEq::Config config_; std::unique_ptr rtp_source_; std::unique_ptr packet_; - unsigned int sim_clock_; AudioFrame out_frame_; int output_sample_rate_; int algorithmic_delay_ms_; @@ -306,16 +307,16 @@ const size_t NetEqDecodingTest::kBlockSize32kHz; const int NetEqDecodingTest::kInitSampleRateHz; NetEqDecodingTest::NetEqDecodingTest() - : neteq_(NULL), + : clock_(0), + neteq_(NULL), config_(), - sim_clock_(0), output_sample_rate_(kInitSampleRateHz), algorithmic_delay_ms_(0) { config_.sample_rate_hz = kInitSampleRateHz; } void NetEqDecodingTest::SetUp() { - neteq_ = NetEq::Create(config_, CreateBuiltinAudioDecoderFactory()); + neteq_ = NetEq::Create(config_, &clock_, CreateBuiltinAudioDecoderFactory()); NetEqNetworkStatistics stat; ASSERT_EQ(0, neteq_->NetworkStatistics(&stat)); algorithmic_delay_ms_ = stat.current_buffer_size_ms; @@ -333,7 +334,7 @@ void NetEqDecodingTest::OpenInputFile(const std::string& rtp_file) { void NetEqDecodingTest::Process() { // Check if time to receive. - while (packet_ && sim_clock_ >= packet_->time_ms()) { + while (packet_ && clock_.TimeInMilliseconds() >= packet_->time_ms()) { if (packet_->payload_length_bytes() > 0) { #ifndef WEBRTC_CODEC_ISAC // Ignore payload type 104 (iSAC-swb) if ISAC is not supported. @@ -363,7 +364,7 @@ void NetEqDecodingTest::Process() { EXPECT_EQ(output_sample_rate_, neteq_->last_output_sample_rate_hz()); // Increase time. - sim_clock_ += kTimeStepMs; + clock_.AdvanceTimeMilliseconds(kTimeStepMs); } void NetEqDecodingTest::DecodeAndCompare( @@ -394,7 +395,7 @@ void NetEqDecodingTest::DecodeAndCompare( output.AddResult(out_frame_.data(), out_frame_.samples_per_channel_)); // Query the network statistics API once per second - if (sim_clock_ % 1000 == 0) { + if (clock_.TimeInMilliseconds() % 1000 == 0) { // Process NetworkStatistics. NetEqNetworkStatistics current_network_stats; ASSERT_EQ(0, neteq_->NetworkStatistics(¤t_network_stats)); @@ -1435,7 +1436,8 @@ class NetEqDecodingTestTwoInstances : public NetEqDecodingTest { } void CreateSecondInstance() { - neteq2_.reset(NetEq::Create(config2_, CreateBuiltinAudioDecoderFactory())); + neteq2_.reset( + NetEq::Create(config2_, &clock_, CreateBuiltinAudioDecoderFactory())); ASSERT_TRUE(neteq2_); LoadDecoders(neteq2_.get()); } diff --git a/modules/audio_coding/neteq/packet.cc b/modules/audio_coding/neteq/packet.cc index 3cec310902..333f161229 100644 --- a/modules/audio_coding/neteq/packet.cc +++ b/modules/audio_coding/neteq/packet.cc @@ -28,6 +28,7 @@ Packet Packet::Clone() const { clone.payload_type = payload_type; clone.payload.SetData(payload.data(), payload.size()); clone.priority = priority; + clone.packet_info = packet_info; return clone; } diff --git a/modules/audio_coding/neteq/packet.h b/modules/audio_coding/neteq/packet.h index 358d8fa1ab..4f50e4d9de 100644 --- a/modules/audio_coding/neteq/packet.h +++ b/modules/audio_coding/neteq/packet.h @@ -16,6 +16,7 @@ #include #include "api/audio_codecs/audio_decoder.h" +#include "api/rtp_packet_info.h" #include "modules/audio_coding/neteq/tick_timer.h" #include "rtc_base/buffer.h" #include "rtc_base/checks.h" @@ -72,6 +73,7 @@ struct Packet { // Datagram excluding RTP header and header extension. rtc::Buffer payload; Priority priority; + RtpPacketInfo packet_info; std::unique_ptr waiting_time; std::unique_ptr frame; diff --git a/modules/audio_coding/neteq/red_payload_splitter.cc b/modules/audio_coding/neteq/red_payload_splitter.cc index 2dfe8386ff..2a9befaa02 100644 --- a/modules/audio_coding/neteq/red_payload_splitter.cc +++ b/modules/audio_coding/neteq/red_payload_splitter.cc @@ -117,6 +117,12 @@ bool RedPayloadSplitter::SplitRed(PacketList* packet_list) { new_packet.priority.red_level = rtc::dchecked_cast((new_headers.size() - 1) - i); new_packet.payload.SetData(payload_ptr, payload_length); + new_packet.packet_info = RtpPacketInfo( + /*ssrc=*/red_packet.packet_info.ssrc(), + /*csrcs=*/std::vector(), + /*rtp_timestamp=*/new_packet.timestamp, + /*audio_level=*/absl::nullopt, + /*receive_time_ms=*/red_packet.packet_info.receive_time_ms()); new_packets.push_front(std::move(new_packet)); payload_ptr += payload_length; } diff --git a/modules/audio_coding/neteq/tools/neteq_performance_test.cc b/modules/audio_coding/neteq/tools/neteq_performance_test.cc index 61f52bb3e5..604083b224 100644 --- a/modules/audio_coding/neteq/tools/neteq_performance_test.cc +++ b/modules/audio_coding/neteq/tools/neteq_performance_test.cc @@ -39,7 +39,9 @@ int64_t NetEqPerformanceTest::Run(int runtime_ms, // Initialize NetEq instance. NetEq::Config config; config.sample_rate_hz = kSampRateHz; - NetEq* neteq = NetEq::Create(config, CreateBuiltinAudioDecoderFactory()); + webrtc::Clock* clock = webrtc::Clock::GetRealTimeClock(); + NetEq* neteq = + NetEq::Create(config, clock, CreateBuiltinAudioDecoderFactory()); // Register decoder in |neteq|. if (!neteq->RegisterPayloadType(kPayloadType, SdpAudioFormat("l16", kSampRateHz, 1))) @@ -72,7 +74,6 @@ int64_t NetEqPerformanceTest::Run(int runtime_ms, RTC_CHECK_EQ(sizeof(input_payload), payload_len); // Main loop. - webrtc::Clock* clock = webrtc::Clock::GetRealTimeClock(); int64_t start_time_ms = clock->TimeInMilliseconds(); AudioFrame out_frame; while (time_now_ms < runtime_ms) { diff --git a/modules/audio_coding/neteq/tools/neteq_quality_test.cc b/modules/audio_coding/neteq/tools/neteq_quality_test.cc index 3bcd5da9bc..ad6aaa5b22 100644 --- a/modules/audio_coding/neteq/tools/neteq_quality_test.cc +++ b/modules/audio_coding/neteq/tools/neteq_quality_test.cc @@ -16,6 +16,7 @@ #include "modules/audio_coding/neteq/tools/output_wav_file.h" #include "modules/audio_coding/neteq/tools/resample_input_audio_file.h" #include "rtc_base/checks.h" +#include "system_wrappers/include/clock.h" #include "test/testsupport/file_utils.h" namespace webrtc { @@ -213,7 +214,8 @@ NetEqQualityTest::NetEqQualityTest( NetEq::Config config; config.sample_rate_hz = out_sampling_khz_ * 1000; - neteq_.reset(NetEq::Create(config, decoder_factory)); + neteq_.reset( + NetEq::Create(config, Clock::GetRealTimeClock(), decoder_factory)); max_payload_bytes_ = in_size_samples_ * channels_ * sizeof(int16_t); in_data_.reset(new int16_t[in_size_samples_ * channels_]); } diff --git a/modules/audio_coding/neteq/tools/neteq_quality_test.h b/modules/audio_coding/neteq/tools/neteq_quality_test.h index e9c6dab0f4..8035414e1e 100644 --- a/modules/audio_coding/neteq/tools/neteq_quality_test.h +++ b/modules/audio_coding/neteq/tools/neteq_quality_test.h @@ -20,6 +20,7 @@ #include "modules/audio_coding/neteq/tools/input_audio_file.h" #include "modules/audio_coding/neteq/tools/rtp_generator.h" #include "rtc_base/flags.h" +#include "system_wrappers/include/clock.h" #include "test/gtest.h" namespace webrtc { diff --git a/modules/audio_coding/neteq/tools/neteq_test.cc b/modules/audio_coding/neteq/tools/neteq_test.cc index be1dd418fc..7e22823d7e 100644 --- a/modules/audio_coding/neteq/tools/neteq_test.cc +++ b/modules/audio_coding/neteq/tools/neteq_test.cc @@ -14,6 +14,7 @@ #include #include "modules/rtp_rtcp/source/byte_io.h" +#include "system_wrappers/include/clock.h" namespace webrtc { namespace test { @@ -57,7 +58,8 @@ NetEqTest::NetEqTest(const NetEq::Config& config, std::unique_ptr input, std::unique_ptr output, Callbacks callbacks) - : neteq_(NetEq::Create(config, decoder_factory)), + : clock_(0), + neteq_(NetEq::Create(config, &clock_, decoder_factory)), input_(std::move(input)), output_(std::move(output)), callbacks_(callbacks), @@ -92,6 +94,7 @@ NetEqTest::SimulationStepResult NetEqTest::RunToNextGetAudio() { while (!input_->ended()) { // Advance time to next event. RTC_DCHECK(input_->NextEventTime()); + clock_.AdvanceTimeMilliseconds(*input_->NextEventTime() - time_now_ms); time_now_ms = *input_->NextEventTime(); // Check if it is time to insert packet. if (input_->NextPacketTime() && time_now_ms >= *input_->NextPacketTime()) { diff --git a/modules/audio_coding/neteq/tools/neteq_test.h b/modules/audio_coding/neteq/tools/neteq_test.h index 5261dd7db0..3cf105c693 100644 --- a/modules/audio_coding/neteq/tools/neteq_test.h +++ b/modules/audio_coding/neteq/tools/neteq_test.h @@ -23,6 +23,7 @@ #include "modules/audio_coding/neteq/include/neteq.h" #include "modules/audio_coding/neteq/tools/audio_sink.h" #include "modules/audio_coding/neteq/tools/neteq_input.h" +#include "system_wrappers/include/clock.h" namespace webrtc { namespace test { @@ -106,6 +107,7 @@ class NetEqTest : public NetEqSimulator { private: void RegisterDecoders(const DecoderMap& codecs); + SimulatedClock clock_; absl::optional next_action_; absl::optional last_packet_time_ms_; std::unique_ptr neteq_; diff --git a/modules/audio_mixer/frame_combiner.cc b/modules/audio_mixer/frame_combiner.cc index 4aa86f7961..f7ce95234d 100644 --- a/modules/audio_mixer/frame_combiner.cc +++ b/modules/audio_mixer/frame_combiner.cc @@ -57,6 +57,7 @@ void SetAudioFrameFields(const std::vector& mix_list, audio_frame_for_mixing->timestamp_ = mix_list[0]->timestamp_; audio_frame_for_mixing->elapsed_time_ms_ = mix_list[0]->elapsed_time_ms_; audio_frame_for_mixing->ntp_time_ms_ = mix_list[0]->ntp_time_ms_; + audio_frame_for_mixing->packet_infos_ = mix_list[0]->packet_infos_; } }