Handle padded audio packets correctly

RTP packets can be padded with extra data at the end of the payload. The usable
payload length of the packet should then be reduced with the padding length,
since the padding must be discarded. This was not the case; instead, the entire
payload, including padding data, was forwarded to the audio channel and in the
end to the decoder.

A special case of padding is packets which are empty except for the padding.
That is, they carry no usable payload. These packets are sometimes used for
probing the network and were discarded in
RTPReceiverAudio::ParseAudioCodecSpecific. The result is that NetEq never sees
those empty packets, just the holes in the sequence number series; this can
throw off the target buffer calculations.

With this change, the empty (after removing the padding) packets are let through,
all the way down to NetEq, to a new method called NetEq::InsertEmptyPacket. This
method notifies the DelayManager that an empty packet was received.

BUG=webrtc:7610, webrtc:7625

Review-Url: https://codereview.webrtc.org/2870043003
Cr-Commit-Position: refs/heads/master@{#18083}
This commit is contained in:
henrik.lundin
2017-05-10 07:38:01 -07:00
committed by Commit bot
parent 423a288a12
commit b8c55b15a3
11 changed files with 113 additions and 8 deletions

View File

@ -77,6 +77,11 @@ int AcmReceiver::InsertPacket(const WebRtcRTPHeader& rtp_header,
uint32_t receive_timestamp = 0;
const RTPHeader* header = &rtp_header.header; // Just a shorthand.
if (incoming_payload.empty()) {
neteq_->InsertEmptyPacket(rtp_header.header);
return 0;
}
{
rtc::CritScope lock(&crit_sect_);

View File

@ -1079,6 +1079,7 @@ rtc::Optional<SdpAudioFormat> AudioCodingModuleImpl::ReceiveFormat() const {
int AudioCodingModuleImpl::IncomingPacket(const uint8_t* incoming_payload,
const size_t payload_length,
const WebRtcRTPHeader& rtp_header) {
RTC_DCHECK_EQ(payload_length == 0, incoming_payload == nullptr);
return receiver_.InsertPacket(
rtp_header,
rtc::ArrayView<const uint8_t>(incoming_payload, payload_length));

View File

@ -374,6 +374,10 @@ void DelayManager::LastDecodedWasCngOrDtmf(bool it_was) {
}
}
void DelayManager::RegisterEmptyPacket() {
++last_seq_no_;
}
bool DelayManager::SetMinimumDelay(int delay_ms) {
// Minimum delay shouldn't be more than maximum delay, if any maximum is set.
// Also, if possible check |delay| to less than 75% of

View File

@ -95,6 +95,11 @@ class DelayManager {
// speech.
virtual void LastDecodedWasCngOrDtmf(bool it_was);
// Notify the delay manager that empty packets have been received. These are
// packets that are part of the sequence number series, so that an empty
// packet will shift the sequence numbers for the following packets.
virtual void RegisterEmptyPacket();
// Accessors and mutators.
// Assuming |delay| is in valid range.
virtual bool SetMinimumDelay(int delay_ms);

View File

@ -266,6 +266,58 @@ TEST_F(DelayManagerTest, MinAndRequiredDelay) {
EXPECT_EQ(kMinDelayPackets << 8, dm_->TargetLevel());
}
// Tests that skipped sequence numbers (simulating empty packets) are handled
// correctly.
TEST_F(DelayManagerTest, EmptyPacketsReported) {
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Advance time by one frame size.
IncreaseTime(kFrameSizeMs);
// Advance the sequence number by 5, simulating that 5 empty packets were
// received, but never inserted.
seq_no_ += 10;
for (int j = 0; j < 10; ++j) {
dm_->RegisterEmptyPacket();
}
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to 1 packet, and (base) target level equal to 1 as well.
// Return false to indicate no peaks found.
EXPECT_CALL(detector_, Update(1, 1)).WillOnce(Return(false));
InsertNextPacket();
EXPECT_EQ(1 << 8, dm_->TargetLevel()); // In Q8.
}
// Same as above, but do not call RegisterEmptyPacket. Observe the target level
// increase dramatically.
TEST_F(DelayManagerTest, EmptyPacketsNotReported) {
SetPacketAudioLength(kFrameSizeMs);
// First packet arrival.
InsertNextPacket();
// Advance time by one frame size.
IncreaseTime(kFrameSizeMs);
// Advance the sequence number by 5, simulating that 5 empty packets were
// received, but never inserted.
seq_no_ += 10;
// Second packet arrival.
// Expect detector update method to be called once with inter-arrival time
// equal to 1 packet, and (base) target level equal to 1 as well.
// Return false to indicate no peaks found.
EXPECT_CALL(detector_, Update(10, 10)).WillOnce(Return(false));
InsertNextPacket();
// Note 10 times higher target value.
EXPECT_EQ(10 * 1 << 8, dm_->TargetLevel()); // In Q8.
}
TEST_F(DelayManagerTest, Failures) {
// Wrong sample rate.
EXPECT_EQ(-1, dm_->Update(0, 0, -1));

View File

@ -145,6 +145,12 @@ class NetEq {
rtc::ArrayView<const uint8_t> payload,
uint32_t receive_timestamp) = 0;
// Lets NetEq know that a packet arrived with an empty payload. This typically
// happens when empty packets are used for probing the network channel, and
// these packets use RTP sequence numbers from the same series as the actual
// audio packets.
virtual void InsertEmptyPacket(const RTPHeader& rtp_header) = 0;
// Instructs NetEq to deliver 10 ms of audio data. The data is written to
// |audio_frame|. All data in |audio_frame| is wiped; |data_|, |speech_type_|,
// |num_channels_|, |sample_rate_hz_|, |samples_per_channel_|, and

View File

@ -45,6 +45,7 @@ class MockDelayManager : public DelayManager {
void(int* lower_limit, int* higher_limit));
MOCK_CONST_METHOD0(TargetLevel,
int());
MOCK_METHOD0(RegisterEmptyPacket, void());
MOCK_METHOD1(set_extra_delay_ms,
void(int16_t delay));
MOCK_CONST_METHOD0(base_target_level,

View File

@ -146,6 +146,14 @@ int NetEqImpl::InsertPacket(const RTPHeader& rtp_header,
return kOK;
}
void NetEqImpl::InsertEmptyPacket(const RTPHeader& /*rtp_header*/) {
// TODO(henrik.lundin) Handle NACK as well. This will make use of the
// rtp_header parameter.
// https://bugs.chromium.org/p/webrtc/issues/detail?id=7611
rtc::CritScope lock(&crit_sect_);
delay_manager_->RegisterEmptyPacket();
}
namespace {
void SetAudioFrameActivityAndType(bool vad_enabled,
NetEqImpl::OutputType type,

View File

@ -109,6 +109,8 @@ class NetEqImpl : public webrtc::NetEq {
rtc::ArrayView<const uint8_t> payload,
uint32_t receive_timestamp) override;
void InsertEmptyPacket(const RTPHeader& rtp_header) override;
int GetAudio(AudioFrame* audio_frame, bool* muted) override;
void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) override;

View File

@ -1284,6 +1284,21 @@ TEST_F(NetEqImplTest, TargetDelayMs) {
EXPECT_EQ(17 * 30, neteq_->TargetDelayMs());
}
TEST_F(NetEqImplTest, InsertEmptyPacket) {
UseNoMocks();
use_mock_delay_manager_ = true;
CreateInstance();
RTPHeader rtp_header;
rtp_header.payloadType = 17;
rtp_header.sequenceNumber = 0x1234;
rtp_header.timestamp = 0x12345678;
rtp_header.ssrc = 0x87654321;
EXPECT_CALL(*mock_delay_manager_, RegisterEmptyPacket());
neteq_->InsertEmptyPacket(rtp_header);
}
class Decoder120ms : public AudioDecoder {
public:
Decoder120ms(int sample_rate_hz, SpeechType speech_type)

View File

@ -212,9 +212,13 @@ int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
size_t payload_length,
const AudioPayload& audio_specific,
bool is_red) {
if (payload_length == 0) {
return 0;
RTC_DCHECK_GE(payload_length, rtp_header->header.paddingLength);
const size_t payload_data_length =
payload_length - rtp_header->header.paddingLength;
if (payload_data_length == 0) {
rtp_header->type.Audio.isCNG = false;
rtp_header->frameType = kEmptyFrame;
return data_callback_->OnReceivedPayloadData(nullptr, 0, rtp_header);
}
bool telephone_event_packet =
@ -229,16 +233,17 @@ int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
// | event |E|R| volume | duration |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
if (payload_length % 4 != 0) {
if (payload_data_length % 4 != 0) {
return -1;
}
size_t number_of_events = payload_length / 4;
size_t number_of_events = payload_data_length / 4;
// sanity
if (number_of_events >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) {
number_of_events = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS;
}
for (size_t n = 0; n < number_of_events; ++n) {
RTC_DCHECK_GE(payload_data_length, (4 * n) + 2);
bool end = (payload_data[(4 * n) + 1] & 0x80) ? true : false;
std::set<uint8_t>::iterator event =
@ -291,17 +296,18 @@ int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
}
}
// TODO(holmer): Break this out to have RED parsing handled generically.
RTC_DCHECK_GT(payload_data_length, 0);
if (is_red && !(payload_data[0] & 0x80)) {
// we recive only one frame packed in a RED packet remove the RED wrapper
rtp_header->header.payloadType = payload_data[0];
// only one frame in the RED strip the one byte to help NetEq
return data_callback_->OnReceivedPayloadData(
payload_data + 1, payload_length - 1, rtp_header);
payload_data + 1, payload_data_length - 1, rtp_header);
}
rtp_header->type.Audio.channel = audio_specific.channels;
return data_callback_->OnReceivedPayloadData(
payload_data, payload_length, rtp_header);
return data_callback_->OnReceivedPayloadData(payload_data,
payload_data_length, rtp_header);
}
} // namespace webrtc