Files
platform-external-webrtc/webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.cc
phoglund@webrtc.org 244251a9cd Moved almost all payload-related stuff to the payload registry.
The big benefit is we no longer have a circular dependency between the media receiver and the payload registry. The payload registry is starting to take a bit more place on the stage, and now knows how to do different things depending on audio or video.

BUG=
TESTED=rtp_rtcp_unittests, vie_auto_test, voe_auto_test

Review URL: https://webrtc-codereview.appspot.com/1078004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@3465 4adac7df-926f-26a2-2b94-8c16560cd09d
2013-02-04 13:23:07 +00:00

464 lines
16 KiB
C++

/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "rtp_receiver_audio.h"
#include <cassert> //assert
#include <cstring> // memcpy()
#include <math.h> // pow()
#include "critical_section_wrapper.h"
#include "trace.h"
namespace webrtc {
RTPReceiverAudio::RTPReceiverAudio(const WebRtc_Word32 id,
RtpData* data_callback,
RtpAudioFeedback* incomingMessagesCallback)
: RTPReceiverStrategy(data_callback),
_id(id),
_criticalSectionRtpReceiverAudio(
CriticalSectionWrapper::CreateCriticalSection()),
_lastReceivedFrequency(8000),
_telephoneEvent(false),
_telephoneEventForwardToDecoder(false),
_telephoneEventDetectEndOfTone(false),
_telephoneEventPayloadType(-1),
_cngNBPayloadType(-1),
_cngWBPayloadType(-1),
_cngSWBPayloadType(-1),
_cngFBPayloadType(-1),
_cngPayloadType(-1),
_G722PayloadType(-1),
_lastReceivedG722(false),
_cbAudioFeedback(incomingMessagesCallback)
{
last_payload_.Audio.channels = 1;
}
WebRtc_UWord32
RTPReceiverAudio::AudioFrequency() const
{
CriticalSectionScoped lock(_criticalSectionRtpReceiverAudio.get());
if(_lastReceivedG722)
{
return 8000;
}
return _lastReceivedFrequency;
}
// Outband TelephoneEvent(DTMF) detection
WebRtc_Word32
RTPReceiverAudio::SetTelephoneEventStatus(const bool enable,
const bool forwardToDecoder,
const bool detectEndOfTone)
{
CriticalSectionScoped lock(_criticalSectionRtpReceiverAudio.get());
_telephoneEvent= enable;
_telephoneEventDetectEndOfTone = detectEndOfTone;
_telephoneEventForwardToDecoder = forwardToDecoder;
return 0;
}
// Is outband TelephoneEvent(DTMF) turned on/off?
bool
RTPReceiverAudio::TelephoneEvent() const
{
CriticalSectionScoped lock(_criticalSectionRtpReceiverAudio.get());
return _telephoneEvent;
}
// Is forwarding of outband telephone events turned on/off?
bool
RTPReceiverAudio::TelephoneEventForwardToDecoder() const
{
CriticalSectionScoped lock(_criticalSectionRtpReceiverAudio.get());
return _telephoneEventForwardToDecoder;
}
bool
RTPReceiverAudio::TelephoneEventPayloadType(const WebRtc_Word8 payloadType) const
{
CriticalSectionScoped lock(_criticalSectionRtpReceiverAudio.get());
return (_telephoneEventPayloadType == payloadType)?true:false;
}
bool
RTPReceiverAudio::CNGPayloadType(const WebRtc_Word8 payloadType,
WebRtc_UWord32* frequency,
bool* cngPayloadTypeHasChanged)
{
CriticalSectionScoped lock(_criticalSectionRtpReceiverAudio.get());
*cngPayloadTypeHasChanged = false;
// We can have four CNG on 8000Hz, 16000Hz, 32000Hz and 48000Hz.
if(_cngNBPayloadType == payloadType)
{
*frequency = 8000;
if ((_cngPayloadType != -1) &&(_cngPayloadType !=_cngNBPayloadType))
*cngPayloadTypeHasChanged = true;
_cngPayloadType = _cngNBPayloadType;
return true;
} else if(_cngWBPayloadType == payloadType)
{
// if last received codec is G.722 we must use frequency 8000
if(_lastReceivedG722)
{
*frequency = 8000;
} else
{
*frequency = 16000;
}
if ((_cngPayloadType != -1) &&(_cngPayloadType !=_cngWBPayloadType))
*cngPayloadTypeHasChanged = true;
_cngPayloadType = _cngWBPayloadType;
return true;
}else if(_cngSWBPayloadType == payloadType)
{
*frequency = 32000;
if ((_cngPayloadType != -1) &&(_cngPayloadType !=_cngSWBPayloadType))
*cngPayloadTypeHasChanged = true;
_cngPayloadType = _cngSWBPayloadType;
return true;
}else if(_cngFBPayloadType == payloadType)
{
*frequency = 48000;
if ((_cngPayloadType != -1) &&(_cngPayloadType !=_cngFBPayloadType))
*cngPayloadTypeHasChanged = true;
_cngPayloadType = _cngFBPayloadType;
return true;
}else
{
// not CNG
if(_G722PayloadType == payloadType)
{
_lastReceivedG722 = true;
}else
{
_lastReceivedG722 = false;
}
}
return false;
}
bool RTPReceiverAudio::ShouldReportCsrcChanges(
WebRtc_UWord8 payload_type) const {
// Don't do this for DTMF packets, otherwise it's fine.
return !TelephoneEventPayloadType(payload_type);
}
/*
Sample based or frame based codecs based on RFC 3551
NOTE! There is one error in the RFC, stating G.722 uses 8 bits/samples.
The correct rate is 4 bits/sample.
name of sampling default
encoding sample/frame bits/sample rate ms/frame ms/packet
Sample based audio codecs
DVI4 sample 4 var. 20
G722 sample 4 16,000 20
G726-40 sample 5 8,000 20
G726-32 sample 4 8,000 20
G726-24 sample 3 8,000 20
G726-16 sample 2 8,000 20
L8 sample 8 var. 20
L16 sample 16 var. 20
PCMA sample 8 var. 20
PCMU sample 8 var. 20
Frame based audio codecs
G723 frame N/A 8,000 30 30
G728 frame N/A 8,000 2.5 20
G729 frame N/A 8,000 10 20
G729D frame N/A 8,000 10 20
G729E frame N/A 8,000 10 20
GSM frame N/A 8,000 20 20
GSM-EFR frame N/A 8,000 20 20
LPC frame N/A 8,000 20 20
MPA frame N/A var. var.
G7221 frame N/A
*/
WebRtc_Word32 RTPReceiverAudio::OnNewPayloadTypeCreated(
const char payloadName[RTP_PAYLOAD_NAME_SIZE],
const WebRtc_Word8 payloadType,
const WebRtc_UWord32 frequency) {
CriticalSectionScoped lock(_criticalSectionRtpReceiverAudio.get());
if (ModuleRTPUtility::StringCompare(payloadName, "telephone-event", 15)) {
_telephoneEventPayloadType = payloadType;
}
if (ModuleRTPUtility::StringCompare(payloadName, "cn", 2)) {
// we can have three CNG on 8000Hz, 16000Hz and 32000Hz
if(frequency == 8000){
_cngNBPayloadType = payloadType;
} else if(frequency == 16000) {
_cngWBPayloadType = payloadType;
} else if(frequency == 32000) {
_cngSWBPayloadType = payloadType;
} else if(frequency == 48000) {
_cngFBPayloadType = payloadType;
} else {
assert(false);
return -1;
}
}
return 0;
}
void RTPReceiverAudio::SendTelephoneEvents(
WebRtc_UWord8 numberOfNewEvents,
WebRtc_UWord8 newEvents[MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS],
WebRtc_UWord8 numberOfRemovedEvents,
WebRtc_UWord8 removedEvents[MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS]) {
// Copy these variables since we can't hold the critsect when we call the
// callback. _cbAudioFeedback and _id are immutable though.
bool telephoneEvent;
bool telephoneEventDetectEndOfTone;
{
CriticalSectionScoped lock(_criticalSectionRtpReceiverAudio.get());
telephoneEvent = _telephoneEvent;
telephoneEventDetectEndOfTone = _telephoneEventDetectEndOfTone;
}
if (telephoneEvent) {
for (int n = 0; n < numberOfNewEvents; ++n) {
_cbAudioFeedback->OnReceivedTelephoneEvent(
_id, newEvents[n], false);
}
if (telephoneEventDetectEndOfTone) {
for (int n = 0; n < numberOfRemovedEvents; ++n) {
_cbAudioFeedback->OnReceivedTelephoneEvent(
_id, removedEvents[n], true);
}
}
}
}
WebRtc_Word32 RTPReceiverAudio::ParseRtpPacket(
WebRtcRTPHeader* rtpHeader,
const ModuleRTPUtility::PayloadUnion& specificPayload,
const bool isRed,
const WebRtc_UWord8* packet,
const WebRtc_UWord16 packetLength,
const WebRtc_Word64 timestampMs,
const bool isFirstPacket) {
const WebRtc_UWord8* payloadData =
ModuleRTPUtility::GetPayloadData(rtpHeader, packet);
const WebRtc_UWord16 payloadDataLength =
ModuleRTPUtility::GetPayloadDataLength(rtpHeader, packetLength);
return ParseAudioCodecSpecific(rtpHeader, payloadData, payloadDataLength,
specificPayload.Audio, isRed);
}
WebRtc_Word32 RTPReceiverAudio::GetFrequencyHz() const {
return AudioFrequency();
}
RTPAliveType RTPReceiverAudio::ProcessDeadOrAlive(
WebRtc_UWord16 lastPayloadLength) const {
// Our CNG is 9 bytes; if it's a likely CNG the receiver needs to check
// kRtpNoRtp against NetEq speechType kOutputPLCtoCNG.
if(lastPayloadLength < 10) // our CNG is 9 bytes
{
return kRtpNoRtp;
} else
{
return kRtpDead;
}
}
void RTPReceiverAudio::CheckPayloadChanged(
const WebRtc_Word8 payloadType,
ModuleRTPUtility::PayloadUnion* specificPayload,
bool* shouldResetStatistics,
bool* shouldDiscardChanges) {
*shouldDiscardChanges = false;
*shouldResetStatistics = false;
if (TelephoneEventPayloadType(payloadType)) {
// Don't do callbacks for DTMF packets.
*shouldDiscardChanges = true;
return;
}
// frequency is updated for CNG
bool cngPayloadTypeHasChanged = false;
bool isCngPayloadType = CNGPayloadType(
payloadType, &specificPayload->Audio.frequency,
&cngPayloadTypeHasChanged);
*shouldResetStatistics = cngPayloadTypeHasChanged;
if (isCngPayloadType) {
// Don't do callbacks for DTMF packets.
*shouldDiscardChanges = true;
return;
}
}
WebRtc_Word32 RTPReceiverAudio::InvokeOnInitializeDecoder(
RtpFeedback* callback,
const WebRtc_Word32 id,
const WebRtc_Word8 payloadType,
const char payloadName[RTP_PAYLOAD_NAME_SIZE],
const ModuleRTPUtility::PayloadUnion& specificPayload) const {
if (-1 == callback->OnInitializeDecoder(
id, payloadType, payloadName, specificPayload.Audio.frequency,
specificPayload.Audio.channels, specificPayload.Audio.rate)) {
WEBRTC_TRACE(kTraceError, kTraceRtpRtcp, id,
"Failed to create video decoder for payload type:%d",
payloadType);
return -1;
}
return 0;
}
// we are not allowed to have any critsects when calling CallbackOfReceivedPayloadData
WebRtc_Word32
RTPReceiverAudio::ParseAudioCodecSpecific(WebRtcRTPHeader* rtpHeader,
const WebRtc_UWord8* payloadData,
const WebRtc_UWord16 payloadLength,
const ModuleRTPUtility::AudioPayload& audioSpecific,
const bool isRED)
{
WebRtc_UWord8 newEvents[MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS];
WebRtc_UWord8 removedEvents[MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS];
WebRtc_UWord8 numberOfNewEvents = 0;
WebRtc_UWord8 numberOfRemovedEvents = 0;
if(payloadLength == 0)
{
return 0;
}
bool telephoneEventPacket = TelephoneEventPayloadType(rtpHeader->header.payloadType);
if(telephoneEventPacket)
{
CriticalSectionScoped lock(_criticalSectionRtpReceiverAudio.get());
// RFC 4733 2.3
/*
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| event |E|R| volume | duration |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*/
if(payloadLength % 4 != 0)
{
return -1;
}
WebRtc_UWord8 numberOfEvents = payloadLength / 4;
// sanity
if(numberOfEvents >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS)
{
numberOfEvents = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS;
}
for (int n = 0; n < numberOfEvents; n++)
{
bool end = (payloadData[(4*n)+1] & 0x80)? true:false;
std::set<WebRtc_UWord8>::iterator event =
_telephoneEventReported.find(payloadData[4*n]);
if(event != _telephoneEventReported.end())
{
// we have already seen this event
if(end)
{
removedEvents[numberOfRemovedEvents]= payloadData[4*n];
numberOfRemovedEvents++;
_telephoneEventReported.erase(payloadData[4*n]);
}
}else
{
if(end)
{
// don't add if it's a end of a tone
}else
{
newEvents[numberOfNewEvents] = payloadData[4*n];
numberOfNewEvents++;
_telephoneEventReported.insert(payloadData[4*n]);
}
}
}
// RFC 4733 2.5.1.3 & 2.5.2.3 Long-Duration Events
// should not be a problem since we don't care about the duration
// RFC 4733 See 2.5.1.5. & 2.5.2.4. Multiple Events in a Packet
}
// This needs to be called without locks held.
SendTelephoneEvents(numberOfNewEvents, newEvents, numberOfRemovedEvents,
removedEvents);
{
CriticalSectionScoped lock(_criticalSectionRtpReceiverAudio.get());
if(! telephoneEventPacket )
{
_lastReceivedFrequency = audioSpecific.frequency;
}
// Check if this is a CNG packet, receiver might want to know
WebRtc_UWord32 ignored;
bool alsoIgnored;
if(CNGPayloadType(rtpHeader->header.payloadType, &ignored, &alsoIgnored))
{
rtpHeader->type.Audio.isCNG=true;
rtpHeader->frameType = kAudioFrameCN;
}else
{
rtpHeader->frameType = kAudioFrameSpeech;
rtpHeader->type.Audio.isCNG=false;
}
// check if it's a DTMF event, hence something we can playout
if(telephoneEventPacket)
{
if(!_telephoneEventForwardToDecoder)
{
// don't forward event to decoder
return 0;
}
std::set<WebRtc_UWord8>::iterator first =
_telephoneEventReported.begin();
if(first != _telephoneEventReported.end() && *first > 15)
{
// don't forward non DTMF events
return 0;
}
}
}
if(isRED && !(payloadData[0] & 0x80))
{
// we recive only one frame packed in a RED packet remove the RED wrapper
rtpHeader->header.payloadType = payloadData[0];
// only one frame in the RED strip the one byte to help NetEq
return data_callback_->OnReceivedPayloadData(payloadData+1,
payloadLength-1,
rtpHeader);
}
rtpHeader->type.Audio.channel = audioSpecific.channels;
return data_callback_->OnReceivedPayloadData(
payloadData, payloadLength, rtpHeader);
}
} // namespace webrtc