/* * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "webrtc/modules/rtp_rtcp/source/rtp_sender_audio.h" #include #include "webrtc/base/logging.h" #include "webrtc/base/timeutils.h" #include "webrtc/base/trace_event.h" #include "webrtc/modules/rtp_rtcp/include/rtp_rtcp_defines.h" #include "webrtc/modules/rtp_rtcp/source/byte_io.h" namespace webrtc { static const int kDtmfFrequencyHz = 8000; RTPSenderAudio::RTPSenderAudio(Clock* clock, RTPSender* rtp_sender) : clock_(clock), rtp_sender_(rtp_sender), packet_size_samples_(160), dtmf_event_is_on_(false), dtmf_event_first_packet_sent_(false), dtmf_payload_type_(-1), dtmf_timestamp_(0), dtmf_key_(0), dtmf_length_samples_(0), dtmf_level_(0), dtmf_time_last_sent_(0), dtmf_timestamp_last_sent_(0), red_payload_type_(-1), inband_vad_active_(false), cngnb_payload_type_(-1), cngwb_payload_type_(-1), cngswb_payload_type_(-1), cngfb_payload_type_(-1), last_payload_type_(-1), audio_level_dbov_(0) {} RTPSenderAudio::~RTPSenderAudio() {} int RTPSenderAudio::AudioFrequency() const { return kDtmfFrequencyHz; } // set audio packet size, used to determine when it's time to send a DTMF packet // in silence (CNG) int32_t RTPSenderAudio::SetAudioPacketSize(uint16_t packet_size_samples) { rtc::CritScope cs(&send_audio_critsect_); packet_size_samples_ = packet_size_samples; return 0; } int32_t RTPSenderAudio::RegisterAudioPayload( const char payloadName[RTP_PAYLOAD_NAME_SIZE], const int8_t payload_type, const uint32_t frequency, const size_t channels, const uint32_t rate, RtpUtility::Payload** payload) { if (RtpUtility::StringCompare(payloadName, "cn", 2)) { rtc::CritScope cs(&send_audio_critsect_); // we can have multiple CNG payload types switch (frequency) { case 8000: cngnb_payload_type_ = payload_type; break; case 16000: cngwb_payload_type_ = payload_type; break; case 32000: cngswb_payload_type_ = payload_type; break; case 48000: cngfb_payload_type_ = payload_type; break; default: return -1; } } else if (RtpUtility::StringCompare(payloadName, "telephone-event", 15)) { rtc::CritScope cs(&send_audio_critsect_); // Don't add it to the list // we dont want to allow send with a DTMF payloadtype dtmf_payload_type_ = payload_type; return 0; // The default timestamp rate is 8000 Hz, but other rates may be defined. } *payload = new RtpUtility::Payload; (*payload)->typeSpecific.Audio.frequency = frequency; (*payload)->typeSpecific.Audio.channels = channels; (*payload)->typeSpecific.Audio.rate = rate; (*payload)->audio = true; (*payload)->name[RTP_PAYLOAD_NAME_SIZE - 1] = '\0'; strncpy((*payload)->name, payloadName, RTP_PAYLOAD_NAME_SIZE - 1); return 0; } bool RTPSenderAudio::MarkerBit(FrameType frame_type, int8_t payload_type) { rtc::CritScope cs(&send_audio_critsect_); // for audio true for first packet in a speech burst bool marker_bit = false; if (last_payload_type_ != payload_type) { if (payload_type != -1 && (cngnb_payload_type_ == payload_type || cngwb_payload_type_ == payload_type || cngswb_payload_type_ == payload_type || cngfb_payload_type_ == payload_type)) { // Only set a marker bit when we change payload type to a non CNG return false; } // payload_type differ if (last_payload_type_ == -1) { if (frame_type != kAudioFrameCN) { // first packet and NOT CNG return true; } else { // first packet and CNG inband_vad_active_ = true; return false; } } // not first packet AND // not CNG AND // payload_type changed // set a marker bit when we change payload type marker_bit = true; } // For G.723 G.729, AMR etc we can have inband VAD if (frame_type == kAudioFrameCN) { inband_vad_active_ = true; } else if (inband_vad_active_) { inband_vad_active_ = false; marker_bit = true; } return marker_bit; } bool RTPSenderAudio::SendAudio(FrameType frame_type, int8_t payload_type, uint32_t capture_timestamp, const uint8_t* payload_data, size_t data_size, const RTPFragmentationHeader* fragmentation) { // TODO(pwestin) Breakup function in smaller functions. size_t payload_size = data_size; size_t max_payload_length = rtp_sender_->MaxPayloadLength(); uint16_t dtmf_length_ms = 0; uint8_t key = 0; int red_payload_type; uint8_t audio_level_dbov; int8_t dtmf_payload_type; uint16_t packet_size_samples; { rtc::CritScope cs(&send_audio_critsect_); red_payload_type = red_payload_type_; audio_level_dbov = audio_level_dbov_; dtmf_payload_type = dtmf_payload_type_; packet_size_samples = packet_size_samples_; } // Check if we have pending DTMFs to send if (!dtmf_event_is_on_ && PendingDTMF()) { int64_t delaySinceLastDTMF = clock_->TimeInMilliseconds() - dtmf_time_last_sent_; if (delaySinceLastDTMF > 100) { // New tone to play dtmf_timestamp_ = capture_timestamp; if (NextDTMF(&key, &dtmf_length_ms, &dtmf_level_) >= 0) { dtmf_event_first_packet_sent_ = false; dtmf_key_ = key; dtmf_length_samples_ = (kDtmfFrequencyHz / 1000) * dtmf_length_ms; dtmf_event_is_on_ = true; } } } // A source MAY send events and coded audio packets for the same time // but we don't support it if (dtmf_event_is_on_) { if (frame_type == kEmptyFrame) { // kEmptyFrame is used to drive the DTMF when in CN mode // it can be triggered more frequently than we want to send the // DTMF packets. if (packet_size_samples > (capture_timestamp - dtmf_timestamp_last_sent_)) { // not time to send yet return true; } } dtmf_timestamp_last_sent_ = capture_timestamp; uint32_t dtmf_duration_samples = capture_timestamp - dtmf_timestamp_; bool ended = false; bool send = true; if (dtmf_length_samples_ > dtmf_duration_samples) { if (dtmf_duration_samples <= 0) { // Skip send packet at start, since we shouldn't use duration 0 send = false; } } else { ended = true; dtmf_event_is_on_ = false; dtmf_time_last_sent_ = clock_->TimeInMilliseconds(); } if (send) { if (dtmf_duration_samples > 0xffff) { // RFC 4733 2.5.2.3 Long-Duration Events SendTelephoneEventPacket(ended, dtmf_payload_type, dtmf_timestamp_, static_cast(0xffff), false); // set new timestap for this segment dtmf_timestamp_ = capture_timestamp; dtmf_duration_samples -= 0xffff; dtmf_length_samples_ -= 0xffff; return SendTelephoneEventPacket( ended, dtmf_payload_type, dtmf_timestamp_, static_cast(dtmf_duration_samples), false); } else { if (!SendTelephoneEventPacket(ended, dtmf_payload_type, dtmf_timestamp_, dtmf_duration_samples, !dtmf_event_first_packet_sent_)) { return false; } dtmf_event_first_packet_sent_ = true; return true; } } return true; } if (payload_size == 0 || payload_data == NULL) { if (frame_type == kEmptyFrame) { // we don't send empty audio RTP packets // no error since we use it to drive DTMF when we use VAD return true; } return false; } uint8_t data_buffer[IP_PACKET_SIZE]; bool marker_bit = MarkerBit(frame_type, payload_type); int32_t rtpHeaderLength = 0; uint16_t timestampOffset = 0; if (red_payload_type >= 0 && fragmentation && !marker_bit && fragmentation->fragmentationVectorSize > 1) { // have we configured RED? use its payload type // we need to get the current timestamp to calc the diff uint32_t old_timestamp = rtp_sender_->Timestamp(); rtpHeaderLength = rtp_sender_->BuildRtpHeader(data_buffer, red_payload_type, marker_bit, capture_timestamp, clock_->TimeInMilliseconds()); timestampOffset = uint16_t(rtp_sender_->Timestamp() - old_timestamp); } else { rtpHeaderLength = rtp_sender_->BuildRtpHeader(data_buffer, payload_type, marker_bit, capture_timestamp, clock_->TimeInMilliseconds()); } if (rtpHeaderLength <= 0) { return false; } if (max_payload_length < (rtpHeaderLength + payload_size)) { // Too large payload buffer. return false; } if (red_payload_type >= 0 && // Have we configured RED? fragmentation && fragmentation->fragmentationVectorSize > 1 && !marker_bit) { if (timestampOffset <= 0x3fff) { if (fragmentation->fragmentationVectorSize != 2) { // we only support 2 codecs when using RED return false; } // only 0x80 if we have multiple blocks data_buffer[rtpHeaderLength++] = 0x80 + fragmentation->fragmentationPlType[1]; size_t blockLength = fragmentation->fragmentationLength[1]; // sanity blockLength if (blockLength > 0x3ff) { // block length 10 bits 1023 bytes return false; } uint32_t REDheader = (timestampOffset << 10) + blockLength; ByteWriter::WriteBigEndian(data_buffer + rtpHeaderLength, REDheader); rtpHeaderLength += 3; data_buffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0]; // copy the RED data memcpy(data_buffer + rtpHeaderLength, payload_data + fragmentation->fragmentationOffset[1], fragmentation->fragmentationLength[1]); // copy the normal data memcpy( data_buffer + rtpHeaderLength + fragmentation->fragmentationLength[1], payload_data + fragmentation->fragmentationOffset[0], fragmentation->fragmentationLength[0]); payload_size = fragmentation->fragmentationLength[0] + fragmentation->fragmentationLength[1]; } else { // silence for too long send only new data data_buffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0]; memcpy(data_buffer + rtpHeaderLength, payload_data + fragmentation->fragmentationOffset[0], fragmentation->fragmentationLength[0]); payload_size = fragmentation->fragmentationLength[0]; } } else { if (fragmentation && fragmentation->fragmentationVectorSize > 0) { // use the fragment info if we have one data_buffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0]; memcpy(data_buffer + rtpHeaderLength, payload_data + fragmentation->fragmentationOffset[0], fragmentation->fragmentationLength[0]); payload_size = fragmentation->fragmentationLength[0]; } else { memcpy(data_buffer + rtpHeaderLength, payload_data, payload_size); } } { rtc::CritScope cs(&send_audio_critsect_); last_payload_type_ = payload_type; } // Update audio level extension, if included. size_t packetSize = payload_size + rtpHeaderLength; RtpUtility::RtpHeaderParser rtp_parser(data_buffer, packetSize); RTPHeader rtp_header; rtp_parser.Parse(&rtp_header); rtp_sender_->UpdateAudioLevel(data_buffer, packetSize, rtp_header, (frame_type == kAudioFrameSpeech), audio_level_dbov); TRACE_EVENT_ASYNC_END2("webrtc", "Audio", capture_timestamp, "timestamp", rtp_sender_->Timestamp(), "seqnum", rtp_sender_->SequenceNumber()); bool send_result = rtp_sender_->SendToNetwork( data_buffer, payload_size, rtpHeaderLength, rtc::TimeMillis(), kAllowRetransmission, RtpPacketSender::kHighPriority); if (first_packet_sent_()) { LOG(LS_INFO) << "First audio RTP packet sent to pacer"; } return send_result; } // Audio level magnitude and voice activity flag are set for each RTP packet int32_t RTPSenderAudio::SetAudioLevel(uint8_t level_dbov) { if (level_dbov > 127) { return -1; } rtc::CritScope cs(&send_audio_critsect_); audio_level_dbov_ = level_dbov; return 0; } // Set payload type for Redundant Audio Data RFC 2198 int32_t RTPSenderAudio::SetRED(int8_t payload_type) { if (payload_type < -1) { return -1; } rtc::CritScope cs(&send_audio_critsect_); red_payload_type_ = payload_type; return 0; } // Get payload type for Redundant Audio Data RFC 2198 int32_t RTPSenderAudio::RED(int8_t* payload_type) const { rtc::CritScope cs(&send_audio_critsect_); if (red_payload_type_ == -1) { // not configured return -1; } *payload_type = red_payload_type_; return 0; } // Send a TelephoneEvent tone using RFC 2833 (4733) int32_t RTPSenderAudio::SendTelephoneEvent(uint8_t key, uint16_t time_ms, uint8_t level) { { rtc::CritScope lock(&send_audio_critsect_); if (dtmf_payload_type_ < 0) { // TelephoneEvent payloadtype not configured return -1; } } return AddDTMF(key, time_ms, level); } bool RTPSenderAudio::SendTelephoneEventPacket(bool ended, int8_t dtmf_payload_type, uint32_t dtmf_timestamp, uint16_t duration, bool marker_bit) { uint8_t dtmfbuffer[IP_PACKET_SIZE]; uint8_t send_count = 1; bool result = true; if (ended) { // resend last packet in an event 3 times send_count = 3; } do { // Send DTMF data int32_t header_length = rtp_sender_->BuildRtpHeader( dtmfbuffer, dtmf_payload_type, marker_bit, dtmf_timestamp, clock_->TimeInMilliseconds()); if (header_length <= 0) return false; // reset CSRC and X bit dtmfbuffer[0] &= 0xe0; // Create DTMF data /* From RFC 2833: 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | event |E|R| volume | duration | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ // R bit always cleared uint8_t R = 0x00; uint8_t volume = dtmf_level_; // First packet un-ended uint8_t E = ended ? 0x80 : 0x00; // First byte is Event number, equals key number dtmfbuffer[12] = dtmf_key_; dtmfbuffer[13] = E | R | volume; ByteWriter::WriteBigEndian(dtmfbuffer + 14, duration); TRACE_EVENT_INSTANT2( TRACE_DISABLED_BY_DEFAULT("webrtc_rtp"), "Audio::SendTelephoneEvent", "timestamp", dtmf_timestamp, "seqnum", rtp_sender_->SequenceNumber()); result = rtp_sender_->SendToNetwork(dtmfbuffer, 4, 12, rtc::TimeMillis(), kAllowRetransmission, RtpPacketSender::kHighPriority); send_count--; } while (send_count > 0 && result); return result; } } // namespace webrtc