Adding DTX to WebRTC Opus wrapper (relanding).

This is relanding of r7846, which failed since the unit test depended on whether Opus is in fixed-point or float-point.

See the review of r7846 here:
https://webrtc-codereview.appspot.com/13219004/

Patch set 1 is the same as r7846. Further fixes are found in patch set 2 and later.

BUG=
R=henrik.lundin@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/32299004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7878 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
minyue@webrtc.org
2014-12-11 16:09:35 +00:00
parent 5f162c8509
commit 0ca768b131
5 changed files with 379 additions and 108 deletions

View File

@ -43,6 +43,7 @@ int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, int32_t channels) {
state->encoder = opus_encoder_create(48000, channels, application,
&error);
state->in_dtx_mode = 0;
if (error == OPUS_OK && state->encoder != NULL) {
*inst = state;
return 0;
@ -80,9 +81,21 @@ int16_t WebRtcOpus_Encode(OpusEncInst* inst,
encoded,
length_encoded_buffer);
if (res > 0) {
if (res == 1) {
// Indicates DTX since the packet has nothing but a header. In principle,
// there is no need to send this packet. However, we do transmit the first
// occurrence to let the decoder know that the encoder enters DTX mode.
if (inst->in_dtx_mode) {
return 0;
} else {
inst->in_dtx_mode = 1;
return 1;
}
} else if (res > 1) {
inst->in_dtx_mode = 0;
return res;
}
return -1;
}
@ -140,6 +153,22 @@ int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
}
}
int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) {
if (inst) {
return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(1));
} else {
return -1;
}
}
int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) {
if (inst) {
return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(0));
} else {
return -1;
}
}
int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
if (inst) {
return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity));
@ -165,6 +194,7 @@ int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, int channels) {
/* Creation of memory all ok. */
state->channels = channels;
state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize;
state->in_dtx_mode = 0;
*inst = state;
return 0;
}
@ -195,53 +225,61 @@ int WebRtcOpus_DecoderChannels(OpusDecInst* inst) {
int16_t WebRtcOpus_DecoderInit(OpusDecInst* inst) {
int error = opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE);
if (error == OPUS_OK) {
inst->in_dtx_mode = 0;
return 0;
}
return -1;
}
/* For decoder to determine if it is to output speech or comfort noise. */
static int16_t DetermineAudioType(OpusDecInst* inst, int16_t encoded_bytes) {
// Audio type becomes comfort noise if |encoded_byte| is 1 and keeps
// to be so if the following |encoded_byte| are 0 or 1.
if (encoded_bytes == 0 && inst->in_dtx_mode) {
return 2; // Comfort noise.
} else if (encoded_bytes == 1) {
inst->in_dtx_mode = 1;
return 2; // Comfort noise.
} else {
inst->in_dtx_mode = 0;
return 0; // Speech.
}
}
/* |frame_size| is set to maximum Opus frame size in the normal case, and
* is set to the number of samples needed for PLC in case of losses.
* It is up to the caller to make sure the value is correct. */
static int DecodeNative(OpusDecoder* inst, const uint8_t* encoded,
static int DecodeNative(OpusDecInst* inst, const uint8_t* encoded,
int16_t encoded_bytes, int frame_size,
int16_t* decoded, int16_t* audio_type) {
int res = opus_decode(
inst, encoded, encoded_bytes, (opus_int16*)decoded, frame_size, 0);
int16_t* decoded, int16_t* audio_type, int decode_fec) {
int res = opus_decode(inst->decoder, encoded, encoded_bytes,
(opus_int16*)decoded, frame_size, decode_fec);
/* TODO(tlegrand): set to DTX for zero-length packets? */
*audio_type = 0;
if (res <= 0)
return -1;
if (res > 0) {
return res;
}
return -1;
}
*audio_type = DetermineAudioType(inst, encoded_bytes);
static int DecodeFec(OpusDecoder* inst, const uint8_t* encoded,
int16_t encoded_bytes, int frame_size,
int16_t* decoded, int16_t* audio_type) {
int res = opus_decode(
inst, encoded, encoded_bytes, (opus_int16*)decoded, frame_size, 1);
/* TODO(tlegrand): set to DTX for zero-length packets? */
*audio_type = 0;
if (res > 0) {
return res;
}
return -1;
return res;
}
int16_t WebRtcOpus_Decode(OpusDecInst* inst, const uint8_t* encoded,
int16_t encoded_bytes, int16_t* decoded,
int16_t* audio_type) {
int decoded_samples = DecodeNative(inst->decoder,
encoded,
encoded_bytes,
kWebRtcOpusMaxFrameSizePerChannel,
decoded,
audio_type);
int decoded_samples;
if (encoded_bytes == 0) {
*audio_type = DetermineAudioType(inst, encoded_bytes);
decoded_samples = WebRtcOpus_DecodePlc(inst, decoded, 1);
} else {
decoded_samples = DecodeNative(inst,
encoded,
encoded_bytes,
kWebRtcOpusMaxFrameSizePerChannel,
decoded,
audio_type,
0);
}
if (decoded_samples < 0) {
return -1;
}
@ -264,8 +302,8 @@ int16_t WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded,
plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
decoded_samples = DecodeNative(inst->decoder, NULL, 0, plc_samples,
decoded, &audio_type);
decoded_samples = DecodeNative(inst, NULL, 0, plc_samples,
decoded, &audio_type, 0);
if (decoded_samples < 0) {
return -1;
}
@ -285,8 +323,8 @@ int16_t WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded,
fec_samples = opus_packet_get_samples_per_frame(encoded, 48000);
decoded_samples = DecodeFec(inst->decoder, encoded, encoded_bytes,
fec_samples, decoded, audio_type);
decoded_samples = DecodeNative(inst, encoded, encoded_bytes,
fec_samples, decoded, audio_type, 1);
if (decoded_samples < 0) {
return -1;
}