
This CL adds support for sending and receiving stereo using the Opus codec. BUG=issue1013 Review URL: https://webrtc-codereview.appspot.com/930008 git-svn-id: http://webrtc.googlecode.com/svn/trunk@3050 4adac7df-926f-26a2-2b94-8c16560cd09d
265 lines
8.5 KiB
C
265 lines
8.5 KiB
C
/*
|
|
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_coding/codecs/opus/interface/opus_interface.h"
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "opus.h"
|
|
|
|
#include "common_audio/signal_processing/resample_by_2_internal.h"
|
|
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
|
|
|
enum {
|
|
/* We always produce 20ms frames. */
|
|
kWebRtcOpusMaxEncodeFrameSizeMs = 20,
|
|
|
|
/* The format allows up to 120ms frames. Since we
|
|
* don't control the other side, we must allow
|
|
* for packets that large. NetEq is currently
|
|
* limited to 60 ms on the receive side.
|
|
*/
|
|
kWebRtcOpusMaxDecodeFrameSizeMs = 120,
|
|
|
|
/* Sample count is 48 kHz * samples per frame * stereo. */
|
|
kWebRtcOpusMaxFrameSize = 48 * kWebRtcOpusMaxDecodeFrameSizeMs * 2,
|
|
};
|
|
|
|
struct WebRtcOpusEncInst {
|
|
OpusEncoder* encoder;
|
|
};
|
|
|
|
int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, int32_t channels) {
|
|
OpusEncInst* state;
|
|
state = (OpusEncInst*) calloc(1, sizeof(OpusEncInst));
|
|
if (state) {
|
|
int error;
|
|
state->encoder = opus_encoder_create(48000, channels, OPUS_APPLICATION_VOIP,
|
|
&error);
|
|
if (error == OPUS_OK || state->encoder != NULL ) {
|
|
*inst = state;
|
|
return 0;
|
|
}
|
|
free(state);
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
|
|
opus_encoder_destroy(inst->encoder);
|
|
return 0;
|
|
}
|
|
|
|
int16_t WebRtcOpus_Encode(OpusEncInst* inst, int16_t* audio_in, int16_t samples,
|
|
int16_t length_encoded_buffer, uint8_t* encoded) {
|
|
opus_int16* audio = (opus_int16*) audio_in;
|
|
unsigned char* coded = encoded;
|
|
int res;
|
|
|
|
if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
|
|
return -1;
|
|
}
|
|
|
|
res = opus_encode(inst->encoder, audio, samples, coded,
|
|
length_encoded_buffer);
|
|
|
|
if (res > 0) {
|
|
return res;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
|
|
return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate));
|
|
}
|
|
|
|
struct WebRtcOpusDecInst {
|
|
int16_t state_48_32_left[8];
|
|
int16_t state_48_32_right[8];
|
|
OpusDecoder* decoder_left;
|
|
OpusDecoder* decoder_right;
|
|
int channels;
|
|
};
|
|
|
|
int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, int channels) {
|
|
OpusDecInst* state;
|
|
state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst));
|
|
if (state) {
|
|
int error_l;
|
|
int error_r;
|
|
// Always create a 48000 Hz Opus decoder.
|
|
state->decoder_left = opus_decoder_create(48000, channels, &error_l);
|
|
state->decoder_right = opus_decoder_create(48000, channels, &error_r);
|
|
if (error_l == OPUS_OK && error_r == OPUS_OK &&
|
|
state->decoder_left != NULL && state->decoder_right != NULL) {
|
|
state->channels = channels;
|
|
*inst = state;
|
|
return 0;
|
|
}
|
|
free(state);
|
|
state = NULL;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
|
|
opus_decoder_destroy(inst->decoder_left);
|
|
opus_decoder_destroy(inst->decoder_right);
|
|
free(inst);
|
|
return 0;
|
|
}
|
|
|
|
int16_t WebRtcOpus_DecoderInit(OpusDecInst* inst) {
|
|
int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE);
|
|
if (error == OPUS_OK) {
|
|
memset(inst->state_48_32_left, 0, sizeof(inst->state_48_32_left));
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int16_t WebRtcOpus_DecoderInitSlave(OpusDecInst* inst) {
|
|
int error = opus_decoder_ctl(inst->decoder_right, OPUS_RESET_STATE);
|
|
if (error == OPUS_OK) {
|
|
memset(inst->state_48_32_right, 0, sizeof(inst->state_48_32_right));
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static int DecodeNative(OpusDecoder* inst, int16_t* encoded,
|
|
int16_t encoded_bytes, int16_t* decoded,
|
|
int16_t* audio_type) {
|
|
unsigned char* coded = (unsigned char*) encoded;
|
|
opus_int16* audio = (opus_int16*) decoded;
|
|
|
|
int res = opus_decode(inst, coded, encoded_bytes, audio,
|
|
kWebRtcOpusMaxFrameSize, 0);
|
|
/* TODO(tlegrand): set to DTX for zero-length packets? */
|
|
*audio_type = 0;
|
|
|
|
if (res > 0) {
|
|
return res;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int16_t WebRtcOpus_Decode(OpusDecInst* inst, int16_t* encoded,
|
|
int16_t encoded_bytes, int16_t* decoded,
|
|
int16_t* audio_type) {
|
|
/* Enough for 120 ms (the largest Opus packet size) of mono audio at 48 kHz
|
|
* and resampler overlap. This will need to be enlarged for stereo decoding.
|
|
*/
|
|
int16_t buffer16[kWebRtcOpusMaxFrameSize];
|
|
int32_t buffer32[kWebRtcOpusMaxFrameSize + 7];
|
|
int decoded_samples;
|
|
int blocks;
|
|
int16_t output_samples;
|
|
int i;
|
|
|
|
/* If mono case, just do a regular call to the decoder.
|
|
* If stereo, call to WebRtcOpus_Decode() gives left channel as output, and
|
|
* calls to WebRtcOpus_Decode_slave() give right channel as output.
|
|
* This is to make stereo work with the current setup of NetEQ, which
|
|
* requires two calls to the decoder to produce stereo. */
|
|
|
|
/* Decode to a temporary buffer. */
|
|
decoded_samples = DecodeNative(inst->decoder_left, encoded, encoded_bytes,
|
|
buffer16, audio_type);
|
|
if (decoded_samples < 0) {
|
|
return -1;
|
|
}
|
|
if (inst->channels == 2) {
|
|
/* The parameter |decoded_samples| holds the number of samples pairs, in
|
|
* case of stereo. Number of samples in |buffer16| equals |decoded_samples|
|
|
* times 2. */
|
|
for (i = 0; i < decoded_samples; i++) {
|
|
/* Take every second sample, starting at the first sample. This gives
|
|
* the left channel. */
|
|
buffer16[i] = buffer16[i * 2];
|
|
}
|
|
}
|
|
/* Resample from 48 kHz to 32 kHz. */
|
|
for (i = 0; i < 7; i++) {
|
|
buffer32[i] = inst->state_48_32_left[i];
|
|
inst->state_48_32_left[i] = buffer16[decoded_samples - 7 + i];
|
|
}
|
|
for (i = 0; i < decoded_samples; i++) {
|
|
buffer32[7 + i] = buffer16[i];
|
|
}
|
|
/* Resampling 3 samples to 2. Function divides the input in |blocks| number
|
|
* of 3-sample groups, and output is |blocks| number of 2-sample groups. */
|
|
blocks = decoded_samples / 3;
|
|
WebRtcSpl_Resample48khzTo32khz(buffer32, buffer32, blocks);
|
|
output_samples = (int16_t) (blocks * 2);
|
|
WebRtcSpl_VectorBitShiftW32ToW16(decoded, output_samples, buffer32, 15);
|
|
|
|
return output_samples;
|
|
}
|
|
|
|
int16_t WebRtcOpus_DecodeSlave(OpusDecInst* inst, int16_t* encoded,
|
|
int16_t encoded_bytes, int16_t* decoded,
|
|
int16_t* audio_type) {
|
|
/* Enough for 120 ms (the largest Opus packet size) of mono audio at 48 kHz
|
|
* and resampler overlap. This will need to be enlarged for stereo decoding.
|
|
*/
|
|
int16_t buffer16[kWebRtcOpusMaxFrameSize];
|
|
int32_t buffer32[kWebRtcOpusMaxFrameSize + 7];
|
|
int decoded_samples;
|
|
int blocks;
|
|
int16_t output_samples;
|
|
int i;
|
|
|
|
/* Decode to a temporary buffer. */
|
|
decoded_samples = DecodeNative(inst->decoder_right, encoded, encoded_bytes,
|
|
buffer16, audio_type);
|
|
if (decoded_samples < 0) {
|
|
return -1;
|
|
}
|
|
if (inst->channels == 2) {
|
|
/* The parameter |decoded_samples| holds the number of samples pairs, in
|
|
* case of stereo. Number of samples in |buffer16| equals |decoded_samples|
|
|
* times 2. */
|
|
for (i = 0; i < decoded_samples; i++) {
|
|
/* Take every second sample, starting at the second sample. This gives
|
|
* the right channel. */
|
|
buffer16[i] = buffer16[i * 2 + 1];
|
|
}
|
|
} else {
|
|
/* Decode slave should never be called for mono packets. */
|
|
return -1;
|
|
}
|
|
/* Resample from 48 kHz to 32 kHz. */
|
|
for (i = 0; i < 7; i++) {
|
|
buffer32[i] = inst->state_48_32_right[i];
|
|
inst->state_48_32_right[i] = buffer16[decoded_samples - 7 + i];
|
|
}
|
|
for (i = 0; i < decoded_samples; i++) {
|
|
buffer32[7 + i] = buffer16[i];
|
|
}
|
|
/* Resampling 3 samples to 2. Function divides the input in |blocks| number
|
|
* of 3-sample groups, and output is |blocks| number of 2-sample groups. */
|
|
blocks = decoded_samples / 3;
|
|
WebRtcSpl_Resample48khzTo32khz(buffer32, buffer32, blocks);
|
|
output_samples = (int16_t) (blocks * 2);
|
|
WebRtcSpl_VectorBitShiftW32ToW16(decoded, output_samples, buffer32, 15);
|
|
|
|
return output_samples;
|
|
}
|
|
|
|
int16_t WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded,
|
|
int16_t number_of_lost_frames) {
|
|
/* TODO(tlegrand): We can pass NULL to opus_decode to activate packet
|
|
* loss concealment, but I don't know how many samples
|
|
* number_of_lost_frames corresponds to. */
|
|
return -1;
|
|
}
|