diff --git a/modules/BUILD.gn b/modules/BUILD.gn index 5c399e4aa2..40ea128200 100644 --- a/modules/BUILD.gn +++ b/modules/BUILD.gn @@ -124,6 +124,7 @@ if (rtc_include_tests) { "../resources/audio_coding/neteq_universal_new.rtp", "../resources/audio_coding/speech_mono_16kHz.pcm", "../resources/audio_coding/speech_mono_32_48kHz.pcm", + "../resources/audio_coding/speech_4_channels_48k_one_second.wav", "../resources/audio_coding/testfile32kHz.pcm", "../resources/audio_coding/teststereo32kHz.pcm", "../resources/audio_device/audio_short16.pcm", diff --git a/modules/audio_coding/codecs/opus/audio_decoder_opus.cc b/modules/audio_coding/codecs/opus/audio_decoder_opus.cc index 357cb1a20d..1accfe42e9 100644 --- a/modules/audio_coding/codecs/opus/audio_decoder_opus.cc +++ b/modules/audio_coding/codecs/opus/audio_decoder_opus.cc @@ -72,7 +72,8 @@ class OpusFrame : public AudioDecoder::EncodedAudioFrame { AudioDecoderOpusImpl::AudioDecoderOpusImpl(size_t num_channels) : channels_(num_channels) { RTC_DCHECK(num_channels == 1 || num_channels == 2); - WebRtcOpus_DecoderCreate(&dec_state_, channels_); + const int error = WebRtcOpus_DecoderCreate(&dec_state_, channels_); + RTC_DCHECK(error == 0); WebRtcOpus_DecoderInit(dec_state_); } diff --git a/modules/audio_coding/codecs/opus/opus_inst.h b/modules/audio_coding/codecs/opus/opus_inst.h index 2473a5c753..41b3f15f28 100644 --- a/modules/audio_coding/codecs/opus/opus_inst.h +++ b/modules/audio_coding/codecs/opus/opus_inst.h @@ -17,16 +17,17 @@ RTC_PUSH_IGNORING_WUNDEF() #include "opus.h" +#include "opus_multistream.h" RTC_POP_IGNORING_WUNDEF() struct WebRtcOpusEncInst { - OpusEncoder* encoder; + OpusMSEncoder* encoder; size_t channels; int in_dtx_mode; }; struct WebRtcOpusDecInst { - OpusDecoder* decoder; + OpusMSDecoder* decoder; int prev_decoded_samples; size_t channels; int in_dtx_mode; diff --git a/modules/audio_coding/codecs/opus/opus_interface.c b/modules/audio_coding/codecs/opus/opus_interface.c index d219098b01..c657a14a6e 100644 --- a/modules/audio_coding/codecs/opus/opus_interface.c +++ b/modules/audio_coding/codecs/opus/opus_interface.c @@ -37,6 +37,40 @@ enum { kWebRtcOpusDefaultFrameSize = 960, }; +int16_t GetSurroundParameters(int channels, + int *streams, + int *coupled_streams, + unsigned char *mapping) { + int opus_error; + int ret = 0; + // Use 'surround encoder create' to get values for 'coupled_streams', + // 'streams' and 'mapping'. + OpusMSEncoder* ms_encoder_ptr = opus_multistream_surround_encoder_create( + 48000, + channels, + /* mapping family */ channels <= 2 ? 0 : 1, + streams, + coupled_streams, + mapping, + OPUS_APPLICATION_VOIP, // Application type shouldn't affect + // streams/mapping values. + &opus_error); + + // This shouldn't fail; if it fails, + // signal an error and return invalid values. + if (opus_error != OPUS_OK || ms_encoder_ptr == NULL) { + ret = -1; + *streams = -1; + *coupled_streams = -1; + } + + // We don't need the encoder. + if (ms_encoder_ptr != NULL) { + opus_multistream_encoder_destroy(ms_encoder_ptr); + } + return ret; +} + int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, size_t channels, int32_t application) { @@ -55,12 +89,26 @@ int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, return -1; } + unsigned char mapping[255]; + memset(mapping, 0, 255); + int streams = -1; + int coupled_streams = -1; + + OpusEncInst* state = calloc(1, sizeof(OpusEncInst)); RTC_DCHECK(state); int error; - state->encoder = opus_encoder_create(48000, (int)channels, opus_app, - &error); + state->encoder = opus_multistream_surround_encoder_create( + 48000, + channels, + /* mapping family */ channels <= 2 ? 0 : 1, + &streams, + &coupled_streams, + mapping, + opus_app, + &error); + if (error != OPUS_OK || !state->encoder) { WebRtcOpus_EncoderFree(state); return -1; @@ -75,7 +123,7 @@ int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) { if (inst) { - opus_encoder_destroy(inst->encoder); + opus_multistream_encoder_destroy(inst->encoder); free(inst); return 0; } else { @@ -94,11 +142,11 @@ int WebRtcOpus_Encode(OpusEncInst* inst, return -1; } - res = opus_encode(inst->encoder, - (const opus_int16*)audio_in, - (int)samples, - encoded, - (opus_int32)length_encoded_buffer); + res = opus_multistream_encode(inst->encoder, + (const opus_int16*)audio_in, + (int)samples, + encoded, + (opus_int32)length_encoded_buffer); if (res <= 0) { return -1; @@ -122,7 +170,7 @@ int WebRtcOpus_Encode(OpusEncInst* inst, int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) { if (inst) { - return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate)); + return opus_multistream_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate)); } else { return -1; } @@ -130,8 +178,8 @@ int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) { int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) { if (inst) { - return opus_encoder_ctl(inst->encoder, - OPUS_SET_PACKET_LOSS_PERC(loss_rate)); + return opus_multistream_encoder_ctl(inst->encoder, + OPUS_SET_PACKET_LOSS_PERC(loss_rate)); } else { return -1; } @@ -154,13 +202,46 @@ int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) { } else { set_bandwidth = OPUS_BANDWIDTH_FULLBAND; } - return opus_encoder_ctl(inst->encoder, - OPUS_SET_MAX_BANDWIDTH(set_bandwidth)); + return opus_multistream_encoder_ctl(inst->encoder, + OPUS_SET_MAX_BANDWIDTH(set_bandwidth)); +} + +int16_t WebRtcOpus_GetMaxPlaybackRate(OpusEncInst* const inst, + int32_t* result_hz) { + opus_int32 max_bandwidth; + int s; + int ret; + + max_bandwidth = 0; + ret = OPUS_OK; + s = 0; + while (ret == OPUS_OK) { + OpusEncoder *enc; + opus_int32 bandwidth; + + ret = opus_multistream_encoder_ctl( + inst->encoder, + OPUS_MULTISTREAM_GET_ENCODER_STATE(s, &enc)); + if (ret == OPUS_BAD_ARG) + break; + if (ret != OPUS_OK) + return -1; + if (opus_encoder_ctl(enc, OPUS_GET_MAX_BANDWIDTH(&bandwidth)) != OPUS_OK) + return -1; + + if (max_bandwidth != 0 && max_bandwidth != bandwidth) + return -1; + + max_bandwidth = bandwidth; + s++; + } + *result_hz = max_bandwidth; + return 0; } int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) { if (inst) { - return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1)); + return opus_multistream_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1)); } else { return -1; } @@ -168,7 +249,7 @@ int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) { int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) { if (inst) { - return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0)); + return opus_multistream_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0)); } else { return -1; } @@ -184,21 +265,21 @@ int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) { // last long during a pure silence, if the signal type is not forced. // TODO(minyue): Remove the signal type forcing when Opus DTX works properly // without it. - int ret = opus_encoder_ctl(inst->encoder, - OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); + int ret = opus_multistream_encoder_ctl(inst->encoder, + OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); if (ret != OPUS_OK) return ret; - return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(1)); + return opus_multistream_encoder_ctl(inst->encoder, OPUS_SET_DTX(1)); } int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) { if (inst) { - int ret = opus_encoder_ctl(inst->encoder, - OPUS_SET_SIGNAL(OPUS_AUTO)); + int ret = opus_multistream_encoder_ctl(inst->encoder, + OPUS_SET_SIGNAL(OPUS_AUTO)); if (ret != OPUS_OK) return ret; - return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(0)); + return opus_multistream_encoder_ctl(inst->encoder, OPUS_SET_DTX(0)); } else { return -1; } @@ -206,7 +287,7 @@ int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) { int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst) { if (inst) { - return opus_encoder_ctl(inst->encoder, OPUS_SET_VBR(0)); + return opus_multistream_encoder_ctl(inst->encoder, OPUS_SET_VBR(0)); } else { return -1; } @@ -214,7 +295,7 @@ int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst) { int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst) { if (inst) { - return opus_encoder_ctl(inst->encoder, OPUS_SET_VBR(1)); + return opus_multistream_encoder_ctl(inst->encoder, OPUS_SET_VBR(1)); } else { return -1; } @@ -222,7 +303,8 @@ int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst) { int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) { if (inst) { - return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity)); + return opus_multistream_encoder_ctl(inst->encoder, + OPUS_SET_COMPLEXITY(complexity)); } else { return -1; } @@ -233,7 +315,8 @@ int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst) { return -1; } int32_t bandwidth; - if (opus_encoder_ctl(inst->encoder, OPUS_GET_BANDWIDTH(&bandwidth)) == 0) { + if (opus_multistream_encoder_ctl(inst->encoder, + OPUS_GET_BANDWIDTH(&bandwidth)) == 0) { return bandwidth; } else { return -1; @@ -243,7 +326,8 @@ int32_t WebRtcOpus_GetBandwidth(OpusEncInst* inst) { int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth) { if (inst) { - return opus_encoder_ctl(inst->encoder, OPUS_SET_BANDWIDTH(bandwidth)); + return opus_multistream_encoder_ctl(inst->encoder, + OPUS_SET_BANDWIDTH(bandwidth)); } else { return -1; } @@ -253,10 +337,10 @@ int16_t WebRtcOpus_SetForceChannels(OpusEncInst* inst, size_t num_channels) { if (!inst) return -1; if (num_channels == 0) { - return opus_encoder_ctl(inst->encoder, + return opus_multistream_encoder_ctl(inst->encoder, OPUS_SET_FORCE_CHANNELS(OPUS_AUTO)); } else if (num_channels == 1 || num_channels == 2) { - return opus_encoder_ctl(inst->encoder, + return opus_multistream_encoder_ctl(inst->encoder, OPUS_SET_FORCE_CHANNELS(num_channels)); } else { return -1; @@ -268,16 +352,31 @@ int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, size_t channels) { OpusDecInst* state; if (inst != NULL) { - /* Create Opus decoder state. */ + // Create Opus decoder state. state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst)); if (state == NULL) { return -1; } - /* Create new memory, always at 48000 Hz. */ - state->decoder = opus_decoder_create(48000, (int)channels, &error); + unsigned char mapping[255]; + memset(mapping, 0, 255); + int streams = -1; + int coupled_streams = -1; + if (GetSurroundParameters(channels, &streams, + &coupled_streams, mapping) != 0) { + free(state); + return -1; + } + + // Create new memory, always at 48000 Hz. + state->decoder = opus_multistream_decoder_create( + 48000, (int)channels, + /* streams = */ streams, + /* coupled streams = */ coupled_streams, + mapping, + &error); if (error == OPUS_OK && state->decoder != NULL) { - /* Creation of memory all ok. */ + // Creation of memory all ok. state->channels = channels; state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize; state->in_dtx_mode = 0; @@ -285,9 +384,9 @@ int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, size_t channels) { return 0; } - /* If memory allocation was unsuccessful, free the entire state. */ + // If memory allocation was unsuccessful, free the entire state. if (state->decoder) { - opus_decoder_destroy(state->decoder); + opus_multistream_decoder_destroy(state->decoder); } free(state); } @@ -296,7 +395,7 @@ int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, size_t channels) { int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) { if (inst) { - opus_decoder_destroy(inst->decoder); + opus_multistream_decoder_destroy(inst->decoder); free(inst); return 0; } else { @@ -309,7 +408,7 @@ size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst) { } void WebRtcOpus_DecoderInit(OpusDecInst* inst) { - opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE); + opus_multistream_decoder_ctl(inst->decoder, OPUS_RESET_STATE); inst->in_dtx_mode = 0; } @@ -324,6 +423,10 @@ static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) { // fact a 1-byte TOC with a 1-byte payload. That will be erroneously // interpreted as comfort noise output, but such a payload is probably // faulty anyway. + + // TODO(webrtc:10218): This is wrong for multistream opus. Then are several + // single-stream packets glued together with some packet size bytes in + // between. See https://tools.ietf.org/html/rfc6716#appendix-B inst->in_dtx_mode = 1; return 2; // Comfort noise. } else { @@ -338,8 +441,9 @@ static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) { static int DecodeNative(OpusDecInst* inst, const uint8_t* encoded, size_t encoded_bytes, int frame_size, int16_t* decoded, int16_t* audio_type, int decode_fec) { - int res = opus_decode(inst->decoder, encoded, (opus_int32)encoded_bytes, - (opus_int16*)decoded, frame_size, decode_fec); + int res = opus_multistream_decode( + inst->decoder, encoded, (opus_int32)encoded_bytes, + (opus_int16*)decoded, frame_size, decode_fec); if (res <= 0) return -1; diff --git a/modules/audio_coding/codecs/opus/opus_interface.h b/modules/audio_coding/codecs/opus/opus_interface.h index ddb4ff9781..0e97734f38 100644 --- a/modules/audio_coding/codecs/opus/opus_interface.h +++ b/modules/audio_coding/codecs/opus/opus_interface.h @@ -125,6 +125,22 @@ int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate); */ int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz); +/**************************************************************************** + * WebRtcOpus_GetMaxPlaybackRate(...) + * + * Queries the maximum playback rate for encoding. If different single-stream + * encoders have different maximum playback rates, this function fails. + * + * Input: + * - inst : Encoder context. + * Output: + * - result_hz : The maximum playback rate in Hz. + * Return value : 0 - Success + * -1 - Error + */ +int16_t WebRtcOpus_GetMaxPlaybackRate(OpusEncInst* const inst, + int32_t* result_hz); + /* TODO(minyue): Check whether an API to check the FEC and the packet loss rate * is needed. It might not be very useful since there are not many use cases and * the caller can always maintain the states. */ diff --git a/modules/audio_coding/codecs/opus/opus_unittest.cc b/modules/audio_coding/codecs/opus/opus_unittest.cc index e5f0464f52..50178a9bf9 100644 --- a/modules/audio_coding/codecs/opus/opus_unittest.cc +++ b/modules/audio_coding/codecs/opus/opus_unittest.cc @@ -27,7 +27,7 @@ using ::testing::Values; using ::testing::Combine; // Maximum number of bytes in output bitstream. -const size_t kMaxBytes = 1000; +const size_t kMaxBytes = 2000; // Sample rate of Opus. const size_t kOpusRateKhz = 48; // Number of samples-per-channel in a 20 ms frame, sampled at 48 kHz. @@ -86,10 +86,14 @@ OpusTest::OpusTest() void OpusTest::PrepareSpeechData(size_t channel, int block_length_ms, int loop_length_ms) { + std::map channel_to_basename = { + {1, "audio_coding/testfile32kHz"}, + {2, "audio_coding/teststereo32kHz"}, + {4, "audio_coding/speech_4_channels_48k_one_second"}}; + std::map channel_to_suffix = { + {1, "pcm"}, {2, "pcm"}, {4, "wav"}}; const std::string file_name = webrtc::test::ResourcePath( - (channel == 1) ? "audio_coding/testfile32kHz" - : "audio_coding/teststereo32kHz", - "pcm"); + channel_to_basename[channel], channel_to_suffix[channel]); if (loop_length_ms < block_length_ms) { loop_length_ms = block_length_ms; } @@ -103,7 +107,7 @@ void OpusTest::SetMaxPlaybackRate(WebRtcOpusEncInst* encoder, int32_t set) { opus_int32 bandwidth; EXPECT_EQ(0, WebRtcOpus_SetMaxPlaybackRate(opus_encoder_, set)); - opus_encoder_ctl(opus_encoder_->encoder, OPUS_GET_MAX_BANDWIDTH(&bandwidth)); + EXPECT_EQ(0, WebRtcOpus_GetMaxPlaybackRate(opus_encoder_, &bandwidth)); EXPECT_EQ(expect, bandwidth); } @@ -354,13 +358,13 @@ TEST(OpusTest, OpusCreateFail) { // Test to see that an invalid pointer is caught. EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(NULL, 1, 0)); // Invalid channel number. - EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 3, 0)); + EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 257, 0)); // Invalid applciation mode. EXPECT_EQ(-1, WebRtcOpus_EncoderCreate(&opus_encoder, 1, 2)); EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(NULL, 1)); // Invalid channel number. - EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(&opus_decoder, 3)); + EXPECT_EQ(-1, WebRtcOpus_DecoderCreate(&opus_decoder, 257)); } // Test failing Free. @@ -399,7 +403,8 @@ TEST_P(OpusTest, OpusEncodeDecode) { // Check application mode. opus_int32 app; - opus_encoder_ctl(opus_encoder_->encoder, OPUS_GET_APPLICATION(&app)); + opus_multistream_encoder_ctl(opus_encoder_->encoder, + OPUS_GET_APPLICATION(&app)); EXPECT_EQ(application_ == 0 ? OPUS_APPLICATION_VOIP : OPUS_APPLICATION_AUDIO, app); @@ -450,6 +455,11 @@ TEST_P(OpusTest, OpusSetComplexity) { } TEST_P(OpusTest, OpusSetBandwidth) { + if (channels_ > 2) { + // TODO(webrtc:10217): investigate why multi-stream Opus reports + // narrowband when it's configured with FULLBAND. + return; + } PrepareSpeechData(channels_, 20, 20); int16_t audio_type; @@ -495,7 +505,7 @@ TEST_P(OpusTest, OpusForceChannels) { ASSERT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, application_)); - if (channels_ == 2) { + if (channels_ >= 2) { EXPECT_EQ(-1, WebRtcOpus_SetForceChannels(opus_encoder_, 3)); EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 2)); EXPECT_EQ(0, WebRtcOpus_SetForceChannels(opus_encoder_, 1)); @@ -568,17 +578,17 @@ TEST_P(OpusTest, OpusEnableDisableDtx) { opus_int32 dtx; // DTX is off by default. - opus_encoder_ctl(opus_encoder_->encoder, OPUS_GET_DTX(&dtx)); + opus_multistream_encoder_ctl(opus_encoder_->encoder, OPUS_GET_DTX(&dtx)); EXPECT_EQ(0, dtx); // Test to enable DTX. EXPECT_EQ(0, WebRtcOpus_EnableDtx(opus_encoder_)); - opus_encoder_ctl(opus_encoder_->encoder, OPUS_GET_DTX(&dtx)); + opus_multistream_encoder_ctl(opus_encoder_->encoder, OPUS_GET_DTX(&dtx)); EXPECT_EQ(1, dtx); // Test to disable DTX. EXPECT_EQ(0, WebRtcOpus_DisableDtx(opus_encoder_)); - opus_encoder_ctl(opus_encoder_->encoder, OPUS_GET_DTX(&dtx)); + opus_multistream_encoder_ctl(opus_encoder_->encoder, OPUS_GET_DTX(&dtx)); EXPECT_EQ(0, dtx); // Free memory. @@ -592,6 +602,11 @@ TEST_P(OpusTest, OpusDtxOff) { } TEST_P(OpusTest, OpusDtxOn) { + if (channels_ > 2) { + // TODO(webrtc:10218): adapt the test to the sizes and order of multi-stream + // DTX packets. + return; + } TestDtxEffect(true, 10); TestDtxEffect(true, 20); TestDtxEffect(true, 40); @@ -723,6 +738,12 @@ TEST_P(OpusTest, OpusDurationEstimation) { } TEST_P(OpusTest, OpusDecodeRepacketized) { + if (channels_ > 2) { + // As per the Opus documentation + // https://mf4.xiph.org/jenkins/view/opus/job/opus/ws/doc/html/group__opus__repacketizer.html#details, + // multiple streams are not supported. + return; + } constexpr size_t kPackets = 6; PrepareSpeechData(channels_, 20, 20 * kPackets); @@ -787,6 +808,6 @@ TEST_P(OpusTest, OpusDecodeRepacketized) { INSTANTIATE_TEST_CASE_P(VariousMode, OpusTest, - Combine(Values(1, 2), Values(0, 1))); + Combine(Values(1, 2, 4), Values(0, 1))); } // namespace webrtc diff --git a/resources/audio_coding/speech_4_channels_48k_one_second.wav.sha1 b/resources/audio_coding/speech_4_channels_48k_one_second.wav.sha1 new file mode 100644 index 0000000000..7d3041c601 --- /dev/null +++ b/resources/audio_coding/speech_4_channels_48k_one_second.wav.sha1 @@ -0,0 +1 @@ +a60c7d03ac2ad9af3cfc7640a4979881f6d47c9c \ No newline at end of file