Avoid flagging Opus DTX frames as speech.

Background: After 20 consecutive DTX frames, Opus encodes the background
noise in a normal frame and then goes back to outputting DTX frames.

Currently all Opus frames are flagged as containing speech.

This CL is has two effects on outgoing Opus packets:
1. DTX frames are flagged as non-speech.
2. A non-DTX frame that follows 20 consecutive DTX frames is flagged as
   non-speech.

Bug: webrtc:8088
Change-Id: Ic36cf8c9d0a34f55ed4e57858362ad91e3897dda
Reviewed-on: https://webrtc-review.googlesource.com/23760
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20794}
This commit is contained in:
Gustaf Ullberg
2017-11-20 14:55:41 +01:00
committed by Commit Bot
parent e5b5f4638d
commit 36de62e830
4 changed files with 78 additions and 2 deletions

View File

@ -380,7 +380,8 @@ AudioEncoderOpusImpl::AudioEncoderOpusImpl(
inst_(nullptr),
packet_loss_fraction_smoother_(new PacketLossFractionSmoother()),
audio_network_adaptor_creator_(audio_network_adaptor_creator),
bitrate_smoother_(std::move(bitrate_smoother)) {
bitrate_smoother_(std::move(bitrate_smoother)),
consecutive_dtx_frames_(0) {
RTC_DCHECK(0 <= payload_type && payload_type <= 127);
// Sanity check of the redundant payload type field that we want to get rid
@ -603,14 +604,23 @@ AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
});
input_buffer_.clear();
bool dtx_frame = (info.encoded_bytes <= 2);
// Will use new packet size for next encoding.
config_.frame_size_ms = next_frame_length_ms_;
info.encoded_timestamp = first_timestamp_in_buffer_;
info.payload_type = payload_type_;
info.send_even_if_empty = true; // Allows Opus to send empty packets.
info.speech = (info.encoded_bytes > 0);
// After 20 DTX frames (MAX_CONSECUTIVE_DTX) Opus will send a frame
// coding the background noise. Avoid flagging this frame as speech
// (even though there is a probability of the frame being speech).
info.speech = !dtx_frame && (consecutive_dtx_frames_ != 20);
info.encoder_type = CodecType::kOpus;
// Increase or reset DTX counter.
consecutive_dtx_frames_ = (dtx_frame) ? (consecutive_dtx_frames_ + 1) : (0);
return info;
}

View File

@ -161,6 +161,7 @@ class AudioEncoderOpusImpl final : public AudioEncoder {
rtc::Optional<size_t> overhead_bytes_per_packet_;
const std::unique_ptr<SmoothingFilter> bitrate_smoother_;
rtc::Optional<int64_t> bitrate_smoother_last_update_time_;
int consecutive_dtx_frames_;
friend struct AudioEncoderOpus;
RTC_DISALLOW_COPY_AND_ASSIGN(AudioEncoderOpusImpl);

View File

@ -753,4 +753,66 @@ TEST(AudioEncoderOpusTest, SetMaxPlaybackRateFb) {
EXPECT_EQ(64000, config.bitrate_bps);
}
TEST(AudioEncoderOpusTest, OpusFlagDtxAsNonSpeech) {
// Create encoder with DTX enabled.
AudioEncoderOpusConfig config;
config.dtx_enabled = true;
constexpr int payload_type = 17;
const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type);
// Open file containing speech and silence.
const std::string kInputFileName =
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
test::AudioLoop audio_loop;
// Use the file as if it were sampled at 48 kHz.
constexpr int kSampleRateHz = 48000;
EXPECT_EQ(kSampleRateHz, encoder->SampleRateHz());
constexpr size_t kMaxLoopLengthSamples =
kSampleRateHz * 10; // Max 10 second loop.
constexpr size_t kInputBlockSizeSamples =
10 * kSampleRateHz / 1000; // 10 ms.
EXPECT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSamples,
kInputBlockSizeSamples));
// Encode.
AudioEncoder::EncodedInfo info;
rtc::Buffer encoded(500);
int nonspeech_frames = 0;
int max_nonspeech_frames = 0;
int dtx_frames = 0;
int max_dtx_frames = 0;
uint32_t rtp_timestamp = 0u;
for (size_t i = 0; i < 500; ++i) {
encoded.Clear();
// Every second call to the encoder will generate an Opus packet.
for (int j = 0; j < 2; j++) {
info =
encoder->Encode(rtp_timestamp, audio_loop.GetNextBlock(), &encoded);
rtp_timestamp += kInputBlockSizeSamples;
}
// Bookkeeping of number of DTX frames.
if (info.encoded_bytes <= 2) {
++dtx_frames;
} else {
if (dtx_frames > max_dtx_frames)
max_dtx_frames = dtx_frames;
dtx_frames = 0;
}
// Bookkeeping of number of non-speech frames.
if (info.speech == 0) {
++nonspeech_frames;
} else {
if (nonspeech_frames > max_nonspeech_frames)
max_nonspeech_frames = nonspeech_frames;
nonspeech_frames = 0;
}
}
// Maximum number of consecutive non-speech packets should exceed 20.
EXPECT_GT(max_nonspeech_frames, 20);
}
} // namespace webrtc