Avoid flagging Opus DTX frames as speech.
Background: After 20 consecutive DTX frames, Opus encodes the background noise in a normal frame and then goes back to outputting DTX frames. Currently all Opus frames are flagged as containing speech. This CL is has two effects on outgoing Opus packets: 1. DTX frames are flagged as non-speech. 2. A non-DTX frame that follows 20 consecutive DTX frames is flagged as non-speech. Bug: webrtc:8088 Change-Id: Ic36cf8c9d0a34f55ed4e57858362ad91e3897dda Reviewed-on: https://webrtc-review.googlesource.com/23760 Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org> Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org> Cr-Commit-Position: refs/heads/master@{#20794}
This commit is contained in:
committed by
Commit Bot
parent
e5b5f4638d
commit
36de62e830
@ -380,7 +380,8 @@ AudioEncoderOpusImpl::AudioEncoderOpusImpl(
|
||||
inst_(nullptr),
|
||||
packet_loss_fraction_smoother_(new PacketLossFractionSmoother()),
|
||||
audio_network_adaptor_creator_(audio_network_adaptor_creator),
|
||||
bitrate_smoother_(std::move(bitrate_smoother)) {
|
||||
bitrate_smoother_(std::move(bitrate_smoother)),
|
||||
consecutive_dtx_frames_(0) {
|
||||
RTC_DCHECK(0 <= payload_type && payload_type <= 127);
|
||||
|
||||
// Sanity check of the redundant payload type field that we want to get rid
|
||||
@ -603,14 +604,23 @@ AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
|
||||
});
|
||||
input_buffer_.clear();
|
||||
|
||||
bool dtx_frame = (info.encoded_bytes <= 2);
|
||||
|
||||
// Will use new packet size for next encoding.
|
||||
config_.frame_size_ms = next_frame_length_ms_;
|
||||
|
||||
info.encoded_timestamp = first_timestamp_in_buffer_;
|
||||
info.payload_type = payload_type_;
|
||||
info.send_even_if_empty = true; // Allows Opus to send empty packets.
|
||||
info.speech = (info.encoded_bytes > 0);
|
||||
// After 20 DTX frames (MAX_CONSECUTIVE_DTX) Opus will send a frame
|
||||
// coding the background noise. Avoid flagging this frame as speech
|
||||
// (even though there is a probability of the frame being speech).
|
||||
info.speech = !dtx_frame && (consecutive_dtx_frames_ != 20);
|
||||
info.encoder_type = CodecType::kOpus;
|
||||
|
||||
// Increase or reset DTX counter.
|
||||
consecutive_dtx_frames_ = (dtx_frame) ? (consecutive_dtx_frames_ + 1) : (0);
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
|
||||
@ -161,6 +161,7 @@ class AudioEncoderOpusImpl final : public AudioEncoder {
|
||||
rtc::Optional<size_t> overhead_bytes_per_packet_;
|
||||
const std::unique_ptr<SmoothingFilter> bitrate_smoother_;
|
||||
rtc::Optional<int64_t> bitrate_smoother_last_update_time_;
|
||||
int consecutive_dtx_frames_;
|
||||
|
||||
friend struct AudioEncoderOpus;
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(AudioEncoderOpusImpl);
|
||||
|
||||
@ -753,4 +753,66 @@ TEST(AudioEncoderOpusTest, SetMaxPlaybackRateFb) {
|
||||
EXPECT_EQ(64000, config.bitrate_bps);
|
||||
}
|
||||
|
||||
TEST(AudioEncoderOpusTest, OpusFlagDtxAsNonSpeech) {
|
||||
// Create encoder with DTX enabled.
|
||||
AudioEncoderOpusConfig config;
|
||||
config.dtx_enabled = true;
|
||||
constexpr int payload_type = 17;
|
||||
const auto encoder = AudioEncoderOpus::MakeAudioEncoder(config, payload_type);
|
||||
|
||||
// Open file containing speech and silence.
|
||||
const std::string kInputFileName =
|
||||
webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm");
|
||||
test::AudioLoop audio_loop;
|
||||
// Use the file as if it were sampled at 48 kHz.
|
||||
constexpr int kSampleRateHz = 48000;
|
||||
EXPECT_EQ(kSampleRateHz, encoder->SampleRateHz());
|
||||
constexpr size_t kMaxLoopLengthSamples =
|
||||
kSampleRateHz * 10; // Max 10 second loop.
|
||||
constexpr size_t kInputBlockSizeSamples =
|
||||
10 * kSampleRateHz / 1000; // 10 ms.
|
||||
EXPECT_TRUE(audio_loop.Init(kInputFileName, kMaxLoopLengthSamples,
|
||||
kInputBlockSizeSamples));
|
||||
|
||||
// Encode.
|
||||
AudioEncoder::EncodedInfo info;
|
||||
rtc::Buffer encoded(500);
|
||||
int nonspeech_frames = 0;
|
||||
int max_nonspeech_frames = 0;
|
||||
int dtx_frames = 0;
|
||||
int max_dtx_frames = 0;
|
||||
uint32_t rtp_timestamp = 0u;
|
||||
for (size_t i = 0; i < 500; ++i) {
|
||||
encoded.Clear();
|
||||
|
||||
// Every second call to the encoder will generate an Opus packet.
|
||||
for (int j = 0; j < 2; j++) {
|
||||
info =
|
||||
encoder->Encode(rtp_timestamp, audio_loop.GetNextBlock(), &encoded);
|
||||
rtp_timestamp += kInputBlockSizeSamples;
|
||||
}
|
||||
|
||||
// Bookkeeping of number of DTX frames.
|
||||
if (info.encoded_bytes <= 2) {
|
||||
++dtx_frames;
|
||||
} else {
|
||||
if (dtx_frames > max_dtx_frames)
|
||||
max_dtx_frames = dtx_frames;
|
||||
dtx_frames = 0;
|
||||
}
|
||||
|
||||
// Bookkeeping of number of non-speech frames.
|
||||
if (info.speech == 0) {
|
||||
++nonspeech_frames;
|
||||
} else {
|
||||
if (nonspeech_frames > max_nonspeech_frames)
|
||||
max_nonspeech_frames = nonspeech_frames;
|
||||
nonspeech_frames = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Maximum number of consecutive non-speech packets should exceed 20.
|
||||
EXPECT_GT(max_nonspeech_frames, 20);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
Reference in New Issue
Block a user