Revert "opus: take SILK vad result into account for voice detection"
This reverts commit 686a3709acfedcf0a4c798dd1c5902787c4a266b. Reason for revert: crbug.com/1144220 Original change's description: > opus: take SILK vad result into account for voice detection > > BUG=webrtc:11643 > > Change-Id: Idc3a9b6bb7bd1a33f905843e5d6067ae19d5172c > Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/176508 > Commit-Queue: Minyue Li <minyue@webrtc.org> > Reviewed-by: Minyue Li <minyue@webrtc.org> > Cr-Commit-Position: refs/heads/master@{#31743} TBR=devicentepena@webrtc.org,minyue@webrtc.org,fippo@sip-communicator.org Bug: webrtc:11643 Change-Id: I9c77e4f6e919c4b648a5783edf4188e1f8114602 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/191485 Commit-Queue: Minyue Li <minyue@webrtc.org> Reviewed-by: Minyue Li <minyue@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32542}
This commit is contained in:
@ -367,7 +367,8 @@ AudioEncoderOpusImpl::AudioEncoderOpusImpl(
|
||||
inst_(nullptr),
|
||||
packet_loss_fraction_smoother_(new PacketLossFractionSmoother()),
|
||||
audio_network_adaptor_creator_(audio_network_adaptor_creator),
|
||||
bitrate_smoother_(std::move(bitrate_smoother)) {
|
||||
bitrate_smoother_(std::move(bitrate_smoother)),
|
||||
consecutive_dtx_frames_(0) {
|
||||
RTC_DCHECK(0 <= payload_type && payload_type <= 127);
|
||||
|
||||
// Sanity check of the redundant payload type field that we want to get rid
|
||||
@ -589,7 +590,6 @@ AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
|
||||
Num10msFramesPerPacket() * SamplesPer10msFrame());
|
||||
|
||||
const size_t max_encoded_bytes = SufficientOutputBufferSize();
|
||||
const size_t start_offset_bytes = encoded->size();
|
||||
EncodedInfo info;
|
||||
info.encoded_bytes = encoded->AppendData(
|
||||
max_encoded_bytes, [&](rtc::ArrayView<uint8_t> encoded) {
|
||||
@ -604,6 +604,8 @@ AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
|
||||
});
|
||||
input_buffer_.clear();
|
||||
|
||||
bool dtx_frame = (info.encoded_bytes <= 2);
|
||||
|
||||
// Will use new packet size for next encoding.
|
||||
config_.frame_size_ms = next_frame_length_ms_;
|
||||
|
||||
@ -618,18 +620,14 @@ AudioEncoder::EncodedInfo AudioEncoderOpusImpl::EncodeImpl(
|
||||
info.encoded_timestamp = first_timestamp_in_buffer_;
|
||||
info.payload_type = payload_type_;
|
||||
info.send_even_if_empty = true; // Allows Opus to send empty packets.
|
||||
// After 20 DTX frames (MAX_CONSECUTIVE_DTX) Opus will send a frame
|
||||
// coding the background noise. Avoid flagging this frame as speech
|
||||
// (even though there is a probability of the frame being speech).
|
||||
info.speech = !dtx_frame && (consecutive_dtx_frames_ != 20);
|
||||
info.encoder_type = CodecType::kOpus;
|
||||
|
||||
// Extract the VAD result from the encoded packet.
|
||||
int has_voice = WebRtcOpus_PacketHasVoiceActivity(
|
||||
&encoded->data()[start_offset_bytes], info.encoded_bytes);
|
||||
if (has_voice == -1) {
|
||||
// CELT mode packet or there was an error. This had set the speech flag to
|
||||
// true historically.
|
||||
info.speech = true;
|
||||
} else {
|
||||
info.speech = has_voice;
|
||||
}
|
||||
// Increase or reset DTX counter.
|
||||
consecutive_dtx_frames_ = (dtx_frame) ? (consecutive_dtx_frames_ + 1) : (0);
|
||||
|
||||
return info;
|
||||
}
|
||||
|
@ -172,6 +172,7 @@ class AudioEncoderOpusImpl final : public AudioEncoder {
|
||||
absl::optional<size_t> overhead_bytes_per_packet_;
|
||||
const std::unique_ptr<SmoothingFilter> bitrate_smoother_;
|
||||
absl::optional<int64_t> bitrate_smoother_last_update_time_;
|
||||
int consecutive_dtx_frames_;
|
||||
|
||||
friend struct AudioEncoderOpus;
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(AudioEncoderOpusImpl);
|
||||
|
@ -767,7 +767,7 @@ int WebRtcOpus_PacketHasVoiceActivity(const uint8_t* payload,
|
||||
|
||||
int silk_frames = WebRtcOpus_NumSilkFrames(payload);
|
||||
if (silk_frames == 0)
|
||||
return 0;
|
||||
return -1;
|
||||
|
||||
const int channels = opus_packet_get_nb_channels(payload);
|
||||
RTC_DCHECK(channels == 1 || channels == 2);
|
||||
|
@ -975,21 +975,4 @@ TEST(OpusVadTest, TwoOpusMonoFramesVadOnSecond) {
|
||||
EXPECT_TRUE(WebRtcOpus_PacketHasVoiceActivity(twoMonoFrames, 3));
|
||||
}
|
||||
|
||||
TEST(OpusVadTest, DtxEmptyPacket) {
|
||||
const uint8_t dtx[] = {0x78};
|
||||
EXPECT_FALSE(WebRtcOpus_PacketHasVoiceActivity(dtx, 1));
|
||||
}
|
||||
|
||||
TEST(OpusVadTest, DtxBackgroundNoisePacket) {
|
||||
// DTX sends a frame coding background noise every 20 packets:
|
||||
// https://tools.ietf.org/html/rfc6716#section-2.1.9
|
||||
// The packet below represents such a frame and was captured using
|
||||
// Wireshark while disabling encryption.
|
||||
const uint8_t dtx[] = {0x78, 0x07, 0xc9, 0x79, 0xc8, 0xc9, 0x57, 0xc0, 0xa2,
|
||||
0x12, 0x23, 0xfa, 0xef, 0x67, 0xf3, 0x2e, 0xe3, 0xd3,
|
||||
0xd5, 0xe9, 0xec, 0xdb, 0x3e, 0xbc, 0x80, 0xb6, 0x6e,
|
||||
0x2a, 0xb7, 0x8c, 0x83, 0xcd, 0x83, 0xcd, 0x00};
|
||||
EXPECT_FALSE(WebRtcOpus_PacketHasVoiceActivity(dtx, 35));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
Reference in New Issue
Block a user