Introduce injectable NetEqController interface.

This interface is implemented by the DecisionLogic class, which now contains the DelayManager and DelayPeakDetector.

Bug: webrtc:11005
Change-Id: I4fb69fa359e60831cf153e41f101d5b623749380
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155176
Reviewed-by: Minyue Li <minyue@webrtc.org>
Reviewed-by: Jakob Ivarsson <jakobi@webrtc.org>
Commit-Queue: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29613}
This commit is contained in:
Ivo Creusen
2019-10-24 15:20:39 +02:00
committed by Commit Bot
parent 16cec3be2c
commit 53a31f7db8
17 changed files with 629 additions and 540 deletions

View File

@ -25,13 +25,10 @@
#include "modules/audio_coding/codecs/cng/webrtc_cng.h"
#include "modules/audio_coding/neteq/accelerate.h"
#include "modules/audio_coding/neteq/background_noise.h"
#include "modules/audio_coding/neteq/buffer_level_filter.h"
#include "modules/audio_coding/neteq/comfort_noise.h"
#include "modules/audio_coding/neteq/decision_logic.h"
#include "modules/audio_coding/neteq/decoder_database.h"
#include "modules/audio_coding/neteq/defines.h"
#include "modules/audio_coding/neteq/delay_manager.h"
#include "modules/audio_coding/neteq/delay_peak_detector.h"
#include "modules/audio_coding/neteq/dtmf_buffer.h"
#include "modules/audio_coding/neteq/dtmf_tone_generator.h"
#include "modules/audio_coding/neteq/expand.h"
@ -57,6 +54,24 @@
#include "system_wrappers/include/clock.h"
namespace webrtc {
namespace {
std::unique_ptr<NetEqController> CreateNetEqController(
int base_min_delay,
int max_packets_in_buffer,
bool enable_rtx_handling,
bool allow_time_stretching,
TickTimer* tick_timer) {
NetEqController::Config config;
config.base_min_delay_ms = base_min_delay;
config.max_packets_in_buffer = max_packets_in_buffer;
config.enable_rtx_handling = enable_rtx_handling;
config.allow_time_stretching = allow_time_stretching;
config.tick_timer = tick_timer;
return std::make_unique<DecisionLogic>(std::move(config));
}
} // namespace
NetEqImpl::Dependencies::Dependencies(
const NetEq::Config& config,
@ -65,21 +80,18 @@ NetEqImpl::Dependencies::Dependencies(
: clock(clock),
tick_timer(new TickTimer),
stats(new StatisticsCalculator),
buffer_level_filter(new BufferLevelFilter),
decoder_database(
new DecoderDatabase(decoder_factory, config.codec_pair_id)),
delay_peak_detector(
new DelayPeakDetector(tick_timer.get(), config.enable_rtx_handling)),
delay_manager(DelayManager::Create(config.max_packets_in_buffer,
config.min_delay_ms,
config.enable_rtx_handling,
delay_peak_detector.get(),
tick_timer.get(),
stats.get())),
dtmf_buffer(new DtmfBuffer(config.sample_rate_hz)),
dtmf_tone_generator(new DtmfToneGenerator),
packet_buffer(
new PacketBuffer(config.max_packets_in_buffer, tick_timer.get())),
neteq_controller(
CreateNetEqController(config.min_delay_ms,
config.max_packets_in_buffer,
config.enable_rtx_handling,
!config.for_test_no_time_stretching,
tick_timer.get())),
red_payload_splitter(new RedPayloadSplitter),
timestamp_scaler(new TimestampScaler(*decoder_database)),
accelerate_factory(new AccelerateFactory),
@ -93,10 +105,7 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config,
bool create_components)
: clock_(deps.clock),
tick_timer_(std::move(deps.tick_timer)),
buffer_level_filter_(std::move(deps.buffer_level_filter)),
decoder_database_(std::move(deps.decoder_database)),
delay_manager_(std::move(deps.delay_manager)),
delay_peak_detector_(std::move(deps.delay_peak_detector)),
dtmf_buffer_(std::move(deps.dtmf_buffer)),
dtmf_tone_generator_(std::move(deps.dtmf_tone_generator)),
packet_buffer_(std::move(deps.packet_buffer)),
@ -107,6 +116,7 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config,
accelerate_factory_(std::move(deps.accelerate_factory)),
preemptive_expand_factory_(std::move(deps.preemptive_expand_factory)),
stats_(std::move(deps.stats)),
controller_(std::move(deps.neteq_controller)),
last_mode_(kModeNormal),
decoded_buffer_length_(kMaxFrameSize),
decoded_buffer_(new int16_t[decoded_buffer_length_]),
@ -133,11 +143,12 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config,
<< "Changing to 8000 Hz.";
fs = 8000;
}
delay_manager_->SetMaximumDelay(config.max_delay_ms);
controller_->SetMaximumDelay(config.max_delay_ms);
fs_hz_ = fs;
fs_mult_ = fs / 8000;
last_output_sample_rate_hz_ = fs;
output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_);
controller_->SetSampleRate(fs_hz_, output_size_samples_);
decoder_frame_length_ = 3 * output_size_samples_;
if (create_components) {
SetSampleRateAndChannels(fs, 1); // Default is 1 channel.
@ -166,7 +177,7 @@ void NetEqImpl::InsertEmptyPacket(const RTPHeader& /*rtp_header*/) {
// rtp_header parameter.
// https://bugs.chromium.org/p/webrtc/issues/detail?id=7611
rtc::CritScope lock(&crit_sect_);
delay_manager_->RegisterEmptyPacket();
controller_->RegisterEmptyPacket();
}
namespace {
@ -279,8 +290,8 @@ void NetEqImpl::RemoveAllPayloadTypes() {
bool NetEqImpl::SetMinimumDelay(int delay_ms) {
rtc::CritScope lock(&crit_sect_);
if (delay_ms >= 0 && delay_ms <= 10000) {
assert(delay_manager_.get());
return delay_manager_->SetMinimumDelay(delay_ms);
assert(controller_.get());
return controller_->SetMinimumDelay(delay_ms);
}
return false;
}
@ -288,8 +299,8 @@ bool NetEqImpl::SetMinimumDelay(int delay_ms) {
bool NetEqImpl::SetMaximumDelay(int delay_ms) {
rtc::CritScope lock(&crit_sect_);
if (delay_ms >= 0 && delay_ms <= 10000) {
assert(delay_manager_.get());
return delay_manager_->SetMaximumDelay(delay_ms);
assert(controller_.get());
return controller_->SetMaximumDelay(delay_ms);
}
return false;
}
@ -297,32 +308,28 @@ bool NetEqImpl::SetMaximumDelay(int delay_ms) {
bool NetEqImpl::SetBaseMinimumDelayMs(int delay_ms) {
rtc::CritScope lock(&crit_sect_);
if (delay_ms >= 0 && delay_ms <= 10000) {
return delay_manager_->SetBaseMinimumDelay(delay_ms);
return controller_->SetBaseMinimumDelay(delay_ms);
}
return false;
}
int NetEqImpl::GetBaseMinimumDelayMs() const {
rtc::CritScope lock(&crit_sect_);
return delay_manager_->GetBaseMinimumDelay();
return controller_->GetBaseMinimumDelay();
}
int NetEqImpl::TargetDelayMs() const {
rtc::CritScope lock(&crit_sect_);
RTC_DCHECK(delay_manager_.get());
// The value from TargetLevel() is in number of packets, represented in Q8.
const size_t target_delay_samples =
(delay_manager_->TargetLevel() * decoder_frame_length_) >> 8;
return static_cast<int>(target_delay_samples) /
rtc::CheckedDivExact(fs_hz_, 1000);
RTC_DCHECK(controller_.get());
return controller_->TargetLevelMs();
}
int NetEqImpl::FilteredCurrentDelayMs() const {
rtc::CritScope lock(&crit_sect_);
// Sum up the filtered packet buffer level with the future length of the sync
// buffer.
const int delay_samples = buffer_level_filter_->filtered_current_level() +
sync_buffer_->FutureLength();
const int delay_samples =
controller_->GetFilteredBufferLevel() + sync_buffer_->FutureLength();
// The division below will truncate. The return value is in ms.
return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000);
}
@ -333,12 +340,9 @@ int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) {
const size_t total_samples_in_buffers =
packet_buffer_->NumSamplesInBuffer(decoder_frame_length_) +
sync_buffer_->FutureLength();
assert(delay_manager_.get());
assert(decision_logic_.get());
const int ms_per_packet = rtc::dchecked_cast<int>(
decision_logic_->packet_length_samples() / (fs_hz_ / 1000));
stats_->PopulateDelayManagerStats(ms_per_packet, *delay_manager_.get(),
stats);
assert(controller_.get());
stats->preferred_buffer_size_ms = controller_->TargetLevelMs();
stats->jitter_peaks_found = controller_->PeakFound();
stats_->GetNetworkStatistics(fs_hz_, total_samples_in_buffers,
decoder_frame_length_, stats);
return 0;
@ -712,38 +716,27 @@ int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header,
}
}
// TODO(hlundin): Move this code to DelayManager class.
const DecoderDatabase::DecoderInfo* dec_info =
decoder_database_->GetDecoderInfo(main_payload_type);
assert(dec_info); // Already checked that the payload type is known.
delay_manager_->LastDecodedWasCngOrDtmf(dec_info->IsComfortNoise() ||
dec_info->IsDtmf());
if (delay_manager_->last_pack_cng_or_dtmf() == 0) {
// Calculate the total speech length carried in each packet.
if (number_of_primary_packets > 0) {
const size_t packet_length_samples =
number_of_primary_packets * decoder_frame_length_;
if (packet_length_samples != decision_logic_->packet_length_samples()) {
decision_logic_->set_packet_length_samples(packet_length_samples);
delay_manager_->SetPacketAudioLength(
rtc::dchecked_cast<int>((1000 * packet_length_samples) / fs_hz_));
}
}
// Update statistics.
if ((enable_rtx_handling_ || (int32_t)(main_timestamp - timestamp_) >= 0) &&
!new_codec_) {
// Only update statistics if incoming packet is not older than last played
// out packet or RTX handling is enabled, and if new codec flag is not
// set.
delay_manager_->Update(main_sequence_number, main_timestamp, fs_hz_);
}
} else if (delay_manager_->last_pack_cng_or_dtmf() == -1) {
// This is first "normal" packet after CNG or DTMF.
// Reset packet time counter and measure time until next packet,
// but don't update statistics.
delay_manager_->set_last_pack_cng_or_dtmf(0);
delay_manager_->ResetPacketIatCount();
const bool last_cng_or_dtmf =
dec_info->IsComfortNoise() || dec_info->IsDtmf();
const size_t packet_length_samples =
number_of_primary_packets * decoder_frame_length_;
// Only update statistics if incoming packet is not older than last played
// out packet or RTX handling is enabled, and if new codec flag is not
// set.
const bool should_update_stats =
(enable_rtx_handling_ ||
static_cast<int32_t>(main_timestamp - timestamp_) >= 0) &&
!new_codec_;
auto relative_delay = controller_->PacketArrived(
last_cng_or_dtmf, packet_length_samples, should_update_stats,
main_sequence_number, main_timestamp, fs_hz_);
if (relative_delay) {
stats_->RelativePacketArrivalDelay(relative_delay.value());
}
return 0;
}
@ -1018,10 +1011,10 @@ int NetEqImpl::GetDecision(Operations* operation,
uint64_t generated_noise_samples =
generated_noise_stopwatch_ ? (generated_noise_stopwatch_->ElapsedTicks() -
1) * output_size_samples_ +
decision_logic_->noise_fast_forward()
controller_->noise_fast_forward()
: 0;
if (decision_logic_->CngRfc3389On() || last_mode_ == kModeRfc3389Cng) {
if (controller_->CngRfc3389On() || last_mode_ == kModeRfc3389Cng) {
// Because of timestamp peculiarities, we have to "manually" disallow using
// a CNG packet with the same timestamp as the one that was last played.
// This can happen when using redundancy and will cause the timing to shift.
@ -1050,7 +1043,7 @@ int NetEqImpl::GetDecision(Operations* operation,
last_mode_ == kModePreemptiveExpandSuccess ||
last_mode_ == kModePreemptiveExpandLowEnergy) {
// Subtract (samples_left + output_size_samples_) from sampleMemory.
decision_logic_->AddSampleMemory(
controller_->AddSampleMemory(
-(samples_left + rtc::dchecked_cast<int>(output_size_samples_)));
}
@ -1067,11 +1060,31 @@ int NetEqImpl::GetDecision(Operations* operation,
generated_noise_samples =
generated_noise_stopwatch_
? generated_noise_stopwatch_->ElapsedTicks() * output_size_samples_ +
decision_logic_->noise_fast_forward()
controller_->noise_fast_forward()
: 0;
*operation = decision_logic_->GetDecision(
*sync_buffer_, *expand_, decoder_frame_length_, packet, last_mode_,
*play_dtmf, generated_noise_samples, &reset_decoder_);
NetEqController::NetEqStatus status;
status.packet_buffer_info.dtx_or_cng =
packet_buffer_->ContainsDtxOrCngPacket(decoder_database_.get());
status.packet_buffer_info.num_samples =
packet_buffer_->NumSamplesInBuffer(decoder_frame_length_);
status.packet_buffer_info.span_samples = packet_buffer_->GetSpanSamples(
decoder_frame_length_, last_output_sample_rate_hz_, true);
status.packet_buffer_info.span_samples_no_dtx =
packet_buffer_->GetSpanSamples(decoder_frame_length_,
last_output_sample_rate_hz_, false);
status.packet_buffer_info.num_packets = packet_buffer_->NumPacketsInBuffer();
status.target_timestamp = sync_buffer_->end_timestamp();
status.expand_mutefactor = expand_->MuteFactor(0);
status.last_packet_samples = decoder_frame_length_;
status.last_mode = last_mode_;
status.play_dtmf = *play_dtmf;
status.generated_noise_samples = generated_noise_samples;
if (packet) {
status.next_packet = {
packet->timestamp, packet->frame && packet->frame->IsDtxPacket(),
decoder_database_->IsComfortNoise(packet->payload_type)};
}
*operation = controller_->GetDecision(status, &reset_decoder_);
// Disallow time stretching if this packet is DTX, because such a decision may
// be based on earlier buffer level estimate, as we do not update buffer level
@ -1097,7 +1110,7 @@ int NetEqImpl::GetDecision(Operations* operation,
return 0;
}
decision_logic_->ExpandDecision(*operation);
controller_->ExpandDecision(*operation);
// Check conditions for reset.
if (new_codec_ || *operation == kUndefined) {
@ -1125,9 +1138,7 @@ int NetEqImpl::GetDecision(Operations* operation,
sync_buffer_->IncreaseEndTimestamp(timestamp_ - end_timestamp);
end_timestamp = timestamp_;
new_codec_ = false;
decision_logic_->SoftReset();
buffer_level_filter_->Reset();
delay_manager_->Reset();
controller_->SoftReset();
stats_->ResetMcu();
}
@ -1153,7 +1164,7 @@ int NetEqImpl::GetDecision(Operations* operation,
generated_noise_stopwatch_
? generated_noise_stopwatch_->ElapsedTicks() *
output_size_samples_ +
decision_logic_->noise_fast_forward()
controller_->noise_fast_forward()
: 0;
if (generated_noise_samples > 0 && last_mode_ != kModeDtmf) {
// Make a jump in timestamp due to the recently played comfort noise.
@ -1169,8 +1180,8 @@ int NetEqImpl::GetDecision(Operations* operation,
// In order to do an accelerate we need at least 30 ms of audio data.
if (samples_left >= static_cast<int>(samples_30_ms)) {
// Already have enough data, so we do not need to extract any more.
decision_logic_->set_sample_memory(samples_left);
decision_logic_->set_prev_time_scale(true);
controller_->set_sample_memory(samples_left);
controller_->set_prev_time_scale(true);
return 0;
} else if (samples_left >= static_cast<int>(samples_10_ms) &&
decoder_frame_length_ >= samples_30_ms) {
@ -1201,8 +1212,8 @@ int NetEqImpl::GetDecision(Operations* operation,
// Already have enough data, so we do not need to extract any more.
// Or, avoid decoding more data as it might overflow the playout buffer.
// Still try preemptive expand, though.
decision_logic_->set_sample_memory(samples_left);
decision_logic_->set_prev_time_scale(true);
controller_->set_sample_memory(samples_left);
controller_->set_prev_time_scale(true);
return 0;
}
if (samples_left < static_cast<int>(samples_20_ms) &&
@ -1228,7 +1239,7 @@ int NetEqImpl::GetDecision(Operations* operation,
int extracted_samples = 0;
if (packet) {
sync_buffer_->IncreaseEndTimestamp(packet->timestamp - end_timestamp);
if (decision_logic_->CngOff()) {
if (controller_->CngOff()) {
// Adjustment of timestamp only corresponds to an actual packet loss
// if comfort noise is not played. If comfort noise was just played,
// this adjustment of timestamp is only done to get back in sync with the
@ -1238,7 +1249,7 @@ int NetEqImpl::GetDecision(Operations* operation,
if (*operation != kRfc3389Cng) {
// We are about to decode and use a non-CNG packet.
decision_logic_->SetCngOff();
controller_->SetCngOff();
}
extracted_samples = ExtractPackets(required_samples, packet_list);
@ -1249,8 +1260,8 @@ int NetEqImpl::GetDecision(Operations* operation,
if (*operation == kAccelerate || *operation == kFastAccelerate ||
*operation == kPreemptiveExpand) {
decision_logic_->set_sample_memory(samples_left + extracted_samples);
decision_logic_->set_prev_time_scale(true);
controller_->set_sample_memory(samples_left + extracted_samples);
controller_->set_prev_time_scale(true);
}
if (*operation == kAccelerate || *operation == kFastAccelerate) {
@ -2058,13 +2069,8 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
decoded_buffer_length_ = kMaxFrameSize * channels;
decoded_buffer_.reset(new int16_t[decoded_buffer_length_]);
}
// Create DecisionLogic if it is not created yet, then communicate new sample
// rate and output size to DecisionLogic object.
if (!decision_logic_.get()) {
CreateDecisionLogic();
}
decision_logic_->SetSampleRate(fs_hz_, output_size_samples_);
RTC_CHECK(controller_) << "Unexpectedly found no NetEqController";
controller_->SetSampleRate(fs_hz_, output_size_samples_);
}
NetEqImpl::OutputType NetEqImpl::LastOutputType() {
@ -2085,11 +2091,4 @@ NetEqImpl::OutputType NetEqImpl::LastOutputType() {
return OutputType::kNormalSpeech;
}
}
void NetEqImpl::CreateDecisionLogic() {
decision_logic_.reset(DecisionLogic::Create(
fs_hz_, output_size_samples_, no_time_stretching_,
decoder_database_.get(), *packet_buffer_.get(), delay_manager_.get(),
buffer_level_filter_.get(), tick_timer_.get()));
}
} // namespace webrtc