Reland "Change buffer level filter to store current level in number of samples."

This is a reland of 87977dd06e702ed517f26235c12e37bd927527c7

Original change's description:
> Change buffer level filter to store current level in number of samples.
> 
> The buffer level should not be converted back and forth between samples and packets in case of variable packet lengths.
> 
> Bug: webrtc:10736
> Change-Id: Ia08dcfac3d8104dc79fbad0704a5f6f12a050a01
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/142178
> Reviewed-by: Minyue Li <minyue@webrtc.org>
> Commit-Queue: Jakob Ivarsson <jakobi@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#28368}

Bug: webrtc:10736
Change-Id: I1ff603e65cdd31c7429f36b035dcc00a17b68f3b
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/143787
Commit-Queue: Minyue Li <minyue@webrtc.org>
Reviewed-by: Minyue Li <minyue@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28393}
This commit is contained in:
Jakob Ivarsson
2019-06-26 16:24:04 +02:00
committed by Commit Bot
parent 4d6951669c
commit 0ded32d5a3
9 changed files with 114 additions and 184 deletions

View File

@ -26,32 +26,22 @@ void BufferLevelFilter::Reset() {
level_factor_ = 253;
}
void BufferLevelFilter::Update(size_t buffer_size_packets,
int time_stretched_samples,
size_t packet_len_samples) {
void BufferLevelFilter::Update(size_t buffer_size_samples,
int time_stretched_samples) {
// Filter:
// |filtered_current_level_| = |level_factor_| * |filtered_current_level_| +
// (1 - |level_factor_|) * |buffer_size_packets|
// (1 - |level_factor_|) * |buffer_size_samples|
// |level_factor_| and |filtered_current_level_| are in Q8.
// |buffer_size_packets| is in Q0.
// |buffer_size_samples| is in Q0.
filtered_current_level_ =
((level_factor_ * filtered_current_level_) >> 8) +
((256 - level_factor_) * rtc::dchecked_cast<int>(buffer_size_packets));
((256 - level_factor_) * rtc::dchecked_cast<int>(buffer_size_samples));
// Account for time-scale operations (accelerate and pre-emptive expand).
if (time_stretched_samples && packet_len_samples > 0) {
// Time-scaling has been performed since last filter update. Subtract the
// value of |time_stretched_samples| from |filtered_current_level_| after
// converting |time_stretched_samples| from samples to packets in Q8.
// Make sure that the filtered value remains non-negative.
int64_t time_stretched_packets =
(int64_t{time_stretched_samples} * (1 << 8)) /
rtc::dchecked_cast<int64_t>(packet_len_samples);
filtered_current_level_ = rtc::saturated_cast<int>(
std::max<int64_t>(0, filtered_current_level_ - time_stretched_packets));
}
// Account for time-scale operations (accelerate and pre-emptive expand) and
// make sure that the filtered value remains non-negative.
filtered_current_level_ = rtc::saturated_cast<int>(std::max<int64_t>(
0,
filtered_current_level_ - (int64_t{time_stretched_samples} * (1 << 8))));
}
void BufferLevelFilter::SetTargetBufferLevel(int target_buffer_level) {
@ -66,8 +56,4 @@ void BufferLevelFilter::SetTargetBufferLevel(int target_buffer_level) {
}
}
int BufferLevelFilter::filtered_current_level() const {
return filtered_current_level_;
}
} // namespace webrtc

View File

@ -24,20 +24,20 @@ class BufferLevelFilter {
virtual void Reset();
// Updates the filter. Current buffer size is |buffer_size_packets| (Q0).
// If |time_stretched_samples| is non-zero, the value is converted to the
// corresponding number of packets, and is subtracted from the filtered
// value (thus bypassing the filter operation). |packet_len_samples| is the
// number of audio samples carried in each incoming packet.
virtual void Update(size_t buffer_size_packets,
int time_stretched_samples,
size_t packet_len_samples);
// |time_stretched_samples| is subtracted from the filtered value (thus
// bypassing the filter operation).
virtual void Update(size_t buffer_size_samples, int time_stretched_samples);
// Set the current target buffer level (obtained from
// Set the current target buffer level in number of packets (obtained from
// DelayManager::base_target_level()). Used to select the appropriate
// filter coefficient.
virtual void SetTargetBufferLevel(int target_buffer_level);
virtual void SetTargetBufferLevel(int target_buffer_level_packets);
virtual int filtered_current_level() const;
// Returns filtered current level in number of samples.
virtual int filtered_current_level() const {
// Round to nearest whole sample.
return (filtered_current_level_ + (1 << 7)) >> 8;
}
private:
int level_factor_; // Filter factor for the buffer level filter in Q8.

View File

@ -35,18 +35,17 @@ TEST(BufferLevelFilter, ConvergenceTest) {
ss << "times = " << times << ", value = " << value;
SCOPED_TRACE(ss.str()); // Print out the parameter values on failure.
for (int i = 0; i < times; ++i) {
filter.Update(value, 0 /* time_stretched_samples */,
160 /* packet_len_samples */);
filter.Update(value, 0 /* time_stretched_samples */);
}
// Expect the filtered value to be (theoretically)
// (1 - (251/256) ^ |times|) * |value|.
double expected_value_double = (1 - pow(251.0 / 256.0, times)) * value;
int expected_value = static_cast<int>(expected_value_double);
// filtered_current_level() returns the value in Q8.
// The actual value may differ slightly from the expected value due to
// intermediate-stage rounding errors in the filter implementation.
// This is why we have to use EXPECT_NEAR with a tolerance of +/-1.
EXPECT_NEAR(expected_value, filter.filtered_current_level() >> 8, 1);
EXPECT_NEAR(expected_value, filter.filtered_current_level(), 1);
}
}
}
@ -60,38 +59,32 @@ TEST(BufferLevelFilter, FilterFactor) {
filter.SetTargetBufferLevel(3); // Makes filter coefficient 252/256.
for (int i = 0; i < kTimes; ++i) {
filter.Update(kValue, 0 /* time_stretched_samples */,
160 /* packet_len_samples */);
filter.Update(kValue, 0 /* time_stretched_samples */);
}
// Expect the filtered value to be
// (1 - (252/256) ^ |kTimes|) * |kValue|.
int expected_value = 14;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8);
int expected_value = 15;
EXPECT_EQ(expected_value, filter.filtered_current_level());
filter.Reset();
filter.SetTargetBufferLevel(7); // Makes filter coefficient 253/256.
for (int i = 0; i < kTimes; ++i) {
filter.Update(kValue, 0 /* time_stretched_samples */,
160 /* packet_len_samples */);
filter.Update(kValue, 0 /* time_stretched_samples */);
}
// Expect the filtered value to be
// (1 - (253/256) ^ |kTimes|) * |kValue|.
expected_value = 11;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8);
EXPECT_EQ(expected_value, filter.filtered_current_level());
filter.Reset();
filter.SetTargetBufferLevel(8); // Makes filter coefficient 254/256.
for (int i = 0; i < kTimes; ++i) {
filter.Update(kValue, 0 /* time_stretched_samples */,
160 /* packet_len_samples */);
filter.Update(kValue, 0 /* time_stretched_samples */);
}
// Expect the filtered value to be
// (1 - (254/256) ^ |kTimes|) * |kValue|.
expected_value = 7;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8);
expected_value = 8;
EXPECT_EQ(expected_value, filter.filtered_current_level());
}
TEST(BufferLevelFilter, TimeStretchedSamples) {
@ -100,62 +93,24 @@ TEST(BufferLevelFilter, TimeStretchedSamples) {
// Update 10 times with value 100.
const int kTimes = 10;
const int kValue = 100;
const int kPacketSizeSamples = 160;
const int kNumPacketsStretched = 2;
const int kTimeStretchedSamples = kNumPacketsStretched * kPacketSizeSamples;
const int kTimeStretchedSamples = 3;
for (int i = 0; i < kTimes; ++i) {
// Packet size set to 0. Do not expect the parameter
// |kTimeStretchedSamples| to have any effect.
filter.Update(kValue, kTimeStretchedSamples, 0 /* packet_len_samples */);
filter.Update(kValue, 0);
}
// Expect the filtered value to be
// (1 - (251/256) ^ |kTimes|) * |kValue|.
const int kExpectedValue = 17;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
const int kExpectedValue = 18;
EXPECT_EQ(kExpectedValue, filter.filtered_current_level());
// Update filter again, now with non-zero value for packet length.
// Set the current filtered value to be the input, in order to isolate the
// impact of |kTimeStretchedSamples|.
filter.Update(filter.filtered_current_level() >> 8, kTimeStretchedSamples,
kPacketSizeSamples);
EXPECT_EQ(kExpectedValue - kNumPacketsStretched,
filter.filtered_current_level() >> 8);
filter.Update(filter.filtered_current_level(), kTimeStretchedSamples);
EXPECT_EQ(kExpectedValue - kTimeStretchedSamples,
filter.filtered_current_level());
// Try negative value and verify that we come back to the previous result.
filter.Update(filter.filtered_current_level() >> 8, -kTimeStretchedSamples,
kPacketSizeSamples);
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
}
TEST(BufferLevelFilter, TimeStretchedSamplesNegativeUnevenFrames) {
BufferLevelFilter filter;
filter.SetTargetBufferLevel(1); // Makes filter coefficient 251/256.
// Update 10 times with value 100.
const int kTimes = 10;
const int kValue = 100;
const int kPacketSizeSamples = 160;
const int kTimeStretchedSamples = -3.1415 * kPacketSizeSamples;
for (int i = 0; i < kTimes; ++i) {
// Packet size set to 0. Do not expect the parameter
// |kTimeStretchedSamples| to have any effect.
filter.Update(kValue, kTimeStretchedSamples, 0 /* packet_len_samples */);
}
// Expect the filtered value to be
// (1 - (251/256) ^ |kTimes|) * |kValue|.
const int kExpectedValue = 17;
// filtered_current_level() returns the value in Q8.
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
// Update filter again, now with non-zero value for packet length.
// Set the current filtered value to be the input, in order to isolate the
// impact of |kTimeStretchedSamples|.
filter.Update(filter.filtered_current_level() >> 8, kTimeStretchedSamples,
kPacketSizeSamples);
EXPECT_EQ(21, filter.filtered_current_level() >> 8);
// Try negative value and verify that we come back to the previous result.
filter.Update(filter.filtered_current_level() >> 8, -kTimeStretchedSamples,
kPacketSizeSamples);
EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8);
filter.Update(filter.filtered_current_level(), -kTimeStretchedSamples);
EXPECT_EQ(kExpectedValue, filter.filtered_current_level());
}
} // namespace webrtc

View File

@ -113,11 +113,9 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
cng_state_ = kCngInternalOn;
}
const size_t samples_left =
sync_buffer.FutureLength() - expand.overlap_length();
// TODO(jakobi): Use buffer span instead of num samples.
const size_t cur_size_samples =
samples_left + packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
packet_buffer_.NumSamplesInBuffer(decoder_frame_length);
prev_time_scale_ =
prev_time_scale_ && (prev_mode == kModeAccelerateSuccess ||
@ -175,8 +173,7 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
// if the mute factor is low enough (otherwise the expansion was short enough
// to not be noticable).
// Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
size_t current_span =
samples_left + packet_buffer_.GetSpanSamples(decoder_frame_length);
size_t current_span = packet_buffer_.GetSpanSamples(decoder_frame_length);
if ((prev_mode == kModeExpand || prev_mode == kModeCodecPlc) &&
expand.MuteFactor(0) < 16384 / 2 &&
current_span < static_cast<size_t>(delay_manager_->TargetLevel() *
@ -193,9 +190,9 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
return ExpectedPacketAvailable(prev_mode, play_dtmf);
} else if (!PacketBuffer::IsObsoleteTimestamp(
available_timestamp, target_timestamp, five_seconds_samples)) {
return FuturePacketAvailable(
sync_buffer, expand, decoder_frame_length, prev_mode, target_timestamp,
available_timestamp, play_dtmf, generated_noise_samples);
return FuturePacketAvailable(decoder_frame_length, prev_mode,
target_timestamp, available_timestamp,
play_dtmf, generated_noise_samples);
} else {
// This implies that available_timestamp < target_timestamp, which can
// happen when a new stream or codec is received. Signal for a reset.
@ -215,19 +212,13 @@ void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) {
buffer_level_filter_->SetTargetBufferLevel(
delay_manager_->base_target_level());
size_t buffer_size_packets = 0;
if (packet_length_samples_ > 0) {
// Calculate size in packets.
buffer_size_packets = buffer_size_samples / packet_length_samples_;
}
int sample_memory_local = 0;
if (prev_time_scale_) {
sample_memory_local = sample_memory_;
timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval);
}
buffer_level_filter_->Update(buffer_size_packets, sample_memory_local,
packet_length_samples_);
buffer_level_filter_->Update(buffer_size_samples, sample_memory_local);
prev_time_scale_ = false;
}
@ -283,15 +274,22 @@ Operations DecisionLogic::NoPacket(bool play_dtmf) {
Operations DecisionLogic::ExpectedPacketAvailable(Modes prev_mode,
bool play_dtmf) {
if (!disallow_time_stretching_ && prev_mode != kModeExpand && !play_dtmf) {
// Check criterion for time-stretching.
// Check criterion for time-stretching. The values are in number of packets
// in Q8.
int low_limit, high_limit;
delay_manager_->BufferLimits(&low_limit, &high_limit);
if (buffer_level_filter_->filtered_current_level() >= high_limit << 2)
int buffer_level_packets = 0;
if (packet_length_samples_ > 0) {
buffer_level_packets =
((1 << 8) * buffer_level_filter_->filtered_current_level()) /
packet_length_samples_;
}
if (buffer_level_packets >= high_limit << 2)
return kFastAccelerate;
if (TimescaleAllowed()) {
if (buffer_level_filter_->filtered_current_level() >= high_limit)
if (buffer_level_packets >= high_limit)
return kAccelerate;
if (buffer_level_filter_->filtered_current_level() < low_limit)
if (buffer_level_packets < low_limit)
return kPreemptiveExpand;
}
}
@ -299,8 +297,6 @@ Operations DecisionLogic::ExpectedPacketAvailable(Modes prev_mode,
}
Operations DecisionLogic::FuturePacketAvailable(
const SyncBuffer& sync_buffer,
const Expand& expand,
size_t decoder_frame_length,
Modes prev_mode,
uint32_t target_timestamp,
@ -327,10 +323,8 @@ Operations DecisionLogic::FuturePacketAvailable(
return kNormal;
}
const size_t samples_left =
sync_buffer.FutureLength() - expand.overlap_length();
const size_t cur_size_samples =
samples_left + packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
packet_buffer_.NumPacketsInBuffer() * decoder_frame_length;
// If previous was comfort noise, then no merge is needed.
if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) {
@ -365,8 +359,13 @@ Operations DecisionLogic::FuturePacketAvailable(
}
bool DecisionLogic::UnderTargetLevel() const {
return buffer_level_filter_->filtered_current_level() <=
delay_manager_->TargetLevel();
int buffer_level_packets = 0;
if (packet_length_samples_ > 0) {
buffer_level_packets =
((1 << 8) * buffer_level_filter_->filtered_current_level()) /
packet_length_samples_;
}
return buffer_level_packets <= delay_manager_->TargetLevel();
}
bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {

View File

@ -134,9 +134,7 @@ class DecisionLogic final {
// Returns the operation to do given that the expected packet is not
// available, but a packet further into the future is at hand.
Operations FuturePacketAvailable(const SyncBuffer& sync_buffer,
const Expand& expand,
size_t decoder_frame_length,
Operations FuturePacketAvailable(size_t decoder_frame_length,
Modes prev_mode,
uint32_t target_timestamp,
uint32_t available_timestamp,

View File

@ -22,10 +22,8 @@ class MockBufferLevelFilter : public BufferLevelFilter {
virtual ~MockBufferLevelFilter() { Die(); }
MOCK_METHOD0(Die, void());
MOCK_METHOD0(Reset, void());
MOCK_METHOD3(Update,
void(size_t buffer_size_packets,
int time_stretched_samples,
size_t packet_len_samples));
MOCK_METHOD2(Update,
void(size_t buffer_size_samples, int time_stretched_samples));
MOCK_METHOD1(SetTargetBufferLevel, void(int target_buffer_level));
MOCK_CONST_METHOD0(filtered_current_level, int());
};

View File

@ -310,18 +310,12 @@ int NetEqImpl::TargetDelayMs() const {
int NetEqImpl::FilteredCurrentDelayMs() const {
rtc::CritScope lock(&crit_sect_);
// Calculate the filtered packet buffer level in samples. The value from
// |buffer_level_filter_| is in number of packets, represented in Q8.
const size_t packet_buffer_samples =
(buffer_level_filter_->filtered_current_level() *
decoder_frame_length_) >>
8;
// Sum up the filtered packet buffer level with the future length of the sync
// buffer, and divide the sum by the sample rate.
const size_t delay_samples =
packet_buffer_samples + sync_buffer_->FutureLength();
// buffer.
const int delay_samples = buffer_level_filter_->filtered_current_level() +
sync_buffer_->FutureLength();
// The division below will truncate. The return value is in ms.
return static_cast<int>(delay_samples) / rtc::CheckedDivExact(fs_hz_, 1000);
return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000);
}
int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) {

View File

@ -458,16 +458,16 @@ TEST_F(NetEqDecodingTest, MAYBE_TestBitExactness) {
webrtc::test::ResourcePath("audio_coding/neteq_universal_new", "rtp");
const std::string output_checksum =
PlatformChecksum("9652cee1d6771a9cbfda821ae1bbdb41b0dd4dee",
"54a7e32f163663c0af35bf70bf45cefc24ad62ef", "not used",
"9652cee1d6771a9cbfda821ae1bbdb41b0dd4dee",
"79496b0a1ef0a3824f3ee04789748a461bed643f");
PlatformChecksum("998be2e5a707e636af0b6298f54bedfabe72aae1",
"61e238ece4cd3b67d66a0b7047e06b20607dcb79", "not used",
"998be2e5a707e636af0b6298f54bedfabe72aae1",
"4116ac2a6e75baac3194b712d6fabe28b384275e");
const std::string network_stats_checksum =
PlatformChecksum("c59b1f9f282b6d8733cdff975e3c150ca4a47d51",
"bca95e565996a4ffd6e2ac15736e08843bdca93b", "not used",
"c59b1f9f282b6d8733cdff975e3c150ca4a47d51",
"c59b1f9f282b6d8733cdff975e3c150ca4a47d51");
PlatformChecksum("3689c9f0ab9e50cefab3e44c37c3d7aa0de82ca4",
"0a596217fccd8d90eff7d1666b8cc63143eeda12", "not used",
"3689c9f0ab9e50cefab3e44c37c3d7aa0de82ca4",
"3689c9f0ab9e50cefab3e44c37c3d7aa0de82ca4");
DecodeAndCompare(input_rtp_file, output_checksum, network_stats_checksum,
FLAG_gen_ref);
@ -486,17 +486,17 @@ TEST_F(NetEqDecodingTest, MAYBE_TestOpusBitExactness) {
// Checksum depends on libopus being compiled with or without SSE.
const std::string maybe_sse =
"14a63b3c7b925c82296be4bafc71bec85f2915c2|"
"2c05677daa968d6c68b92adf4affb7cd9bb4d363";
"eb0b68bddcac00fc85403df64f83126f8ea9bc93";
const std::string output_checksum = PlatformChecksum(
maybe_sse, "b7b7ed802b0e18ee416973bf3b9ae98599b0181d",
"5876e52dda90d5ca433c3726555b907b97c86374", maybe_sse, maybe_sse);
maybe_sse, "f95f2a220c9ca5d60b81c4653d46e0de2bee159f",
"6f288a03d34958f62496f18fa85655593eef4dbe", maybe_sse, maybe_sse);
const std::string network_stats_checksum =
PlatformChecksum("adb3272498e436d1c019cbfd71610e9510c54497",
"fa935a91abc7291db47428a2d7c5361b98713a92",
"42106aa5267300f709f63737707ef07afd9dac61",
"adb3272498e436d1c019cbfd71610e9510c54497",
"adb3272498e436d1c019cbfd71610e9510c54497");
PlatformChecksum("0b3d34baffaf651812ffaf06ea1b5ce45ea1c47a",
"a71dce66c7bea85ba22d4e29a5298f606f810444",
"7c64e1e915bace7c4bf583484efd64eaf234552f",
"0b3d34baffaf651812ffaf06ea1b5ce45ea1c47a",
"0b3d34baffaf651812ffaf06ea1b5ce45ea1c47a");
DecodeAndCompare(input_rtp_file, output_checksum, network_stats_checksum,
FLAG_gen_ref);
@ -796,7 +796,7 @@ TEST_F(NetEqDecodingTest, LongCngWithNegativeClockDriftNetworkFreeze) {
const double kDriftFactor = 1000.0 / (1000.0 + 25.0);
const double kNetworkFreezeTimeMs = 5000.0;
const bool kGetAudioDuringFreezeRecovery = false;
const int kDelayToleranceMs = 50;
const int kDelayToleranceMs = 60;
const int kMaxTimeToSpeechMs = 200;
LongCngWithClockDrift(kDriftFactor, kNetworkFreezeTimeMs,
kGetAudioDuringFreezeRecovery, kDelayToleranceMs,