Improve FrameDelayDeltaKalmanFilter interface and add more docs.

This CL simplifies and documents the interface of the Kalman filter better. A simple unit test verifying the filter's convergence is
added. No functional changes to the filter are intended.

Future CLs will improve the data member naming and code organization.

Bug: webrtc:14151
Change-Id: I01e60d4b783cabad3ccbdc711c5d746666dd16ca
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/265877
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Commit-Queue: Rasmus Brandt <brandtr@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#37766}
This commit is contained in:
Rasmus Brandt
2022-08-12 13:45:34 +02:00
committed by WebRTC LUCI CQ
parent 85a126ec5d
commit ae4a832730
4 changed files with 171 additions and 78 deletions

View File

@ -20,27 +20,22 @@ constexpr double kThetaLow = 0.000001;
}
FrameDelayDeltaKalmanFilter::FrameDelayDeltaKalmanFilter() {
Reset();
}
// TODO(brandtr): Is there a factor 1000 missing here?
theta_[0] = 1 / (512e3 / 8); // Unit: [1 / bytes per ms]
theta_[1] = 0; // Unit: [ms]
// Resets the JitterEstimate.
void FrameDelayDeltaKalmanFilter::Reset() {
theta_[0] = 1 / (512e3 / 8);
theta_[1] = 0;
theta_cov_[0][0] = 1e-4;
theta_cov_[1][1] = 1e2;
theta_cov_[0][0] = 1e-4; // Unit: [(1 / bytes per ms)^2]
theta_cov_[1][1] = 1e2; // Unit: [ms^2]
theta_cov_[0][1] = theta_cov_[1][0] = 0;
q_cov_[0][0] = 2.5e-10;
q_cov_[1][1] = 1e-10;
q_cov_[0][0] = 2.5e-10; // Unit: [(1 / bytes per ms)^2]
q_cov_[1][1] = 1e-10; // Unit: [ms^2]
q_cov_[0][1] = q_cov_[1][0] = 0;
}
// Updates Kalman estimate of the channel.
// The caller is expected to sanity check the inputs.
void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
TimeDelta frame_delay,
double delta_frame_size_bytes,
void FrameDelayDeltaKalmanFilter::PredictAndUpdate(
TimeDelta frame_delay_variation,
double frame_size_variation_bytes,
DataSize max_frame_size,
double var_noise) {
double Mh[2];
@ -63,21 +58,23 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
// h = [dFS 1]
// Mh = M*h'
// hMh_sigma = h*M*h' + R
Mh[0] = theta_cov_[0][0] * delta_frame_size_bytes + theta_cov_[0][1];
Mh[1] = theta_cov_[1][0] * delta_frame_size_bytes + theta_cov_[1][1];
Mh[0] = theta_cov_[0][0] * frame_size_variation_bytes + theta_cov_[0][1];
Mh[1] = theta_cov_[1][0] * frame_size_variation_bytes + theta_cov_[1][1];
// sigma weights measurements with a small deltaFS as noisy and
// measurements with large deltaFS as good
if (max_frame_size < DataSize::Bytes(1)) {
return;
}
double sigma = (300.0 * exp(-fabs(delta_frame_size_bytes) /
double sigma = (300.0 * exp(-fabs(frame_size_variation_bytes) /
(1e0 * max_frame_size.bytes())) +
1) *
sqrt(var_noise);
if (sigma < 1.0) {
sigma = 1.0;
}
hMh_sigma = delta_frame_size_bytes * Mh[0] + Mh[1] + sigma;
// TODO(brandtr): Shouldn't we add sigma^2 here? Otherwise, the dimensional
// analysis fails.
hMh_sigma = frame_size_variation_bytes * Mh[0] + Mh[1] + sigma;
if ((hMh_sigma < 1e-9 && hMh_sigma >= 0) ||
(hMh_sigma > -1e-9 && hMh_sigma <= 0)) {
RTC_DCHECK_NOTREACHED();
@ -88,8 +85,8 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
// Correction
// theta = theta + K*(dT - h*theta)
measureRes =
frame_delay.ms() - (delta_frame_size_bytes * theta_[0] + theta_[1]);
measureRes = frame_delay_variation.ms() -
(frame_size_variation_bytes * theta_[0] + theta_[1]);
theta_[0] += kalmanGain[0] * measureRes;
theta_[1] += kalmanGain[1] * measureRes;
@ -100,14 +97,14 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
// M = (I - K*h)*M
t00 = theta_cov_[0][0];
t01 = theta_cov_[0][1];
theta_cov_[0][0] = (1 - kalmanGain[0] * delta_frame_size_bytes) * t00 -
theta_cov_[0][0] = (1 - kalmanGain[0] * frame_size_variation_bytes) * t00 -
kalmanGain[0] * theta_cov_[1][0];
theta_cov_[0][1] = (1 - kalmanGain[0] * delta_frame_size_bytes) * t01 -
theta_cov_[0][1] = (1 - kalmanGain[0] * frame_size_variation_bytes) * t01 -
kalmanGain[0] * theta_cov_[1][1];
theta_cov_[1][0] = theta_cov_[1][0] * (1 - kalmanGain[1]) -
kalmanGain[1] * delta_frame_size_bytes * t00;
kalmanGain[1] * frame_size_variation_bytes * t00;
theta_cov_[1][1] = theta_cov_[1][1] * (1 - kalmanGain[1]) -
kalmanGain[1] * delta_frame_size_bytes * t01;
kalmanGain[1] * frame_size_variation_bytes * t01;
// Covariance matrix, must be positive semi-definite.
RTC_DCHECK(theta_cov_[0][0] + theta_cov_[1][1] >= 0 &&
@ -117,16 +114,18 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
theta_cov_[0][0] >= 0);
}
// Calculate difference in delay between a sample and the expected delay
// estimated by the Kalman filter
double FrameDelayDeltaKalmanFilter::DeviationFromExpectedDelay(
TimeDelta frame_delay,
double delta_frame_size_bytes) const {
return frame_delay.ms() - (theta_[0] * delta_frame_size_bytes + theta_[1]);
double FrameDelayDeltaKalmanFilter::GetFrameDelayVariationEstimateSizeBased(
double frame_size_variation_bytes) const {
// Unit: [1 / bytes per millisecond] * [bytes] = [milliseconds].
return theta_[0] * frame_size_variation_bytes;
}
double FrameDelayDeltaKalmanFilter::GetSlope() const {
return theta_[0];
double FrameDelayDeltaKalmanFilter::GetFrameDelayVariationEstimateTotal(
double frame_size_variation_bytes) const {
double frame_transmission_delay_ms =
GetFrameDelayVariationEstimateSizeBased(frame_size_variation_bytes);
double link_queuing_delay_ms = theta_[1];
return frame_transmission_delay_ms + link_queuing_delay_ms;
}
} // namespace webrtc

View File

@ -16,51 +16,82 @@
namespace webrtc {
// This class uses a linear Kalman filter (see
// https://en.wikipedia.org/wiki/Kalman_filter) to estimate the frame delay
// variation (i.e., the difference in transmission time between a frame and the
// prior frame) for a frame, given its size variation in bytes (i.e., the
// difference in size between a frame and the prior frame). The idea is that,
// given a fixed link bandwidth, a larger frame (in bytes) would take
// proportionally longer to arrive than a correspondingly smaller frame. Using
// the variations of frame delay and frame size, the underlying bandwidth and
// queuing delay variation of the network link can be estimated.
//
// The filter takes as input the frame delay variation, the difference between
// the actual inter-frame arrival time and the expected inter-frame arrival time
// (based on RTP timestamp), and frame size variation, the inter-frame size
// delta for a single frame. The frame delay variation is seen as the
// measurement and the frame size variation is used in the observation model.
// The hidden state of the filter is the link bandwidth and queuing delay
// buildup. The estimated state can be used to get the expected frame delay
// variation for a frame, given its frame size variation. This information can
// then be used to estimate the frame delay variation coming from network
// jitter.
//
// Mathematical details:
// * The state (`x` in Wikipedia notation) is a 2x1 vector comprising the
// reciprocal of link bandwidth [1 / bytes per ms] and the
// link queuing delay buildup [ms].
// * The state transition matrix (`F`) is the 2x2 identity matrix, meaning that
// link bandwidth and link queuing delay buildup are modeled as independent.
// * The measurement (`z`) is the (scalar) frame delay variation [ms].
// * The observation matrix (`H`) is a 1x2 vector set as
// `{frame_size_variation [bytes], 1.0}`.
// * The process noise covariance (`Q`) is a constant 2x2 diagonal matrix
// [(1 / bytes per ms)^2, ms^2].
// * The observation noise covariance (`r`) is a scalar [ms^2] that is
// determined externally to this class.
class FrameDelayDeltaKalmanFilter {
public:
FrameDelayDeltaKalmanFilter();
~FrameDelayDeltaKalmanFilter() = default;
// Resets the estimate to the initial state.
void Reset();
// Updates the Kalman filter for the line describing the frame size dependent
// jitter.
// Predicts and updates the filter, given a new pair of frame delay variation
// and frame size variation.
//
// Input:
// - frame_delay
// Delay-delta calculated by UTILDelayEstimate.
// - delta_frame_size_bytes
// Frame size delta, i.e. frame size at time T minus frame size
// at time T-1. (May be negative!)
// - max_frame_size
// Filtered version of the largest frame size received.
// - var_noise
// Inputs:
// `frame_delay_variation`:
// Frame delay variation as calculated by the `InterFrameDelay` estimator.
//
// `frame_size_variation_bytes`:
// Frame size variation, i.e., the current frame size minus the previous
// frame size (in bytes). Note that this quantity may be negative.
//
// `max_frame_size`:
// Filtered largest frame size received since the last reset.
//
// `var_noise`:
// Variance of the estimated random jitter.
void KalmanEstimateChannel(TimeDelta frame_delay,
double delta_frame_size_bytes,
void PredictAndUpdate(TimeDelta frame_delay_variation,
double frame_size_variation_bytes,
DataSize max_frame_size,
double var_noise);
// Calculates the difference in delay between a sample and the expected delay
// estimated by the Kalman filter.
//
// Input:
// - frame_delay : Delay-delta calculated by UTILDelayEstimate.
// - delta_frame_size_bytes : Frame size delta, i.e. frame size at
// time T minus frame size at time T-1.
//
// Return value : The delay difference in ms.
double DeviationFromExpectedDelay(TimeDelta frame_delay,
double delta_frame_size_bytes) const;
// Given a frame size variation, returns the estimated frame delay variation
// explained by the link bandwidth alone.
double GetFrameDelayVariationEstimateSizeBased(
double frame_size_variation_bytes) const;
// Returns the estimated slope.
double GetSlope() const;
// Given a frame size variation, returns the estimated frame delay variation
// explained by both link bandwidth and link queuing delay buildup.
double GetFrameDelayVariationEstimateTotal(
double frame_size_variation_bytes) const;
private:
double theta_[2]; // Estimated line parameters (slope, offset)
double theta_cov_[2][2]; // Estimate covariance
double q_cov_[2][2]; // Process noise covariance
double theta_[2]; // Estimated line parameters:
// (bandwidth [1 / bytes per ms],
// queue buildup [ms])
double theta_cov_[2][2]; // Estimate covariance.
double q_cov_[2][2]; // Process noise covariance.
};
} // namespace webrtc

View File

@ -11,18 +11,80 @@
#include "modules/video_coding/timing/frame_delay_delta_kalman_filter.h"
#include "api/units/data_size.h"
#include "api/units/frequency.h"
#include "api/units/time_delta.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
TEST(FrameDelayDeltaKalmanFilterTest, InitialBandwidthStateIs512kbps) {
// This test verifies that the initial filter state (link bandwidth, link
// propagation delay) is such that a frame of size zero would take no time to
// propagate.
TEST(FrameDelayDeltaKalmanFilterTest,
InitializedFilterWithZeroSizeFrameTakesNoTimeToPropagate) {
FrameDelayDeltaKalmanFilter filter;
// The slope corresponds to the estimated bandwidth, and the initial value
// is set in the implementation.
EXPECT_EQ(filter.GetSlope(), 1 / (512e3 / 8));
// A zero-sized frame...
double frame_size_variation_bytes = 0.0;
// ...should take no time to propagate due to it's size...
EXPECT_EQ(filter.GetFrameDelayVariationEstimateSizeBased(
frame_size_variation_bytes),
0.0);
// ...and no time due to the initial link propagation delay being zero.
EXPECT_EQ(
filter.GetFrameDelayVariationEstimateTotal(frame_size_variation_bytes),
0.0);
}
// TODO(brandtr): Look into if there is a factor 1000 missing here? It seems
// unreasonable to have an initial link bandwidth of 512 _mega_bits per second?
TEST(FrameDelayDeltaKalmanFilterTest,
InitializedFilterWithSmallSizeFrameTakesFixedTimeToPropagate) {
FrameDelayDeltaKalmanFilter filter;
// A 1000-byte frame...
double frame_size_variation_bytes = 1000.0;
// ...should take around `1000.0 / (512e3 / 8.0) = 0.015625 ms` to transmit.
double expected_frame_delay_variation_estimate_ms = 1000.0 / (512e3 / 8.0);
EXPECT_EQ(filter.GetFrameDelayVariationEstimateSizeBased(
frame_size_variation_bytes),
expected_frame_delay_variation_estimate_ms);
EXPECT_EQ(
filter.GetFrameDelayVariationEstimateTotal(frame_size_variation_bytes),
expected_frame_delay_variation_estimate_ms);
}
TEST(FrameDelayDeltaKalmanFilterTest,
VerifyConvergenceWithAlternatingDeviations) {
FrameDelayDeltaKalmanFilter filter;
// One frame every 33 ms.
int framerate_fps = 30;
// Let's assume approximately 10% delay variation.
TimeDelta frame_delay_variation = TimeDelta::Millis(3);
// With a bitrate of 512 kbps, each frame will be around 2000 bytes.
DataSize max_frame_size = DataSize::Bytes(2000);
// And again, let's assume 10% size deviation.
double frame_size_variation_bytes = 200;
double var_noise = 0.1;
int test_duration_s = 60;
for (int i = 0; i < test_duration_s * framerate_fps; ++i) {
// For simplicity, assume alternating variations.
double sign = (i % 2 == 0) ? 1.0 : -1.0;
filter.PredictAndUpdate(sign * frame_delay_variation,
sign * frame_size_variation_bytes, max_frame_size,
var_noise);
}
// Verify that the filter has converged within a margin of 0.1 ms.
EXPECT_NEAR(
filter.GetFrameDelayVariationEstimateTotal(frame_size_variation_bytes),
frame_delay_variation.ms(), 0.1);
}
} // namespace

View File

@ -78,7 +78,7 @@ void JitterEstimator::Reset() {
rtt_filter_.Reset();
fps_counter_.Reset();
kalman_filter_.Reset();
kalman_filter_ = FrameDelayDeltaKalmanFilter();
}
// Updates the estimates with the new measurements.
@ -129,7 +129,8 @@ void JitterEstimator::UpdateEstimate(TimeDelta frame_delay,
// the frame size also is large the deviation is probably due to an incorrect
// line slope.
double deviation =
kalman_filter_.DeviationFromExpectedDelay(frame_delay, delta_frame_bytes);
frame_delay.ms() -
kalman_filter_.GetFrameDelayVariationEstimateTotal(delta_frame_bytes);
if (fabs(deviation) < kNumStdDevDelayOutlier * sqrt(var_noise_) ||
frame_size.bytes() >
@ -146,7 +147,7 @@ void JitterEstimator::UpdateEstimate(TimeDelta frame_delay,
// frame.
if (delta_frame_bytes > -0.25 * max_frame_size_.bytes()) {
// Update the Kalman filter with the new data
kalman_filter_.KalmanEstimateChannel(frame_delay, delta_frame_bytes,
kalman_filter_.PredictAndUpdate(frame_delay, delta_frame_bytes,
max_frame_size_, var_noise_);
}
} else {
@ -228,8 +229,8 @@ double JitterEstimator::NoiseThreshold() const {
// Calculates the current jitter estimate from the filtered estimates.
TimeDelta JitterEstimator::CalculateEstimate() {
double retMs = kalman_filter_.GetSlope() *
(max_frame_size_.bytes() - avg_frame_size_.bytes()) +
double retMs = kalman_filter_.GetFrameDelayVariationEstimateSizeBased(
max_frame_size_.bytes() - avg_frame_size_.bytes()) +
NoiseThreshold();
TimeDelta ret = TimeDelta::Millis(retMs);