Improve FrameDelayDeltaKalmanFilter interface and add more docs.
This CL simplifies and documents the interface of the Kalman filter better. A simple unit test verifying the filter's convergence is added. No functional changes to the filter are intended. Future CLs will improve the data member naming and code organization. Bug: webrtc:14151 Change-Id: I01e60d4b783cabad3ccbdc711c5d746666dd16ca Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/265877 Reviewed-by: Philip Eliasson <philipel@webrtc.org> Commit-Queue: Rasmus Brandt <brandtr@webrtc.org> Cr-Commit-Position: refs/heads/main@{#37766}
This commit is contained in:

committed by
WebRTC LUCI CQ

parent
85a126ec5d
commit
ae4a832730
@ -20,27 +20,22 @@ constexpr double kThetaLow = 0.000001;
|
||||
}
|
||||
|
||||
FrameDelayDeltaKalmanFilter::FrameDelayDeltaKalmanFilter() {
|
||||
Reset();
|
||||
}
|
||||
// TODO(brandtr): Is there a factor 1000 missing here?
|
||||
theta_[0] = 1 / (512e3 / 8); // Unit: [1 / bytes per ms]
|
||||
theta_[1] = 0; // Unit: [ms]
|
||||
|
||||
// Resets the JitterEstimate.
|
||||
void FrameDelayDeltaKalmanFilter::Reset() {
|
||||
theta_[0] = 1 / (512e3 / 8);
|
||||
theta_[1] = 0;
|
||||
|
||||
theta_cov_[0][0] = 1e-4;
|
||||
theta_cov_[1][1] = 1e2;
|
||||
theta_cov_[0][0] = 1e-4; // Unit: [(1 / bytes per ms)^2]
|
||||
theta_cov_[1][1] = 1e2; // Unit: [ms^2]
|
||||
theta_cov_[0][1] = theta_cov_[1][0] = 0;
|
||||
q_cov_[0][0] = 2.5e-10;
|
||||
q_cov_[1][1] = 1e-10;
|
||||
|
||||
q_cov_[0][0] = 2.5e-10; // Unit: [(1 / bytes per ms)^2]
|
||||
q_cov_[1][1] = 1e-10; // Unit: [ms^2]
|
||||
q_cov_[0][1] = q_cov_[1][0] = 0;
|
||||
}
|
||||
|
||||
// Updates Kalman estimate of the channel.
|
||||
// The caller is expected to sanity check the inputs.
|
||||
void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
|
||||
TimeDelta frame_delay,
|
||||
double delta_frame_size_bytes,
|
||||
void FrameDelayDeltaKalmanFilter::PredictAndUpdate(
|
||||
TimeDelta frame_delay_variation,
|
||||
double frame_size_variation_bytes,
|
||||
DataSize max_frame_size,
|
||||
double var_noise) {
|
||||
double Mh[2];
|
||||
@ -63,21 +58,23 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
|
||||
// h = [dFS 1]
|
||||
// Mh = M*h'
|
||||
// hMh_sigma = h*M*h' + R
|
||||
Mh[0] = theta_cov_[0][0] * delta_frame_size_bytes + theta_cov_[0][1];
|
||||
Mh[1] = theta_cov_[1][0] * delta_frame_size_bytes + theta_cov_[1][1];
|
||||
Mh[0] = theta_cov_[0][0] * frame_size_variation_bytes + theta_cov_[0][1];
|
||||
Mh[1] = theta_cov_[1][0] * frame_size_variation_bytes + theta_cov_[1][1];
|
||||
// sigma weights measurements with a small deltaFS as noisy and
|
||||
// measurements with large deltaFS as good
|
||||
if (max_frame_size < DataSize::Bytes(1)) {
|
||||
return;
|
||||
}
|
||||
double sigma = (300.0 * exp(-fabs(delta_frame_size_bytes) /
|
||||
double sigma = (300.0 * exp(-fabs(frame_size_variation_bytes) /
|
||||
(1e0 * max_frame_size.bytes())) +
|
||||
1) *
|
||||
sqrt(var_noise);
|
||||
if (sigma < 1.0) {
|
||||
sigma = 1.0;
|
||||
}
|
||||
hMh_sigma = delta_frame_size_bytes * Mh[0] + Mh[1] + sigma;
|
||||
// TODO(brandtr): Shouldn't we add sigma^2 here? Otherwise, the dimensional
|
||||
// analysis fails.
|
||||
hMh_sigma = frame_size_variation_bytes * Mh[0] + Mh[1] + sigma;
|
||||
if ((hMh_sigma < 1e-9 && hMh_sigma >= 0) ||
|
||||
(hMh_sigma > -1e-9 && hMh_sigma <= 0)) {
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
@ -88,8 +85,8 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
|
||||
|
||||
// Correction
|
||||
// theta = theta + K*(dT - h*theta)
|
||||
measureRes =
|
||||
frame_delay.ms() - (delta_frame_size_bytes * theta_[0] + theta_[1]);
|
||||
measureRes = frame_delay_variation.ms() -
|
||||
(frame_size_variation_bytes * theta_[0] + theta_[1]);
|
||||
theta_[0] += kalmanGain[0] * measureRes;
|
||||
theta_[1] += kalmanGain[1] * measureRes;
|
||||
|
||||
@ -100,14 +97,14 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
|
||||
// M = (I - K*h)*M
|
||||
t00 = theta_cov_[0][0];
|
||||
t01 = theta_cov_[0][1];
|
||||
theta_cov_[0][0] = (1 - kalmanGain[0] * delta_frame_size_bytes) * t00 -
|
||||
theta_cov_[0][0] = (1 - kalmanGain[0] * frame_size_variation_bytes) * t00 -
|
||||
kalmanGain[0] * theta_cov_[1][0];
|
||||
theta_cov_[0][1] = (1 - kalmanGain[0] * delta_frame_size_bytes) * t01 -
|
||||
theta_cov_[0][1] = (1 - kalmanGain[0] * frame_size_variation_bytes) * t01 -
|
||||
kalmanGain[0] * theta_cov_[1][1];
|
||||
theta_cov_[1][0] = theta_cov_[1][0] * (1 - kalmanGain[1]) -
|
||||
kalmanGain[1] * delta_frame_size_bytes * t00;
|
||||
kalmanGain[1] * frame_size_variation_bytes * t00;
|
||||
theta_cov_[1][1] = theta_cov_[1][1] * (1 - kalmanGain[1]) -
|
||||
kalmanGain[1] * delta_frame_size_bytes * t01;
|
||||
kalmanGain[1] * frame_size_variation_bytes * t01;
|
||||
|
||||
// Covariance matrix, must be positive semi-definite.
|
||||
RTC_DCHECK(theta_cov_[0][0] + theta_cov_[1][1] >= 0 &&
|
||||
@ -117,16 +114,18 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
|
||||
theta_cov_[0][0] >= 0);
|
||||
}
|
||||
|
||||
// Calculate difference in delay between a sample and the expected delay
|
||||
// estimated by the Kalman filter
|
||||
double FrameDelayDeltaKalmanFilter::DeviationFromExpectedDelay(
|
||||
TimeDelta frame_delay,
|
||||
double delta_frame_size_bytes) const {
|
||||
return frame_delay.ms() - (theta_[0] * delta_frame_size_bytes + theta_[1]);
|
||||
double FrameDelayDeltaKalmanFilter::GetFrameDelayVariationEstimateSizeBased(
|
||||
double frame_size_variation_bytes) const {
|
||||
// Unit: [1 / bytes per millisecond] * [bytes] = [milliseconds].
|
||||
return theta_[0] * frame_size_variation_bytes;
|
||||
}
|
||||
|
||||
double FrameDelayDeltaKalmanFilter::GetSlope() const {
|
||||
return theta_[0];
|
||||
double FrameDelayDeltaKalmanFilter::GetFrameDelayVariationEstimateTotal(
|
||||
double frame_size_variation_bytes) const {
|
||||
double frame_transmission_delay_ms =
|
||||
GetFrameDelayVariationEstimateSizeBased(frame_size_variation_bytes);
|
||||
double link_queuing_delay_ms = theta_[1];
|
||||
return frame_transmission_delay_ms + link_queuing_delay_ms;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -16,51 +16,82 @@
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This class uses a linear Kalman filter (see
|
||||
// https://en.wikipedia.org/wiki/Kalman_filter) to estimate the frame delay
|
||||
// variation (i.e., the difference in transmission time between a frame and the
|
||||
// prior frame) for a frame, given its size variation in bytes (i.e., the
|
||||
// difference in size between a frame and the prior frame). The idea is that,
|
||||
// given a fixed link bandwidth, a larger frame (in bytes) would take
|
||||
// proportionally longer to arrive than a correspondingly smaller frame. Using
|
||||
// the variations of frame delay and frame size, the underlying bandwidth and
|
||||
// queuing delay variation of the network link can be estimated.
|
||||
//
|
||||
// The filter takes as input the frame delay variation, the difference between
|
||||
// the actual inter-frame arrival time and the expected inter-frame arrival time
|
||||
// (based on RTP timestamp), and frame size variation, the inter-frame size
|
||||
// delta for a single frame. The frame delay variation is seen as the
|
||||
// measurement and the frame size variation is used in the observation model.
|
||||
// The hidden state of the filter is the link bandwidth and queuing delay
|
||||
// buildup. The estimated state can be used to get the expected frame delay
|
||||
// variation for a frame, given its frame size variation. This information can
|
||||
// then be used to estimate the frame delay variation coming from network
|
||||
// jitter.
|
||||
//
|
||||
// Mathematical details:
|
||||
// * The state (`x` in Wikipedia notation) is a 2x1 vector comprising the
|
||||
// reciprocal of link bandwidth [1 / bytes per ms] and the
|
||||
// link queuing delay buildup [ms].
|
||||
// * The state transition matrix (`F`) is the 2x2 identity matrix, meaning that
|
||||
// link bandwidth and link queuing delay buildup are modeled as independent.
|
||||
// * The measurement (`z`) is the (scalar) frame delay variation [ms].
|
||||
// * The observation matrix (`H`) is a 1x2 vector set as
|
||||
// `{frame_size_variation [bytes], 1.0}`.
|
||||
// * The process noise covariance (`Q`) is a constant 2x2 diagonal matrix
|
||||
// [(1 / bytes per ms)^2, ms^2].
|
||||
// * The observation noise covariance (`r`) is a scalar [ms^2] that is
|
||||
// determined externally to this class.
|
||||
class FrameDelayDeltaKalmanFilter {
|
||||
public:
|
||||
FrameDelayDeltaKalmanFilter();
|
||||
~FrameDelayDeltaKalmanFilter() = default;
|
||||
|
||||
// Resets the estimate to the initial state.
|
||||
void Reset();
|
||||
|
||||
// Updates the Kalman filter for the line describing the frame size dependent
|
||||
// jitter.
|
||||
// Predicts and updates the filter, given a new pair of frame delay variation
|
||||
// and frame size variation.
|
||||
//
|
||||
// Input:
|
||||
// - frame_delay
|
||||
// Delay-delta calculated by UTILDelayEstimate.
|
||||
// - delta_frame_size_bytes
|
||||
// Frame size delta, i.e. frame size at time T minus frame size
|
||||
// at time T-1. (May be negative!)
|
||||
// - max_frame_size
|
||||
// Filtered version of the largest frame size received.
|
||||
// - var_noise
|
||||
// Inputs:
|
||||
// `frame_delay_variation`:
|
||||
// Frame delay variation as calculated by the `InterFrameDelay` estimator.
|
||||
//
|
||||
// `frame_size_variation_bytes`:
|
||||
// Frame size variation, i.e., the current frame size minus the previous
|
||||
// frame size (in bytes). Note that this quantity may be negative.
|
||||
//
|
||||
// `max_frame_size`:
|
||||
// Filtered largest frame size received since the last reset.
|
||||
//
|
||||
// `var_noise`:
|
||||
// Variance of the estimated random jitter.
|
||||
void KalmanEstimateChannel(TimeDelta frame_delay,
|
||||
double delta_frame_size_bytes,
|
||||
void PredictAndUpdate(TimeDelta frame_delay_variation,
|
||||
double frame_size_variation_bytes,
|
||||
DataSize max_frame_size,
|
||||
double var_noise);
|
||||
|
||||
// Calculates the difference in delay between a sample and the expected delay
|
||||
// estimated by the Kalman filter.
|
||||
//
|
||||
// Input:
|
||||
// - frame_delay : Delay-delta calculated by UTILDelayEstimate.
|
||||
// - delta_frame_size_bytes : Frame size delta, i.e. frame size at
|
||||
// time T minus frame size at time T-1.
|
||||
//
|
||||
// Return value : The delay difference in ms.
|
||||
double DeviationFromExpectedDelay(TimeDelta frame_delay,
|
||||
double delta_frame_size_bytes) const;
|
||||
// Given a frame size variation, returns the estimated frame delay variation
|
||||
// explained by the link bandwidth alone.
|
||||
double GetFrameDelayVariationEstimateSizeBased(
|
||||
double frame_size_variation_bytes) const;
|
||||
|
||||
// Returns the estimated slope.
|
||||
double GetSlope() const;
|
||||
// Given a frame size variation, returns the estimated frame delay variation
|
||||
// explained by both link bandwidth and link queuing delay buildup.
|
||||
double GetFrameDelayVariationEstimateTotal(
|
||||
double frame_size_variation_bytes) const;
|
||||
|
||||
private:
|
||||
double theta_[2]; // Estimated line parameters (slope, offset)
|
||||
double theta_cov_[2][2]; // Estimate covariance
|
||||
double q_cov_[2][2]; // Process noise covariance
|
||||
double theta_[2]; // Estimated line parameters:
|
||||
// (bandwidth [1 / bytes per ms],
|
||||
// queue buildup [ms])
|
||||
double theta_cov_[2][2]; // Estimate covariance.
|
||||
double q_cov_[2][2]; // Process noise covariance.
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -11,18 +11,80 @@
|
||||
#include "modules/video_coding/timing/frame_delay_delta_kalman_filter.h"
|
||||
|
||||
#include "api/units/data_size.h"
|
||||
#include "api/units/frequency.h"
|
||||
#include "api/units/time_delta.h"
|
||||
#include "test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
TEST(FrameDelayDeltaKalmanFilterTest, InitialBandwidthStateIs512kbps) {
|
||||
// This test verifies that the initial filter state (link bandwidth, link
|
||||
// propagation delay) is such that a frame of size zero would take no time to
|
||||
// propagate.
|
||||
TEST(FrameDelayDeltaKalmanFilterTest,
|
||||
InitializedFilterWithZeroSizeFrameTakesNoTimeToPropagate) {
|
||||
FrameDelayDeltaKalmanFilter filter;
|
||||
|
||||
// The slope corresponds to the estimated bandwidth, and the initial value
|
||||
// is set in the implementation.
|
||||
EXPECT_EQ(filter.GetSlope(), 1 / (512e3 / 8));
|
||||
// A zero-sized frame...
|
||||
double frame_size_variation_bytes = 0.0;
|
||||
|
||||
// ...should take no time to propagate due to it's size...
|
||||
EXPECT_EQ(filter.GetFrameDelayVariationEstimateSizeBased(
|
||||
frame_size_variation_bytes),
|
||||
0.0);
|
||||
|
||||
// ...and no time due to the initial link propagation delay being zero.
|
||||
EXPECT_EQ(
|
||||
filter.GetFrameDelayVariationEstimateTotal(frame_size_variation_bytes),
|
||||
0.0);
|
||||
}
|
||||
|
||||
// TODO(brandtr): Look into if there is a factor 1000 missing here? It seems
|
||||
// unreasonable to have an initial link bandwidth of 512 _mega_bits per second?
|
||||
TEST(FrameDelayDeltaKalmanFilterTest,
|
||||
InitializedFilterWithSmallSizeFrameTakesFixedTimeToPropagate) {
|
||||
FrameDelayDeltaKalmanFilter filter;
|
||||
|
||||
// A 1000-byte frame...
|
||||
double frame_size_variation_bytes = 1000.0;
|
||||
// ...should take around `1000.0 / (512e3 / 8.0) = 0.015625 ms` to transmit.
|
||||
double expected_frame_delay_variation_estimate_ms = 1000.0 / (512e3 / 8.0);
|
||||
|
||||
EXPECT_EQ(filter.GetFrameDelayVariationEstimateSizeBased(
|
||||
frame_size_variation_bytes),
|
||||
expected_frame_delay_variation_estimate_ms);
|
||||
EXPECT_EQ(
|
||||
filter.GetFrameDelayVariationEstimateTotal(frame_size_variation_bytes),
|
||||
expected_frame_delay_variation_estimate_ms);
|
||||
}
|
||||
|
||||
TEST(FrameDelayDeltaKalmanFilterTest,
|
||||
VerifyConvergenceWithAlternatingDeviations) {
|
||||
FrameDelayDeltaKalmanFilter filter;
|
||||
|
||||
// One frame every 33 ms.
|
||||
int framerate_fps = 30;
|
||||
// Let's assume approximately 10% delay variation.
|
||||
TimeDelta frame_delay_variation = TimeDelta::Millis(3);
|
||||
// With a bitrate of 512 kbps, each frame will be around 2000 bytes.
|
||||
DataSize max_frame_size = DataSize::Bytes(2000);
|
||||
// And again, let's assume 10% size deviation.
|
||||
double frame_size_variation_bytes = 200;
|
||||
double var_noise = 0.1;
|
||||
int test_duration_s = 60;
|
||||
|
||||
for (int i = 0; i < test_duration_s * framerate_fps; ++i) {
|
||||
// For simplicity, assume alternating variations.
|
||||
double sign = (i % 2 == 0) ? 1.0 : -1.0;
|
||||
filter.PredictAndUpdate(sign * frame_delay_variation,
|
||||
sign * frame_size_variation_bytes, max_frame_size,
|
||||
var_noise);
|
||||
}
|
||||
|
||||
// Verify that the filter has converged within a margin of 0.1 ms.
|
||||
EXPECT_NEAR(
|
||||
filter.GetFrameDelayVariationEstimateTotal(frame_size_variation_bytes),
|
||||
frame_delay_variation.ms(), 0.1);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -78,7 +78,7 @@ void JitterEstimator::Reset() {
|
||||
rtt_filter_.Reset();
|
||||
fps_counter_.Reset();
|
||||
|
||||
kalman_filter_.Reset();
|
||||
kalman_filter_ = FrameDelayDeltaKalmanFilter();
|
||||
}
|
||||
|
||||
// Updates the estimates with the new measurements.
|
||||
@ -129,7 +129,8 @@ void JitterEstimator::UpdateEstimate(TimeDelta frame_delay,
|
||||
// the frame size also is large the deviation is probably due to an incorrect
|
||||
// line slope.
|
||||
double deviation =
|
||||
kalman_filter_.DeviationFromExpectedDelay(frame_delay, delta_frame_bytes);
|
||||
frame_delay.ms() -
|
||||
kalman_filter_.GetFrameDelayVariationEstimateTotal(delta_frame_bytes);
|
||||
|
||||
if (fabs(deviation) < kNumStdDevDelayOutlier * sqrt(var_noise_) ||
|
||||
frame_size.bytes() >
|
||||
@ -146,7 +147,7 @@ void JitterEstimator::UpdateEstimate(TimeDelta frame_delay,
|
||||
// frame.
|
||||
if (delta_frame_bytes > -0.25 * max_frame_size_.bytes()) {
|
||||
// Update the Kalman filter with the new data
|
||||
kalman_filter_.KalmanEstimateChannel(frame_delay, delta_frame_bytes,
|
||||
kalman_filter_.PredictAndUpdate(frame_delay, delta_frame_bytes,
|
||||
max_frame_size_, var_noise_);
|
||||
}
|
||||
} else {
|
||||
@ -228,8 +229,8 @@ double JitterEstimator::NoiseThreshold() const {
|
||||
|
||||
// Calculates the current jitter estimate from the filtered estimates.
|
||||
TimeDelta JitterEstimator::CalculateEstimate() {
|
||||
double retMs = kalman_filter_.GetSlope() *
|
||||
(max_frame_size_.bytes() - avg_frame_size_.bytes()) +
|
||||
double retMs = kalman_filter_.GetFrameDelayVariationEstimateSizeBased(
|
||||
max_frame_size_.bytes() - avg_frame_size_.bytes()) +
|
||||
NoiseThreshold();
|
||||
|
||||
TimeDelta ret = TimeDelta::Millis(retMs);
|
||||
|
Reference in New Issue
Block a user