Improve FrameDelayDeltaKalmanFilter interface and add more docs.

This CL simplifies and documents the interface of the Kalman filter better. A simple unit test verifying the filter's convergence is added. No functional changes to the filter are intended. Future CLs will improve the data member naming and code organization. Bug: webrtc:14151 Change-Id: I01e60d4b783cabad3ccbdc711c5d746666dd16ca Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/265877 Reviewed-by: Philip Eliasson <philipel@webrtc.org> Commit-Queue: Rasmus Brandt <brandtr@webrtc.org> Cr-Commit-Position: refs/heads/main@{#37766}
2022-08-12 13:45:34 +02:00
parent 85a126ec5d
commit ae4a832730
4 changed files with 171 additions and 78 deletions
--- a/modules/video_coding/timing/frame_delay_delta_kalman_filter.cc
+++ b/modules/video_coding/timing/frame_delay_delta_kalman_filter.cc
@ -20,27 +20,22 @@ constexpr double kThetaLow = 0.000001;
 }

 FrameDelayDeltaKalmanFilter::FrameDelayDeltaKalmanFilter() {
-  Reset();
-}
+  // TODO(brandtr): Is there a factor 1000 missing here?
+  theta_[0] = 1 / (512e3 / 8);  // Unit: [1 / bytes per ms]
+  theta_[1] = 0;                // Unit: [ms]

-// Resets the JitterEstimate.
-void FrameDelayDeltaKalmanFilter::Reset() {
-  theta_[0] = 1 / (512e3 / 8);
-  theta_[1] = 0;
-
-  theta_cov_[0][0] = 1e-4;
-  theta_cov_[1][1] = 1e2;
+  theta_cov_[0][0] = 1e-4;  // Unit: [(1 / bytes per ms)^2]
+  theta_cov_[1][1] = 1e2;   // Unit: [ms^2]
  theta_cov_[0][1] = theta_cov_[1][0] = 0;
-  q_cov_[0][0] = 2.5e-10;
-  q_cov_[1][1] = 1e-10;
+
+  q_cov_[0][0] = 2.5e-10;  // Unit: [(1 / bytes per ms)^2]
+  q_cov_[1][1] = 1e-10;    // Unit: [ms^2]
  q_cov_[0][1] = q_cov_[1][0] = 0;
 }

-// Updates Kalman estimate of the channel.
-// The caller is expected to sanity check the inputs.
-void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
-    TimeDelta frame_delay,
-    double delta_frame_size_bytes,
+void FrameDelayDeltaKalmanFilter::PredictAndUpdate(
+    TimeDelta frame_delay_variation,
+    double frame_size_variation_bytes,
    DataSize max_frame_size,
    double var_noise) {
  double Mh[2];
@ -63,21 +58,23 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
  // h = [dFS 1]
  // Mh = M*h'
  // hMh_sigma = h*M*h' + R
-  Mh[0] = theta_cov_[0][0] * delta_frame_size_bytes + theta_cov_[0][1];
-  Mh[1] = theta_cov_[1][0] * delta_frame_size_bytes + theta_cov_[1][1];
+  Mh[0] = theta_cov_[0][0] * frame_size_variation_bytes + theta_cov_[0][1];
+  Mh[1] = theta_cov_[1][0] * frame_size_variation_bytes + theta_cov_[1][1];
  // sigma weights measurements with a small deltaFS as noisy and
  // measurements with large deltaFS as good
  if (max_frame_size < DataSize::Bytes(1)) {
    return;
  }
-  double sigma = (300.0 * exp(-fabs(delta_frame_size_bytes) /
+  double sigma = (300.0 * exp(-fabs(frame_size_variation_bytes) /
                              (1e0 * max_frame_size.bytes())) +
                  1) *
                 sqrt(var_noise);
  if (sigma < 1.0) {
    sigma = 1.0;
  }
-  hMh_sigma = delta_frame_size_bytes * Mh[0] + Mh[1] + sigma;
+  // TODO(brandtr): Shouldn't we add sigma^2 here? Otherwise, the dimensional
+  // analysis fails.
+  hMh_sigma = frame_size_variation_bytes * Mh[0] + Mh[1] + sigma;
  if ((hMh_sigma < 1e-9 && hMh_sigma >= 0) ||
      (hMh_sigma > -1e-9 && hMh_sigma <= 0)) {
    RTC_DCHECK_NOTREACHED();
@ -88,8 +85,8 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(

  // Correction
  // theta = theta + K*(dT - h*theta)
-  measureRes =
-      frame_delay.ms() - (delta_frame_size_bytes * theta_[0] + theta_[1]);
+  measureRes = frame_delay_variation.ms() -
+               (frame_size_variation_bytes * theta_[0] + theta_[1]);
  theta_[0] += kalmanGain[0] * measureRes;
  theta_[1] += kalmanGain[1] * measureRes;

@ -100,14 +97,14 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
  // M = (I - K*h)*M
  t00 = theta_cov_[0][0];
  t01 = theta_cov_[0][1];
-  theta_cov_[0][0] = (1 - kalmanGain[0] * delta_frame_size_bytes) * t00 -
+  theta_cov_[0][0] = (1 - kalmanGain[0] * frame_size_variation_bytes) * t00 -
                     kalmanGain[0] * theta_cov_[1][0];
-  theta_cov_[0][1] = (1 - kalmanGain[0] * delta_frame_size_bytes) * t01 -
+  theta_cov_[0][1] = (1 - kalmanGain[0] * frame_size_variation_bytes) * t01 -
                     kalmanGain[0] * theta_cov_[1][1];
  theta_cov_[1][0] = theta_cov_[1][0] * (1 - kalmanGain[1]) -
-                     kalmanGain[1] * delta_frame_size_bytes * t00;
+                     kalmanGain[1] * frame_size_variation_bytes * t00;
  theta_cov_[1][1] = theta_cov_[1][1] * (1 - kalmanGain[1]) -
-                     kalmanGain[1] * delta_frame_size_bytes * t01;
+                     kalmanGain[1] * frame_size_variation_bytes * t01;

  // Covariance matrix, must be positive semi-definite.
  RTC_DCHECK(theta_cov_[0][0] + theta_cov_[1][1] >= 0 &&
@ -117,16 +114,18 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel(
             theta_cov_[0][0] >= 0);
 }

-// Calculate difference in delay between a sample and the expected delay
-// estimated by the Kalman filter
-double FrameDelayDeltaKalmanFilter::DeviationFromExpectedDelay(
-    TimeDelta frame_delay,
-    double delta_frame_size_bytes) const {
-  return frame_delay.ms() - (theta_[0] * delta_frame_size_bytes + theta_[1]);
+double FrameDelayDeltaKalmanFilter::GetFrameDelayVariationEstimateSizeBased(
+    double frame_size_variation_bytes) const {
+  // Unit: [1 / bytes per millisecond] * [bytes] = [milliseconds].
+  return theta_[0] * frame_size_variation_bytes;
 }

-double FrameDelayDeltaKalmanFilter::GetSlope() const {
-  return theta_[0];
+double FrameDelayDeltaKalmanFilter::GetFrameDelayVariationEstimateTotal(
+    double frame_size_variation_bytes) const {
+  double frame_transmission_delay_ms =
+      GetFrameDelayVariationEstimateSizeBased(frame_size_variation_bytes);
+  double link_queuing_delay_ms = theta_[1];
+  return frame_transmission_delay_ms + link_queuing_delay_ms;
 }

 }  // namespace webrtc
--- a/modules/video_coding/timing/frame_delay_delta_kalman_filter.h
+++ b/modules/video_coding/timing/frame_delay_delta_kalman_filter.h
@ -16,51 +16,82 @@

 namespace webrtc {

+// This class uses a linear Kalman filter (see
+// https://en.wikipedia.org/wiki/Kalman_filter) to estimate the frame delay
+// variation (i.e., the difference in transmission time between a frame and the
+// prior frame) for a frame, given its size variation in bytes (i.e., the
+// difference in size between a frame and the prior frame). The idea is that,
+// given a fixed link bandwidth, a larger frame (in bytes) would take
+// proportionally longer to arrive than a correspondingly smaller frame. Using
+// the variations of frame delay and frame size, the underlying bandwidth and
+// queuing delay variation of the network link can be estimated.
+//
+// The filter takes as input the frame delay variation, the difference between
+// the actual inter-frame arrival time and the expected inter-frame arrival time
+// (based on RTP timestamp), and frame size variation, the inter-frame size
+// delta for a single frame. The frame delay variation is seen as the
+// measurement and the frame size variation is used in the observation model.
+// The hidden state of the filter is the link bandwidth and queuing delay
+// buildup. The estimated state can be used to get the expected frame delay
+// variation for a frame, given its frame size variation. This information can
+// then be used to estimate the frame delay variation coming from network
+// jitter.
+//
+// Mathematical details:
+//  * The state (`x` in Wikipedia notation) is a 2x1 vector comprising the
+//    reciprocal of link bandwidth [1 / bytes per ms] and the
+//    link queuing delay buildup [ms].
+//  * The state transition matrix (`F`) is the 2x2 identity matrix, meaning that
+//    link bandwidth and link queuing delay buildup are modeled as independent.
+//  * The measurement (`z`) is the (scalar) frame delay variation [ms].
+//  * The observation matrix (`H`) is a 1x2 vector set as
+//    `{frame_size_variation [bytes], 1.0}`.
+//  * The process noise covariance (`Q`) is a constant 2x2 diagonal matrix
+//    [(1 / bytes per ms)^2, ms^2].
+//  * The observation noise covariance (`r`) is a scalar [ms^2] that is
+//    determined externally to this class.
 class FrameDelayDeltaKalmanFilter {
 public:
  FrameDelayDeltaKalmanFilter();
  ~FrameDelayDeltaKalmanFilter() = default;

-  // Resets the estimate to the initial state.
-  void Reset();
-
-  // Updates the Kalman filter for the line describing the frame size dependent
-  // jitter.
+  // Predicts and updates the filter, given a new pair of frame delay variation
+  // and frame size variation.
  //
-  // Input:
-  //          - frame_delay
-  //              Delay-delta calculated by UTILDelayEstimate.
-  //          - delta_frame_size_bytes
-  //              Frame size delta, i.e. frame size at time T minus frame size
-  //              at time T-1. (May be negative!)
-  //          - max_frame_size
-  //              Filtered version of the largest frame size received.
-  //          - var_noise
+  // Inputs:
+  // `frame_delay_variation`:
+  //    Frame delay variation as calculated by the `InterFrameDelay` estimator.
+  //
+  // `frame_size_variation_bytes`:
+  //    Frame size variation, i.e., the current frame size minus the previous
+  //    frame size (in bytes). Note that this quantity may be negative.
+  //
+  // `max_frame_size`:
+  //    Filtered largest frame size received since the last reset.
+  //
+  // `var_noise`:
  //    Variance of the estimated random jitter.
-  void KalmanEstimateChannel(TimeDelta frame_delay,
-                             double delta_frame_size_bytes,
+  void PredictAndUpdate(TimeDelta frame_delay_variation,
+                        double frame_size_variation_bytes,
                        DataSize max_frame_size,
                        double var_noise);

-  // Calculates the difference in delay between a sample and the expected delay
-  // estimated by the Kalman filter.
-  //
-  // Input:
-  //          - frame_delay       : Delay-delta calculated by UTILDelayEstimate.
-  //          - delta_frame_size_bytes : Frame size delta, i.e. frame size at
-  //                                     time T minus frame size at time T-1.
-  //
-  // Return value               : The delay difference in ms.
-  double DeviationFromExpectedDelay(TimeDelta frame_delay,
-                                    double delta_frame_size_bytes) const;
+  // Given a frame size variation, returns the estimated frame delay variation
+  // explained by the link bandwidth alone.
+  double GetFrameDelayVariationEstimateSizeBased(
+      double frame_size_variation_bytes) const;

-  // Returns the estimated slope.
-  double GetSlope() const;
+  // Given a frame size variation, returns the estimated frame delay variation
+  // explained by both link bandwidth and link queuing delay buildup.
+  double GetFrameDelayVariationEstimateTotal(
+      double frame_size_variation_bytes) const;

 private:
-  double theta_[2];         // Estimated line parameters (slope, offset)
-  double theta_cov_[2][2];  // Estimate covariance
-  double q_cov_[2][2];      // Process noise covariance
+  double theta_[2];         // Estimated line parameters:
+                            // (bandwidth [1 / bytes per ms],
+                            //  queue buildup [ms])
+  double theta_cov_[2][2];  // Estimate covariance.
+  double q_cov_[2][2];      // Process noise covariance.
 };

 }  // namespace webrtc
--- a/modules/video_coding/timing/frame_delay_delta_kalman_filter_unittest.cc
+++ b/modules/video_coding/timing/frame_delay_delta_kalman_filter_unittest.cc
@ -11,18 +11,80 @@
 #include "modules/video_coding/timing/frame_delay_delta_kalman_filter.h"

 #include "api/units/data_size.h"
+#include "api/units/frequency.h"
 #include "api/units/time_delta.h"
 #include "test/gtest.h"

 namespace webrtc {
 namespace {

-TEST(FrameDelayDeltaKalmanFilterTest, InitialBandwidthStateIs512kbps) {
+// This test verifies that the initial filter state (link bandwidth, link
+// propagation delay) is such that a frame of size zero would take no time to
+// propagate.
+TEST(FrameDelayDeltaKalmanFilterTest,
+     InitializedFilterWithZeroSizeFrameTakesNoTimeToPropagate) {
  FrameDelayDeltaKalmanFilter filter;

-  // The slope corresponds to the estimated bandwidth, and the initial value
-  // is set in the implementation.
-  EXPECT_EQ(filter.GetSlope(), 1 / (512e3 / 8));
+  // A zero-sized frame...
+  double frame_size_variation_bytes = 0.0;
+
+  // ...should take no time to propagate due to it's size...
+  EXPECT_EQ(filter.GetFrameDelayVariationEstimateSizeBased(
+                frame_size_variation_bytes),
+            0.0);
+
+  // ...and no time due to the initial link propagation delay being zero.
+  EXPECT_EQ(
+      filter.GetFrameDelayVariationEstimateTotal(frame_size_variation_bytes),
+      0.0);
+}
+
+// TODO(brandtr): Look into if there is a factor 1000 missing here? It seems
+// unreasonable to have an initial link bandwidth of 512 _mega_bits per second?
+TEST(FrameDelayDeltaKalmanFilterTest,
+     InitializedFilterWithSmallSizeFrameTakesFixedTimeToPropagate) {
+  FrameDelayDeltaKalmanFilter filter;
+
+  // A 1000-byte frame...
+  double frame_size_variation_bytes = 1000.0;
+  // ...should take around `1000.0 / (512e3 / 8.0) = 0.015625 ms` to transmit.
+  double expected_frame_delay_variation_estimate_ms = 1000.0 / (512e3 / 8.0);
+
+  EXPECT_EQ(filter.GetFrameDelayVariationEstimateSizeBased(
+                frame_size_variation_bytes),
+            expected_frame_delay_variation_estimate_ms);
+  EXPECT_EQ(
+      filter.GetFrameDelayVariationEstimateTotal(frame_size_variation_bytes),
+      expected_frame_delay_variation_estimate_ms);
+}
+
+TEST(FrameDelayDeltaKalmanFilterTest,
+     VerifyConvergenceWithAlternatingDeviations) {
+  FrameDelayDeltaKalmanFilter filter;
+
+  // One frame every 33 ms.
+  int framerate_fps = 30;
+  // Let's assume approximately 10% delay variation.
+  TimeDelta frame_delay_variation = TimeDelta::Millis(3);
+  // With a bitrate of 512 kbps, each frame will be around 2000 bytes.
+  DataSize max_frame_size = DataSize::Bytes(2000);
+  // And again, let's assume 10% size deviation.
+  double frame_size_variation_bytes = 200;
+  double var_noise = 0.1;
+  int test_duration_s = 60;
+
+  for (int i = 0; i < test_duration_s * framerate_fps; ++i) {
+    // For simplicity, assume alternating variations.
+    double sign = (i % 2 == 0) ? 1.0 : -1.0;
+    filter.PredictAndUpdate(sign * frame_delay_variation,
+                            sign * frame_size_variation_bytes, max_frame_size,
+                            var_noise);
+  }
+
+  // Verify that the filter has converged within a margin of 0.1 ms.
+  EXPECT_NEAR(
+      filter.GetFrameDelayVariationEstimateTotal(frame_size_variation_bytes),
+      frame_delay_variation.ms(), 0.1);
 }

 }  // namespace
--- a/modules/video_coding/timing/jitter_estimator.cc
+++ b/modules/video_coding/timing/jitter_estimator.cc
@ -78,7 +78,7 @@ void JitterEstimator::Reset() {
  rtt_filter_.Reset();
  fps_counter_.Reset();

-  kalman_filter_.Reset();
+  kalman_filter_ = FrameDelayDeltaKalmanFilter();
 }

 // Updates the estimates with the new measurements.
@ -129,7 +129,8 @@ void JitterEstimator::UpdateEstimate(TimeDelta frame_delay,
  // the frame size also is large the deviation is probably due to an incorrect
  // line slope.
  double deviation =
-      kalman_filter_.DeviationFromExpectedDelay(frame_delay, delta_frame_bytes);
+      frame_delay.ms() -
+      kalman_filter_.GetFrameDelayVariationEstimateTotal(delta_frame_bytes);

  if (fabs(deviation) < kNumStdDevDelayOutlier * sqrt(var_noise_) ||
      frame_size.bytes() >
@ -146,7 +147,7 @@ void JitterEstimator::UpdateEstimate(TimeDelta frame_delay,
    // frame.
    if (delta_frame_bytes > -0.25 * max_frame_size_.bytes()) {
      // Update the Kalman filter with the new data
-      kalman_filter_.KalmanEstimateChannel(frame_delay, delta_frame_bytes,
+      kalman_filter_.PredictAndUpdate(frame_delay, delta_frame_bytes,
                                      max_frame_size_, var_noise_);
    }
  } else {
@ -228,8 +229,8 @@ double JitterEstimator::NoiseThreshold() const {

 // Calculates the current jitter estimate from the filtered estimates.
 TimeDelta JitterEstimator::CalculateEstimate() {
-  double retMs = kalman_filter_.GetSlope() *
-                     (max_frame_size_.bytes() - avg_frame_size_.bytes()) +
+  double retMs = kalman_filter_.GetFrameDelayVariationEstimateSizeBased(
+                     max_frame_size_.bytes() - avg_frame_size_.bytes()) +
                 NoiseThreshold();

  TimeDelta ret = TimeDelta::Millis(retMs);