Refactor vp8 temporal layers with inferred sync and search order

This CL introduces a few changes to the default VP8 temporal layers: * The pattern is now reset on keyframes * The sync flag is inferred rather than hard-coded * Support is added for buffer search order Bug: webrtc:9012 Change-Id: Ice19d32413d20982368a01a7d2540d155e185ad4 Reviewed-on: https://webrtc-review.googlesource.com/91863 Reviewed-by: Sergey Silkin <ssilkin@webrtc.org> Commit-Queue: Erik Språng <sprang@webrtc.org> Cr-Commit-Position: refs/heads/master@{#24288}
2018-08-13 16:05:33 +02:00
parent fa4e185684
commit b75d6b8dc3
10 changed files with 813 additions and 284 deletions
--- a/modules/video_coding/codecs/vp8/temporal_layers.h
+++ b/modules/video_coding/codecs/vp8/temporal_layers.h
@ -22,23 +22,63 @@

 namespace webrtc {

+// Some notes on the prerequisites of the TemporalLayers interface.
+// * Implementations of TemporalLayers may not contain internal synchronization
+//   so caller must make sure doing so thread safe.
+// * The encoder is assumed to encode all frames in order, and callbacks to
+//   PopulateCodecSpecific() / FrameEncoded() must happen in the same order.
+//
+// This means that in the case of pipelining encoders, it is OK to have a chain
+// of calls such as this:
+// - UpdateLayerConfig(timestampA)
+// - UpdateLayerConfig(timestampB)
+// - PopulateCodecSpecific(timestampA, ...)
+// - UpdateLayerConfig(timestampC)
+// - FrameEncoded(timestampA, 1234, ...)
+// - FrameEncoded(timestampB, 0, ...)
+// - PopulateCodecSpecific(timestampC, ...)
+// - FrameEncoded(timestampC, 1234, ...)
+// Note that UpdateLayerConfig() for a new frame can happen before
+// FrameEncoded() for a previous one, but calls themselves must be both
+// synchronized (e.g. run on a task queue) and in order (per type).
+
 struct CodecSpecificInfoVP8;
+enum class Vp8BufferReference : uint8_t {
+  kNone = 0,
+  kLast = 1,
+  kGolden = 2,
+  kAltref = 4
+};

 struct Vp8EncoderConfig {
+  // Number of active temporal layers. Set to 0 if not used.
  unsigned int ts_number_layers;
+  // Arrays of length |ts_number_layers|, indicating (cumulative) target bitrate
+  // and rate decimator (e.g. 4 if every 4th frame is in the given layer) for
+  // each active temporal layer, starting with temporal id 0.
  unsigned int ts_target_bitrate[VP8_TS_MAX_LAYERS];
  unsigned int ts_rate_decimator[VP8_TS_MAX_LAYERS];
+
+  // The periodicity of the temporal pattern. Set to 0 if not used.
  unsigned int ts_periodicity;
+  // Array of length |ts_periodicity| indicating the sequence of temporal id's
+  // to assign to incoming frames.
  unsigned int ts_layer_id[VP8_TS_MAX_PERIODICITY];
+
+  // Target bitrate, in bps.
  unsigned int rc_target_bitrate;
+
+  // Clamp QP to min/max. Use 0 to disable clamping.
  unsigned int rc_min_quantizer;
  unsigned int rc_max_quantizer;
 };

+// This interface defines a way of getting the encoder settings needed to
+// realize a temporal layer structure of predefined size.
 class TemporalLayersChecker;
 class TemporalLayers {
 public:
-  enum BufferFlags {
+  enum BufferFlags : int {
    kNone = 0,
    kReference = 1,
    kUpdate = 2,
@ -78,15 +118,15 @@ class TemporalLayers {

    bool freeze_entropy;

-    bool operator==(const FrameConfig& o) const {
-      return drop_frame == o.drop_frame &&
-             last_buffer_flags == o.last_buffer_flags &&
-             golden_buffer_flags == o.golden_buffer_flags &&
-             arf_buffer_flags == o.arf_buffer_flags &&
-             layer_sync == o.layer_sync && freeze_entropy == o.freeze_entropy &&
-             encoder_layer_id == o.encoder_layer_id &&
-             packetizer_temporal_idx == o.packetizer_temporal_idx;
-    }
+    // Indicates in which order the encoder should search the reference buffers
+    // when doing motion prediction. Set to kNone to use unspecified order. Any
+    // buffer indicated here must not have the corresponding no_ref bit set.
+    // If all three buffers can be reference, the one not listed here should be
+    // searched last.
+    Vp8BufferReference first_reference;
+    Vp8BufferReference second_reference;
+
+    bool operator==(const FrameConfig& o) const;
    bool operator!=(const FrameConfig& o) const { return !(*this == o); }

   private:
@ -96,6 +136,8 @@ class TemporalLayers {
                bool freeze_entropy);
  };

+  // Factory for TemporalLayer strategy. Default behavior is a fixed pattern
+  // of temporal layers. See default_temporal_layers.cc
  static std::unique_ptr<TemporalLayers> CreateTemporalLayers(
      const VideoCodec& codec,
      size_t spatial_id);
@ -103,13 +145,7 @@ class TemporalLayers {
      const VideoCodec& codec,
      size_t spatial_id);

-  // Factory for TemporalLayer strategy. Default behavior is a fixed pattern
-  // of temporal layers. See default_temporal_layers.cc
-  virtual ~TemporalLayers() {}
-
-  // Returns the recommended VP8 encode flags needed. May refresh the decoder
-  // and/or update the reference buffers.
-  virtual FrameConfig UpdateLayerConfig(uint32_t timestamp) = 0;
+  virtual ~TemporalLayers() = default;

  // New target bitrate, per temporal layer.
  virtual void OnRatesUpdated(const std::vector<uint32_t>& bitrates_bps,
@ -119,13 +155,35 @@ class TemporalLayers {
  // Returns true iff the configuration was actually modified.
  virtual bool UpdateConfiguration(Vp8EncoderConfig* cfg) = 0;

+  // Returns the recommended VP8 encode flags needed, and moves the temporal
+  // pattern to the next frame.
+  // The timestamp may be used as both a time and a unique identifier, and so
+  // the caller must make sure no two frames use the same timestamp.
+  // The timestamp uses a 90kHz RTP clock.
+  // After calling this method, the actual encoder should be called with the
+  // provided frame configuration, after which:
+  // * On success, call PopulateCodecSpecific() and then FrameEncoded();
+  // * On failure/ frame drop: Call FrameEncoded() with size = 0.
+  virtual FrameConfig UpdateLayerConfig(uint32_t rtp_timestamp) = 0;
+
+  // Called after successful encoding of a frame. The rtp timestamp must match
+  // the one using in UpdateLayerConfig(). Some fields in |vp8_info| may have
+  // already been populated by the encoder, check before overwriting.
+  // |tl_config| is the frame config returned by UpdateLayerConfig() for this
+  // rtp_timestamp;
+  // If |is_keyframe| is true, the flags in |tl_config| will be ignored.
  virtual void PopulateCodecSpecific(
      bool is_keyframe,
      const TemporalLayers::FrameConfig& tl_config,
      CodecSpecificInfoVP8* vp8_info,
-      uint32_t timestamp) = 0;
+      uint32_t rtp_timestamp) = 0;

-  virtual void FrameEncoded(unsigned int size, int qp) = 0;
+  // Called after an encode event. If the frame was dropped, |size_bytes| must
+  // be set to 0. The rtp timestamp must match the one using in
+  // UpdateLayerConfig()
+  virtual void FrameEncoded(uint32_t rtp_timestamp,
+                            size_t size_bytes,
+                            int qp) = 0;
 };

 // Used only inside RTC_DCHECK(). It checks correctness of temporal layers