
RtpFrameReferenceFinder. BUG=webrtc:5514 Review-Url: https://codereview.webrtc.org/1961053002 Cr-Commit-Position: refs/heads/master@{#12725}
487 lines
17 KiB
C++
487 lines
17 KiB
C++
/*
|
|
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "webrtc/modules/video_coding/rtp_frame_reference_finder.h"
|
|
|
|
#include <algorithm>
|
|
#include <limits>
|
|
|
|
#include "webrtc/base/checks.h"
|
|
#include "webrtc/base/logging.h"
|
|
#include "webrtc/modules/video_coding/frame_object.h"
|
|
#include "webrtc/modules/video_coding/packet_buffer.h"
|
|
|
|
namespace webrtc {
|
|
namespace video_coding {
|
|
|
|
RtpFrameReferenceFinder::RtpFrameReferenceFinder(
|
|
OnCompleteFrameCallback* frame_callback)
|
|
: last_picture_id_(-1),
|
|
last_unwrap_(-1),
|
|
current_ss_idx_(0),
|
|
frame_callback_(frame_callback) {}
|
|
|
|
void RtpFrameReferenceFinder::ManageFrame(
|
|
std::unique_ptr<RtpFrameObject> frame) {
|
|
rtc::CritScope lock(&crit_);
|
|
switch (frame->codec_type()) {
|
|
case kVideoCodecULPFEC:
|
|
case kVideoCodecRED:
|
|
case kVideoCodecUnknown:
|
|
RTC_NOTREACHED();
|
|
break;
|
|
case kVideoCodecVP8:
|
|
ManageFrameVp8(std::move(frame));
|
|
break;
|
|
case kVideoCodecVP9:
|
|
ManageFrameVp9(std::move(frame));
|
|
break;
|
|
case kVideoCodecH264:
|
|
case kVideoCodecI420:
|
|
case kVideoCodecGeneric:
|
|
ManageFrameGeneric(std::move(frame));
|
|
break;
|
|
}
|
|
}
|
|
|
|
void RtpFrameReferenceFinder::RetryStashedFrames() {
|
|
size_t num_stashed_frames = stashed_frames_.size();
|
|
|
|
// Clean up stashed frames if there are too many.
|
|
while (stashed_frames_.size() > kMaxStashedFrames)
|
|
stashed_frames_.pop();
|
|
|
|
// Since frames are stashed if there is not enough data to determine their
|
|
// frame references we should at most check |stashed_frames_.size()| in
|
|
// order to not pop and push frames in and endless loop.
|
|
for (size_t i = 0; i < num_stashed_frames && !stashed_frames_.empty(); ++i) {
|
|
std::unique_ptr<RtpFrameObject> frame = std::move(stashed_frames_.front());
|
|
stashed_frames_.pop();
|
|
ManageFrame(std::move(frame));
|
|
}
|
|
}
|
|
|
|
void RtpFrameReferenceFinder::ManageFrameGeneric(
|
|
std::unique_ptr<RtpFrameObject> frame) {
|
|
if (frame->frame_type() == kVideoFrameKey)
|
|
last_seq_num_gop_[frame->last_seq_num()] = frame->last_seq_num();
|
|
|
|
// We have received a frame but not yet a keyframe, stash this frame.
|
|
if (last_seq_num_gop_.empty()) {
|
|
stashed_frames_.emplace(std::move(frame));
|
|
return;
|
|
}
|
|
|
|
// Clean up info for old keyframes but make sure to keep info
|
|
// for the last keyframe.
|
|
auto clean_to = last_seq_num_gop_.lower_bound(frame->last_seq_num() - 100);
|
|
if (clean_to != last_seq_num_gop_.end())
|
|
last_seq_num_gop_.erase(last_seq_num_gop_.begin(), clean_to);
|
|
|
|
// Find the last sequence number of the last frame for the keyframe
|
|
// that this frame indirectly references.
|
|
auto seq_num_it = last_seq_num_gop_.upper_bound(frame->last_seq_num());
|
|
seq_num_it--;
|
|
|
|
// Make sure the packet sequence numbers are continuous, otherwise stash
|
|
// this frame.
|
|
if (frame->frame_type() == kVideoFrameDelta) {
|
|
if (seq_num_it->second !=
|
|
static_cast<uint16_t>(frame->first_seq_num() - 1)) {
|
|
stashed_frames_.emplace(std::move(frame));
|
|
return;
|
|
}
|
|
}
|
|
|
|
RTC_DCHECK(AheadOrAt(frame->last_seq_num(), seq_num_it->first));
|
|
|
|
// Since keyframes can cause reordering we can't simply assign the
|
|
// picture id according to some incrementing counter.
|
|
frame->picture_id = frame->last_seq_num();
|
|
frame->num_references = frame->frame_type() == kVideoFrameDelta;
|
|
frame->references[0] = seq_num_it->second;
|
|
seq_num_it->second = frame->picture_id;
|
|
|
|
last_picture_id_ = frame->picture_id;
|
|
frame_callback_->OnCompleteFrame(std::move(frame));
|
|
RetryStashedFrames();
|
|
}
|
|
|
|
void RtpFrameReferenceFinder::ManageFrameVp8(
|
|
std::unique_ptr<RtpFrameObject> frame) {
|
|
RTPVideoTypeHeader* rtp_codec_header = frame->GetCodecHeader();
|
|
if (!rtp_codec_header)
|
|
return;
|
|
|
|
const RTPVideoHeaderVP8& codec_header = rtp_codec_header->VP8;
|
|
|
|
if (codec_header.pictureId == kNoPictureId ||
|
|
codec_header.temporalIdx == kNoTemporalIdx ||
|
|
codec_header.tl0PicIdx == kNoTl0PicIdx) {
|
|
ManageFrameGeneric(std::move(frame));
|
|
return;
|
|
}
|
|
|
|
frame->picture_id = codec_header.pictureId % kPicIdLength;
|
|
|
|
if (last_unwrap_ == -1)
|
|
last_unwrap_ = codec_header.pictureId;
|
|
|
|
if (last_picture_id_ == -1)
|
|
last_picture_id_ = frame->picture_id;
|
|
|
|
// Find if there has been a gap in fully received frames and save the picture
|
|
// id of those frames in |not_yet_received_frames_|.
|
|
if (AheadOf<uint16_t, kPicIdLength>(frame->picture_id, last_picture_id_)) {
|
|
last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
|
|
while (last_picture_id_ != frame->picture_id) {
|
|
not_yet_received_frames_.insert(last_picture_id_);
|
|
last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
|
|
}
|
|
}
|
|
|
|
// Clean up info for base layers that are too old.
|
|
uint8_t old_tl0_pic_idx = codec_header.tl0PicIdx - kMaxLayerInfo;
|
|
auto clean_layer_info_to = layer_info_.lower_bound(old_tl0_pic_idx);
|
|
layer_info_.erase(layer_info_.begin(), clean_layer_info_to);
|
|
|
|
// Clean up info about not yet received frames that are too old.
|
|
uint16_t old_picture_id =
|
|
Subtract<kPicIdLength>(frame->picture_id, kMaxNotYetReceivedFrames);
|
|
auto clean_frames_to = not_yet_received_frames_.lower_bound(old_picture_id);
|
|
not_yet_received_frames_.erase(not_yet_received_frames_.begin(),
|
|
clean_frames_to);
|
|
|
|
if (frame->frame_type() == kVideoFrameKey) {
|
|
frame->num_references = 0;
|
|
layer_info_[codec_header.tl0PicIdx].fill(-1);
|
|
CompletedFrameVp8(std::move(frame));
|
|
return;
|
|
}
|
|
|
|
auto layer_info_it = layer_info_.find(codec_header.temporalIdx == 0
|
|
? codec_header.tl0PicIdx - 1
|
|
: codec_header.tl0PicIdx);
|
|
|
|
// If we don't have the base layer frame yet, stash this frame.
|
|
if (layer_info_it == layer_info_.end()) {
|
|
stashed_frames_.emplace(std::move(frame));
|
|
return;
|
|
}
|
|
|
|
// A non keyframe base layer frame has been received, copy the layer info
|
|
// from the previous base layer frame and set a reference to the previous
|
|
// base layer frame.
|
|
if (codec_header.temporalIdx == 0) {
|
|
layer_info_it =
|
|
layer_info_
|
|
.insert(make_pair(codec_header.tl0PicIdx, layer_info_it->second))
|
|
.first;
|
|
frame->num_references = 1;
|
|
frame->references[0] = layer_info_it->second[0];
|
|
CompletedFrameVp8(std::move(frame));
|
|
return;
|
|
}
|
|
|
|
// Layer sync frame, this frame only references its base layer frame.
|
|
if (codec_header.layerSync) {
|
|
frame->num_references = 1;
|
|
frame->references[0] = layer_info_it->second[0];
|
|
|
|
CompletedFrameVp8(std::move(frame));
|
|
return;
|
|
}
|
|
|
|
// Find all references for this frame.
|
|
frame->num_references = 0;
|
|
for (uint8_t layer = 0; layer <= codec_header.temporalIdx; ++layer) {
|
|
RTC_DCHECK_NE(-1, layer_info_it->second[layer]);
|
|
|
|
// If we have not yet received a frame between this frame and the referenced
|
|
// frame then we have to wait for that frame to be completed first.
|
|
auto not_received_frame_it =
|
|
not_yet_received_frames_.upper_bound(layer_info_it->second[layer]);
|
|
if (not_received_frame_it != not_yet_received_frames_.end() &&
|
|
AheadOf<uint16_t, kPicIdLength>(frame->picture_id,
|
|
*not_received_frame_it)) {
|
|
stashed_frames_.emplace(std::move(frame));
|
|
return;
|
|
}
|
|
|
|
++frame->num_references;
|
|
frame->references[layer] = layer_info_it->second[layer];
|
|
}
|
|
|
|
CompletedFrameVp8(std::move(frame));
|
|
}
|
|
|
|
void RtpFrameReferenceFinder::CompletedFrameVp8(
|
|
std::unique_ptr<RtpFrameObject> frame) {
|
|
RTPVideoTypeHeader* rtp_codec_header = frame->GetCodecHeader();
|
|
if (!rtp_codec_header)
|
|
return;
|
|
|
|
const RTPVideoHeaderVP8& codec_header = rtp_codec_header->VP8;
|
|
|
|
uint8_t tl0_pic_idx = codec_header.tl0PicIdx;
|
|
uint8_t temporal_index = codec_header.temporalIdx;
|
|
auto layer_info_it = layer_info_.find(tl0_pic_idx);
|
|
|
|
// Update this layer info and newer.
|
|
while (layer_info_it != layer_info_.end()) {
|
|
if (layer_info_it->second[temporal_index] != -1 &&
|
|
AheadOf<uint16_t, kPicIdLength>(layer_info_it->second[temporal_index],
|
|
frame->picture_id)) {
|
|
// The frame was not newer, then no subsequent layer info have to be
|
|
// update.
|
|
break;
|
|
}
|
|
|
|
layer_info_it->second[codec_header.temporalIdx] = frame->picture_id;
|
|
++tl0_pic_idx;
|
|
layer_info_it = layer_info_.find(tl0_pic_idx);
|
|
}
|
|
not_yet_received_frames_.erase(frame->picture_id);
|
|
|
|
for (size_t i = 0; i < frame->num_references; ++i)
|
|
frame->references[i] = UnwrapPictureId(frame->references[i]);
|
|
frame->picture_id = UnwrapPictureId(frame->picture_id);
|
|
|
|
frame_callback_->OnCompleteFrame(std::move(frame));
|
|
RetryStashedFrames();
|
|
}
|
|
|
|
void RtpFrameReferenceFinder::ManageFrameVp9(
|
|
std::unique_ptr<RtpFrameObject> frame) {
|
|
RTPVideoTypeHeader* rtp_codec_header = frame->GetCodecHeader();
|
|
if (!rtp_codec_header)
|
|
return;
|
|
|
|
const RTPVideoHeaderVP9& codec_header = rtp_codec_header->VP9;
|
|
|
|
if (codec_header.picture_id == kNoPictureId) {
|
|
ManageFrameGeneric(std::move(frame));
|
|
return;
|
|
}
|
|
|
|
frame->spatial_layer = codec_header.spatial_idx;
|
|
frame->inter_layer_predicted = codec_header.inter_layer_predicted;
|
|
frame->picture_id = codec_header.picture_id % kPicIdLength;
|
|
|
|
if (last_unwrap_ == -1)
|
|
last_unwrap_ = codec_header.picture_id;
|
|
|
|
if (last_picture_id_ == -1)
|
|
last_picture_id_ = frame->picture_id;
|
|
|
|
if (codec_header.flexible_mode) {
|
|
frame->num_references = codec_header.num_ref_pics;
|
|
for (size_t i = 0; i < frame->num_references; ++i) {
|
|
frame->references[i] =
|
|
Subtract<1 << 16>(frame->picture_id, codec_header.pid_diff[i]);
|
|
}
|
|
|
|
CompletedFrameVp9(std::move(frame));
|
|
return;
|
|
}
|
|
|
|
if (codec_header.ss_data_available) {
|
|
// Scalability structures can only be sent with tl0 frames.
|
|
if (codec_header.temporal_idx != 0) {
|
|
LOG(LS_WARNING) << "Received scalability structure on a non base layer"
|
|
" frame. Scalability structure ignored.";
|
|
} else {
|
|
current_ss_idx_ = Add<kMaxGofSaved>(current_ss_idx_, 1);
|
|
scalability_structures_[current_ss_idx_] = codec_header.gof;
|
|
scalability_structures_[current_ss_idx_].pid_start = frame->picture_id;
|
|
|
|
auto pid_and_gof = std::make_pair(
|
|
frame->picture_id, &scalability_structures_[current_ss_idx_]);
|
|
gof_info_.insert(std::make_pair(codec_header.tl0_pic_idx, pid_and_gof));
|
|
}
|
|
}
|
|
|
|
// Clean up info for base layers that are too old.
|
|
uint8_t old_tl0_pic_idx = codec_header.tl0_pic_idx - kMaxGofSaved;
|
|
auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx);
|
|
gof_info_.erase(gof_info_.begin(), clean_gof_info_to);
|
|
|
|
if (frame->frame_type() == kVideoFrameKey) {
|
|
// When using GOF all keyframes must include the scalability structure.
|
|
if (!codec_header.ss_data_available)
|
|
LOG(LS_WARNING) << "Received keyframe without scalability structure";
|
|
|
|
frame->num_references = 0;
|
|
GofInfoVP9* gof = gof_info_.find(codec_header.tl0_pic_idx)->second.second;
|
|
FrameReceivedVp9(frame->picture_id, *gof);
|
|
CompletedFrameVp9(std::move(frame));
|
|
return;
|
|
}
|
|
|
|
auto gof_info_it = gof_info_.find(
|
|
(codec_header.temporal_idx == 0 && !codec_header.ss_data_available)
|
|
? codec_header.tl0_pic_idx - 1
|
|
: codec_header.tl0_pic_idx);
|
|
|
|
// Gof info for this frame is not available yet, stash this frame.
|
|
if (gof_info_it == gof_info_.end()) {
|
|
stashed_frames_.emplace(std::move(frame));
|
|
return;
|
|
}
|
|
|
|
GofInfoVP9* gof = gof_info_it->second.second;
|
|
uint16_t picture_id_tl0 = gof_info_it->second.first;
|
|
|
|
FrameReceivedVp9(frame->picture_id, *gof);
|
|
|
|
// Make sure we don't miss any frame that could potentially have the
|
|
// up switch flag set.
|
|
if (MissingRequiredFrameVp9(frame->picture_id, *gof)) {
|
|
stashed_frames_.emplace(std::move(frame));
|
|
return;
|
|
}
|
|
|
|
if (codec_header.temporal_up_switch) {
|
|
auto pid_tidx =
|
|
std::make_pair(frame->picture_id, codec_header.temporal_idx);
|
|
up_switch_.insert(pid_tidx);
|
|
}
|
|
|
|
// If this is a base layer frame that contains a scalability structure
|
|
// then gof info has already been inserted earlier, so we only want to
|
|
// insert if we haven't done so already.
|
|
if (codec_header.temporal_idx == 0 && !codec_header.ss_data_available) {
|
|
auto pid_and_gof = std::make_pair(frame->picture_id, gof);
|
|
gof_info_.insert(std::make_pair(codec_header.tl0_pic_idx, pid_and_gof));
|
|
}
|
|
|
|
// Clean out old info about up switch frames.
|
|
uint16_t old_picture_id = Subtract<kPicIdLength>(last_picture_id_, 50);
|
|
auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id);
|
|
up_switch_.erase(up_switch_.begin(), up_switch_erase_to);
|
|
|
|
RTC_DCHECK(
|
|
(AheadOrAt<uint16_t, kPicIdLength>(frame->picture_id, picture_id_tl0)));
|
|
|
|
size_t diff =
|
|
ForwardDiff<uint16_t, kPicIdLength>(gof->pid_start, frame->picture_id);
|
|
size_t gof_idx = diff % gof->num_frames_in_gof;
|
|
|
|
// Populate references according to the scalability structure.
|
|
frame->num_references = gof->num_ref_pics[gof_idx];
|
|
for (size_t i = 0; i < frame->num_references; ++i) {
|
|
frame->references[i] =
|
|
Subtract<kPicIdLength>(frame->picture_id, gof->pid_diff[gof_idx][i]);
|
|
|
|
// If this is a reference to a frame earlier than the last up switch point,
|
|
// then ignore this reference.
|
|
if (UpSwitchInIntervalVp9(frame->picture_id, codec_header.temporal_idx,
|
|
frame->references[i])) {
|
|
--frame->num_references;
|
|
}
|
|
}
|
|
|
|
CompletedFrameVp9(std::move(frame));
|
|
}
|
|
|
|
bool RtpFrameReferenceFinder::MissingRequiredFrameVp9(uint16_t picture_id,
|
|
const GofInfoVP9& gof) {
|
|
size_t diff = ForwardDiff<uint16_t, kPicIdLength>(gof.pid_start, picture_id);
|
|
size_t gof_idx = diff % gof.num_frames_in_gof;
|
|
size_t temporal_idx = gof.temporal_idx[gof_idx];
|
|
|
|
// For every reference this frame has, check if there is a frame missing in
|
|
// the interval (|ref_pid|, |picture_id|) in any of the lower temporal
|
|
// layers. If so, we are missing a required frame.
|
|
uint8_t num_references = gof.num_ref_pics[gof_idx];
|
|
for (size_t i = 0; i < num_references; ++i) {
|
|
uint16_t ref_pid =
|
|
Subtract<kPicIdLength>(picture_id, gof.pid_diff[gof_idx][i]);
|
|
for (size_t l = 0; l < temporal_idx; ++l) {
|
|
auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid);
|
|
if (missing_frame_it != missing_frames_for_layer_[l].end() &&
|
|
AheadOf<uint16_t, kPicIdLength>(picture_id, *missing_frame_it)) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void RtpFrameReferenceFinder::FrameReceivedVp9(uint16_t picture_id,
|
|
const GofInfoVP9& gof) {
|
|
RTC_DCHECK_NE(-1, last_picture_id_);
|
|
|
|
// If there is a gap, find which temporal layer the missing frames
|
|
// belong to and add the frame as missing for that temporal layer.
|
|
// Otherwise, remove this frame from the set of missing frames.
|
|
if (AheadOf<uint16_t, kPicIdLength>(picture_id, last_picture_id_)) {
|
|
size_t diff =
|
|
ForwardDiff<uint16_t, kPicIdLength>(gof.pid_start, last_picture_id_);
|
|
size_t gof_idx = diff % gof.num_frames_in_gof;
|
|
|
|
last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
|
|
while (last_picture_id_ != picture_id) {
|
|
++gof_idx;
|
|
RTC_DCHECK_NE(0ul, gof_idx % gof.num_frames_in_gof);
|
|
size_t temporal_idx = gof.temporal_idx[gof_idx];
|
|
missing_frames_for_layer_[temporal_idx].insert(last_picture_id_);
|
|
last_picture_id_ = Add<kPicIdLength>(last_picture_id_, 1);
|
|
}
|
|
} else {
|
|
size_t diff =
|
|
ForwardDiff<uint16_t, kPicIdLength>(gof.pid_start, picture_id);
|
|
size_t gof_idx = diff % gof.num_frames_in_gof;
|
|
size_t temporal_idx = gof.temporal_idx[gof_idx];
|
|
missing_frames_for_layer_[temporal_idx].erase(picture_id);
|
|
}
|
|
}
|
|
|
|
bool RtpFrameReferenceFinder::UpSwitchInIntervalVp9(uint16_t picture_id,
|
|
uint8_t temporal_idx,
|
|
uint16_t pid_ref) {
|
|
for (auto up_switch_it = up_switch_.upper_bound(pid_ref);
|
|
up_switch_it != up_switch_.end() &&
|
|
AheadOf<uint16_t, kPicIdLength>(picture_id, up_switch_it->first);
|
|
++up_switch_it) {
|
|
if (up_switch_it->second < temporal_idx)
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void RtpFrameReferenceFinder::CompletedFrameVp9(
|
|
std::unique_ptr<RtpFrameObject> frame) {
|
|
for (size_t i = 0; i < frame->num_references; ++i)
|
|
frame->references[i] = UnwrapPictureId(frame->references[i]);
|
|
frame->picture_id = UnwrapPictureId(frame->picture_id);
|
|
|
|
frame_callback_->OnCompleteFrame(std::move(frame));
|
|
RetryStashedFrames();
|
|
}
|
|
|
|
uint16_t RtpFrameReferenceFinder::UnwrapPictureId(uint16_t picture_id) {
|
|
RTC_DCHECK_NE(-1, last_unwrap_);
|
|
|
|
uint16_t unwrap_truncated = last_unwrap_ % kPicIdLength;
|
|
uint16_t diff = MinDiff<uint16_t, kPicIdLength>(unwrap_truncated, picture_id);
|
|
|
|
if (AheadOf<uint16_t, kPicIdLength>(picture_id, unwrap_truncated))
|
|
last_unwrap_ = Add<1 << 16>(last_unwrap_, diff);
|
|
else
|
|
last_unwrap_ = Subtract<1 << 16>(last_unwrap_, diff);
|
|
|
|
return last_unwrap_;
|
|
}
|
|
|
|
} // namespace video_coding
|
|
} // namespace webrtc
|