diff --git a/modules/video_coding/codecs/test/stats.cc b/modules/video_coding/codecs/test/stats.cc index d7bc05bc4c..968d84e984 100644 --- a/modules/video_coding/codecs/test/stats.cc +++ b/modules/video_coding/codecs/test/stats.cc @@ -293,8 +293,9 @@ VideoStatistics Stats::SliceAndCalcVideoStatistic( if (frame_stat.decoding_successful) { ++video_stat.num_decoded_frames; - video_stat.width = frame_stat.decoded_width; - video_stat.height = frame_stat.decoded_height; + video_stat.width = std::max(video_stat.width, frame_stat.decoded_width); + video_stat.height = + std::max(video_stat.height, frame_stat.decoded_height); psnr_y.AddSample(frame_stat.psnr_y); psnr_u.AddSample(frame_stat.psnr_u); diff --git a/modules/video_coding/codecs/test/videoprocessor.cc b/modules/video_coding/codecs/test/videoprocessor.cc index d8df9375ff..4758f752a0 100644 --- a/modules/video_coding/codecs/test/videoprocessor.cc +++ b/modules/video_coding/codecs/test/videoprocessor.cc @@ -171,7 +171,6 @@ VideoProcessor::VideoProcessor(webrtc::VideoEncoder* encoder, config_.codec_settings)), framerate_fps_(0), encode_callback_(this), - decode_callback_(this), input_frame_reader_(input_frame_reader), merged_encoded_frames_(num_simulcast_or_spatial_layers_), encoded_frame_writers_(encoded_frame_writers), @@ -205,11 +204,20 @@ VideoProcessor::VideoProcessor(webrtc::VideoEncoder* encoder, static_cast(config_.NumberOfCores()), config_.max_payload_size_bytes), WEBRTC_VIDEO_CODEC_OK); - for (auto& decoder : *decoders_) { - RTC_CHECK_EQ(decoder->InitDecode(&config_.codec_settings, - static_cast(config_.NumberOfCores())), + + for (size_t simulcast_svc_idx = 0; + simulcast_svc_idx < num_simulcast_or_spatial_layers_; + ++simulcast_svc_idx) { + decode_callback_.push_back( + rtc::MakeUnique( + this, simulcast_svc_idx)); + RTC_CHECK_EQ(decoders_->at(simulcast_svc_idx) + ->InitDecode(&config_.codec_settings, + static_cast(config_.NumberOfCores())), WEBRTC_VIDEO_CODEC_OK); - RTC_CHECK_EQ(decoder->RegisterDecodeCompleteCallback(&decode_callback_), + RTC_CHECK_EQ(decoders_->at(simulcast_svc_idx) + ->RegisterDecodeCompleteCallback( + decode_callback_.at(simulcast_svc_idx).get()), WEBRTC_VIDEO_CODEC_OK); } } @@ -322,9 +330,6 @@ void VideoProcessor::FrameEncoded( size_t simulcast_svc_idx = 0; size_t temporal_idx = 0; GetLayerIndices(codec_specific, &simulcast_svc_idx, &temporal_idx); - const size_t frame_wxh = - encoded_image._encodedWidth * encoded_image._encodedHeight; - frame_wxh_to_simulcast_svc_idx_[frame_wxh] = simulcast_svc_idx; FrameStatistics* frame_stat = stats_->GetFrameWithTimestamp( encoded_image._timeStamp, simulcast_svc_idx); @@ -361,23 +366,52 @@ void VideoProcessor::FrameEncoded( frame_stat->frame_type = encoded_image._frameType; frame_stat->temporal_layer_idx = temporal_idx; frame_stat->simulcast_svc_idx = simulcast_svc_idx; - if (codec_type == kVideoCodecVP9) { - const CodecSpecificInfoVP9& vp9_info = codec_specific.codecSpecific.VP9; - frame_stat->inter_layer_predicted = vp9_info.inter_layer_predicted; - } frame_stat->max_nalu_size_bytes = GetMaxNaluSizeBytes(encoded_image, config_); frame_stat->qp = encoded_image.qp_; + const size_t num_spatial_layers = config_.NumberOfSpatialLayers(); + // TODO(ssilkin): Get actual value. For now assume inter-layer prediction + // is enabled for all frames. + const bool inter_layer_prediction = num_spatial_layers > 1; + bool end_of_superframe = false; + if (codec_type == kVideoCodecVP9) { + const CodecSpecificInfoVP9& vp9_info = codec_specific.codecSpecific.VP9; + frame_stat->inter_layer_predicted = vp9_info.inter_layer_predicted; + end_of_superframe = vp9_info.end_of_superframe; + } + const webrtc::EncodedImage* encoded_image_for_decode = &encoded_image; - if (config_.decode) { - if (config_.NumberOfSpatialLayers() > 1) { - encoded_image_for_decode = MergeAndStoreEncodedImageForSvcDecoding( - encoded_image, codec_type, frame_number, simulcast_svc_idx); + if (config_.decode || encoded_frame_writers_) { + if (num_spatial_layers > 1) { + encoded_image_for_decode = BuildAndStoreSuperframe( + encoded_image, codec_type, frame_number, simulcast_svc_idx, + frame_stat->inter_layer_predicted); + } + } + + if (config_.decode) { + DecodeFrame(*encoded_image_for_decode, simulcast_svc_idx); + + if (end_of_superframe && inter_layer_prediction) { + // If inter-layer prediction is enabled and upper layer was dropped then + // base layer should be passed to upper layer decoder. Otherwise decoder + // won't be able to decode next superframe. + const EncodedImage* base_image = nullptr; + for (size_t spatial_idx = 0; spatial_idx < num_spatial_layers; + ++spatial_idx) { + const bool layer_dropped = + last_decoded_frame_num_[spatial_idx] < frame_number; + + // Ensure current layer was decoded. + RTC_CHECK(layer_dropped == false || spatial_idx != simulcast_svc_idx); + + if (!layer_dropped) { + base_image = &merged_encoded_frames_[spatial_idx]; + } else if (base_image) { + DecodeFrame(*base_image, spatial_idx); + } + } } - frame_stat->decode_start_ns = rtc::TimeNanos(); - frame_stat->decode_return_code = - decoders_->at(simulcast_svc_idx) - ->Decode(*encoded_image_for_decode, false, nullptr); } else { frame_stat->decode_return_code = WEBRTC_VIDEO_CODEC_NO_OUTPUT; } @@ -395,16 +429,14 @@ void VideoProcessor::FrameEncoded( } } -void VideoProcessor::FrameDecoded(const VideoFrame& decoded_frame) { +void VideoProcessor::FrameDecoded(const VideoFrame& decoded_frame, + size_t simulcast_svc_idx) { RTC_DCHECK_CALLED_SEQUENTIALLY(&sequence_checker_); // For the highest measurement accuracy of the decode time, the start/stop // time recordings should wrap the Decode call as tightly as possible. const int64_t decode_stop_ns = rtc::TimeNanos(); - // Layer metadata. - const size_t simulcast_svc_idx = - frame_wxh_to_simulcast_svc_idx_.at(decoded_frame.size()); FrameStatistics* frame_stat = stats_->GetFrameWithTimestamp( decoded_frame.timestamp(), simulcast_svc_idx); const size_t frame_number = frame_stat->frame_number; @@ -457,12 +489,23 @@ void VideoProcessor::FrameDecoded(const VideoFrame& decoded_frame) { } } -const webrtc::EncodedImage* -VideoProcessor::MergeAndStoreEncodedImageForSvcDecoding( +void VideoProcessor::DecodeFrame(const EncodedImage& encoded_image, + size_t simulcast_svc_idx) { + RTC_DCHECK_CALLED_SEQUENTIALLY(&sequence_checker_); + FrameStatistics* frame_stat = stats_->GetFrameWithTimestamp( + encoded_image._timeStamp, simulcast_svc_idx); + + frame_stat->decode_start_ns = rtc::TimeNanos(); + frame_stat->decode_return_code = + decoders_->at(simulcast_svc_idx)->Decode(encoded_image, false, nullptr); +} + +const webrtc::EncodedImage* VideoProcessor::BuildAndStoreSuperframe( const EncodedImage& encoded_image, const VideoCodecType codec, size_t frame_number, - size_t simulcast_svc_idx) { + size_t simulcast_svc_idx, + bool inter_layer_predicted) { // Should only be called for SVC. RTC_CHECK_GT(config_.NumberOfSpatialLayers(), 1); @@ -471,7 +514,7 @@ VideoProcessor::MergeAndStoreEncodedImageForSvcDecoding( // Each SVC layer is decoded with dedicated decoder. Find the nearest // non-dropped base frame and merge it and current frame into superframe. - if (simulcast_svc_idx > 0) { + if (inter_layer_predicted) { for (int base_idx = static_cast(simulcast_svc_idx) - 1; base_idx >= 0; --base_idx) { EncodedImage lower_layer = merged_encoded_frames_.at(base_idx); diff --git a/modules/video_coding/codecs/test/videoprocessor.h b/modules/video_coding/codecs/test/videoprocessor.h index 4f1390ad59..a56b83d669 100644 --- a/modules/video_coding/codecs/test/videoprocessor.h +++ b/modules/video_coding/codecs/test/videoprocessor.h @@ -123,8 +123,10 @@ class VideoProcessor { : public webrtc::DecodedImageCallback { public: explicit VideoProcessorDecodeCompleteCallback( - VideoProcessor* video_processor) + VideoProcessor* video_processor, + size_t simulcast_svc_idx) : video_processor_(video_processor), + simulcast_svc_idx_(simulcast_svc_idx), task_queue_(rtc::TaskQueue::Current()) { RTC_DCHECK(video_processor_); RTC_DCHECK(task_queue_); @@ -133,11 +135,12 @@ class VideoProcessor { int32_t Decoded(webrtc::VideoFrame& image) override { // Post the callback to the right task queue, if needed. if (!task_queue_->IsCurrent()) { - task_queue_->PostTask( - [this, image]() { video_processor_->FrameDecoded(image); }); + task_queue_->PostTask([this, image]() { + video_processor_->FrameDecoded(image, simulcast_svc_idx_); + }); return 0; } - video_processor_->FrameDecoded(image); + video_processor_->FrameDecoded(image, simulcast_svc_idx_); return 0; } @@ -154,6 +157,7 @@ class VideoProcessor { private: VideoProcessor* const video_processor_; + const size_t simulcast_svc_idx_; rtc::TaskQueue* const task_queue_; }; @@ -162,15 +166,18 @@ class VideoProcessor { const webrtc::CodecSpecificInfo& codec_specific); // Invoked by the callback adapter when a frame has completed decoding. - void FrameDecoded(const webrtc::VideoFrame& image); + void FrameDecoded(const webrtc::VideoFrame& image, size_t simulcast_svc_idx); + + void DecodeFrame(const EncodedImage& encoded_image, size_t simulcast_svc_idx); // In order to supply the SVC decoders with super frames containing all // lower layer frames, we merge and store the layer frames in this method. - const webrtc::EncodedImage* MergeAndStoreEncodedImageForSvcDecoding( + const webrtc::EncodedImage* BuildAndStoreSuperframe( const EncodedImage& encoded_image, const VideoCodecType codec, size_t frame_number, - size_t simulcast_svc_idx) RTC_RUN_ON(sequence_checker_); + size_t simulcast_svc_idx, + bool inter_layer_predicted) RTC_RUN_ON(sequence_checker_); // Test input/output. TestConfig config_ RTC_GUARDED_BY(sequence_checker_); @@ -186,7 +193,11 @@ class VideoProcessor { // Adapters for the codec callbacks. VideoProcessorEncodeCompleteCallback encode_callback_; - VideoProcessorDecodeCompleteCallback decode_callback_; + // Assign separate callback object to each decoder. This allows us to identify + // decoded layer in frame decode callback. + // simulcast_svc_idx -> decode callback. + std::vector> + decode_callback_; // Each call to ProcessFrame() will read one frame from |input_frame_reader_|. FrameReader* const input_frame_reader_; @@ -226,9 +237,6 @@ class VideoProcessor { std::vector first_decoded_frame_ RTC_GUARDED_BY(sequence_checker_); // simulcast_svc_idx -> frame_number. std::vector last_decoded_frame_num_ RTC_GUARDED_BY(sequence_checker_); - // frame size (pixels) -> simulcast_svc_idx. - std::map frame_wxh_to_simulcast_svc_idx_ - RTC_GUARDED_BY(sequence_checker_); // Time spent in frame encode callback. It is accumulated for layers and // reset when frame encode starts. When next layer is encoded post-encode time