diff --git a/api/video/video_source_interface.h b/api/video/video_source_interface.h index b03d7c5483..8b5823fc27 100644 --- a/api/video/video_source_interface.h +++ b/api/video/video_source_interface.h @@ -12,6 +12,7 @@ #define API_VIDEO_VIDEO_SOURCE_INTERFACE_H_ #include +#include #include "absl/types/optional.h" #include "api/video/video_sink_interface.h" @@ -22,6 +23,15 @@ namespace rtc { // VideoSinkWants is used for notifying the source of properties a video frame // should have when it is delivered to a certain sink. struct RTC_EXPORT VideoSinkWants { + struct FrameSize { + FrameSize(int width, int height) : width(width), height(height) {} + FrameSize(const FrameSize&) = default; + ~FrameSize() = default; + + int width; + int height; + }; + VideoSinkWants(); VideoSinkWants(const VideoSinkWants&); ~VideoSinkWants(); @@ -49,8 +59,34 @@ struct RTC_EXPORT VideoSinkWants { // Note that this field is unrelated to any horizontal or vertical stride // requirements the encoder has on the incoming video frame buffers. int resolution_alignment = 1; + + // The resolutions that sink is configured to consume. If the sink is an + // encoder this is what the encoder is configured to encode. In singlecast we + // only encode one resolution, but in simulcast and SVC this can mean multiple + // resolutions per frame. + // + // The sink is always configured to consume a subset of the + // webrtc::VideoFrame's resolution. In the case of encoding, we usually encode + // at webrtc::VideoFrame's resolution but this may not always be the case due + // to scaleResolutionDownBy or turning off simulcast or SVC layers. + // + // For example, we may capture at 720p and due to adaptation (e.g. applying + // |max_pixel_count| constraints) create webrtc::VideoFrames of size 480p, but + // if we do scaleResolutionDownBy:2 then the only resolution we end up + // encoding is 240p. In this case we still need to provide webrtc::VideoFrames + // of size 480p but we can optimize internal buffers for 240p, avoiding + // downsampling to 480p if possible. + // + // Note that the |resolutions| can change while frames are in flight and + // should only be used as a hint when constructing the webrtc::VideoFrame. + std::vector resolutions; }; +inline bool operator==(const VideoSinkWants::FrameSize& a, + const VideoSinkWants::FrameSize& b) { + return a.width == b.width && a.height == b.height; +} + template class VideoSourceInterface { public: diff --git a/call/call_perf_tests.cc b/call/call_perf_tests.cc index 6591ab596d..4cb9766c84 100644 --- a/call/call_perf_tests.cc +++ b/call/call_perf_tests.cc @@ -561,6 +561,18 @@ TEST_F(CallPerfTest, ReceivesCpuOveruseAndUnderuse) { // TODO(sprang): Add integration test for maintain-framerate mode? void OnSinkWantsChanged(rtc::VideoSinkInterface* sink, const rtc::VideoSinkWants& wants) override { + // The sink wants can change either because an adaptation happened (i.e. + // the pixels or frame rate changed) or for other reasons, such as encoded + // resolutions being communicated (happens whenever we capture a new frame + // size). In this test, we only care about adaptations. + bool did_adapt = + last_wants_.max_pixel_count != wants.max_pixel_count || + last_wants_.target_pixel_count != wants.target_pixel_count || + last_wants_.max_framerate_fps != wants.max_framerate_fps; + last_wants_ = wants; + if (!did_adapt) { + return; + } // At kStart expect CPU overuse. Then expect CPU underuse when the encoder // delay has been decreased. switch (test_phase_) { @@ -625,6 +637,9 @@ TEST_F(CallPerfTest, ReceivesCpuOveruseAndUnderuse) { kAdaptedDown, kAdaptedUp } test_phase_; + + private: + rtc::VideoSinkWants last_wants_; } test; RunBaseTest(&test); diff --git a/video/video_source_sink_controller.cc b/video/video_source_sink_controller.cc index 376eb85eae..4cd12d8a27 100644 --- a/video/video_source_sink_controller.cc +++ b/video/video_source_sink_controller.cc @@ -29,7 +29,14 @@ std::string WantsToString(const rtc::VideoSinkWants& wants) { << " max_pixel_count=" << wants.max_pixel_count << " target_pixel_count=" << (wants.target_pixel_count.has_value() ? std::to_string(wants.target_pixel_count.value()) - : "null"); + : "null") + << " resolutions={"; + for (size_t i = 0; i < wants.resolutions.size(); ++i) { + if (i != 0) + ss << ","; + ss << wants.resolutions[i].width << "x" << wants.resolutions[i].height; + } + ss << "}"; return ss.Release(); } @@ -104,6 +111,12 @@ int VideoSourceSinkController::resolution_alignment() const { return resolution_alignment_; } +const std::vector& +VideoSourceSinkController::resolutions() const { + RTC_DCHECK_RUN_ON(&sequence_checker_); + return resolutions_; +} + void VideoSourceSinkController::SetRestrictions( VideoSourceRestrictions restrictions) { RTC_DCHECK_RUN_ON(&sequence_checker_); @@ -133,6 +146,12 @@ void VideoSourceSinkController::SetResolutionAlignment( resolution_alignment_ = resolution_alignment; } +void VideoSourceSinkController::SetResolutions( + std::vector resolutions) { + RTC_DCHECK_RUN_ON(&sequence_checker_); + resolutions_ = std::move(resolutions); +} + // RTC_EXCLUSIVE_LOCKS_REQUIRED(sequence_checker_) rtc::VideoSinkWants VideoSourceSinkController::CurrentSettingsToSinkWants() const { @@ -161,6 +180,7 @@ rtc::VideoSinkWants VideoSourceSinkController::CurrentSettingsToSinkWants() frame_rate_upper_limit_.has_value() ? static_cast(frame_rate_upper_limit_.value()) : std::numeric_limits::max()); + wants.resolutions = resolutions_; return wants; } diff --git a/video/video_source_sink_controller.h b/video/video_source_sink_controller.h index 29a9588c4d..c61084f99a 100644 --- a/video/video_source_sink_controller.h +++ b/video/video_source_sink_controller.h @@ -12,6 +12,7 @@ #define VIDEO_VIDEO_SOURCE_SINK_CONTROLLER_H_ #include +#include #include "absl/types/optional.h" #include "api/sequence_checker.h" @@ -46,6 +47,7 @@ class VideoSourceSinkController { absl::optional frame_rate_upper_limit() const; bool rotation_applied() const; int resolution_alignment() const; + const std::vector& resolutions() const; // Updates the settings stored internally. In order for these settings to be // applied to the sink, PushSourceSinkSettings() must subsequently be called. @@ -55,6 +57,7 @@ class VideoSourceSinkController { void SetFrameRateUpperLimit(absl::optional frame_rate_upper_limit); void SetRotationApplied(bool rotation_applied); void SetResolutionAlignment(int resolution_alignment); + void SetResolutions(std::vector resolutions); private: rtc::VideoSinkWants CurrentSettingsToSinkWants() const @@ -79,6 +82,8 @@ class VideoSourceSinkController { RTC_GUARDED_BY(&sequence_checker_); bool rotation_applied_ RTC_GUARDED_BY(&sequence_checker_) = false; int resolution_alignment_ RTC_GUARDED_BY(&sequence_checker_) = 1; + std::vector resolutions_ + RTC_GUARDED_BY(&sequence_checker_); }; } // namespace webrtc diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc index 63770c4975..ae5872526c 100644 --- a/video/video_stream_encoder.cc +++ b/video/video_stream_encoder.cc @@ -991,14 +991,29 @@ void VideoStreamEncoder::ReconfigureEncoder() { max_framerate = std::max(stream.max_framerate, max_framerate); } - main_queue_->PostTask( - ToQueuedTask(task_safety_, [this, max_framerate, alignment]() { + // The resolutions that we're actually encoding with. + std::vector encoder_resolutions; + // TODO(hbos): For the case of SVC, also make use of |codec.spatialLayers|. + // For now, SVC layers are handled by the VP9 encoder. + for (const auto& simulcastStream : codec.simulcastStream) { + if (!simulcastStream.active) + continue; + encoder_resolutions.emplace_back(simulcastStream.width, + simulcastStream.height); + } + main_queue_->PostTask(ToQueuedTask( + task_safety_, [this, max_framerate, alignment, + encoder_resolutions = std::move(encoder_resolutions)]() { RTC_DCHECK_RUN_ON(main_queue_); if (max_framerate != video_source_sink_controller_.frame_rate_upper_limit() || - alignment != video_source_sink_controller_.resolution_alignment()) { + alignment != video_source_sink_controller_.resolution_alignment() || + encoder_resolutions != + video_source_sink_controller_.resolutions()) { video_source_sink_controller_.SetFrameRateUpperLimit(max_framerate); video_source_sink_controller_.SetResolutionAlignment(alignment); + video_source_sink_controller_.SetResolutions( + std::move(encoder_resolutions)); video_source_sink_controller_.PushSourceSinkSettings(); } })); diff --git a/video/video_stream_encoder_unittest.cc b/video/video_stream_encoder_unittest.cc index f7a36216c2..d74ebe84ad 100644 --- a/video/video_stream_encoder_unittest.cc +++ b/video/video_stream_encoder_unittest.cc @@ -461,6 +461,10 @@ class AdaptingFrameForwarder : public test::FrameForwarder { return adaptation_enabled_; } + // The "last wants" is a snapshot of the previous rtc::VideoSinkWants where + // the resolution or frame rate was different than it is currently. If + // something else is modified, such as encoder resolutions, but the resolution + // and frame rate stays the same, last wants is not updated. rtc::VideoSinkWants last_wants() const { MutexLock lock(&mutex_); return last_wants_; @@ -519,7 +523,14 @@ class AdaptingFrameForwarder : public test::FrameForwarder { void AddOrUpdateSink(rtc::VideoSinkInterface* sink, const rtc::VideoSinkWants& wants) override { MutexLock lock(&mutex_); - last_wants_ = sink_wants_locked(); + rtc::VideoSinkWants prev_wants = sink_wants_locked(); + bool did_adapt = + prev_wants.max_pixel_count != wants.max_pixel_count || + prev_wants.target_pixel_count != wants.target_pixel_count || + prev_wants.max_framerate_fps != wants.max_framerate_fps; + if (did_adapt) { + last_wants_ = prev_wants; + } adapter_.OnSinkWants(wants); test::FrameForwarder::AddOrUpdateSinkLocked(sink, wants); } @@ -7611,4 +7622,105 @@ TEST_F(VideoStreamEncoderTest, EncoderResetAccordingToParameterChange) { video_stream_encoder_->Stop(); } +TEST_F(VideoStreamEncoderTest, EncoderResolutionsExposedInSinglecast) { + const int kFrameWidth = 1280; + const int kFrameHeight = 720; + + SetUp(); + video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources( + DataRate::BitsPerSec(kTargetBitrateBps), + DataRate::BitsPerSec(kTargetBitrateBps), + DataRate::BitsPerSec(kTargetBitrateBps), 0, 0, 0); + + // Capturing a frame should reconfigure the encoder and expose the encoder + // resolution, which is the same as the input frame. + int64_t timestamp_ms = kFrameIntervalMs; + video_source_.IncomingCapturedFrame( + CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight)); + WaitForEncodedFrame(timestamp_ms); + video_stream_encoder_->WaitUntilTaskQueueIsIdle(); + EXPECT_THAT(video_source_.sink_wants().resolutions, + ::testing::ElementsAreArray( + {rtc::VideoSinkWants::FrameSize(kFrameWidth, kFrameHeight)})); + + video_stream_encoder_->Stop(); +} + +TEST_F(VideoStreamEncoderTest, EncoderResolutionsExposedInSimulcast) { + // Pick downscale factors such that we never encode at full resolution - this + // is an interesting use case. The frame resolution influences the encoder + // resolutions, but if no layer has |scale_resolution_down_by| == 1 then the + // encoder should not ask for the frame resolution. This allows video frames + // to have the appearence of one resolution but optimize its internal buffers + // for what is actually encoded. + const size_t kNumSimulcastLayers = 3u; + const float kDownscaleFactors[] = {8.0, 4.0, 2.0}; + const int kFrameWidth = 1280; + const int kFrameHeight = 720; + const rtc::VideoSinkWants::FrameSize kLayer0Size( + kFrameWidth / kDownscaleFactors[0], kFrameHeight / kDownscaleFactors[0]); + const rtc::VideoSinkWants::FrameSize kLayer1Size( + kFrameWidth / kDownscaleFactors[1], kFrameHeight / kDownscaleFactors[1]); + const rtc::VideoSinkWants::FrameSize kLayer2Size( + kFrameWidth / kDownscaleFactors[2], kFrameHeight / kDownscaleFactors[2]); + + VideoEncoderConfig config; + test::FillEncoderConfiguration(kVideoCodecVP8, kNumSimulcastLayers, &config); + for (size_t i = 0; i < kNumSimulcastLayers; ++i) { + config.simulcast_layers[i].scale_resolution_down_by = kDownscaleFactors[i]; + config.simulcast_layers[i].active = true; + } + config.video_stream_factory = + new rtc::RefCountedObject( + "VP8", /*max qp*/ 56, /*screencast*/ false, + /*screenshare enabled*/ false); + video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources( + DataRate::BitsPerSec(kSimulcastTargetBitrateBps), + DataRate::BitsPerSec(kSimulcastTargetBitrateBps), + DataRate::BitsPerSec(kSimulcastTargetBitrateBps), 0, 0, 0); + + // Capture a frame with all layers active. + int64_t timestamp_ms = kFrameIntervalMs; + sink_.SetNumExpectedLayers(kNumSimulcastLayers); + video_stream_encoder_->ConfigureEncoder(config.Copy(), kMaxPayloadLength); + video_source_.IncomingCapturedFrame( + CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight)); + WaitForEncodedFrame(timestamp_ms); + // Expect encoded resolutions to match the expected simulcast layers. + video_stream_encoder_->WaitUntilTaskQueueIsIdle(); + EXPECT_THAT( + video_source_.sink_wants().resolutions, + ::testing::ElementsAreArray({kLayer0Size, kLayer1Size, kLayer2Size})); + + // Capture a frame with one of the layers inactive. + timestamp_ms += kFrameIntervalMs; + config.simulcast_layers[2].active = false; + sink_.SetNumExpectedLayers(kNumSimulcastLayers - 1); + video_stream_encoder_->ConfigureEncoder(config.Copy(), kMaxPayloadLength); + video_source_.IncomingCapturedFrame( + CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight)); + WaitForEncodedFrame(timestamp_ms); + + // Expect encoded resolutions to match the expected simulcast layers. + video_stream_encoder_->WaitUntilTaskQueueIsIdle(); + EXPECT_THAT(video_source_.sink_wants().resolutions, + ::testing::ElementsAreArray({kLayer0Size, kLayer1Size})); + + // Capture a frame with all but one layer turned off. + timestamp_ms += kFrameIntervalMs; + config.simulcast_layers[1].active = false; + sink_.SetNumExpectedLayers(kNumSimulcastLayers - 2); + video_stream_encoder_->ConfigureEncoder(config.Copy(), kMaxPayloadLength); + video_source_.IncomingCapturedFrame( + CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight)); + WaitForEncodedFrame(timestamp_ms); + + // Expect encoded resolutions to match the expected simulcast layers. + video_stream_encoder_->WaitUntilTaskQueueIsIdle(); + EXPECT_THAT(video_source_.sink_wants().resolutions, + ::testing::ElementsAreArray({kLayer0Size})); + + video_stream_encoder_->Stop(); +} + } // namespace webrtc