Communicate encoder resolutions via rtc::VideoSinkWants.

This will allow us to optimize the internal buffers of
webrtc::VideoFrame for the resolution(s) that we actually want to
encode.

Bug: webrtc:12469, chromium:1157072
Change-Id: If378b52b5e35aa9a9800c1f7dfe189437ce43253
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/208540
Reviewed-by: Niels Moller <nisse@webrtc.org>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Henrik Boström <hbos@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#33342}
This commit is contained in:
Henrik Boström
2021-02-25 10:30:39 +01:00
committed by Commit Bot
parent bb52bdf095
commit 1124ed1ab2
6 changed files with 208 additions and 5 deletions

View File

@ -12,6 +12,7 @@
#define API_VIDEO_VIDEO_SOURCE_INTERFACE_H_
#include <limits>
#include <vector>
#include "absl/types/optional.h"
#include "api/video/video_sink_interface.h"
@ -22,6 +23,15 @@ namespace rtc {
// VideoSinkWants is used for notifying the source of properties a video frame
// should have when it is delivered to a certain sink.
struct RTC_EXPORT VideoSinkWants {
struct FrameSize {
FrameSize(int width, int height) : width(width), height(height) {}
FrameSize(const FrameSize&) = default;
~FrameSize() = default;
int width;
int height;
};
VideoSinkWants();
VideoSinkWants(const VideoSinkWants&);
~VideoSinkWants();
@ -49,8 +59,34 @@ struct RTC_EXPORT VideoSinkWants {
// Note that this field is unrelated to any horizontal or vertical stride
// requirements the encoder has on the incoming video frame buffers.
int resolution_alignment = 1;
// The resolutions that sink is configured to consume. If the sink is an
// encoder this is what the encoder is configured to encode. In singlecast we
// only encode one resolution, but in simulcast and SVC this can mean multiple
// resolutions per frame.
//
// The sink is always configured to consume a subset of the
// webrtc::VideoFrame's resolution. In the case of encoding, we usually encode
// at webrtc::VideoFrame's resolution but this may not always be the case due
// to scaleResolutionDownBy or turning off simulcast or SVC layers.
//
// For example, we may capture at 720p and due to adaptation (e.g. applying
// |max_pixel_count| constraints) create webrtc::VideoFrames of size 480p, but
// if we do scaleResolutionDownBy:2 then the only resolution we end up
// encoding is 240p. In this case we still need to provide webrtc::VideoFrames
// of size 480p but we can optimize internal buffers for 240p, avoiding
// downsampling to 480p if possible.
//
// Note that the |resolutions| can change while frames are in flight and
// should only be used as a hint when constructing the webrtc::VideoFrame.
std::vector<FrameSize> resolutions;
};
inline bool operator==(const VideoSinkWants::FrameSize& a,
const VideoSinkWants::FrameSize& b) {
return a.width == b.width && a.height == b.height;
}
template <typename VideoFrameT>
class VideoSourceInterface {
public:

View File

@ -561,6 +561,18 @@ TEST_F(CallPerfTest, ReceivesCpuOveruseAndUnderuse) {
// TODO(sprang): Add integration test for maintain-framerate mode?
void OnSinkWantsChanged(rtc::VideoSinkInterface<VideoFrame>* sink,
const rtc::VideoSinkWants& wants) override {
// The sink wants can change either because an adaptation happened (i.e.
// the pixels or frame rate changed) or for other reasons, such as encoded
// resolutions being communicated (happens whenever we capture a new frame
// size). In this test, we only care about adaptations.
bool did_adapt =
last_wants_.max_pixel_count != wants.max_pixel_count ||
last_wants_.target_pixel_count != wants.target_pixel_count ||
last_wants_.max_framerate_fps != wants.max_framerate_fps;
last_wants_ = wants;
if (!did_adapt) {
return;
}
// At kStart expect CPU overuse. Then expect CPU underuse when the encoder
// delay has been decreased.
switch (test_phase_) {
@ -625,6 +637,9 @@ TEST_F(CallPerfTest, ReceivesCpuOveruseAndUnderuse) {
kAdaptedDown,
kAdaptedUp
} test_phase_;
private:
rtc::VideoSinkWants last_wants_;
} test;
RunBaseTest(&test);

View File

@ -29,7 +29,14 @@ std::string WantsToString(const rtc::VideoSinkWants& wants) {
<< " max_pixel_count=" << wants.max_pixel_count << " target_pixel_count="
<< (wants.target_pixel_count.has_value()
? std::to_string(wants.target_pixel_count.value())
: "null");
: "null")
<< " resolutions={";
for (size_t i = 0; i < wants.resolutions.size(); ++i) {
if (i != 0)
ss << ",";
ss << wants.resolutions[i].width << "x" << wants.resolutions[i].height;
}
ss << "}";
return ss.Release();
}
@ -104,6 +111,12 @@ int VideoSourceSinkController::resolution_alignment() const {
return resolution_alignment_;
}
const std::vector<rtc::VideoSinkWants::FrameSize>&
VideoSourceSinkController::resolutions() const {
RTC_DCHECK_RUN_ON(&sequence_checker_);
return resolutions_;
}
void VideoSourceSinkController::SetRestrictions(
VideoSourceRestrictions restrictions) {
RTC_DCHECK_RUN_ON(&sequence_checker_);
@ -133,6 +146,12 @@ void VideoSourceSinkController::SetResolutionAlignment(
resolution_alignment_ = resolution_alignment;
}
void VideoSourceSinkController::SetResolutions(
std::vector<rtc::VideoSinkWants::FrameSize> resolutions) {
RTC_DCHECK_RUN_ON(&sequence_checker_);
resolutions_ = std::move(resolutions);
}
// RTC_EXCLUSIVE_LOCKS_REQUIRED(sequence_checker_)
rtc::VideoSinkWants VideoSourceSinkController::CurrentSettingsToSinkWants()
const {
@ -161,6 +180,7 @@ rtc::VideoSinkWants VideoSourceSinkController::CurrentSettingsToSinkWants()
frame_rate_upper_limit_.has_value()
? static_cast<int>(frame_rate_upper_limit_.value())
: std::numeric_limits<int>::max());
wants.resolutions = resolutions_;
return wants;
}

View File

@ -12,6 +12,7 @@
#define VIDEO_VIDEO_SOURCE_SINK_CONTROLLER_H_
#include <string>
#include <vector>
#include "absl/types/optional.h"
#include "api/sequence_checker.h"
@ -46,6 +47,7 @@ class VideoSourceSinkController {
absl::optional<double> frame_rate_upper_limit() const;
bool rotation_applied() const;
int resolution_alignment() const;
const std::vector<rtc::VideoSinkWants::FrameSize>& resolutions() const;
// Updates the settings stored internally. In order for these settings to be
// applied to the sink, PushSourceSinkSettings() must subsequently be called.
@ -55,6 +57,7 @@ class VideoSourceSinkController {
void SetFrameRateUpperLimit(absl::optional<double> frame_rate_upper_limit);
void SetRotationApplied(bool rotation_applied);
void SetResolutionAlignment(int resolution_alignment);
void SetResolutions(std::vector<rtc::VideoSinkWants::FrameSize> resolutions);
private:
rtc::VideoSinkWants CurrentSettingsToSinkWants() const
@ -79,6 +82,8 @@ class VideoSourceSinkController {
RTC_GUARDED_BY(&sequence_checker_);
bool rotation_applied_ RTC_GUARDED_BY(&sequence_checker_) = false;
int resolution_alignment_ RTC_GUARDED_BY(&sequence_checker_) = 1;
std::vector<rtc::VideoSinkWants::FrameSize> resolutions_
RTC_GUARDED_BY(&sequence_checker_);
};
} // namespace webrtc

View File

@ -991,14 +991,29 @@ void VideoStreamEncoder::ReconfigureEncoder() {
max_framerate = std::max(stream.max_framerate, max_framerate);
}
main_queue_->PostTask(
ToQueuedTask(task_safety_, [this, max_framerate, alignment]() {
// The resolutions that we're actually encoding with.
std::vector<rtc::VideoSinkWants::FrameSize> encoder_resolutions;
// TODO(hbos): For the case of SVC, also make use of |codec.spatialLayers|.
// For now, SVC layers are handled by the VP9 encoder.
for (const auto& simulcastStream : codec.simulcastStream) {
if (!simulcastStream.active)
continue;
encoder_resolutions.emplace_back(simulcastStream.width,
simulcastStream.height);
}
main_queue_->PostTask(ToQueuedTask(
task_safety_, [this, max_framerate, alignment,
encoder_resolutions = std::move(encoder_resolutions)]() {
RTC_DCHECK_RUN_ON(main_queue_);
if (max_framerate !=
video_source_sink_controller_.frame_rate_upper_limit() ||
alignment != video_source_sink_controller_.resolution_alignment()) {
alignment != video_source_sink_controller_.resolution_alignment() ||
encoder_resolutions !=
video_source_sink_controller_.resolutions()) {
video_source_sink_controller_.SetFrameRateUpperLimit(max_framerate);
video_source_sink_controller_.SetResolutionAlignment(alignment);
video_source_sink_controller_.SetResolutions(
std::move(encoder_resolutions));
video_source_sink_controller_.PushSourceSinkSettings();
}
}));

View File

@ -461,6 +461,10 @@ class AdaptingFrameForwarder : public test::FrameForwarder {
return adaptation_enabled_;
}
// The "last wants" is a snapshot of the previous rtc::VideoSinkWants where
// the resolution or frame rate was different than it is currently. If
// something else is modified, such as encoder resolutions, but the resolution
// and frame rate stays the same, last wants is not updated.
rtc::VideoSinkWants last_wants() const {
MutexLock lock(&mutex_);
return last_wants_;
@ -519,7 +523,14 @@ class AdaptingFrameForwarder : public test::FrameForwarder {
void AddOrUpdateSink(rtc::VideoSinkInterface<VideoFrame>* sink,
const rtc::VideoSinkWants& wants) override {
MutexLock lock(&mutex_);
last_wants_ = sink_wants_locked();
rtc::VideoSinkWants prev_wants = sink_wants_locked();
bool did_adapt =
prev_wants.max_pixel_count != wants.max_pixel_count ||
prev_wants.target_pixel_count != wants.target_pixel_count ||
prev_wants.max_framerate_fps != wants.max_framerate_fps;
if (did_adapt) {
last_wants_ = prev_wants;
}
adapter_.OnSinkWants(wants);
test::FrameForwarder::AddOrUpdateSinkLocked(sink, wants);
}
@ -7611,4 +7622,105 @@ TEST_F(VideoStreamEncoderTest, EncoderResetAccordingToParameterChange) {
video_stream_encoder_->Stop();
}
TEST_F(VideoStreamEncoderTest, EncoderResolutionsExposedInSinglecast) {
const int kFrameWidth = 1280;
const int kFrameHeight = 720;
SetUp();
video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources(
DataRate::BitsPerSec(kTargetBitrateBps),
DataRate::BitsPerSec(kTargetBitrateBps),
DataRate::BitsPerSec(kTargetBitrateBps), 0, 0, 0);
// Capturing a frame should reconfigure the encoder and expose the encoder
// resolution, which is the same as the input frame.
int64_t timestamp_ms = kFrameIntervalMs;
video_source_.IncomingCapturedFrame(
CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight));
WaitForEncodedFrame(timestamp_ms);
video_stream_encoder_->WaitUntilTaskQueueIsIdle();
EXPECT_THAT(video_source_.sink_wants().resolutions,
::testing::ElementsAreArray(
{rtc::VideoSinkWants::FrameSize(kFrameWidth, kFrameHeight)}));
video_stream_encoder_->Stop();
}
TEST_F(VideoStreamEncoderTest, EncoderResolutionsExposedInSimulcast) {
// Pick downscale factors such that we never encode at full resolution - this
// is an interesting use case. The frame resolution influences the encoder
// resolutions, but if no layer has |scale_resolution_down_by| == 1 then the
// encoder should not ask for the frame resolution. This allows video frames
// to have the appearence of one resolution but optimize its internal buffers
// for what is actually encoded.
const size_t kNumSimulcastLayers = 3u;
const float kDownscaleFactors[] = {8.0, 4.0, 2.0};
const int kFrameWidth = 1280;
const int kFrameHeight = 720;
const rtc::VideoSinkWants::FrameSize kLayer0Size(
kFrameWidth / kDownscaleFactors[0], kFrameHeight / kDownscaleFactors[0]);
const rtc::VideoSinkWants::FrameSize kLayer1Size(
kFrameWidth / kDownscaleFactors[1], kFrameHeight / kDownscaleFactors[1]);
const rtc::VideoSinkWants::FrameSize kLayer2Size(
kFrameWidth / kDownscaleFactors[2], kFrameHeight / kDownscaleFactors[2]);
VideoEncoderConfig config;
test::FillEncoderConfiguration(kVideoCodecVP8, kNumSimulcastLayers, &config);
for (size_t i = 0; i < kNumSimulcastLayers; ++i) {
config.simulcast_layers[i].scale_resolution_down_by = kDownscaleFactors[i];
config.simulcast_layers[i].active = true;
}
config.video_stream_factory =
new rtc::RefCountedObject<cricket::EncoderStreamFactory>(
"VP8", /*max qp*/ 56, /*screencast*/ false,
/*screenshare enabled*/ false);
video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources(
DataRate::BitsPerSec(kSimulcastTargetBitrateBps),
DataRate::BitsPerSec(kSimulcastTargetBitrateBps),
DataRate::BitsPerSec(kSimulcastTargetBitrateBps), 0, 0, 0);
// Capture a frame with all layers active.
int64_t timestamp_ms = kFrameIntervalMs;
sink_.SetNumExpectedLayers(kNumSimulcastLayers);
video_stream_encoder_->ConfigureEncoder(config.Copy(), kMaxPayloadLength);
video_source_.IncomingCapturedFrame(
CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight));
WaitForEncodedFrame(timestamp_ms);
// Expect encoded resolutions to match the expected simulcast layers.
video_stream_encoder_->WaitUntilTaskQueueIsIdle();
EXPECT_THAT(
video_source_.sink_wants().resolutions,
::testing::ElementsAreArray({kLayer0Size, kLayer1Size, kLayer2Size}));
// Capture a frame with one of the layers inactive.
timestamp_ms += kFrameIntervalMs;
config.simulcast_layers[2].active = false;
sink_.SetNumExpectedLayers(kNumSimulcastLayers - 1);
video_stream_encoder_->ConfigureEncoder(config.Copy(), kMaxPayloadLength);
video_source_.IncomingCapturedFrame(
CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight));
WaitForEncodedFrame(timestamp_ms);
// Expect encoded resolutions to match the expected simulcast layers.
video_stream_encoder_->WaitUntilTaskQueueIsIdle();
EXPECT_THAT(video_source_.sink_wants().resolutions,
::testing::ElementsAreArray({kLayer0Size, kLayer1Size}));
// Capture a frame with all but one layer turned off.
timestamp_ms += kFrameIntervalMs;
config.simulcast_layers[1].active = false;
sink_.SetNumExpectedLayers(kNumSimulcastLayers - 2);
video_stream_encoder_->ConfigureEncoder(config.Copy(), kMaxPayloadLength);
video_source_.IncomingCapturedFrame(
CreateFrame(timestamp_ms, kFrameWidth, kFrameHeight));
WaitForEncodedFrame(timestamp_ms);
// Expect encoded resolutions to match the expected simulcast layers.
video_stream_encoder_->WaitUntilTaskQueueIsIdle();
EXPECT_THAT(video_source_.sink_wants().resolutions,
::testing::ElementsAreArray({kLayer0Size}));
video_stream_encoder_->Stop();
}
} // namespace webrtc