Reland "Add tool for aliging video files"
This is a reland of b2c0e8f60fad10e2786e5e131136a0da1299d883 Original change's description: > Add tool for aliging video files > > This class adds logic for aligning a test video to a reference video > by an algorithm that maximizes SSIM between them. Aligned videos will be > easier to run video quality metrics on. This is a generic way of > aligning videos and can be replace the intrusive barcode stamping that > we currently use. This will be done in a follow-up CL. > > Change-Id: I71cf1e2179c0f1e03eff9e4d8fc492fd5cfbbb1c > Bug: webrtc:9642 > Reviewed-on: https://webrtc-review.googlesource.com/94773 > Commit-Queue: Magnus Jedvert <magjed@webrtc.org> > Reviewed-by: Patrik Höglund <phoglund@webrtc.org> > Reviewed-by: Paulina Hensman <phensman@webrtc.org> > Cr-Commit-Position: refs/heads/master@{#24407} TBR=phensman,phoglund Bug: webrtc:9642 Change-Id: I35d6b0e598335b8d80fbfa37ba06d5c651bda4f6 Reviewed-on: https://webrtc-review.googlesource.com/98040 Commit-Queue: Magnus Jedvert <magjed@webrtc.org> Reviewed-by: Magnus Jedvert <magjed@webrtc.org> Cr-Commit-Position: refs/heads/master@{#24580}
This commit is contained in:

committed by
Commit Bot

parent
11a922086b
commit
62228c41ea
@ -77,12 +77,17 @@ rtc_static_library("video_quality_analysis") {
|
||||
sources = [
|
||||
"frame_analyzer/video_quality_analysis.cc",
|
||||
"frame_analyzer/video_quality_analysis.h",
|
||||
"frame_analyzer/video_temporal_aligner.cc",
|
||||
"frame_analyzer/video_temporal_aligner.h",
|
||||
]
|
||||
deps = [
|
||||
":video_file_reader",
|
||||
"../api/video:video_frame_i420",
|
||||
"../common_video",
|
||||
"../rtc_base:checks",
|
||||
"../rtc_base:rtc_base_approved",
|
||||
"../test:perf_test",
|
||||
"//third_party/abseil-cpp/absl/types:optional",
|
||||
"//third_party/libyuv",
|
||||
]
|
||||
}
|
||||
@ -298,6 +303,7 @@ if (rtc_include_tests) {
|
||||
}
|
||||
|
||||
tools_unittests_resources = [
|
||||
"../resources/foreman_128x96.yuv",
|
||||
"../resources/foreman_cif.yuv",
|
||||
"../resources/reference_less_video_test_file.y4m",
|
||||
]
|
||||
@ -318,6 +324,7 @@ if (rtc_include_tests) {
|
||||
sources = [
|
||||
"frame_analyzer/reference_less_video_analysis_unittest.cc",
|
||||
"frame_analyzer/video_quality_analysis_unittest.cc",
|
||||
"frame_analyzer/video_temporal_aligner_unittest.cc",
|
||||
"frame_editing/frame_editing_unittest.cc",
|
||||
"sanitizers_unittest.cc",
|
||||
"simple_command_line_parser_unittest.cc",
|
||||
|
229
rtc_tools/frame_analyzer/video_temporal_aligner.cc
Normal file
229
rtc_tools/frame_analyzer/video_temporal_aligner.cc
Normal file
@ -0,0 +1,229 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "rtc_tools/frame_analyzer/video_temporal_aligner.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <deque>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "api/video/i420_buffer.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/refcountedobject.h"
|
||||
#include "rtc_tools/frame_analyzer/video_quality_analysis.h"
|
||||
#include "third_party/libyuv/include/libyuv/compare.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace test {
|
||||
|
||||
namespace {
|
||||
|
||||
// This constant controls how many frames we look ahead while seeking for the
|
||||
// match for the next frame. Note that we may span bigger gaps than this number
|
||||
// since we reset the counter as soon as we find a better match. The seeking
|
||||
// will stop when there is no improvement in the next kNumberOfFramesLookAhead
|
||||
// frames. Typically, the SSIM will improve as we get closer and closer to the
|
||||
// real match.
|
||||
const int kNumberOfFramesLookAhead = 60;
|
||||
|
||||
// Helper class that takes a video and generates an infinite looping video.
|
||||
class LoopingVideo : public rtc::RefCountedObject<Video> {
|
||||
public:
|
||||
explicit LoopingVideo(const rtc::scoped_refptr<Video>& video)
|
||||
: video_(video) {}
|
||||
|
||||
int width() const override { return video_->width(); }
|
||||
int height() const override { return video_->height(); }
|
||||
size_t number_of_frames() const override {
|
||||
return std::numeric_limits<size_t>::max();
|
||||
}
|
||||
|
||||
rtc::scoped_refptr<I420BufferInterface> GetFrame(
|
||||
size_t index) const override {
|
||||
return video_->GetFrame(index % video_->number_of_frames());
|
||||
}
|
||||
|
||||
private:
|
||||
const rtc::scoped_refptr<Video> video_;
|
||||
};
|
||||
|
||||
// Helper class that take a vector of frame indices and a video and produces a
|
||||
// new video where the frames have been reshuffled.
|
||||
class ReorderedVideo : public rtc::RefCountedObject<Video> {
|
||||
public:
|
||||
ReorderedVideo(const rtc::scoped_refptr<Video>& video,
|
||||
const std::vector<size_t>& indices)
|
||||
: video_(video), indices_(indices) {}
|
||||
|
||||
int width() const override { return video_->width(); }
|
||||
int height() const override { return video_->height(); }
|
||||
size_t number_of_frames() const override { return indices_.size(); }
|
||||
|
||||
rtc::scoped_refptr<I420BufferInterface> GetFrame(
|
||||
size_t index) const override {
|
||||
return video_->GetFrame(indices_.at(index));
|
||||
}
|
||||
|
||||
private:
|
||||
const rtc::scoped_refptr<Video> video_;
|
||||
const std::vector<size_t> indices_;
|
||||
};
|
||||
|
||||
// Helper class that takes a video and produces a downscaled video.
|
||||
class DownscaledVideo : public rtc::RefCountedObject<Video> {
|
||||
public:
|
||||
DownscaledVideo(float scale_factor, const rtc::scoped_refptr<Video>& video)
|
||||
: downscaled_width_(
|
||||
static_cast<int>(round(scale_factor * video->width()))),
|
||||
downscaled_height_(
|
||||
static_cast<int>(round(scale_factor * video->height()))),
|
||||
video_(video) {}
|
||||
|
||||
int width() const override { return downscaled_width_; }
|
||||
int height() const override { return downscaled_height_; }
|
||||
size_t number_of_frames() const override {
|
||||
return video_->number_of_frames();
|
||||
}
|
||||
|
||||
rtc::scoped_refptr<I420BufferInterface> GetFrame(
|
||||
size_t index) const override {
|
||||
const rtc::scoped_refptr<I420BufferInterface> frame =
|
||||
video_->GetFrame(index);
|
||||
rtc::scoped_refptr<I420Buffer> downscaled_frame =
|
||||
I420Buffer::Create(downscaled_width_, downscaled_height_);
|
||||
downscaled_frame->ScaleFrom(*frame);
|
||||
return downscaled_frame;
|
||||
}
|
||||
|
||||
private:
|
||||
const int downscaled_width_;
|
||||
const int downscaled_height_;
|
||||
const rtc::scoped_refptr<Video> video_;
|
||||
};
|
||||
|
||||
// Helper class that takes a video and caches the latest frame access. This
|
||||
// improves performance a lot since the original source is often from a file.
|
||||
class CachedVideo : public rtc::RefCountedObject<Video> {
|
||||
public:
|
||||
CachedVideo(int max_cache_size, const rtc::scoped_refptr<Video>& video)
|
||||
: max_cache_size_(max_cache_size), video_(video) {}
|
||||
|
||||
int width() const override { return video_->width(); }
|
||||
int height() const override { return video_->height(); }
|
||||
size_t number_of_frames() const override {
|
||||
return video_->number_of_frames();
|
||||
}
|
||||
|
||||
rtc::scoped_refptr<I420BufferInterface> GetFrame(
|
||||
size_t index) const override {
|
||||
for (const CachedFrame& cached_frame : cache_) {
|
||||
if (cached_frame.index == index)
|
||||
return cached_frame.frame;
|
||||
}
|
||||
|
||||
rtc::scoped_refptr<I420BufferInterface> frame = video_->GetFrame(index);
|
||||
cache_.push_front({index, frame});
|
||||
if (cache_.size() > max_cache_size_)
|
||||
cache_.pop_back();
|
||||
|
||||
return frame;
|
||||
}
|
||||
|
||||
private:
|
||||
struct CachedFrame {
|
||||
size_t index;
|
||||
rtc::scoped_refptr<I420BufferInterface> frame;
|
||||
};
|
||||
|
||||
const size_t max_cache_size_;
|
||||
const rtc::scoped_refptr<Video> video_;
|
||||
mutable std::deque<CachedFrame> cache_;
|
||||
};
|
||||
|
||||
// Try matching the test frame against all frames in the reference video and
|
||||
// return the index of the best matching frame.
|
||||
size_t FindBestMatch(const rtc::scoped_refptr<I420BufferInterface>& test_frame,
|
||||
const Video& reference_video) {
|
||||
std::vector<double> ssim;
|
||||
for (const auto& ref_frame : reference_video)
|
||||
ssim.push_back(Ssim(test_frame, ref_frame));
|
||||
return std::distance(ssim.begin(),
|
||||
std::max_element(ssim.begin(), ssim.end()));
|
||||
}
|
||||
|
||||
// Find and return the index of the frame matching the test frame. The search
|
||||
// starts at the starting index and continues until there is no better match
|
||||
// within the next kNumberOfFramesLookAhead frames.
|
||||
size_t FindNextMatch(const rtc::scoped_refptr<I420BufferInterface>& test_frame,
|
||||
const Video& reference_video,
|
||||
size_t start_index) {
|
||||
const double start_ssim =
|
||||
Ssim(test_frame, reference_video.GetFrame(start_index));
|
||||
for (int i = 1; i < kNumberOfFramesLookAhead; ++i) {
|
||||
const size_t next_index = start_index + i;
|
||||
// If we find a better match, restart the search at that point.
|
||||
if (start_ssim < Ssim(test_frame, reference_video.GetFrame(next_index)))
|
||||
return FindNextMatch(test_frame, reference_video, next_index);
|
||||
}
|
||||
// The starting index was the best match.
|
||||
return start_index;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
std::vector<size_t> FindMatchingFrameIndices(
|
||||
const rtc::scoped_refptr<Video>& reference_video,
|
||||
const rtc::scoped_refptr<Video>& test_video) {
|
||||
// This is done to get a 10x speedup. We don't need the full resolution in
|
||||
// order to match frames, and we should limit file access and not read the
|
||||
// same memory tens of times.
|
||||
const float kScaleFactor = 0.25f;
|
||||
const rtc::scoped_refptr<Video> cached_downscaled_reference_video =
|
||||
new CachedVideo(kNumberOfFramesLookAhead,
|
||||
new DownscaledVideo(kScaleFactor, reference_video));
|
||||
const rtc::scoped_refptr<Video> downscaled_test_video =
|
||||
new DownscaledVideo(kScaleFactor, test_video);
|
||||
|
||||
// Assume the video is looping around.
|
||||
const rtc::scoped_refptr<Video> looping_reference_video =
|
||||
new LoopingVideo(cached_downscaled_reference_video);
|
||||
|
||||
std::vector<size_t> match_indices;
|
||||
for (const rtc::scoped_refptr<I420BufferInterface>& test_frame :
|
||||
*downscaled_test_video) {
|
||||
if (match_indices.empty()) {
|
||||
// First frame.
|
||||
match_indices.push_back(
|
||||
FindBestMatch(test_frame, *cached_downscaled_reference_video));
|
||||
} else {
|
||||
match_indices.push_back(FindNextMatch(
|
||||
test_frame, *looping_reference_video, match_indices.back()));
|
||||
}
|
||||
}
|
||||
|
||||
return match_indices;
|
||||
}
|
||||
|
||||
rtc::scoped_refptr<Video> ReorderVideo(const rtc::scoped_refptr<Video>& video,
|
||||
const std::vector<size_t>& indices) {
|
||||
return new ReorderedVideo(video, indices);
|
||||
}
|
||||
|
||||
rtc::scoped_refptr<Video> GenerateAlignedReferenceVideo(
|
||||
const rtc::scoped_refptr<Video>& reference_video,
|
||||
const rtc::scoped_refptr<Video>& test_video) {
|
||||
return ReorderVideo(new LoopingVideo(reference_video),
|
||||
FindMatchingFrameIndices(reference_video, test_video));
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace webrtc
|
53
rtc_tools/frame_analyzer/video_temporal_aligner.h
Normal file
53
rtc_tools/frame_analyzer/video_temporal_aligner.h
Normal file
@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef RTC_TOOLS_FRAME_ANALYZER_VIDEO_TEMPORAL_ALIGNER_H_
|
||||
#define RTC_TOOLS_FRAME_ANALYZER_VIDEO_TEMPORAL_ALIGNER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "rtc_tools/video_file_reader.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace test {
|
||||
|
||||
// Returns a vector with the same size as the given test video. Each index
|
||||
// corresponds to what reference frame that test frame matches to. These
|
||||
// indices are strictly increasing and might loop around the reference video,
|
||||
// e.g. their values can be bigger than the number of frames in the reference
|
||||
// video and they should be interpreted modulo that size. The matching frames
|
||||
// will be determined by maximizing SSIM.
|
||||
std::vector<size_t> FindMatchingFrameIndices(
|
||||
const rtc::scoped_refptr<Video>& reference_video,
|
||||
const rtc::scoped_refptr<Video>& test_video);
|
||||
|
||||
// Generate a new video using the frames from the original video. The returned
|
||||
// video will have the same number of frames as the size of |indices|, and
|
||||
// frame nr i in the returned video will point to frame nr indices[i] in the
|
||||
// original video.
|
||||
rtc::scoped_refptr<Video> ReorderVideo(const rtc::scoped_refptr<Video>& video,
|
||||
const std::vector<size_t>& indices);
|
||||
|
||||
// Returns a modified version of the reference video where the frames have
|
||||
// been aligned to the test video. The test video is assumed to be captured
|
||||
// during a quality measurement test where the reference video is the source.
|
||||
// The test video may start at an arbitrary position in the reference video
|
||||
// and there might be missing frames. The reference video is assumed to loop
|
||||
// over when it reaches the end. The returned result is a version of the
|
||||
// reference video where the missing frames are left out so it aligns to the
|
||||
// test video.
|
||||
rtc::scoped_refptr<Video> GenerateAlignedReferenceVideo(
|
||||
const rtc::scoped_refptr<Video>& reference_video,
|
||||
const rtc::scoped_refptr<Video>& test_video);
|
||||
|
||||
} // namespace test
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // RTC_TOOLS_FRAME_ANALYZER_VIDEO_TEMPORAL_ALIGNER_H_
|
127
rtc_tools/frame_analyzer/video_temporal_aligner_unittest.cc
Normal file
127
rtc_tools/frame_analyzer/video_temporal_aligner_unittest.cc
Normal file
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "rtc_tools/frame_analyzer/video_temporal_aligner.h"
|
||||
|
||||
#include "rtc_tools/frame_analyzer/video_quality_analysis.h"
|
||||
#include "rtc_tools/video_file_reader.h"
|
||||
#include "test/gtest.h"
|
||||
#include "test/testsupport/fileutils.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace test {
|
||||
|
||||
class VideoTemporalAlignerTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() {
|
||||
reference_video =
|
||||
OpenYuvFile(ResourcePath("foreman_128x96", "yuv"), 128, 96);
|
||||
ASSERT_TRUE(reference_video);
|
||||
}
|
||||
|
||||
rtc::scoped_refptr<Video> reference_video;
|
||||
};
|
||||
|
||||
TEST_F(VideoTemporalAlignerTest, FindMatchingFrameIndicesEmpty) {
|
||||
rtc::scoped_refptr<Video> empty_test_video =
|
||||
ReorderVideo(reference_video, std::vector<size_t>());
|
||||
|
||||
const std::vector<size_t> matched_indices =
|
||||
FindMatchingFrameIndices(reference_video, empty_test_video);
|
||||
|
||||
EXPECT_TRUE(matched_indices.empty());
|
||||
}
|
||||
|
||||
TEST_F(VideoTemporalAlignerTest, FindMatchingFrameIndicesIdentity) {
|
||||
const std::vector<size_t> indices =
|
||||
FindMatchingFrameIndices(reference_video, reference_video);
|
||||
|
||||
EXPECT_EQ(indices.size(), reference_video->number_of_frames());
|
||||
for (size_t i = 0; i < indices.size(); ++i)
|
||||
EXPECT_EQ(i, indices[i]);
|
||||
}
|
||||
|
||||
TEST_F(VideoTemporalAlignerTest, FindMatchingFrameIndicesDuplicateFrames) {
|
||||
const std::vector<size_t> indices = {2, 2, 2, 2};
|
||||
|
||||
// Generate a test video based on this sequence.
|
||||
rtc::scoped_refptr<Video> test_video = ReorderVideo(reference_video, indices);
|
||||
|
||||
const std::vector<size_t> matched_indices =
|
||||
FindMatchingFrameIndices(reference_video, test_video);
|
||||
|
||||
EXPECT_EQ(indices, matched_indices);
|
||||
}
|
||||
|
||||
TEST_F(VideoTemporalAlignerTest, FindMatchingFrameIndicesLoopAround) {
|
||||
std::vector<size_t> indices;
|
||||
for (size_t i = 0; i < reference_video->number_of_frames() * 2; ++i)
|
||||
indices.push_back(i % reference_video->number_of_frames());
|
||||
|
||||
// Generate a test video based on this sequence.
|
||||
rtc::scoped_refptr<Video> test_video = ReorderVideo(reference_video, indices);
|
||||
|
||||
const std::vector<size_t> matched_indices =
|
||||
FindMatchingFrameIndices(reference_video, test_video);
|
||||
|
||||
for (size_t i = 0; i < matched_indices.size(); ++i)
|
||||
EXPECT_EQ(i, matched_indices[i]);
|
||||
}
|
||||
|
||||
TEST_F(VideoTemporalAlignerTest, FindMatchingFrameIndicesStressTest) {
|
||||
std::vector<size_t> indices;
|
||||
// Arbitrary start index.
|
||||
const size_t start_index = 12345;
|
||||
// Generate some generic sequence of frames.
|
||||
indices.push_back(start_index % reference_video->number_of_frames());
|
||||
indices.push_back((start_index + 1) % reference_video->number_of_frames());
|
||||
indices.push_back((start_index + 2) % reference_video->number_of_frames());
|
||||
indices.push_back((start_index + 5) % reference_video->number_of_frames());
|
||||
indices.push_back((start_index + 10) % reference_video->number_of_frames());
|
||||
indices.push_back((start_index + 20) % reference_video->number_of_frames());
|
||||
indices.push_back((start_index + 20) % reference_video->number_of_frames());
|
||||
indices.push_back((start_index + 22) % reference_video->number_of_frames());
|
||||
indices.push_back((start_index + 32) % reference_video->number_of_frames());
|
||||
|
||||
// Generate a test video based on this sequence.
|
||||
rtc::scoped_refptr<Video> test_video = ReorderVideo(reference_video, indices);
|
||||
|
||||
const std::vector<size_t> matched_indices =
|
||||
FindMatchingFrameIndices(reference_video, test_video);
|
||||
|
||||
EXPECT_EQ(indices, matched_indices);
|
||||
}
|
||||
|
||||
TEST_F(VideoTemporalAlignerTest, GenerateAlignedReferenceVideo) {
|
||||
// Arbitrary start index.
|
||||
const size_t start_index = 12345;
|
||||
std::vector<size_t> indices;
|
||||
const size_t frame_step = 10;
|
||||
for (size_t i = 0; i < reference_video->number_of_frames() / frame_step;
|
||||
++i) {
|
||||
indices.push_back((start_index + i * frame_step) %
|
||||
reference_video->number_of_frames());
|
||||
}
|
||||
|
||||
// Generate a test video based on this sequence.
|
||||
rtc::scoped_refptr<Video> test_video = ReorderVideo(reference_video, indices);
|
||||
|
||||
rtc::scoped_refptr<Video> aligned_reference_video =
|
||||
GenerateAlignedReferenceVideo(reference_video, test_video);
|
||||
|
||||
// Assume perfect match, i.e. ssim == 1, for all frames.
|
||||
for (size_t i = 0; i < test_video->number_of_frames(); ++i) {
|
||||
EXPECT_EQ(1.0, Ssim(test_video->GetFrame(i),
|
||||
aligned_reference_video->GetFrame(i)));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace webrtc
|
Reference in New Issue
Block a user