From c8474178d66fa774474bae7dc4581dfb3f5b86ac Mon Sep 17 00:00:00 2001
From: kthelgason <kthelgason@webrtc.org>
Date: Thu, 8 Dec 2016 08:04:51 -0800
Subject: [PATCH] Reland of Add ability to scale to arbitrary factors (patchset
 #1 id:1 of https://codereview.webrtc.org/2557323002/ )

Reason for revert:
There was a bug in the implementation where the adapter could get stuck at really low resolutions. That has now been fixed.

Original issue's description:
> Revert of Add ability to scale to arbitrary factors (patchset #7 id:120001 of https://codereview.webrtc.org/2555483005/ )
>
> Reason for revert:
> Issue discovered with scaling back up.
>
> Original issue's description:
> > Add ability to scale to arbitrary factors
> >
> > This CL adds a fallback for the case when no optimized scale factor produces a low enough resolution for what was requested. It also ensures that all resolutions provided by the video adapter are divisible by four. This is required by some hardware implementations.
> >
> > BUG=webrtc:6837
> >
> > Committed: https://crrev.com/710c335d785b104bda4a912bd7909e4d27f9b04f
> > Cr-Commit-Position: refs/heads/master@{#15469}
>
> TBR=magjed@webrtc.org
> # Skipping CQ checks because original CL landed less than 1 days ago.
> NOPRESUBMIT=true
> NOTREECHECKS=true
> NOTRY=true
> BUG=webrtc:6837
>
> Committed: https://crrev.com/7722a4cc8d31e5e924e9e6c5c97412ce8bbbe59d
> Cr-Commit-Position: refs/heads/master@{#15470}

R=magjed@webrtc.org
BUG=webrtc:6837,webrtc:6848

Review-Url: https://codereview.webrtc.org/2558243003
Cr-Commit-Position: refs/heads/master@{#15485}
---
 webrtc/media/base/adaptedvideotracksource.cc  |   5 +
 webrtc/media/base/adaptedvideotracksource.h   |   3 +
 webrtc/media/base/videoadapter.cc             | 150 +++++++-----------
 webrtc/media/base/videoadapter.h              |   5 +-
 webrtc/media/base/videoadapter_unittest.cc    |  60 +++++++
 .../src/jni/androidvideotracksource.cc        |   8 +-
 6 files changed, 136 insertions(+), 95 deletions(-)
diff --git a/webrtc/media/base/adaptedvideotracksource.cc b/webrtc/media/base/adaptedvideotracksource.cc
index acc147cc89..5900439e12 100644
--- a/webrtc/media/base/adaptedvideotracksource.cc
+++ b/webrtc/media/base/adaptedvideotracksource.cc
@@ -16,6 +16,11 @@ AdaptedVideoTrackSource::AdaptedVideoTrackSource() {
   thread_checker_.DetachFromThread();
 }
 
+AdaptedVideoTrackSource::AdaptedVideoTrackSource(int required_alignment)
+    : video_adapter_(required_alignment) {
+  thread_checker_.DetachFromThread();
+}
+
 bool AdaptedVideoTrackSource::GetStats(Stats* stats) {
   rtc::CritScope lock(&stats_crit_);
 
diff --git a/webrtc/media/base/adaptedvideotracksource.h b/webrtc/media/base/adaptedvideotracksource.h
index 5b420aeb9e..9d51c692c2 100644
--- a/webrtc/media/base/adaptedvideotracksource.h
+++ b/webrtc/media/base/adaptedvideotracksource.h
@@ -28,6 +28,9 @@ class AdaptedVideoTrackSource
   AdaptedVideoTrackSource();
 
  protected:
+  // Allows derived classes to initialize |video_adapter_| with a custom
+  // alignment.
+  AdaptedVideoTrackSource(int required_alignment);
   // Checks the apply_rotation() flag. If the frame needs rotation, and it is a
   // plain memory frame, it is rotated. Subclasses producing native frames must
   // handle apply_rotation() themselves.
diff --git a/webrtc/media/base/videoadapter.cc b/webrtc/media/base/videoadapter.cc
index acb0e2c1a1..8f7288dc59 100644
--- a/webrtc/media/base/videoadapter.cc
+++ b/webrtc/media/base/videoadapter.cc
@@ -11,116 +11,73 @@
 #include "webrtc/media/base/videoadapter.h"
 
 #include <algorithm>
+#include <cmath>
 #include <cstdlib>
 #include <limits>
 
+#include "webrtc/base/arraysize.h"
 #include "webrtc/base/checks.h"
 #include "webrtc/base/logging.h"
+#include "webrtc/base/optional.h"
 #include "webrtc/media/base/mediaconstants.h"
 #include "webrtc/media/base/videocommon.h"
 
 namespace {
-
 struct Fraction {
   int numerator;
   int denominator;
 };
 
-// Scale factors optimized for in libYUV that we accept.
-// Must be sorted in decreasing scale factors for FindScaleLargerThan to work.
-const Fraction kScaleFractions[] = {
-  {1, 1},
-  {3, 4},
-  {1, 2},
-  {3, 8},
-  {1, 4},
-  {3, 16},
-};
-
-// Round |valueToRound| to a multiple of |multiple|. Prefer rounding upwards,
-// but never more than |maxValue|.
-int roundUp(int valueToRound, int multiple, int maxValue) {
-  const int roundedValue = (valueToRound + multiple - 1) / multiple * multiple;
-  return roundedValue <= maxValue ? roundedValue
-                                  : (maxValue / multiple * multiple);
+// Round |value_to_round| to a multiple of |multiple|. Prefer rounding upwards,
+// but never more than |max_value|.
+int roundUp(int value_to_round, int multiple, int max_value) {
+  const int rounded_value =
+      (value_to_round + multiple - 1) / multiple * multiple;
+  return rounded_value <= max_value ? rounded_value
+                                    : (max_value / multiple * multiple);
 }
 
-Fraction FindScaleLessThanOrEqual(int input_num_pixels, int target_num_pixels) {
-  float best_distance = std::numeric_limits<float>::max();
-  Fraction best_scale = {0, 1};  // Default to 0 if nothing matches.
-  for (const auto& fraction : kScaleFractions) {
-    const float scale =
-        fraction.numerator / static_cast<float>(fraction.denominator);
-    float test_num_pixels = input_num_pixels * scale * scale;
-    float diff = target_num_pixels - test_num_pixels;
-    if (diff < 0) {
-      continue;
-    }
-    if (diff < best_distance) {
-      best_distance = diff;
-      best_scale = fraction;
-      if (best_distance == 0) {  // Found exact match.
-        break;
-      }
+// Generates a scale factor that makes |input_num_pixels| smaller or
+// larger than |target_num_pixels|, depending on the value of |step_up|.
+Fraction FindScale(int input_num_pixels, int target_num_pixels, bool step_up) {
+  // This function only makes sense for a positive target.
+  RTC_DCHECK_GT(target_num_pixels, 0);
+  Fraction best_scale = Fraction{1, 1};
+  Fraction last_scale = Fraction{1, 1};
+  const float target_scale =
+      sqrt(target_num_pixels / static_cast<float>(input_num_pixels));
+  while (best_scale.numerator > (target_scale * best_scale.denominator)) {
+    last_scale = best_scale;
+    if (best_scale.numerator % 3 == 0 && best_scale.denominator % 2 == 0) {
+      // Multiply by 2/3
+      best_scale.numerator /= 3;
+      best_scale.denominator /= 2;
+    } else {
+      // Multiply by 3/4
+      best_scale.numerator *= 3;
+      best_scale.denominator *= 4;
     }
   }
+  if (step_up)
+    return last_scale;
   return best_scale;
 }
-
-Fraction FindScaleLargerThan(int input_num_pixels,
-                             int target_num_pixels,
-                             int* resulting_number_of_pixels) {
-  float best_distance = std::numeric_limits<float>::max();
-  Fraction best_scale = {1, 1};  // Default to unscaled if nothing matches.
-  // Default to input number of pixels.
-  float best_number_of_pixels = input_num_pixels;
-  for (const auto& fraction : kScaleFractions) {
-    const float scale =
-        fraction.numerator / static_cast<float>(fraction.denominator);
-    float test_num_pixels = input_num_pixels * scale * scale;
-    float diff = test_num_pixels - target_num_pixels;
-    if (diff <= 0) {
-      break;
-    }
-    if (diff < best_distance) {
-      best_distance = diff;
-      best_scale = fraction;
-      best_number_of_pixels = test_num_pixels;
-    }
-  }
-
-  *resulting_number_of_pixels = static_cast<int>(best_number_of_pixels + .5f);
-  return best_scale;
-}
-
-Fraction FindScale(int input_num_pixels,
-                   int max_pixel_count_step_up,
-                   int max_pixel_count) {
-  // Try scale just above |max_pixel_count_step_up_|.
-  if (max_pixel_count_step_up > 0) {
-    int resulting_pixel_count;
-    const Fraction scale = FindScaleLargerThan(
-        input_num_pixels, max_pixel_count_step_up, &resulting_pixel_count);
-    if (resulting_pixel_count <= max_pixel_count)
-      return scale;
-  }
-  // Return largest scale below |max_pixel_count|.
-  return FindScaleLessThanOrEqual(input_num_pixels, max_pixel_count);
-}
-
 }  // namespace
 
 namespace cricket {
 
-VideoAdapter::VideoAdapter()
+VideoAdapter::VideoAdapter(int required_resolution_alignment)
     : frames_in_(0),
       frames_out_(0),
       frames_scaled_(0),
       adaption_changes_(0),
       previous_width_(0),
       previous_height_(0),
+      required_resolution_alignment_(required_resolution_alignment),
       resolution_request_max_pixel_count_(std::numeric_limits<int>::max()),
-      resolution_request_max_pixel_count_step_up_(0) {}
+      step_up_(false) {}
+
+VideoAdapter::VideoAdapter() : VideoAdapter(1) {}
 
 VideoAdapter::~VideoAdapter() {}
 
@@ -167,12 +124,17 @@ bool VideoAdapter::AdaptFrameResolution(int in_width,
   // OnOutputFormatRequest and OnResolutionRequest.
   int max_pixel_count = resolution_request_max_pixel_count_;
   if (requested_format_) {
+    // TODO(kthelgason): remove the - |step_up_| hack when we change how
+    // resolution is requested from VideoSourceProxy.
+    // This is required because we must not scale above the requested
+    // format so we subtract one when scaling up.
     max_pixel_count = std::min(
-        max_pixel_count, requested_format_->width * requested_format_->height);
+        max_pixel_count, requested_format_->width * requested_format_->height -
+                             static_cast<int>(step_up_));
   }
 
   // Drop the input frame if necessary.
-  if (max_pixel_count == 0 || !KeepFrame(in_timestamp_ns)) {
+  if (max_pixel_count <= 0 || !KeepFrame(in_timestamp_ns)) {
     // Show VAdapt log every 90 frames dropped. (3 seconds)
     if ((frames_in_ - frames_out_) % 90 == 0) {
       // TODO(fbarchard): Reduce to LS_VERBOSE when adapter info is not needed
@@ -211,22 +173,25 @@ bool VideoAdapter::AdaptFrameResolution(int in_width,
     *cropped_height =
         std::min(in_height, static_cast<int>(in_width / requested_aspect));
   }
-
-  // Find best scale factor.
   const Fraction scale =
-      FindScale(*cropped_width * *cropped_height,
-                resolution_request_max_pixel_count_step_up_, max_pixel_count);
-
+      FindScale(*cropped_width * *cropped_height, max_pixel_count, step_up_);
   // Adjust cropping slightly to get even integer output size and a perfect
-  // scale factor.
-  *cropped_width = roundUp(*cropped_width, scale.denominator, in_width);
-  *cropped_height = roundUp(*cropped_height, scale.denominator, in_height);
+  // scale factor. Make sure the resulting dimensions are aligned correctly
+  // to be nice to hardware encoders.
+  *cropped_width =
+      roundUp(*cropped_width,
+              scale.denominator * required_resolution_alignment_, in_width);
+  *cropped_height =
+      roundUp(*cropped_height,
+              scale.denominator * required_resolution_alignment_, in_height);
   RTC_DCHECK_EQ(0, *cropped_width % scale.denominator);
   RTC_DCHECK_EQ(0, *cropped_height % scale.denominator);
 
   // Calculate final output size.
   *out_width = *cropped_width / scale.denominator * scale.numerator;
   *out_height = *cropped_height / scale.denominator * scale.numerator;
+  RTC_DCHECK_EQ(0, *out_height % required_resolution_alignment_);
+  RTC_DCHECK_EQ(0, *out_height % required_resolution_alignment_);
 
   ++frames_out_;
   if (scale.numerator != scale.denominator)
@@ -260,10 +225,9 @@ void VideoAdapter::OnResolutionRequest(
     rtc::Optional<int> max_pixel_count,
     rtc::Optional<int> max_pixel_count_step_up) {
   rtc::CritScope cs(&critical_section_);
-  resolution_request_max_pixel_count_ =
-      max_pixel_count.value_or(std::numeric_limits<int>::max());
-  resolution_request_max_pixel_count_step_up_ =
-      max_pixel_count_step_up.value_or(0);
+  resolution_request_max_pixel_count_ = max_pixel_count.value_or(
+      max_pixel_count_step_up.value_or(std::numeric_limits<int>::max()));
+  step_up_ = static_cast<bool>(max_pixel_count_step_up);
 }
 
 }  // namespace cricket
diff --git a/webrtc/media/base/videoadapter.h b/webrtc/media/base/videoadapter.h
index 9d17f5ce15..553c085774 100644
--- a/webrtc/media/base/videoadapter.h
+++ b/webrtc/media/base/videoadapter.h
@@ -25,6 +25,7 @@ namespace cricket {
 class VideoAdapter {
  public:
   VideoAdapter();
+  VideoAdapter(int required_resolution_alignment);
   virtual ~VideoAdapter();
 
   // Return the adapted resolution and cropping parameters given the
@@ -63,6 +64,8 @@ class VideoAdapter {
   int adaption_changes_;  // Number of changes in scale factor.
   int previous_width_;    // Previous adapter output width.
   int previous_height_;   // Previous adapter output height.
+  // Resolution must be divisible by this factor.
+  const int required_resolution_alignment_;
   // The target timestamp for the next frame based on requested format.
   rtc::Optional<int64_t> next_frame_timestamp_ns_ GUARDED_BY(critical_section_);
 
@@ -71,7 +74,7 @@ class VideoAdapter {
   // The adapted output format is the minimum of these.
   rtc::Optional<VideoFormat> requested_format_ GUARDED_BY(critical_section_);
   int resolution_request_max_pixel_count_ GUARDED_BY(critical_section_);
-  int resolution_request_max_pixel_count_step_up_ GUARDED_BY(critical_section_);
+  bool step_up_ GUARDED_BY(critical_section_);
 
   // The critical section to protect the above variables.
   rtc::CriticalSection critical_section_;
diff --git a/webrtc/media/base/videoadapter_unittest.cc b/webrtc/media/base/videoadapter_unittest.cc
index 7f660ef37f..782cd2f74e 100644
--- a/webrtc/media/base/videoadapter_unittest.cc
+++ b/webrtc/media/base/videoadapter_unittest.cc
@@ -951,4 +951,64 @@ TEST_F(VideoAdapterTest, TestCroppingOddResolution) {
   EXPECT_EQ(69, out_height_);
 }
 
+TEST_F(VideoAdapterTest, TestAdaptToVerySmallResolution) {
+  // Ask for 1920x1080 (16:9 aspect), with 1/16 scaling.
+  const int w = 1920;
+  const int h = 1080;
+  adapter_.OnOutputFormatRequest(VideoFormat(w, h, 0, FOURCC_I420));
+  adapter_.OnResolutionRequest(rtc::Optional<int>(w * h * 1 / 16 * 1 / 16),
+                               rtc::Optional<int>());
+
+  // Send 1920x1080 (16:9 aspect).
+  EXPECT_TRUE(adapter_.AdaptFrameResolution(
+      w, h, 0, &cropped_width_, &cropped_height_, &out_width_, &out_height_));
+
+  // Instead of getting the exact aspect ratio with cropped resolution 1920x1080
+  // the resolution should be adjusted to get a perfect scale factor instead.
+  EXPECT_EQ(1920, cropped_width_);
+  EXPECT_EQ(1072, cropped_height_);
+  EXPECT_EQ(120, out_width_);
+  EXPECT_EQ(67, out_height_);
+
+  // Adapt back up one step to 3/32.
+  adapter_.OnResolutionRequest(rtc::Optional<int>(),
+                               rtc::Optional<int>(w * h * 1 / 16 * 1 / 16));
+
+  // Send 1920x1080 (16:9 aspect).
+  EXPECT_TRUE(adapter_.AdaptFrameResolution(
+      w, h, 0, &cropped_width_, &cropped_height_, &out_width_, &out_height_));
+
+  EXPECT_EQ(180, out_width_);
+  EXPECT_EQ(99, out_height_);
+}
+
+TEST_F(VideoAdapterTest, AdaptFrameResolutionDropWithResolutionRequest) {
+  VideoFormat output_format = capture_format_;
+  output_format.width = 0;
+  output_format.height = 0;
+  adapter_.OnOutputFormatRequest(output_format);
+  EXPECT_FALSE(adapter_.AdaptFrameResolution(
+      capture_format_.width, capture_format_.height, 0,
+      &cropped_width_, &cropped_height_,
+      &out_width_, &out_height_));
+
+  adapter_.OnResolutionRequest(rtc::Optional<int>(),
+                               rtc::Optional<int>(640 * 480));
+
+  // Still expect all frames to be dropped
+  EXPECT_FALSE(adapter_.AdaptFrameResolution(
+      capture_format_.width, capture_format_.height, 0,
+      &cropped_width_, &cropped_height_,
+      &out_width_, &out_height_));
+
+  adapter_.OnResolutionRequest(rtc::Optional<int>(640 * 480 - 1),
+                               rtc::Optional<int>());
+
+  // Still expect all frames to be dropped
+  EXPECT_FALSE(adapter_.AdaptFrameResolution(
+      capture_format_.width, capture_format_.height, 0,
+      &cropped_width_, &cropped_height_,
+      &out_width_, &out_height_));
+}
+
 }  // namespace cricket
diff --git a/webrtc/sdk/android/src/jni/androidvideotracksource.cc b/webrtc/sdk/android/src/jni/androidvideotracksource.cc
index 57bbc4a312..1a4a276ae2 100644
--- a/webrtc/sdk/android/src/jni/androidvideotracksource.cc
+++ b/webrtc/sdk/android/src/jni/androidvideotracksource.cc
@@ -12,13 +12,19 @@
 
 #include <utility>
 
+namespace {
+// MediaCodec wants resolution to be divisible by 2.
+const int kRequiredResolutionAlignment = 2;
+}
+
 namespace webrtc {
 
 AndroidVideoTrackSource::AndroidVideoTrackSource(rtc::Thread* signaling_thread,
                                                  JNIEnv* jni,
                                                  jobject j_egl_context,
                                                  bool is_screencast)
-    : signaling_thread_(signaling_thread),
+    : AdaptedVideoTrackSource(kRequiredResolutionAlignment),
+      signaling_thread_(signaling_thread),
       surface_texture_helper_(webrtc_jni::SurfaceTextureHelper::create(
           jni,
           "Camera SurfaceTextureHelper",