From fa0befe13bb030adb61eb55c5557df1bf6e29205 Mon Sep 17 00:00:00 2001
From: jackychen <jackychen@webrtc.org>
Date: Fri, 1 Apr 2016 07:46:58 -0700
Subject: [PATCH] External denoiser based on noise estimation and moving object
 detection.

Improved the existing external denoiser in WebRTC: the filter strength
is adaptive based on the noise level of the whole frame and the moving
object detection result. The adaptive filter effectively removes the
artifacts in previous version, such as trailing and blockiness on moving
objects.
The external denoiser is off by default for now.

BUG=

Review URL: https://codereview.webrtc.org/1822333003

Cr-Commit-Position: refs/heads/master@{#12198}
---
 webrtc/modules/video_processing/BUILD.gn      |   2 +
 .../video_processing/frame_preprocessor.cc    |   4 +-
 .../video_processing/frame_preprocessor.h     |   1 +
 .../video_processing/test/denoiser_test.cc    |  61 ++--
 .../video_processing/util/denoiser_filter.cc  |  13 +-
 .../video_processing/util/denoiser_filter.h   |   8 +-
 .../util/denoiser_filter_c.cc                 |  10 +-
 .../video_processing/util/denoiser_filter_c.h |   3 +-
 .../util/denoiser_filter_neon.cc              |  12 +-
 .../util/denoiser_filter_neon.h               |   3 +-
 .../util/denoiser_filter_sse2.cc              |  11 +-
 .../util/denoiser_filter_sse2.h               |   3 +-
 .../video_processing/util/noise_estimation.cc |  95 ++++++
 .../video_processing/util/noise_estimation.h  |  56 ++++
 .../video_processing/video_denoiser.cc        | 306 ++++++++++++++----
 .../modules/video_processing/video_denoiser.h |  20 +-
 .../video_processing/video_processing.gypi    |   2 +
 17 files changed, 488 insertions(+), 122 deletions(-)
 create mode 100644 webrtc/modules/video_processing/util/noise_estimation.cc
 create mode 100644 webrtc/modules/video_processing/util/noise_estimation.h

diff --git a/webrtc/modules/video_processing/BUILD.gn b/webrtc/modules/video_processing/BUILD.gn
index 6d411edda1..43a8de1255 100644
--- a/webrtc/modules/video_processing/BUILD.gn
+++ b/webrtc/modules/video_processing/BUILD.gn
@@ -29,6 +29,8 @@ source_set("video_processing") {
     "util/denoiser_filter.h",
     "util/denoiser_filter_c.cc",
     "util/denoiser_filter_c.h",
+    "util/noise_estimation.cc",
+    "util/noise_estimation.h",
     "util/skin_detection.cc",
     "util/skin_detection.h",
     "video_decimator.cc",
diff --git a/webrtc/modules/video_processing/frame_preprocessor.cc b/webrtc/modules/video_processing/frame_preprocessor.cc
index d91cacfe39..fd0d0efb97 100644
--- a/webrtc/modules/video_processing/frame_preprocessor.cc
+++ b/webrtc/modules/video_processing/frame_preprocessor.cc
@@ -22,6 +22,7 @@ VPMFramePreprocessor::VPMFramePreprocessor()
   spatial_resampler_ = new VPMSimpleSpatialResampler();
   ca_ = new VPMContentAnalysis(true);
   vd_ = new VPMVideoDecimator();
+  EnableDenosing(false);
 }
 
 VPMFramePreprocessor::~VPMFramePreprocessor() {
@@ -115,7 +116,8 @@ const VideoFrame* VPMFramePreprocessor::PreprocessFrame(
 
   const VideoFrame* current_frame = &frame;
   if (denoiser_) {
-    denoiser_->DenoiseFrame(*current_frame, &denoised_frame_);
+    denoiser_->DenoiseFrame(*current_frame, &denoised_frame_,
+                            &denoised_frame_prev_, 0);
     current_frame = &denoised_frame_;
   }
 
diff --git a/webrtc/modules/video_processing/frame_preprocessor.h b/webrtc/modules/video_processing/frame_preprocessor.h
index 6dd3693dbf..c35dd0d7af 100644
--- a/webrtc/modules/video_processing/frame_preprocessor.h
+++ b/webrtc/modules/video_processing/frame_preprocessor.h
@@ -71,6 +71,7 @@ class VPMFramePreprocessor {
 
   VideoContentMetrics* content_metrics_;
   VideoFrame denoised_frame_;
+  VideoFrame denoised_frame_prev_;
   VideoFrame resampled_frame_;
   VPMSpatialResampler* spatial_resampler_;
   VPMContentAnalysis* ca_;
diff --git a/webrtc/modules/video_processing/test/denoiser_test.cc b/webrtc/modules/video_processing/test/denoiser_test.cc
index c71097ac26..a45f933bb5 100644
--- a/webrtc/modules/video_processing/test/denoiser_test.cc
+++ b/webrtc/modules/video_processing/test/denoiser_test.cc
@@ -21,8 +21,9 @@
 namespace webrtc {
 
 TEST_F(VideoProcessingTest, CopyMem) {
-  std::unique_ptr<DenoiserFilter> df_c(DenoiserFilter::Create(false));
-  std::unique_ptr<DenoiserFilter> df_sse_neon(DenoiserFilter::Create(true));
+  std::unique_ptr<DenoiserFilter> df_c(DenoiserFilter::Create(false, nullptr));
+  std::unique_ptr<DenoiserFilter> df_sse_neon(
+      DenoiserFilter::Create(true, nullptr));
   uint8_t src[16 * 16], dst[16 * 16];
   for (int i = 0; i < 16; ++i) {
     for (int j = 0; j < 16; ++j) {
@@ -48,8 +49,9 @@ TEST_F(VideoProcessingTest, CopyMem) {
 }
 
 TEST_F(VideoProcessingTest, Variance) {
-  std::unique_ptr<DenoiserFilter> df_c(DenoiserFilter::Create(false));
-  std::unique_ptr<DenoiserFilter> df_sse_neon(DenoiserFilter::Create(true));
+  std::unique_ptr<DenoiserFilter> df_c(DenoiserFilter::Create(false, nullptr));
+  std::unique_ptr<DenoiserFilter> df_sse_neon(
+      DenoiserFilter::Create(true, nullptr));
   uint8_t src[16 * 16], dst[16 * 16];
   uint32_t sum = 0, sse = 0, var;
   for (int i = 0; i < 16; ++i) {
@@ -71,51 +73,53 @@ TEST_F(VideoProcessingTest, Variance) {
 }
 
 TEST_F(VideoProcessingTest, MbDenoise) {
-  std::unique_ptr<DenoiserFilter> df_c(DenoiserFilter::Create(false));
-  std::unique_ptr<DenoiserFilter> df_sse_neon(DenoiserFilter::Create(true));
-  uint8_t running_src[16 * 16], src[16 * 16], dst[16 * 16], dst_ref[16 * 16];
+  std::unique_ptr<DenoiserFilter> df_c(DenoiserFilter::Create(false, nullptr));
+  std::unique_ptr<DenoiserFilter> df_sse_neon(
+      DenoiserFilter::Create(true, nullptr));
+  uint8_t running_src[16 * 16], src[16 * 16];
+  uint8_t dst[16 * 16], dst_sse_neon[16 * 16];
 
   // Test case: |diff| <= |3 + shift_inc1|
   for (int i = 0; i < 16; ++i) {
     for (int j = 0; j < 16; ++j) {
       running_src[i * 16 + j] = i * 11 + j;
       src[i * 16 + j] = i * 11 + j + 2;
-      dst_ref[i * 16 + j] = running_src[i * 16 + j];
     }
   }
   memset(dst, 0, 16 * 16);
-  df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1);
-  EXPECT_EQ(0, memcmp(dst, dst_ref, 16 * 16));
+  df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1, false);
+  memset(dst_sse_neon, 0, 16 * 16);
+  df_sse_neon->MbDenoise(running_src, 16, dst_sse_neon, 16, src, 16, 0, 1,
+                         false);
+  EXPECT_EQ(0, memcmp(dst, dst_sse_neon, 16 * 16));
 
   // Test case: |diff| >= |4 + shift_inc1|
   for (int i = 0; i < 16; ++i) {
     for (int j = 0; j < 16; ++j) {
       running_src[i * 16 + j] = i * 11 + j;
       src[i * 16 + j] = i * 11 + j + 5;
-      dst_ref[i * 16 + j] = src[i * 16 + j] - 2;
     }
   }
   memset(dst, 0, 16 * 16);
-  df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1);
-  EXPECT_EQ(0, memcmp(dst, dst_ref, 16 * 16));
-  memset(dst, 0, 16 * 16);
-  df_sse_neon->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1);
-  EXPECT_EQ(0, memcmp(dst, dst_ref, 16 * 16));
+  df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1, false);
+  memset(dst_sse_neon, 0, 16 * 16);
+  df_sse_neon->MbDenoise(running_src, 16, dst_sse_neon, 16, src, 16, 0, 1,
+                         false);
+  EXPECT_EQ(0, memcmp(dst, dst_sse_neon, 16 * 16));
 
   // Test case: |diff| >= 8
   for (int i = 0; i < 16; ++i) {
     for (int j = 0; j < 16; ++j) {
       running_src[i * 16 + j] = i * 11 + j;
       src[i * 16 + j] = i * 11 + j + 8;
-      dst_ref[i * 16 + j] = src[i * 16 + j] - 6;
     }
   }
   memset(dst, 0, 16 * 16);
-  df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1);
-  EXPECT_EQ(0, memcmp(dst, dst_ref, 16 * 16));
-  memset(dst, 0, 16 * 16);
-  df_sse_neon->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1);
-  EXPECT_EQ(0, memcmp(dst, dst_ref, 16 * 16));
+  df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1, false);
+  memset(dst_sse_neon, 0, 16 * 16);
+  df_sse_neon->MbDenoise(running_src, 16, dst_sse_neon, 16, src, 16, 0, 1,
+                         false);
+  EXPECT_EQ(0, memcmp(dst, dst_sse_neon, 16 * 16));
 
   // Test case: |diff| > 15
   for (int i = 0; i < 16; ++i) {
@@ -126,9 +130,10 @@ TEST_F(VideoProcessingTest, MbDenoise) {
   }
   memset(dst, 0, 16 * 16);
   DenoiserDecision decision =
-      df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1);
+      df_c->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1, false);
   EXPECT_EQ(COPY_BLOCK, decision);
-  decision = df_sse_neon->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1);
+  decision =
+      df_sse_neon->MbDenoise(running_src, 16, dst, 16, src, 16, 0, 1, false);
   EXPECT_EQ(COPY_BLOCK, decision);
 }
 
@@ -138,7 +143,9 @@ TEST_F(VideoProcessingTest, Denoiser) {
   // Create SSE or NEON denoiser.
   VideoDenoiser denoiser_sse_neon(true);
   VideoFrame denoised_frame_c;
+  VideoFrame denoised_frame_track_c;
   VideoFrame denoised_frame_sse_neon;
+  VideoFrame denoised_frame_track_sse_neon;
 
   std::unique_ptr<uint8_t[]> video_buffer(new uint8_t[frame_length_]);
   while (fread(video_buffer.get(), 1, frame_length_, source_file_) ==
@@ -147,8 +154,10 @@ TEST_F(VideoProcessingTest, Denoiser) {
     EXPECT_EQ(0, ConvertToI420(kI420, video_buffer.get(), 0, 0, width_, height_,
                                0, kVideoRotation_0, &video_frame_));
 
-    denoiser_c.DenoiseFrame(video_frame_, &denoised_frame_c);
-    denoiser_sse_neon.DenoiseFrame(video_frame_, &denoised_frame_sse_neon);
+    denoiser_c.DenoiseFrame(video_frame_, &denoised_frame_c,
+                            &denoised_frame_track_c, -1);
+    denoiser_sse_neon.DenoiseFrame(video_frame_, &denoised_frame_sse_neon,
+                                   &denoised_frame_track_sse_neon, -1);
 
     // Denoising results should be the same for C and SSE/NEON denoiser.
     ASSERT_TRUE(test::FramesEqual(denoised_frame_c, denoised_frame_sse_neon));
diff --git a/webrtc/modules/video_processing/util/denoiser_filter.cc b/webrtc/modules/video_processing/util/denoiser_filter.cc
index a9c6f005d9..b111a0e412 100644
--- a/webrtc/modules/video_processing/util/denoiser_filter.cc
+++ b/webrtc/modules/video_processing/util/denoiser_filter.cc
@@ -18,13 +18,16 @@
 namespace webrtc {
 
 const int kMotionMagnitudeThreshold = 8 * 3;
-const int kSumDiffThreshold = 16 * 16 * 2;
-const int kSumDiffThresholdHigh = 600;
+const int kSumDiffThreshold = 96;
+const int kSumDiffThresholdHigh = 448;
 
 std::unique_ptr<DenoiserFilter> DenoiserFilter::Create(
-    bool runtime_cpu_detection) {
+    bool runtime_cpu_detection,
+    CpuType* cpu_type) {
   std::unique_ptr<DenoiserFilter> filter;
 
+  if (cpu_type != nullptr)
+    *cpu_type = CPU_NOT_NEON;
   if (runtime_cpu_detection) {
 // If we know the minimum architecture at compile time, avoid CPU detection.
 #if defined(WEBRTC_ARCH_X86_FAMILY)
@@ -40,9 +43,13 @@ std::unique_ptr<DenoiserFilter> DenoiserFilter::Create(
 #endif
 #elif defined(WEBRTC_HAS_NEON)
     filter.reset(new DenoiserFilterNEON());
+    if (cpu_type != nullptr)
+      *cpu_type = CPU_NEON;
 #elif defined(WEBRTC_DETECT_NEON)
     if (WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) {
       filter.reset(new DenoiserFilterNEON());
+      if (cpu_type != nullptr)
+        *cpu_type = CPU_NEON;
     } else {
       filter.reset(new DenoiserFilterC());
     }
diff --git a/webrtc/modules/video_processing/util/denoiser_filter.h b/webrtc/modules/video_processing/util/denoiser_filter.h
index 40745853de..f2c7570083 100644
--- a/webrtc/modules/video_processing/util/denoiser_filter.h
+++ b/webrtc/modules/video_processing/util/denoiser_filter.h
@@ -11,6 +11,7 @@
 #ifndef WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_H_
 #define WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_DENOISER_FILTER_H_
 
+#include <climits>
 #include <memory>
 
 #include "webrtc/modules/include/module_common_types.h"
@@ -23,6 +24,7 @@ extern const int kSumDiffThreshold;
 extern const int kSumDiffThresholdHigh;
 
 enum DenoiserDecision { COPY_BLOCK, FILTER_BLOCK };
+enum CpuType { CPU_NEON, CPU_NOT_NEON };
 struct DenoiseMetrics {
   uint32_t var;
   uint32_t sad;
@@ -32,7 +34,8 @@ struct DenoiseMetrics {
 
 class DenoiserFilter {
  public:
-  static std::unique_ptr<DenoiserFilter> Create(bool runtime_cpu_detection);
+  static std::unique_ptr<DenoiserFilter> Create(bool runtime_cpu_detection,
+                                                CpuType* cpu_type);
 
   virtual ~DenoiserFilter() {}
 
@@ -56,7 +59,8 @@ class DenoiserFilter {
                                      const uint8_t* sig,
                                      int sig_stride,
                                      uint8_t motion_magnitude,
-                                     int increase_denoising) = 0;
+                                     int increase_denoising,
+                                     bool denoise_always) = 0;
 };
 
 }  // namespace webrtc
diff --git a/webrtc/modules/video_processing/util/denoiser_filter_c.cc b/webrtc/modules/video_processing/util/denoiser_filter_c.cc
index 6323980e18..8c84f4989c 100644
--- a/webrtc/modules/video_processing/util/denoiser_filter_c.cc
+++ b/webrtc/modules/video_processing/util/denoiser_filter_c.cc
@@ -66,7 +66,8 @@ DenoiserDecision DenoiserFilterC::MbDenoise(uint8_t* mc_running_avg_y,
                                             const uint8_t* sig,
                                             int sig_stride,
                                             uint8_t motion_magnitude,
-                                            int increase_denoising) {
+                                            int increase_denoising,
+                                            bool denoise_always) {
   int sum_diff_thresh = 0;
   int sum_diff = 0;
   int adj_val[3] = {3, 4, 6};
@@ -136,9 +137,12 @@ DenoiserDecision DenoiserFilterC::MbDenoise(uint8_t* mc_running_avg_y,
     sum_diff += col_sum[c];
   }
 
-  sum_diff_thresh = kSumDiffThreshold;
-  if (increase_denoising)
+  if (denoise_always)
+    sum_diff_thresh = INT_MAX;
+  else if (increase_denoising)
     sum_diff_thresh = kSumDiffThresholdHigh;
+  else
+    sum_diff_thresh = kSumDiffThreshold;
   if (abs(sum_diff) > sum_diff_thresh) {
     int delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1;
     // Only apply the adjustment for max delta up to 3.
diff --git a/webrtc/modules/video_processing/util/denoiser_filter_c.h b/webrtc/modules/video_processing/util/denoiser_filter_c.h
index fe46ac38ec..3e52c3e47c 100644
--- a/webrtc/modules/video_processing/util/denoiser_filter_c.h
+++ b/webrtc/modules/video_processing/util/denoiser_filter_c.h
@@ -38,7 +38,8 @@ class DenoiserFilterC : public DenoiserFilter {
                              const uint8_t* sig,
                              int sig_stride,
                              uint8_t motion_magnitude,
-                             int increase_denoising) override;
+                             int increase_denoising,
+                             bool denoise_always) override;
 };
 
 }  // namespace webrtc
diff --git a/webrtc/modules/video_processing/util/denoiser_filter_neon.cc b/webrtc/modules/video_processing/util/denoiser_filter_neon.cc
index b522bf002b..2920305f71 100644
--- a/webrtc/modules/video_processing/util/denoiser_filter_neon.cc
+++ b/webrtc/modules/video_processing/util/denoiser_filter_neon.cc
@@ -106,13 +106,15 @@ DenoiserDecision DenoiserFilterNEON::MbDenoise(uint8_t* mc_running_avg_y,
                                                const uint8_t* sig,
                                                int sig_stride,
                                                uint8_t motion_magnitude,
-                                               int increase_denoising) {
+                                               int increase_denoising,
+                                               bool denoise_always) {
   // If motion_magnitude is small, making the denoiser more aggressive by
   // increasing the adjustment for each level, level1 adjustment is
   // increased, the deltas stay the same.
   int shift_inc =
       (increase_denoising && motion_magnitude <= kMotionMagnitudeThreshold) ? 1
                                                                             : 0;
+  int sum_diff_thresh = 0;
   const uint8x16_t v_level1_adjustment = vmovq_n_u8(
       (motion_magnitude <= kMotionMagnitudeThreshold) ? 4 + shift_inc : 3);
   const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1);
@@ -192,10 +194,12 @@ DenoiserDecision DenoiserFilterNEON::MbDenoise(uint8_t* mc_running_avg_y,
     int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total),
                             vget_low_s64(v_sum_diff_total));
     int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0);
-    int sum_diff_thresh = kSumDiffThreshold;
-
-    if (increase_denoising)
+    if (denoise_always)
+      sum_diff_thresh = INT_MAX;
+    else if (increase_denoising)
       sum_diff_thresh = kSumDiffThresholdHigh;
+    else
+      sum_diff_thresh = kSumDiffThreshold;
     if (sum_diff > sum_diff_thresh) {
       // Before returning to copy the block (i.e., apply no denoising),
       // checK if we can still apply some (weaker) temporal filtering to
diff --git a/webrtc/modules/video_processing/util/denoiser_filter_neon.h b/webrtc/modules/video_processing/util/denoiser_filter_neon.h
index bc87ba788e..2e3ea26829 100644
--- a/webrtc/modules/video_processing/util/denoiser_filter_neon.h
+++ b/webrtc/modules/video_processing/util/denoiser_filter_neon.h
@@ -38,7 +38,8 @@ class DenoiserFilterNEON : public DenoiserFilter {
                              const uint8_t* sig,
                              int sig_stride,
                              uint8_t motion_magnitude,
-                             int increase_denoising) override;
+                             int increase_denoising,
+                             bool denoise_always) override;
 };
 
 }  // namespace webrtc
diff --git a/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc b/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc
index 903d7b1ec6..614b6c9485 100644
--- a/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc
+++ b/webrtc/modules/video_processing/util/denoiser_filter_sse2.cc
@@ -139,7 +139,9 @@ DenoiserDecision DenoiserFilterSSE2::MbDenoise(uint8_t* mc_running_avg_y,
                                                const uint8_t* sig,
                                                int sig_stride,
                                                uint8_t motion_magnitude,
-                                               int increase_denoising) {
+                                               int increase_denoising,
+                                               bool denoise_always) {
+  unsigned int sum_diff_thresh = 0;
   int shift_inc =
       (increase_denoising && motion_magnitude <= kMotionMagnitudeThreshold) ? 1
                                                                             : 0;
@@ -211,9 +213,12 @@ DenoiserDecision DenoiserFilterSSE2::MbDenoise(uint8_t* mc_running_avg_y,
   {
     // Compute the sum of all pixel differences of this MB.
     unsigned int abs_sum_diff = AbsSumDiff16x1(acc_diff);
-    unsigned int sum_diff_thresh = kSumDiffThreshold;
-    if (increase_denoising)
+    if (denoise_always)
+      sum_diff_thresh = INT_MAX;
+    else if (increase_denoising)
       sum_diff_thresh = kSumDiffThresholdHigh;
+    else
+      sum_diff_thresh = kSumDiffThreshold;
     if (abs_sum_diff > sum_diff_thresh) {
       // Before returning to copy the block (i.e., apply no denoising),
       // check if we can still apply some (weaker) temporal filtering to
diff --git a/webrtc/modules/video_processing/util/denoiser_filter_sse2.h b/webrtc/modules/video_processing/util/denoiser_filter_sse2.h
index 31d8510902..395fa10eca 100644
--- a/webrtc/modules/video_processing/util/denoiser_filter_sse2.h
+++ b/webrtc/modules/video_processing/util/denoiser_filter_sse2.h
@@ -38,7 +38,8 @@ class DenoiserFilterSSE2 : public DenoiserFilter {
                              const uint8_t* sig,
                              int sig_stride,
                              uint8_t motion_magnitude,
-                             int increase_denoising) override;
+                             int increase_denoising,
+                             bool denoise_always) override;
 };
 
 }  // namespace webrtc
diff --git a/webrtc/modules/video_processing/util/noise_estimation.cc b/webrtc/modules/video_processing/util/noise_estimation.cc
new file mode 100644
index 0000000000..87beac38ae
--- /dev/null
+++ b/webrtc/modules/video_processing/util/noise_estimation.cc
@@ -0,0 +1,95 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/video_processing/util/noise_estimation.h"
+
+namespace webrtc {
+
+void NoiseEstimation::Init(int width, int height, CpuType cpu_type) {
+  int mb_cols = width >> 4;
+  int mb_rows = height >> 4;
+  consec_low_var_.reset(new uint32_t[mb_cols * mb_rows]());
+  width_ = width;
+  height_ = height;
+  mb_cols_ = width_ >> 4;
+  mb_rows_ = height_ >> 4;
+  cpu_type_ = cpu_type;
+}
+
+void NoiseEstimation::GetNoise(int mb_index, uint32_t var, uint32_t luma) {
+  consec_low_var_[mb_index]++;
+  num_static_block_++;
+  if (consec_low_var_[mb_index] >= kConsecLowVarFrame &&
+      (luma >> 8) < kAverageLumaMax && (luma >> 8) > kAverageLumaMin) {
+    // Normalized var by the average luma value, this gives more weight to
+    // darker blocks.
+    int nor_var = var / (luma >> 12);
+    noise_var_ +=
+        nor_var > kBlockSelectionVarMax ? kBlockSelectionVarMax : nor_var;
+    num_noisy_block_++;
+  }
+}
+
+void NoiseEstimation::ResetConsecLowVar(int mb_index) {
+  consec_low_var_[mb_index] = 0;
+}
+
+void NoiseEstimation::UpdateNoiseLevel() {
+  // TODO(jackychen): Tune a threshold for numb_noisy_block > T to make the
+  // condition more reasonable.
+  // No enough samples implies the motion of the camera or too many moving
+  // objects in the frame.
+  if (num_static_block_ < (0.65 * mb_cols_ * mb_rows_) || !num_noisy_block_) {
+    noise_var_ = 0;
+    noise_var_accum_ = 0;
+    num_static_block_ = 0;
+    num_noisy_block_ = 0;
+#if DISPLAY
+    printf("Not enough samples.\n");
+#endif
+    return;
+  } else {
+    // Normalized by the number of noisy blocks.
+    noise_var_ /= num_noisy_block_;
+    // Get the percentage of static blocks.
+    percent_static_block_ =
+        static_cast<double>(num_static_block_) / (mb_cols_ * mb_rows_);
+#if DISPLAY
+    printf("%d %d fraction = %.3f\n", num_static_block_, mb_cols_ * mb_rows_,
+           percent_static_block_);
+#endif
+    num_noisy_block_ = 0;
+    num_static_block_ = 0;
+  }
+  // For the first frame just update the value with current noise_var_,
+  // otherwise, use the averaging window.
+  if (noise_var_accum_ == 0) {
+    noise_var_accum_ = noise_var_;
+  } else {
+    noise_var_accum_ = (noise_var_accum_ * 15 + noise_var_) / 16;
+  }
+  // Reset noise_var_ for the next frame.
+  noise_var_ = 0;
+#if DISPLAY
+  printf("noise_var_accum_ = %.1f, noise_var_ = %d.\n", noise_var_accum_,
+         noise_var_);
+#endif
+}
+
+uint8_t NoiseEstimation::GetNoiseLevel() {
+  int noise_thr = cpu_type_ ? kNoiseThreshold : kNoiseThresholdNeon;
+  UpdateNoiseLevel();
+  if (noise_var_accum_ > noise_thr) {
+    return 1;
+  }
+  return 0;
+}
+
+}  // namespace webrtc
diff --git a/webrtc/modules/video_processing/util/noise_estimation.h b/webrtc/modules/video_processing/util/noise_estimation.h
new file mode 100644
index 0000000000..ca5cc2324f
--- /dev/null
+++ b/webrtc/modules/video_processing/util/noise_estimation.h
@@ -0,0 +1,56 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_NOISE_ESTIMATION_H_
+#define WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_NOISE_ESTIMATION_H_
+
+#include "webrtc/base/scoped_ptr.h"
+#include "webrtc/modules/include/module_common_types.h"
+#include "webrtc/modules/video_processing/include/video_processing_defines.h"
+#include "webrtc/modules/video_processing/util/denoiser_filter.h"
+
+namespace webrtc {
+
+#define EXPERIMENTAL 0
+#define DISPLAY 0
+
+const int kNoiseThreshold = 200;
+const int kNoiseThresholdNeon = 70;
+const int kConsecLowVarFrame = 6;
+const int kAverageLumaMin = 20;
+const int kAverageLumaMax = 220;
+const int kBlockSelectionVarMax = kNoiseThreshold << 1;
+
+class NoiseEstimation {
+ public:
+  void Init(int width, int height, CpuType cpu_type);
+  void GetNoise(int mb_index, uint32_t var, uint32_t luma);
+  void ResetConsecLowVar(int mb_index);
+  void UpdateNoiseLevel();
+  // 0: low noise, 1: high noise
+  uint8_t GetNoiseLevel();
+
+ private:
+  int width_;
+  int height_;
+  int mb_rows_;
+  int mb_cols_;
+  CpuType cpu_type_;
+  uint32_t noise_var_;
+  double noise_var_accum_;
+  int num_noisy_block_;
+  int num_static_block_;
+  double percent_static_block_;
+  rtc::scoped_ptr<uint32_t[]> consec_low_var_;
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_VIDEO_PROCESSING_UTIL_NOISE_ESTIMATION_H_
diff --git a/webrtc/modules/video_processing/video_denoiser.cc b/webrtc/modules/video_processing/video_denoiser.cc
index 3951381d22..b00da5c90a 100644
--- a/webrtc/modules/video_processing/video_denoiser.cc
+++ b/webrtc/modules/video_processing/video_denoiser.cc
@@ -16,50 +16,144 @@ namespace webrtc {
 VideoDenoiser::VideoDenoiser(bool runtime_cpu_detection)
     : width_(0),
       height_(0),
-      filter_(DenoiserFilter::Create(runtime_cpu_detection)) {}
+      filter_(DenoiserFilter::Create(runtime_cpu_detection, &cpu_type_)),
+      ne_(new NoiseEstimation()) {}
 
-void VideoDenoiser::TrailingReduction(int mb_rows,
-                                      int mb_cols,
-                                      const uint8_t* y_src,
-                                      int stride_y,
-                                      uint8_t* y_dst) {
-  for (int mb_row = 1; mb_row < mb_rows - 1; ++mb_row) {
-    for (int mb_col = 1; mb_col < mb_cols - 1; ++mb_col) {
-      int mb_index = mb_row * mb_cols + mb_col;
-      uint8_t* mb_dst = y_dst + (mb_row << 4) * stride_y + (mb_col << 4);
-      const uint8_t* mb_src = y_src + (mb_row << 4) * stride_y + (mb_col << 4);
-      // If the number of denoised neighbors is less than a threshold,
-      // do NOT denoise for the block. Set different threshold for skin MB.
-      // The change of denoising status will not propagate.
-      if (metrics_[mb_index].is_skin) {
-        // The threshold is high (more strict) for non-skin MB where the
-        // trailing usually happen.
-        if (metrics_[mb_index].denoise &&
-            metrics_[mb_index + 1].denoise + metrics_[mb_index - 1].denoise +
-                    metrics_[mb_index + mb_cols].denoise +
-                    metrics_[mb_index - mb_cols].denoise <=
-                2) {
-          metrics_[mb_index].denoise = 0;
-          filter_->CopyMem16x16(mb_src, stride_y, mb_dst, stride_y);
-        }
-      } else if (metrics_[mb_index].denoise &&
-                 metrics_[mb_index + 1].denoise +
-                         metrics_[mb_index - 1].denoise +
-                         metrics_[mb_index + mb_cols + 1].denoise +
-                         metrics_[mb_index + mb_cols - 1].denoise +
-                         metrics_[mb_index - mb_cols + 1].denoise +
-                         metrics_[mb_index - mb_cols - 1].denoise +
-                         metrics_[mb_index + mb_cols].denoise +
-                         metrics_[mb_index - mb_cols].denoise <=
-                     7) {
-        filter_->CopyMem16x16(mb_src, stride_y, mb_dst, stride_y);
-      }
+#if EXPERIMENTAL
+// Check the mb position(1: close to the center, 3: close to the border).
+static int PositionCheck(int mb_row, int mb_col, int mb_rows, int mb_cols) {
+  if ((mb_row >= (mb_rows >> 3)) && (mb_row <= (7 * mb_rows >> 3)) &&
+      (mb_col >= (mb_cols >> 3)) && (mb_col <= (7 * mb_cols >> 3)))
+    return 1;
+  else if ((mb_row >= (mb_rows >> 4)) && (mb_row <= (15 * mb_rows >> 4)) &&
+           (mb_col >= (mb_cols >> 4)) && (mb_col <= (15 * mb_cols >> 4)))
+    return 2;
+  else
+    return 3;
+}
+
+static void ReduceFalseDetection(const std::unique_ptr<uint8_t[]>& d_status,
+                                 std::unique_ptr<uint8_t[]>* d_status_tmp1,
+                                 std::unique_ptr<uint8_t[]>* d_status_tmp2,
+                                 int noise_level,
+                                 int mb_rows,
+                                 int mb_cols) {
+  // Draft. This can be optimized. This code block is to reduce false detection
+  // in moving object detection.
+  int mb_row_min = noise_level ? mb_rows >> 3 : 1;
+  int mb_col_min = noise_level ? mb_cols >> 3 : 1;
+  int mb_row_max = noise_level ? (7 * mb_rows >> 3) : mb_rows - 2;
+  int mb_col_max = noise_level ? (7 * mb_cols >> 3) : mb_cols - 2;
+  memcpy((*d_status_tmp1).get(), d_status.get(), mb_rows * mb_cols);
+  // Up left.
+  for (int mb_row = mb_row_min; mb_row <= mb_row_max; ++mb_row) {
+    for (int mb_col = mb_col_min; mb_col <= mb_col_max; ++mb_col) {
+      (*d_status_tmp1)[mb_row * mb_cols + mb_col] |=
+          ((*d_status_tmp1)[(mb_row - 1) * mb_cols + mb_col] |
+           (*d_status_tmp1)[mb_row * mb_cols + mb_col - 1]);
+    }
+  }
+  memcpy((*d_status_tmp2).get(), (*d_status_tmp1).get(), mb_rows * mb_cols);
+  memcpy((*d_status_tmp1).get(), d_status.get(), mb_rows * mb_cols);
+  // Bottom left.
+  for (int mb_row = mb_row_max; mb_row >= mb_row_min; --mb_row) {
+    for (int mb_col = mb_col_min; mb_col <= mb_col_max; ++mb_col) {
+      (*d_status_tmp1)[mb_row * mb_cols + mb_col] |=
+          ((*d_status_tmp1)[(mb_row + 1) * mb_cols + mb_col] |
+           (*d_status_tmp1)[mb_row * mb_cols + mb_col - 1]);
+      (*d_status_tmp2)[mb_row * mb_cols + mb_col] &=
+          (*d_status_tmp1)[mb_row * mb_cols + mb_col];
+    }
+  }
+  memcpy((*d_status_tmp1).get(), d_status.get(), mb_rows * mb_cols);
+  // Up right.
+  for (int mb_row = mb_row_min; mb_row <= mb_row_max; ++mb_row) {
+    for (int mb_col = mb_col_max; mb_col >= mb_col_min; --mb_col) {
+      (*d_status_tmp1)[mb_row * mb_cols + mb_col] |=
+          ((*d_status_tmp1)[(mb_row - 1) * mb_cols + mb_col] |
+           (*d_status_tmp1)[mb_row * mb_cols + mb_col + 1]);
+      (*d_status_tmp2)[mb_row * mb_cols + mb_col] &=
+          (*d_status_tmp1)[mb_row * mb_cols + mb_col];
+    }
+  }
+  memcpy((*d_status_tmp1).get(), d_status.get(), mb_rows * mb_cols);
+  // Bottom right.
+  for (int mb_row = mb_row_max; mb_row >= mb_row_min; --mb_row) {
+    for (int mb_col = mb_col_max; mb_col >= mb_col_min; --mb_col) {
+      (*d_status_tmp1)[mb_row * mb_cols + mb_col] |=
+          ((*d_status_tmp1)[(mb_row + 1) * mb_cols + mb_col] |
+           (*d_status_tmp1)[mb_row * mb_cols + mb_col + 1]);
+      (*d_status_tmp2)[mb_row * mb_cols + mb_col] &=
+          (*d_status_tmp1)[mb_row * mb_cols + mb_col];
     }
   }
 }
 
+static bool TrailingBlock(const std::unique_ptr<uint8_t[]>& d_status,
+                          int mb_row,
+                          int mb_col,
+                          int mb_rows,
+                          int mb_cols) {
+  int mb_index = mb_row * mb_cols + mb_col;
+  if (!mb_row || !mb_col || mb_row == mb_rows - 1 || mb_col == mb_cols - 1)
+    return false;
+  return d_status[mb_index + 1] || d_status[mb_index - 1] ||
+         d_status[mb_index + mb_cols] || d_status[mb_index - mb_cols];
+}
+#endif
+
+#if DISPLAY
+void ShowRect(const std::unique_ptr<DenoiserFilter>& filter,
+              const std::unique_ptr<uint8_t[]>& d_status,
+              const std::unique_ptr<uint8_t[]>& d_status_tmp2,
+              const std::unique_ptr<uint8_t[]>& x_density,
+              const std::unique_ptr<uint8_t[]>& y_density,
+              const uint8_t* u_src,
+              const uint8_t* v_src,
+              uint8_t* u_dst,
+              uint8_t* v_dst,
+              int mb_rows,
+              int mb_cols,
+              int stride_u,
+              int stride_v) {
+  for (int mb_row = 0; mb_row < mb_rows; ++mb_row) {
+    for (int mb_col = 0; mb_col < mb_cols; ++mb_col) {
+      int mb_index = mb_row * mb_cols + mb_col;
+      const uint8_t* mb_src_u =
+          u_src + (mb_row << 3) * stride_u + (mb_col << 3);
+      const uint8_t* mb_src_v =
+          v_src + (mb_row << 3) * stride_v + (mb_col << 3);
+      uint8_t* mb_dst_u = u_dst + (mb_row << 3) * stride_u + (mb_col << 3);
+      uint8_t* mb_dst_v = v_dst + (mb_row << 3) * stride_v + (mb_col << 3);
+      uint8_t y_tmp_255[8 * 8];
+      memset(y_tmp_255, 200, 8 * 8);
+      // x_density_[mb_col] * y_density_[mb_row]
+      if (d_status[mb_index] == 1) {
+        // Paint to red.
+        filter->CopyMem8x8(mb_src_u, stride_u, mb_dst_u, stride_u);
+        filter->CopyMem8x8(y_tmp_255, 8, mb_dst_v, stride_v);
+#if EXPERIMENTAL
+      } else if (d_status_tmp2[mb_row * mb_cols + mb_col] &&
+                 x_density[mb_col] * y_density[mb_row]) {
+#else
+      } else if (x_density[mb_col] * y_density[mb_row]) {
+#endif
+        // Paint to blue.
+        filter->CopyMem8x8(y_tmp_255, 8, mb_dst_u, stride_u);
+        filter->CopyMem8x8(mb_src_v, stride_v, mb_dst_v, stride_v);
+      } else {
+        filter->CopyMem8x8(mb_src_u, stride_u, mb_dst_u, stride_u);
+        filter->CopyMem8x8(mb_src_v, stride_v, mb_dst_v, stride_v);
+      }
+    }
+  }
+}
+#endif
+
 void VideoDenoiser::DenoiseFrame(const VideoFrame& frame,
-                                 VideoFrame* denoised_frame) {
+                                 VideoFrame* denoised_frame,
+                                 VideoFrame* denoised_frame_prev,
+                                 int noise_level_prev) {
   int stride_y = frame.stride(kYPlane);
   int stride_u = frame.stride(kUPlane);
   int stride_v = frame.stride(kVPlane);
@@ -71,9 +165,13 @@ void VideoDenoiser::DenoiseFrame(const VideoFrame& frame,
     denoised_frame->CreateFrame(frame.buffer(kYPlane), frame.buffer(kUPlane),
                                 frame.buffer(kVPlane), width_, height_,
                                 stride_y, stride_u, stride_v, kVideoRotation_0);
+    denoised_frame_prev->CreateFrame(
+        frame.buffer(kYPlane), frame.buffer(kUPlane), frame.buffer(kVPlane),
+        width_, height_, stride_y, stride_u, stride_v, kVideoRotation_0);
     // Setting time parameters to the output frame.
     denoised_frame->set_timestamp(frame.timestamp());
     denoised_frame->set_render_time_ms(frame.render_time_ms());
+    ne_->Init(width_, height_, cpu_type_);
     return;
   }
   // For 16x16 block.
@@ -81,62 +179,128 @@ void VideoDenoiser::DenoiseFrame(const VideoFrame& frame,
   int mb_rows = height_ >> 4;
   if (metrics_.get() == nullptr)
     metrics_.reset(new DenoiseMetrics[mb_cols * mb_rows]());
+  if (d_status_.get() == nullptr) {
+    d_status_.reset(new uint8_t[mb_cols * mb_rows]());
+#if EXPERIMENTAL
+    d_status_tmp1_.reset(new uint8_t[mb_cols * mb_rows]());
+    d_status_tmp2_.reset(new uint8_t[mb_cols * mb_rows]());
+#endif
+    x_density_.reset(new uint8_t[mb_cols]());
+    y_density_.reset(new uint8_t[mb_rows]());
+  }
+
   // Denoise on Y plane.
   uint8_t* y_dst = denoised_frame->buffer(kYPlane);
   uint8_t* u_dst = denoised_frame->buffer(kUPlane);
   uint8_t* v_dst = denoised_frame->buffer(kVPlane);
+  uint8_t* y_dst_prev = denoised_frame_prev->buffer(kYPlane);
   const uint8_t* y_src = frame.buffer(kYPlane);
   const uint8_t* u_src = frame.buffer(kUPlane);
   const uint8_t* v_src = frame.buffer(kVPlane);
+  uint8_t noise_level = noise_level_prev == -1 ? 0 : ne_->GetNoiseLevel();
   // Temporary buffer to store denoising result.
   uint8_t y_tmp[16 * 16] = {0};
+  memset(x_density_.get(), 0, mb_cols);
+  memset(y_density_.get(), 0, mb_rows);
+
+  // Loop over blocks to accumulate/extract noise level and update x/y_density
+  // factors for moving object detection.
+  for (int mb_row = 0; mb_row < mb_rows; ++mb_row) {
+    for (int mb_col = 0; mb_col < mb_cols; ++mb_col) {
+      const uint8_t* mb_src = y_src + (mb_row << 4) * stride_y + (mb_col << 4);
+      uint8_t* mb_dst_prev =
+          y_dst_prev + (mb_row << 4) * stride_y + (mb_col << 4);
+      int mb_index = mb_row * mb_cols + mb_col;
+#if EXPERIMENTAL
+      int pos_factor = PositionCheck(mb_row, mb_col, mb_rows, mb_cols);
+      uint32_t thr_var_adp = 16 * 16 * 5 * (noise_level ? pos_factor : 1);
+#else
+      uint32_t thr_var_adp = 16 * 16 * 5;
+#endif
+      int brightness = 0;
+      for (int i = 0; i < 16; ++i) {
+        for (int j = 0; j < 16; ++j) {
+          brightness += mb_src[i * stride_y + j];
+        }
+      }
+
+      // Get the denoised block.
+      filter_->MbDenoise(mb_dst_prev, stride_y, y_tmp, 16, mb_src, stride_y, 0,
+                         1, true);
+      // The variance is based on the denoised blocks in time T and T-1.
+      metrics_[mb_index].var = filter_->Variance16x8(
+          mb_dst_prev, stride_y, y_tmp, 16, &metrics_[mb_index].sad);
+
+      if (metrics_[mb_index].var > thr_var_adp) {
+        ne_->ResetConsecLowVar(mb_index);
+        d_status_[mb_index] = 1;
+#if EXPERIMENTAL
+        if (noise_level == 0 || pos_factor < 3) {
+          x_density_[mb_col] += 1;
+          y_density_[mb_row] += 1;
+        }
+#else
+        x_density_[mb_col] += 1;
+        y_density_[mb_row] += 1;
+#endif
+      } else {
+        uint32_t sse_t = 0;
+        // The variance is based on the src blocks in time T and denoised block
+        // in time T-1.
+        uint32_t noise_var = filter_->Variance16x8(mb_dst_prev, stride_y,
+                                                   mb_src, stride_y, &sse_t);
+        ne_->GetNoise(mb_index, noise_var, brightness);
+        d_status_[mb_index] = 0;
+      }
+      // Track denoised frame.
+      filter_->CopyMem16x16(y_tmp, 16, mb_dst_prev, stride_y);
+    }
+  }
+
+#if EXPERIMENTAL
+  ReduceFalseDetection(d_status_, &d_status_tmp1_, &d_status_tmp2_, noise_level,
+                       mb_rows, mb_cols);
+#endif
+
+  // Denoise each MB based on the results of moving objects detection.
   for (int mb_row = 0; mb_row < mb_rows; ++mb_row) {
     for (int mb_col = 0; mb_col < mb_cols; ++mb_col) {
       const uint8_t* mb_src = y_src + (mb_row << 4) * stride_y + (mb_col << 4);
       uint8_t* mb_dst = y_dst + (mb_row << 4) * stride_y + (mb_col << 4);
-      int mb_index = mb_row * mb_cols + mb_col;
-      // Denoise each MB at the very start and save the result to a temporary
-      // buffer.
-      if (filter_->MbDenoise(mb_dst, stride_y, y_tmp, 16, mb_src, stride_y, 0,
-                             1) == FILTER_BLOCK) {
-        uint32_t thr_var = 0;
-        // Save var and sad to the buffer.
-        metrics_[mb_index].var = filter_->Variance16x8(
-            mb_dst, stride_y, y_tmp, 16, &metrics_[mb_index].sad);
-        // Get skin map.
-        metrics_[mb_index].is_skin = MbHasSkinColor(
-            y_src, u_src, v_src, stride_y, stride_u, stride_v, mb_row, mb_col);
-        // Variance threshold for skin/non-skin MB is different.
-        // Skin MB use a small threshold to reduce blockiness.
-        thr_var = metrics_[mb_index].is_skin ? 128 : 12 * 128;
-        if (metrics_[mb_index].var > thr_var) {
-          metrics_[mb_index].denoise = 0;
-          // Use the source MB.
-          filter_->CopyMem16x16(mb_src, stride_y, mb_dst, stride_y);
-        } else {
-          metrics_[mb_index].denoise = 1;
-          // Use the denoised MB.
-          filter_->CopyMem16x16(y_tmp, 16, mb_dst, stride_y);
-        }
-      } else {
-        metrics_[mb_index].denoise = 0;
-        filter_->CopyMem16x16(mb_src, stride_y, mb_dst, stride_y);
-      }
-      // Copy source U/V plane.
       const uint8_t* mb_src_u =
           u_src + (mb_row << 3) * stride_u + (mb_col << 3);
       const uint8_t* mb_src_v =
           v_src + (mb_row << 3) * stride_v + (mb_col << 3);
       uint8_t* mb_dst_u = u_dst + (mb_row << 3) * stride_u + (mb_col << 3);
       uint8_t* mb_dst_v = v_dst + (mb_row << 3) * stride_v + (mb_col << 3);
+#if EXPERIMENTAL
+      if ((!d_status_tmp2_[mb_row * mb_cols + mb_col] ||
+           x_density_[mb_col] * y_density_[mb_row] == 0) &&
+          !TrailingBlock(d_status_, mb_row, mb_col, mb_rows, mb_cols)) {
+#else
+      if (x_density_[mb_col] * y_density_[mb_row] == 0) {
+#endif
+        if (filter_->MbDenoise(mb_dst, stride_y, y_tmp, 16, mb_src, stride_y, 0,
+                               noise_level, false) == FILTER_BLOCK) {
+          filter_->CopyMem16x16(y_tmp, 16, mb_dst, stride_y);
+        } else {
+          // Copy y source.
+          filter_->CopyMem16x16(mb_src, stride_y, mb_dst, stride_y);
+        }
+      } else {
+        // Copy y source.
+        filter_->CopyMem16x16(mb_src, stride_y, mb_dst, stride_y);
+      }
       filter_->CopyMem8x8(mb_src_u, stride_u, mb_dst_u, stride_u);
       filter_->CopyMem8x8(mb_src_v, stride_v, mb_dst_v, stride_v);
     }
   }
-  // Second round.
-  // This is to reduce the trailing artifact and blockiness by referring
-  // neighbors' denoising status.
-  TrailingReduction(mb_rows, mb_cols, y_src, stride_y, y_dst);
+
+#if DISPLAY  // Rectangle diagnostics
+  // Show rectangular region
+  ShowRect(filter_, d_status_, d_status_tmp2_, x_density_, y_density_, u_src,
+           v_src, u_dst, v_dst, mb_rows, mb_cols, stride_u, stride_v);
+#endif
 
   // Setting time parameters to the output frame.
   denoised_frame->set_timestamp(frame.timestamp());
diff --git a/webrtc/modules/video_processing/video_denoiser.h b/webrtc/modules/video_processing/video_denoiser.h
index 0fe2053639..03b30d91c7 100644
--- a/webrtc/modules/video_processing/video_denoiser.h
+++ b/webrtc/modules/video_processing/video_denoiser.h
@@ -14,6 +14,7 @@
 #include <memory>
 
 #include "webrtc/modules/video_processing/util/denoiser_filter.h"
+#include "webrtc/modules/video_processing/util/noise_estimation.h"
 #include "webrtc/modules/video_processing/util/skin_detection.h"
 
 namespace webrtc {
@@ -21,18 +22,25 @@ namespace webrtc {
 class VideoDenoiser {
  public:
   explicit VideoDenoiser(bool runtime_cpu_detection);
-  void DenoiseFrame(const VideoFrame& frame, VideoFrame* denoised_frame);
+  void DenoiseFrame(const VideoFrame& frame,
+                    VideoFrame* denoised_frame,
+                    VideoFrame* denoised_frame_track,
+                    int noise_level_prev);
 
  private:
-  void TrailingReduction(int mb_rows,
-                         int mb_cols,
-                         const uint8_t* y_src,
-                         int stride_y,
-                         uint8_t* y_dst);
   int width_;
   int height_;
+  CpuType cpu_type_;
   std::unique_ptr<DenoiseMetrics[]> metrics_;
   std::unique_ptr<DenoiserFilter> filter_;
+  std::unique_ptr<NoiseEstimation> ne_;
+  std::unique_ptr<uint8_t[]> d_status_;
+#if EXPERIMENTAL
+  std::unique_ptr<uint8_t[]> d_status_tmp1_;
+  std::unique_ptr<uint8_t[]> d_status_tmp2_;
+#endif
+  std::unique_ptr<uint8_t[]> x_density_;
+  std::unique_ptr<uint8_t[]> y_density_;
 };
 
 }  // namespace webrtc
diff --git a/webrtc/modules/video_processing/video_processing.gypi b/webrtc/modules/video_processing/video_processing.gypi
index 7418c455a2..5bf0ea36c3 100644
--- a/webrtc/modules/video_processing/video_processing.gypi
+++ b/webrtc/modules/video_processing/video_processing.gypi
@@ -40,6 +40,8 @@
         'util/denoiser_filter.h',
         'util/denoiser_filter_c.cc',
         'util/denoiser_filter_c.h',
+        'util/noise_estimation.cc',
+        'util/noise_estimation.h',
         'util/skin_detection.cc',
         'util/skin_detection.h',
       ],