From 67e43c8b95057a889ba9946e47d50a265e1e9ac9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Per=20=C3=85hgren?= <peah@webrtc.org>
Date: Mon, 12 Aug 2019 17:41:45 +0200
Subject: [PATCH] Correct conversion between float and fixed formats
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This CL changes the way that values are converted
between fixed and floating point to
-Avoid the former asymmetric conversion causing
nonlinear distortions.
-Reduce the complexity.

Bug: webrtc:6594
Change-Id: I64d0cc31c5d16f397686a59a062cfbc4b336d94d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/132783
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28867}
---
 common_audio/audio_util.cc                    |  5 --
 common_audio/audio_util_unittest.cc           | 41 ++++----------
 common_audio/include/audio_util.h             | 36 +++++-------
 .../resampler/push_sinc_resampler_unittest.cc | 18 +++++-
 .../echo_cancellation_bit_exact_unittest.cc   |  4 +-
 .../low_cut_filter_unittest.cc                | 36 ++++++------
 .../noise_suppression_unittest.cc             | 56 +++++++++----------
 7 files changed, 87 insertions(+), 109 deletions(-)

diff --git a/common_audio/audio_util.cc b/common_audio/audio_util.cc
index 735ba5f188..eb132ca633 100644
--- a/common_audio/audio_util.cc
+++ b/common_audio/audio_util.cc
@@ -12,11 +12,6 @@
 
 namespace webrtc {
 
-void FloatToS16(const float* src, size_t size, int16_t* dest) {
-  for (size_t i = 0; i < size; ++i)
-    dest[i] = FloatToS16(src[i]);
-}
-
 void S16ToFloat(const int16_t* src, size_t size, float* dest) {
   for (size_t i = 0; i < size; ++i)
     dest[i] = S16ToFloat(src[i]);
diff --git a/common_audio/audio_util_unittest.cc b/common_audio/audio_util_unittest.cc
index cf85a2d46c..a215a123b1 100644
--- a/common_audio/audio_util_unittest.cc
+++ b/common_audio/audio_util_unittest.cc
@@ -31,25 +31,6 @@ void ExpectArraysEq(const float* ref, const float* test, size_t length) {
   }
 }
 
-TEST(AudioUtilTest, FloatToS16) {
-  static constexpr float kInput[] = {0.f,
-                                     0.4f / 32767.f,
-                                     0.6f / 32767.f,
-                                     -0.4f / 32768.f,
-                                     -0.6f / 32768.f,
-                                     1.f,
-                                     -1.f,
-                                     1.1f,
-                                     -1.1f};
-  static constexpr int16_t kReference[] = {0,     0,      1,     0,     -1,
-                                           32767, -32768, 32767, -32768};
-  static constexpr size_t kSize = arraysize(kInput);
-  static_assert(arraysize(kReference) == kSize, "");
-  int16_t output[kSize];
-  FloatToS16(kInput, kSize, output);
-  ExpectArraysEq(kReference, output, kSize);
-}
-
 TEST(AudioUtilTest, S16ToFloat) {
   static constexpr int16_t kInput[] = {0, 1, -1, 16384, -16384, 32767, -32768};
   static constexpr float kReference[] = {
@@ -74,16 +55,16 @@ TEST(AudioUtilTest, FloatS16ToS16) {
 
 TEST(AudioUtilTest, FloatToFloatS16) {
   static constexpr float kInput[] = {0.f,
-                                     0.4f / 32767.f,
-                                     0.6f / 32767.f,
+                                     0.4f / 32768.f,
+                                     0.6f / 32768.f,
                                      -0.4f / 32768.f,
                                      -0.6f / 32768.f,
                                      1.f,
                                      -1.f,
-                                     1.1f,
-                                     -1.1f};
+                                     1.f,
+                                     -1.f};
   static constexpr float kReference[] = {
-      0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f};
+      0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32768.f, -32768.f, 32768.f, -32768.f};
   static constexpr size_t kSize = arraysize(kInput);
   static_assert(arraysize(kReference) == kSize, "");
   float output[kSize];
@@ -92,17 +73,17 @@ TEST(AudioUtilTest, FloatToFloatS16) {
 }
 
 TEST(AudioUtilTest, FloatS16ToFloat) {
-  static constexpr float kInput[] = {
-      0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f};
+  static constexpr float kInput[] = {0.f,     0.4f,     0.6f,    -0.4f,   -0.6f,
+                                     32767.f, -32768.f, 32767.f, -32768.f};
   static constexpr float kReference[] = {0.f,
-                                         0.4f / 32767.f,
-                                         0.6f / 32767.f,
+                                         0.4f / 32768.f,
+                                         0.6f / 32768.f,
                                          -0.4f / 32768.f,
                                          -0.6f / 32768.f,
                                          1.f,
                                          -1.f,
-                                         1.1f,
-                                         -1.1f};
+                                         1.f,
+                                         -1.f};
   static constexpr size_t kSize = arraysize(kInput);
   static_assert(arraysize(kReference) == kSize, "");
   float output[kSize];
diff --git a/common_audio/include/audio_util.h b/common_audio/include/audio_util.h
index 50c9cf282c..255abcc0f7 100644
--- a/common_audio/include/audio_util.h
+++ b/common_audio/include/audio_util.h
@@ -27,45 +27,35 @@ typedef std::numeric_limits<int16_t> limits_int16;
 // The conversion functions use the following naming convention:
 // S16:      int16_t [-32768, 32767]
 // Float:    float   [-1.0, 1.0]
-// FloatS16: float   [-32768.0, 32767.0]
+// FloatS16: float   [-32768.0, 32768.0]
 // Dbfs: float [-20.0*log(10, 32768), 0] = [-90.3, 0]
 // The ratio conversion functions use this naming convention:
 // Ratio: float (0, +inf)
 // Db: float (-inf, +inf)
-static inline int16_t FloatToS16(float v) {
-  if (v > 0)
-    return v >= 1 ? limits_int16::max()
-                  : static_cast<int16_t>(v * limits_int16::max() + 0.5f);
-  return v <= -1 ? limits_int16::min()
-                 : static_cast<int16_t>(-v * limits_int16::min() - 0.5f);
-}
-
 static inline float S16ToFloat(int16_t v) {
-  static const float kMaxInt16Inverse = 1.f / limits_int16::max();
-  static const float kMinInt16Inverse = 1.f / limits_int16::min();
-  return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse);
+  constexpr float kScaling = 1.f / 32768.f;
+  return v * kScaling;
 }
 
 static inline int16_t FloatS16ToS16(float v) {
-  static const float kMaxRound = limits_int16::max() - 0.5f;
-  static const float kMinRound = limits_int16::min() + 0.5f;
-  if (v > 0)
-    return v >= kMaxRound ? limits_int16::max()
-                          : static_cast<int16_t>(v + 0.5f);
-  return v <= kMinRound ? limits_int16::min() : static_cast<int16_t>(v - 0.5f);
+  v = std::min(v, 32767.f);
+  v = std::max(v, -32768.f);
+  return static_cast<int16_t>(v + std::copysign(0.5f, v));
 }
 
 static inline float FloatToFloatS16(float v) {
-  return v * (v > 0 ? limits_int16::max() : -limits_int16::min());
+  RTC_DCHECK_LE(v, 1.f);
+  RTC_DCHECK_GE(v, -1.f);
+  return v * 32768.f;
 }
 
 static inline float FloatS16ToFloat(float v) {
-  static const float kMaxInt16Inverse = 1.f / limits_int16::max();
-  static const float kMinInt16Inverse = 1.f / limits_int16::min();
-  return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse);
+  RTC_DCHECK_LE(v, 32768.f);
+  RTC_DCHECK_GE(v, -32768.f);
+  constexpr float kScaling = 1.f / 32768.f;
+  return v * kScaling;
 }
 
-void FloatToS16(const float* src, size_t size, int16_t* dest);
 void S16ToFloat(const int16_t* src, size_t size, float* dest);
 void FloatS16ToS16(const float* src, size_t size, int16_t* dest);
 void FloatToFloatS16(const float* src, size_t size, float* dest);
diff --git a/common_audio/resampler/push_sinc_resampler_unittest.cc b/common_audio/resampler/push_sinc_resampler_unittest.cc
index 1a25a8c3e8..f9943b3cc8 100644
--- a/common_audio/resampler/push_sinc_resampler_unittest.cc
+++ b/common_audio/resampler/push_sinc_resampler_unittest.cc
@@ -33,6 +33,18 @@ T DBFS(T x) {
   return 20 * std::log10(x);
 }
 
+void FloatToS16(const float* src, size_t size, int16_t* dest) {
+  for (size_t i = 0; i < size; ++i) {
+    RTC_DCHECK_GE(32767.f, src[i]);
+    RTC_DCHECK_LE(-32768.f, src[i]);
+    if (src[i] >= 1.f)
+      dest[i] = 32767;
+    if (src[i] <= -1.f)
+      dest[i] = -32768;
+    dest[i] = static_cast<int16_t>(src[i] * 32767.5f);
+  }
+}
+
 }  // namespace
 
 class PushSincResamplerTest : public ::testing::TestWithParam<
@@ -322,14 +334,14 @@ INSTANTIATE_TEST_SUITE_P(
         ::testing::make_tuple(32000, 16000, -18.48, -28.59),
         ::testing::make_tuple(44100, 16000, -19.30, -19.67),
         ::testing::make_tuple(48000, 16000, -19.81, -18.11),
-        ::testing::make_tuple(96000, 16000, -20.95, -10.96),
+        ::testing::make_tuple(96000, 16000, -20.95, -10.9596),
 
         // To 32 kHz
         ::testing::make_tuple(8000, 32000, kResamplingRMSError, -70.30),
         ::testing::make_tuple(16000, 32000, kResamplingRMSError, -75.51),
         ::testing::make_tuple(32000, 32000, kResamplingRMSError, -75.51),
-        ::testing::make_tuple(44100, 32000, -16.44, -51.10),
-        ::testing::make_tuple(48000, 32000, -16.90, -44.03),
+        ::testing::make_tuple(44100, 32000, -16.44, -51.0349),
+        ::testing::make_tuple(48000, 32000, -16.90, -43.9967),
         ::testing::make_tuple(96000, 32000, -19.61, -18.04),
         ::testing::make_tuple(192000, 32000, -21.02, -10.94)));
 
diff --git a/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc b/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc
index 80f36a8c0e..d44483c4bc 100644
--- a/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc
+++ b/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc
@@ -324,8 +324,8 @@ TEST(EchoCancellationBitExactnessTest,
      DISABLED_Stereo32kHz_HighLevel_NoDrift_StreamDelay0) {
 #endif
 #if defined(WEBRTC_MAC)
-  const float kOutputReference[] = {-0.000458f, 0.000244f, 0.000153f,
-                                    -0.000458f, 0.000244f, 0.000153f};
+  const float kOutputReference[] = {-0.000458f, 0.000214f, 0.000122f,
+                                    -0.000458f, 0.000214f, 0.000122f};
 #else
   const float kOutputReference[] = {-0.000427f, 0.000183f, 0.000183f,
                                     -0.000427f, 0.000183f, 0.000183f};
diff --git a/modules/audio_processing/low_cut_filter_unittest.cc b/modules/audio_processing/low_cut_filter_unittest.cc
index ea4fb6711b..fb950da640 100644
--- a/modules/audio_processing/low_cut_filter_unittest.cc
+++ b/modules/audio_processing/low_cut_filter_unittest.cc
@@ -110,9 +110,9 @@ TEST(LowCutFilterBitExactnessTest, Mono8kHzInitial) {
       0.349705f,  0.173054f,  0.016750f,  -0.415957f, -0.461001f, -0.557111f,
       0.738711f,  0.275720f};
 
-  const float kReference[] = {0.142277f,  -0.418518f, -0.028229f, -0.102112f,
-                              0.141270f,  0.137791f,  0.124577f,  -0.088715f,
-                              -0.142273f, -0.125885f, 0.266640f,  -0.468079f};
+  const float kReference[] = {0.142273f,  -0.418518f, -0.028229f, -0.102112f,
+                              0.141266f,  0.137787f,  0.124573f,  -0.088715f,
+                              -0.142273f, -0.125885f, 0.266663f,  -0.468109f};
 
   RunBitexactnessTest(
       8000, 1, CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
@@ -162,9 +162,9 @@ TEST(LowCutFilterBitExactnessTest, Mono8kHzConverged) {
       0.036391f,  -0.408991f, 0.369330f,  0.399785f,  -0.471419f, 0.551138f,
       -0.307569f, 0.064315f,  0.311605f,  0.041736f,  0.650943f,  0.780496f};
 
-  const float kReference[] = {-0.173553f, -0.265778f, 0.158757f,  -0.259399f,
-                              -0.176361f, 0.192877f,  0.056825f,  0.171453f,
-                              0.050752f,  -0.194580f, -0.208679f, 0.153722f};
+  const float kReference[] = {-0.173584f, -0.265778f, 0.158783f,  -0.259430f,
+                              -0.176361f, 0.192841f,  0.056854f,  0.171448f,
+                              0.050751f,  -0.194580f, -0.208710f, 0.153717f};
 
   RunBitexactnessTest(
       8000, 1, CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
@@ -202,10 +202,10 @@ TEST(LowCutFilterBitExactnessTest, Stereo8kHzInitial) {
       0.768778f,  -0.122021f, 0.563445f,  -0.703070f};
 
   const float kReference[] = {
-      0.733329f,  0.084109f,  0.072695f,  0.566210f,  -1.000000f, 0.652120f,
-      -0.297424f, -0.964020f, 0.438551f,  -0.698364f, -0.654449f, 0.266243f,
-      0.454115f,  0.684774f,  -0.586823f, -0.747345f, -0.503021f, -0.222961f,
-      -0.314972f, 0.907224f,  -0.796265f, 0.284280f,  -0.533417f, 0.773980f};
+      0.733307f,  0.084106f,  0.072693f,  0.566193f,  -1.000000f, 0.652130f,
+      -0.297424f, -0.964020f, 0.438568f,  -0.698364f, -0.654449f, 0.266205f,
+      0.454102f,  0.684784f,  -0.586823f, -0.747375f, -0.503021f, -0.222961f,
+      -0.314972f, 0.907196f,  -0.796295f, 0.284271f,  -0.533417f, 0.773956f};
 
   RunBitexactnessTest(
       8000, 2, CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
@@ -296,10 +296,10 @@ TEST(LowCutFilterBitExactnessTest, Stereo8kHzConverged) {
       -0.034654f, -0.743470f, -0.494178f, 0.767923f,  -0.607446f, -0.757293f};
 
   const float kReference[] = {
-      -0.544495f, 0.264199f, 0.647938f,  0.565569f,  0.496231f,  0.271340f,
-      0.519944f,  0.318094f, -0.792999f, 0.733421f,  -1.000000f, 0.103977f,
-      0.981719f,  0.314859f, 0.476882f,  0.514267f,  -0.196381f, -0.425781f,
-      -0.783081f, 0.101108f, 0.419782f,  -0.291718f, 0.183355f,  -0.332489f};
+      -0.544525f, 0.264221f, 0.647919f,  0.565552f,  0.496185f,  0.271332f,
+      0.519958f,  0.318085f, -0.792999f, 0.733429f,  -1.000000f, 0.103973f,
+      0.981720f,  0.314850f, 0.476837f,  0.514252f,  -0.196411f, -0.425812f,
+      -0.783112f, 0.101105f, 0.419739f,  -0.291718f, 0.183350f,  -0.332489f};
 
   RunBitexactnessTest(
       8000, 2, CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
@@ -669,10 +669,10 @@ TEST(LowCutFilterBitExactnessTest, Stereo16kHzConverged) {
       0.462557f,  0.807713f,  -0.095536f, -0.858625f, -0.517444f, 0.463730f};
 
   const float kReference[] = {
-      -0.816528f, 0.085421f,  0.739647f,  -0.922089f, 0.669301f,  -0.048187f,
-      -0.290039f, -0.818085f, -0.596008f, -0.177826f, -0.002197f, -0.350647f,
-      -0.064301f, 0.337291f,  -0.621765f, 0.115909f,  0.311899f,  -0.915924f,
-      0.020478f,  0.836055f,  -0.714020f, -0.037140f, 0.391125f,  -0.340118f};
+      -0.816559f, 0.085419f,  0.739655f,  -0.922089f, 0.669312f,  -0.048218f,
+      -0.290039f, -0.818085f, -0.596039f, -0.177856f, -0.002197f, -0.350647f,
+      -0.064331f, 0.337280f,  -0.621765f, 0.115906f,  0.311890f,  -0.915924f,
+      0.020477f,  0.836029f,  -0.714020f, -0.037140f, 0.391113f,  -0.340118f};
 
   RunBitexactnessTest(
       16000, 2, CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
diff --git a/modules/audio_processing/noise_suppression_unittest.cc b/modules/audio_processing/noise_suppression_unittest.cc
index b2074859b0..29aae8b90f 100644
--- a/modules/audio_processing/noise_suppression_unittest.cc
+++ b/modules/audio_processing/noise_suppression_unittest.cc
@@ -106,10 +106,10 @@ TEST(NoiseSuppresionBitExactnessTest, Mono8kHzLow) {
                                            7677.521973f};
   const float kOutputReference[] = {0.003510f, 0.004517f, 0.004669f};
 #else
-  const float kSpeechProbabilityReference = 0.73421317f;
-  const float kNoiseEstimateReference[] = {1175.266113f, 3289.305908f,
-                                           7532.991211f};
-  const float kOutputReference[] = {0.003263f, 0.004402f, 0.004537f};
+  const float kSpeechProbabilityReference = 0.73650402f;
+  const float kNoiseEstimateReference[] = {1176.856812f, 3287.490967f,
+                                           7525.964844f};
+  const float kOutputReference[] = {0.003306f, 0.004442f, 0.004574f};
 #endif
 
   RunBitexactnessTest(8000, 1, NoiseSuppression::Level::kLow,
@@ -129,9 +129,9 @@ TEST(NoiseSuppresionBitExactnessTest, Mono16kHzLow) {
                                            14367.499023f};
   const float kOutputReference[] = {0.003449f, 0.004334f, 0.004303f};
 #else
-  const float kSpeechProbabilityReference = 0.71672988f;
-  const float kNoiseEstimateReference[] = {2151.313965f, 6509.765137f,
-                                           15658.848633f};
+  const float kSpeechProbabilityReference = 0.71743423f;
+  const float kNoiseEstimateReference[] = {2179.853027f, 6507.995117f,
+                                           15652.758789f};
   const float kOutputReference[] = {0.003574f, 0.004494f, 0.004499f};
 #endif
 
@@ -166,18 +166,18 @@ TEST(NoiseSuppresionBitExactnessTest, Mono32kHzLow) {
 TEST(NoiseSuppresionBitExactnessTest, Mono48kHzLow) {
 #if defined(WEBRTC_ARCH_ARM64)
   const float kSpeechProbabilityReference = -4.0f;
-  const float kNoiseEstimateReference[] = {2564.605713f, 6213.656250f,
-                                           13372.284180f};
-  const float kOutputReference[] = {-0.013185f, -0.012769f, -0.012023f};
+  const float kNoiseEstimateReference[] = {2135.292480f, 6692.695801f,
+                                           14647.632812f};
+  const float kOutputReference[] = {-0.012738f, -0.012312f, -0.011576f};
 #elif defined(WEBRTC_ARCH_ARM)
   const float kSpeechProbabilityReference = -4.0f;
-  const float kNoiseEstimateReference[] = {2564.605713f, 6213.656250f,
-                                           13372.284180f};
-  const float kOutputReference[] = {-0.013185f, -0.012769f, -0.012023f};
+  const float kNoiseEstimateReference[] = {2135.292480f, 6692.695801f,
+                                           14647.632812f};
+  const float kOutputReference[] = {-0.012738f, -0.012312f, -0.011576f};
 #else
-  const float kSpeechProbabilityReference = 0.70645678f;
-  const float kNoiseEstimateReference[] = {2168.783203f, 6902.895508f,
-                                           13190.677734f};
+  const float kSpeechProbabilityReference = 0.70737761f;
+  const float kNoiseEstimateReference[] = {2187.394043f, 6913.306641f,
+                                           13182.945312f};
   const float kOutputReference[] = {-0.013062f, -0.012657f, -0.011934f};
 #endif
 
@@ -200,9 +200,9 @@ TEST(NoiseSuppresionBitExactnessTest, Stereo16kHzLow) {
   const float kOutputReference[] = {-0.011108f, -0.007904f, -0.012390f,
                                     -0.002472f, 0.000916f,  -0.003235f};
 #else
-  const float kSpeechProbabilityReference = 0.67230678f;
-  const float kNoiseEstimateReference[] = {9771.250000f, 11329.377930f,
-                                           10503.052734f};
+  const float kSpeechProbabilityReference = 0.67285913f;
+  const float kNoiseEstimateReference[] = {9753.257812f, 11515.603516f,
+                                           10503.309570f};
   const float kOutputReference[] = {-0.011459f, -0.008110f, -0.012728f,
                                     -0.002399f, 0.001018f,  -0.003189f};
 #endif
@@ -224,9 +224,9 @@ TEST(NoiseSuppresionBitExactnessTest, Mono16kHzModerate) {
                                            16726.523438f};
   const float kOutputReference[] = {0.004669f, 0.005615f, 0.005585f};
 #else
-  const float kSpeechProbabilityReference = 0.70897013f;
-  const float kNoiseEstimateReference[] = {2171.490723f, 6553.567871f,
-                                           15626.562500f};
+  const float kSpeechProbabilityReference = 0.70916927f;
+  const float kNoiseEstimateReference[] = {2172.830566f, 6552.661133f,
+                                           15624.025391f};
   const float kOutputReference[] = {0.004513f, 0.005590f, 0.005614f};
 #endif
 
@@ -247,9 +247,9 @@ TEST(NoiseSuppresionBitExactnessTest, Mono16kHzHigh) {
                                            16920.960938f};
   const float kOutputReference[] = {0.004547f, 0.005432f, 0.005402f};
 #else
-  const float kSpeechProbabilityReference = 0.70106733f;
-  const float kNoiseEstimateReference[] = {2224.968506f, 6712.025879f,
-                                           15785.087891f};
+  const float kSpeechProbabilityReference = 0.70104003f;
+  const float kNoiseEstimateReference[] = {2225.081055f, 6711.529785f,
+                                           15785.949219};
   const float kOutputReference[] = {0.004394f, 0.005406f, 0.005416f};
 #endif
 
@@ -270,9 +270,9 @@ TEST(NoiseSuppresionBitExactnessTest, Mono16kHzVeryHigh) {
                                            14365.744141f};
   const float kOutputReference[] = {0.004273f, 0.005127f, 0.005188f};
 #else
-  const float kSpeechProbabilityReference = 0.70281971f;
-  const float kNoiseEstimateReference[] = {2254.347900f, 6723.699707f,
-                                           15771.625977f};
+  const float kSpeechProbabilityReference = 0.70290041f;
+  const float kNoiseEstimateReference[] = {2254.921875f, 6723.172852f,
+                                           15770.559570f};
   const float kOutputReference[] = {0.004321f, 0.005247f, 0.005263f};
 #endif