Correct conversion between float and fixed formats

This CL changes the way that values are converted between fixed and floating point to -Avoid the former asymmetric conversion causing nonlinear distortions. -Reduce the complexity. Bug: webrtc:6594 Change-Id: I64d0cc31c5d16f397686a59a062cfbc4b336d94d Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/132783 Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org> Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#28867}
2019-08-12 17:41:45 +02:00
parent a1351271e6
commit 67e43c8b95
7 changed files with 87 additions and 109 deletions
--- a/common_audio/audio_util.cc
+++ b/common_audio/audio_util.cc
@ -12,11 +12,6 @@

 namespace webrtc {

-void FloatToS16(const float* src, size_t size, int16_t* dest) {
-  for (size_t i = 0; i < size; ++i)
-    dest[i] = FloatToS16(src[i]);
-}
-
 void S16ToFloat(const int16_t* src, size_t size, float* dest) {
  for (size_t i = 0; i < size; ++i)
    dest[i] = S16ToFloat(src[i]);
--- a/common_audio/audio_util_unittest.cc
+++ b/common_audio/audio_util_unittest.cc
@ -31,25 +31,6 @@ void ExpectArraysEq(const float* ref, const float* test, size_t length) {
  }
 }

-TEST(AudioUtilTest, FloatToS16) {
-  static constexpr float kInput[] = {0.f,
-                                     0.4f / 32767.f,
-                                     0.6f / 32767.f,
-                                     -0.4f / 32768.f,
-                                     -0.6f / 32768.f,
-                                     1.f,
-                                     -1.f,
-                                     1.1f,
-                                     -1.1f};
-  static constexpr int16_t kReference[] = {0,     0,      1,     0,     -1,
-                                           32767, -32768, 32767, -32768};
-  static constexpr size_t kSize = arraysize(kInput);
-  static_assert(arraysize(kReference) == kSize, "");
-  int16_t output[kSize];
-  FloatToS16(kInput, kSize, output);
-  ExpectArraysEq(kReference, output, kSize);
-}
-
 TEST(AudioUtilTest, S16ToFloat) {
  static constexpr int16_t kInput[] = {0, 1, -1, 16384, -16384, 32767, -32768};
  static constexpr float kReference[] = {
@ -74,16 +55,16 @@ TEST(AudioUtilTest, FloatS16ToS16) {

 TEST(AudioUtilTest, FloatToFloatS16) {
  static constexpr float kInput[] = {0.f,
-                                     0.4f / 32767.f,
-                                     0.6f / 32767.f,
+                                     0.4f / 32768.f,
+                                     0.6f / 32768.f,
                                     -0.4f / 32768.f,
                                     -0.6f / 32768.f,
                                     1.f,
                                     -1.f,
-                                     1.1f,
-                                     -1.1f};
+                                     1.f,
+                                     -1.f};
  static constexpr float kReference[] = {
-      0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f};
+      0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32768.f, -32768.f, 32768.f, -32768.f};
  static constexpr size_t kSize = arraysize(kInput);
  static_assert(arraysize(kReference) == kSize, "");
  float output[kSize];
@ -92,17 +73,17 @@ TEST(AudioUtilTest, FloatToFloatS16) {
 }

 TEST(AudioUtilTest, FloatS16ToFloat) {
-  static constexpr float kInput[] = {
-      0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f};
+  static constexpr float kInput[] = {0.f,     0.4f,     0.6f,    -0.4f,   -0.6f,
+                                     32767.f, -32768.f, 32767.f, -32768.f};
  static constexpr float kReference[] = {0.f,
-                                         0.4f / 32767.f,
-                                         0.6f / 32767.f,
+                                         0.4f / 32768.f,
+                                         0.6f / 32768.f,
                                         -0.4f / 32768.f,
                                         -0.6f / 32768.f,
                                         1.f,
                                         -1.f,
-                                         1.1f,
-                                         -1.1f};
+                                         1.f,
+                                         -1.f};
  static constexpr size_t kSize = arraysize(kInput);
  static_assert(arraysize(kReference) == kSize, "");
  float output[kSize];
--- a/common_audio/include/audio_util.h
+++ b/common_audio/include/audio_util.h
@ -27,45 +27,35 @@ typedef std::numeric_limits<int16_t> limits_int16;
 // The conversion functions use the following naming convention:
 // S16:      int16_t [-32768, 32767]
 // Float:    float   [-1.0, 1.0]
-// FloatS16: float   [-32768.0, 32767.0]
+// FloatS16: float   [-32768.0, 32768.0]
 // Dbfs: float [-20.0*log(10, 32768), 0] = [-90.3, 0]
 // The ratio conversion functions use this naming convention:
 // Ratio: float (0, +inf)
 // Db: float (-inf, +inf)
-static inline int16_t FloatToS16(float v) {
-  if (v > 0)
-    return v >= 1 ? limits_int16::max()
-                  : static_cast<int16_t>(v * limits_int16::max() + 0.5f);
-  return v <= -1 ? limits_int16::min()
-                 : static_cast<int16_t>(-v * limits_int16::min() - 0.5f);
-}
-
 static inline float S16ToFloat(int16_t v) {
-  static const float kMaxInt16Inverse = 1.f / limits_int16::max();
-  static const float kMinInt16Inverse = 1.f / limits_int16::min();
-  return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse);
+  constexpr float kScaling = 1.f / 32768.f;
+  return v * kScaling;
 }

 static inline int16_t FloatS16ToS16(float v) {
-  static const float kMaxRound = limits_int16::max() - 0.5f;
-  static const float kMinRound = limits_int16::min() + 0.5f;
-  if (v > 0)
-    return v >= kMaxRound ? limits_int16::max()
-                          : static_cast<int16_t>(v + 0.5f);
-  return v <= kMinRound ? limits_int16::min() : static_cast<int16_t>(v - 0.5f);
+  v = std::min(v, 32767.f);
+  v = std::max(v, -32768.f);
+  return static_cast<int16_t>(v + std::copysign(0.5f, v));
 }

 static inline float FloatToFloatS16(float v) {
-  return v * (v > 0 ? limits_int16::max() : -limits_int16::min());
+  RTC_DCHECK_LE(v, 1.f);
+  RTC_DCHECK_GE(v, -1.f);
+  return v * 32768.f;
 }

 static inline float FloatS16ToFloat(float v) {
-  static const float kMaxInt16Inverse = 1.f / limits_int16::max();
-  static const float kMinInt16Inverse = 1.f / limits_int16::min();
-  return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse);
+  RTC_DCHECK_LE(v, 32768.f);
+  RTC_DCHECK_GE(v, -32768.f);
+  constexpr float kScaling = 1.f / 32768.f;
+  return v * kScaling;
 }

-void FloatToS16(const float* src, size_t size, int16_t* dest);
 void S16ToFloat(const int16_t* src, size_t size, float* dest);
 void FloatS16ToS16(const float* src, size_t size, int16_t* dest);
 void FloatToFloatS16(const float* src, size_t size, float* dest);
--- a/common_audio/resampler/push_sinc_resampler_unittest.cc
+++ b/common_audio/resampler/push_sinc_resampler_unittest.cc
@ -33,6 +33,18 @@ T DBFS(T x) {
  return 20 * std::log10(x);
 }

+void FloatToS16(const float* src, size_t size, int16_t* dest) {
+  for (size_t i = 0; i < size; ++i) {
+    RTC_DCHECK_GE(32767.f, src[i]);
+    RTC_DCHECK_LE(-32768.f, src[i]);
+    if (src[i] >= 1.f)
+      dest[i] = 32767;
+    if (src[i] <= -1.f)
+      dest[i] = -32768;
+    dest[i] = static_cast<int16_t>(src[i] * 32767.5f);
+  }
+}
+
 }  // namespace

 class PushSincResamplerTest : public ::testing::TestWithParam<
@ -322,14 +334,14 @@ INSTANTIATE_TEST_SUITE_P(
        ::testing::make_tuple(32000, 16000, -18.48, -28.59),
        ::testing::make_tuple(44100, 16000, -19.30, -19.67),
        ::testing::make_tuple(48000, 16000, -19.81, -18.11),
-        ::testing::make_tuple(96000, 16000, -20.95, -10.96),
+        ::testing::make_tuple(96000, 16000, -20.95, -10.9596),

        // To 32 kHz
        ::testing::make_tuple(8000, 32000, kResamplingRMSError, -70.30),
        ::testing::make_tuple(16000, 32000, kResamplingRMSError, -75.51),
        ::testing::make_tuple(32000, 32000, kResamplingRMSError, -75.51),
-        ::testing::make_tuple(44100, 32000, -16.44, -51.10),
-        ::testing::make_tuple(48000, 32000, -16.90, -44.03),
+        ::testing::make_tuple(44100, 32000, -16.44, -51.0349),
+        ::testing::make_tuple(48000, 32000, -16.90, -43.9967),
        ::testing::make_tuple(96000, 32000, -19.61, -18.04),
        ::testing::make_tuple(192000, 32000, -21.02, -10.94)));

--- a/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc
+++ b/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc
@ -324,8 +324,8 @@ TEST(EchoCancellationBitExactnessTest,
     DISABLED_Stereo32kHz_HighLevel_NoDrift_StreamDelay0) {
 #endif
 #if defined(WEBRTC_MAC)
-  const float kOutputReference[] = {-0.000458f, 0.000244f, 0.000153f,
-                                    -0.000458f, 0.000244f, 0.000153f};
+  const float kOutputReference[] = {-0.000458f, 0.000214f, 0.000122f,
+                                    -0.000458f, 0.000214f, 0.000122f};
 #else
  const float kOutputReference[] = {-0.000427f, 0.000183f, 0.000183f,
                                    -0.000427f, 0.000183f, 0.000183f};
--- a/modules/audio_processing/low_cut_filter_unittest.cc
+++ b/modules/audio_processing/low_cut_filter_unittest.cc
@ -110,9 +110,9 @@ TEST(LowCutFilterBitExactnessTest, Mono8kHzInitial) {
      0.349705f,  0.173054f,  0.016750f,  -0.415957f, -0.461001f, -0.557111f,
      0.738711f,  0.275720f};

-  const float kReference[] = {0.142277f,  -0.418518f, -0.028229f, -0.102112f,
-                              0.141270f,  0.137791f,  0.124577f,  -0.088715f,
-                              -0.142273f, -0.125885f, 0.266640f,  -0.468079f};
+  const float kReference[] = {0.142273f,  -0.418518f, -0.028229f, -0.102112f,
+                              0.141266f,  0.137787f,  0.124573f,  -0.088715f,
+                              -0.142273f, -0.125885f, 0.266663f,  -0.468109f};

  RunBitexactnessTest(
      8000, 1, CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
@ -162,9 +162,9 @@ TEST(LowCutFilterBitExactnessTest, Mono8kHzConverged) {
      0.036391f,  -0.408991f, 0.369330f,  0.399785f,  -0.471419f, 0.551138f,
      -0.307569f, 0.064315f,  0.311605f,  0.041736f,  0.650943f,  0.780496f};

-  const float kReference[] = {-0.173553f, -0.265778f, 0.158757f,  -0.259399f,
-                              -0.176361f, 0.192877f,  0.056825f,  0.171453f,
-                              0.050752f,  -0.194580f, -0.208679f, 0.153722f};
+  const float kReference[] = {-0.173584f, -0.265778f, 0.158783f,  -0.259430f,
+                              -0.176361f, 0.192841f,  0.056854f,  0.171448f,
+                              0.050751f,  -0.194580f, -0.208710f, 0.153717f};

  RunBitexactnessTest(
      8000, 1, CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
@ -202,10 +202,10 @@ TEST(LowCutFilterBitExactnessTest, Stereo8kHzInitial) {
      0.768778f,  -0.122021f, 0.563445f,  -0.703070f};

  const float kReference[] = {
-      0.733329f,  0.084109f,  0.072695f,  0.566210f,  -1.000000f, 0.652120f,
-      -0.297424f, -0.964020f, 0.438551f,  -0.698364f, -0.654449f, 0.266243f,
-      0.454115f,  0.684774f,  -0.586823f, -0.747345f, -0.503021f, -0.222961f,
-      -0.314972f, 0.907224f,  -0.796265f, 0.284280f,  -0.533417f, 0.773980f};
+      0.733307f,  0.084106f,  0.072693f,  0.566193f,  -1.000000f, 0.652130f,
+      -0.297424f, -0.964020f, 0.438568f,  -0.698364f, -0.654449f, 0.266205f,
+      0.454102f,  0.684784f,  -0.586823f, -0.747375f, -0.503021f, -0.222961f,
+      -0.314972f, 0.907196f,  -0.796295f, 0.284271f,  -0.533417f, 0.773956f};

  RunBitexactnessTest(
      8000, 2, CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
@ -296,10 +296,10 @@ TEST(LowCutFilterBitExactnessTest, Stereo8kHzConverged) {
      -0.034654f, -0.743470f, -0.494178f, 0.767923f,  -0.607446f, -0.757293f};

  const float kReference[] = {
-      -0.544495f, 0.264199f, 0.647938f,  0.565569f,  0.496231f,  0.271340f,
-      0.519944f,  0.318094f, -0.792999f, 0.733421f,  -1.000000f, 0.103977f,
-      0.981719f,  0.314859f, 0.476882f,  0.514267f,  -0.196381f, -0.425781f,
-      -0.783081f, 0.101108f, 0.419782f,  -0.291718f, 0.183355f,  -0.332489f};
+      -0.544525f, 0.264221f, 0.647919f,  0.565552f,  0.496185f,  0.271332f,
+      0.519958f,  0.318085f, -0.792999f, 0.733429f,  -1.000000f, 0.103973f,
+      0.981720f,  0.314850f, 0.476837f,  0.514252f,  -0.196411f, -0.425812f,
+      -0.783112f, 0.101105f, 0.419739f,  -0.291718f, 0.183350f,  -0.332489f};

  RunBitexactnessTest(
      8000, 2, CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
@ -669,10 +669,10 @@ TEST(LowCutFilterBitExactnessTest, Stereo16kHzConverged) {
      0.462557f,  0.807713f,  -0.095536f, -0.858625f, -0.517444f, 0.463730f};

  const float kReference[] = {
-      -0.816528f, 0.085421f,  0.739647f,  -0.922089f, 0.669301f,  -0.048187f,
-      -0.290039f, -0.818085f, -0.596008f, -0.177826f, -0.002197f, -0.350647f,
-      -0.064301f, 0.337291f,  -0.621765f, 0.115909f,  0.311899f,  -0.915924f,
-      0.020478f,  0.836055f,  -0.714020f, -0.037140f, 0.391125f,  -0.340118f};
+      -0.816559f, 0.085419f,  0.739655f,  -0.922089f, 0.669312f,  -0.048218f,
+      -0.290039f, -0.818085f, -0.596039f, -0.177856f, -0.002197f, -0.350647f,
+      -0.064331f, 0.337280f,  -0.621765f, 0.115906f,  0.311890f,  -0.915924f,
+      0.020477f,  0.836029f,  -0.714020f, -0.037140f, 0.391113f,  -0.340118f};

  RunBitexactnessTest(
      16000, 2, CreateVector(rtc::ArrayView<const float>(kReferenceInput)),
--- a/modules/audio_processing/noise_suppression_unittest.cc
+++ b/modules/audio_processing/noise_suppression_unittest.cc
@ -106,10 +106,10 @@ TEST(NoiseSuppresionBitExactnessTest, Mono8kHzLow) {
                                           7677.521973f};
  const float kOutputReference[] = {0.003510f, 0.004517f, 0.004669f};
 #else
-  const float kSpeechProbabilityReference = 0.73421317f;
-  const float kNoiseEstimateReference[] = {1175.266113f, 3289.305908f,
-                                           7532.991211f};
-  const float kOutputReference[] = {0.003263f, 0.004402f, 0.004537f};
+  const float kSpeechProbabilityReference = 0.73650402f;
+  const float kNoiseEstimateReference[] = {1176.856812f, 3287.490967f,
+                                           7525.964844f};
+  const float kOutputReference[] = {0.003306f, 0.004442f, 0.004574f};
 #endif

  RunBitexactnessTest(8000, 1, NoiseSuppression::Level::kLow,
@ -129,9 +129,9 @@ TEST(NoiseSuppresionBitExactnessTest, Mono16kHzLow) {
                                           14367.499023f};
  const float kOutputReference[] = {0.003449f, 0.004334f, 0.004303f};
 #else
-  const float kSpeechProbabilityReference = 0.71672988f;
-  const float kNoiseEstimateReference[] = {2151.313965f, 6509.765137f,
-                                           15658.848633f};
+  const float kSpeechProbabilityReference = 0.71743423f;
+  const float kNoiseEstimateReference[] = {2179.853027f, 6507.995117f,
+                                           15652.758789f};
  const float kOutputReference[] = {0.003574f, 0.004494f, 0.004499f};
 #endif

@ -166,18 +166,18 @@ TEST(NoiseSuppresionBitExactnessTest, Mono32kHzLow) {
 TEST(NoiseSuppresionBitExactnessTest, Mono48kHzLow) {
 #if defined(WEBRTC_ARCH_ARM64)
  const float kSpeechProbabilityReference = -4.0f;
-  const float kNoiseEstimateReference[] = {2564.605713f, 6213.656250f,
-                                           13372.284180f};
-  const float kOutputReference[] = {-0.013185f, -0.012769f, -0.012023f};
+  const float kNoiseEstimateReference[] = {2135.292480f, 6692.695801f,
+                                           14647.632812f};
+  const float kOutputReference[] = {-0.012738f, -0.012312f, -0.011576f};
 #elif defined(WEBRTC_ARCH_ARM)
  const float kSpeechProbabilityReference = -4.0f;
-  const float kNoiseEstimateReference[] = {2564.605713f, 6213.656250f,
-                                           13372.284180f};
-  const float kOutputReference[] = {-0.013185f, -0.012769f, -0.012023f};
+  const float kNoiseEstimateReference[] = {2135.292480f, 6692.695801f,
+                                           14647.632812f};
+  const float kOutputReference[] = {-0.012738f, -0.012312f, -0.011576f};
 #else
-  const float kSpeechProbabilityReference = 0.70645678f;
-  const float kNoiseEstimateReference[] = {2168.783203f, 6902.895508f,
-                                           13190.677734f};
+  const float kSpeechProbabilityReference = 0.70737761f;
+  const float kNoiseEstimateReference[] = {2187.394043f, 6913.306641f,
+                                           13182.945312f};
  const float kOutputReference[] = {-0.013062f, -0.012657f, -0.011934f};
 #endif

@ -200,9 +200,9 @@ TEST(NoiseSuppresionBitExactnessTest, Stereo16kHzLow) {
  const float kOutputReference[] = {-0.011108f, -0.007904f, -0.012390f,
                                    -0.002472f, 0.000916f,  -0.003235f};
 #else
-  const float kSpeechProbabilityReference = 0.67230678f;
-  const float kNoiseEstimateReference[] = {9771.250000f, 11329.377930f,
-                                           10503.052734f};
+  const float kSpeechProbabilityReference = 0.67285913f;
+  const float kNoiseEstimateReference[] = {9753.257812f, 11515.603516f,
+                                           10503.309570f};
  const float kOutputReference[] = {-0.011459f, -0.008110f, -0.012728f,
                                    -0.002399f, 0.001018f,  -0.003189f};
 #endif
@ -224,9 +224,9 @@ TEST(NoiseSuppresionBitExactnessTest, Mono16kHzModerate) {
                                           16726.523438f};
  const float kOutputReference[] = {0.004669f, 0.005615f, 0.005585f};
 #else
-  const float kSpeechProbabilityReference = 0.70897013f;
-  const float kNoiseEstimateReference[] = {2171.490723f, 6553.567871f,
-                                           15626.562500f};
+  const float kSpeechProbabilityReference = 0.70916927f;
+  const float kNoiseEstimateReference[] = {2172.830566f, 6552.661133f,
+                                           15624.025391f};
  const float kOutputReference[] = {0.004513f, 0.005590f, 0.005614f};
 #endif

@ -247,9 +247,9 @@ TEST(NoiseSuppresionBitExactnessTest, Mono16kHzHigh) {
                                           16920.960938f};
  const float kOutputReference[] = {0.004547f, 0.005432f, 0.005402f};
 #else
-  const float kSpeechProbabilityReference = 0.70106733f;
-  const float kNoiseEstimateReference[] = {2224.968506f, 6712.025879f,
-                                           15785.087891f};
+  const float kSpeechProbabilityReference = 0.70104003f;
+  const float kNoiseEstimateReference[] = {2225.081055f, 6711.529785f,
+                                           15785.949219};
  const float kOutputReference[] = {0.004394f, 0.005406f, 0.005416f};
 #endif

@ -270,9 +270,9 @@ TEST(NoiseSuppresionBitExactnessTest, Mono16kHzVeryHigh) {
                                           14365.744141f};
  const float kOutputReference[] = {0.004273f, 0.005127f, 0.005188f};
 #else
-  const float kSpeechProbabilityReference = 0.70281971f;
-  const float kNoiseEstimateReference[] = {2254.347900f, 6723.699707f,
-                                           15771.625977f};
+  const float kSpeechProbabilityReference = 0.70290041f;
+  const float kNoiseEstimateReference[] = {2254.921875f, 6723.172852f,
+                                           15770.559570f};
  const float kOutputReference[] = {0.004321f, 0.005247f, 0.005263f};
 #endif