From 67e43c8b95057a889ba9946e47d50a265e1e9ac9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20=C3=85hgren?= Date: Mon, 12 Aug 2019 17:41:45 +0200 Subject: [PATCH] Correct conversion between float and fixed formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL changes the way that values are converted between fixed and floating point to -Avoid the former asymmetric conversion causing nonlinear distortions. -Reduce the complexity. Bug: webrtc:6594 Change-Id: I64d0cc31c5d16f397686a59a062cfbc4b336d94d Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/132783 Reviewed-by: Henrik Lundin Reviewed-by: Gustaf Ullberg Commit-Queue: Per Ã…hgren Cr-Commit-Position: refs/heads/master@{#28867} --- common_audio/audio_util.cc | 5 -- common_audio/audio_util_unittest.cc | 41 ++++---------- common_audio/include/audio_util.h | 36 +++++------- .../resampler/push_sinc_resampler_unittest.cc | 18 +++++- .../echo_cancellation_bit_exact_unittest.cc | 4 +- .../low_cut_filter_unittest.cc | 36 ++++++------ .../noise_suppression_unittest.cc | 56 +++++++++---------- 7 files changed, 87 insertions(+), 109 deletions(-) diff --git a/common_audio/audio_util.cc b/common_audio/audio_util.cc index 735ba5f188..eb132ca633 100644 --- a/common_audio/audio_util.cc +++ b/common_audio/audio_util.cc @@ -12,11 +12,6 @@ namespace webrtc { -void FloatToS16(const float* src, size_t size, int16_t* dest) { - for (size_t i = 0; i < size; ++i) - dest[i] = FloatToS16(src[i]); -} - void S16ToFloat(const int16_t* src, size_t size, float* dest) { for (size_t i = 0; i < size; ++i) dest[i] = S16ToFloat(src[i]); diff --git a/common_audio/audio_util_unittest.cc b/common_audio/audio_util_unittest.cc index cf85a2d46c..a215a123b1 100644 --- a/common_audio/audio_util_unittest.cc +++ b/common_audio/audio_util_unittest.cc @@ -31,25 +31,6 @@ void ExpectArraysEq(const float* ref, const float* test, size_t length) { } } -TEST(AudioUtilTest, FloatToS16) { - static constexpr float kInput[] = {0.f, - 0.4f / 32767.f, - 0.6f / 32767.f, - -0.4f / 32768.f, - -0.6f / 32768.f, - 1.f, - -1.f, - 1.1f, - -1.1f}; - static constexpr int16_t kReference[] = {0, 0, 1, 0, -1, - 32767, -32768, 32767, -32768}; - static constexpr size_t kSize = arraysize(kInput); - static_assert(arraysize(kReference) == kSize, ""); - int16_t output[kSize]; - FloatToS16(kInput, kSize, output); - ExpectArraysEq(kReference, output, kSize); -} - TEST(AudioUtilTest, S16ToFloat) { static constexpr int16_t kInput[] = {0, 1, -1, 16384, -16384, 32767, -32768}; static constexpr float kReference[] = { @@ -74,16 +55,16 @@ TEST(AudioUtilTest, FloatS16ToS16) { TEST(AudioUtilTest, FloatToFloatS16) { static constexpr float kInput[] = {0.f, - 0.4f / 32767.f, - 0.6f / 32767.f, + 0.4f / 32768.f, + 0.6f / 32768.f, -0.4f / 32768.f, -0.6f / 32768.f, 1.f, -1.f, - 1.1f, - -1.1f}; + 1.f, + -1.f}; static constexpr float kReference[] = { - 0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f}; + 0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32768.f, -32768.f, 32768.f, -32768.f}; static constexpr size_t kSize = arraysize(kInput); static_assert(arraysize(kReference) == kSize, ""); float output[kSize]; @@ -92,17 +73,17 @@ TEST(AudioUtilTest, FloatToFloatS16) { } TEST(AudioUtilTest, FloatS16ToFloat) { - static constexpr float kInput[] = { - 0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f}; + static constexpr float kInput[] = {0.f, 0.4f, 0.6f, -0.4f, -0.6f, + 32767.f, -32768.f, 32767.f, -32768.f}; static constexpr float kReference[] = {0.f, - 0.4f / 32767.f, - 0.6f / 32767.f, + 0.4f / 32768.f, + 0.6f / 32768.f, -0.4f / 32768.f, -0.6f / 32768.f, 1.f, -1.f, - 1.1f, - -1.1f}; + 1.f, + -1.f}; static constexpr size_t kSize = arraysize(kInput); static_assert(arraysize(kReference) == kSize, ""); float output[kSize]; diff --git a/common_audio/include/audio_util.h b/common_audio/include/audio_util.h index 50c9cf282c..255abcc0f7 100644 --- a/common_audio/include/audio_util.h +++ b/common_audio/include/audio_util.h @@ -27,45 +27,35 @@ typedef std::numeric_limits limits_int16; // The conversion functions use the following naming convention: // S16: int16_t [-32768, 32767] // Float: float [-1.0, 1.0] -// FloatS16: float [-32768.0, 32767.0] +// FloatS16: float [-32768.0, 32768.0] // Dbfs: float [-20.0*log(10, 32768), 0] = [-90.3, 0] // The ratio conversion functions use this naming convention: // Ratio: float (0, +inf) // Db: float (-inf, +inf) -static inline int16_t FloatToS16(float v) { - if (v > 0) - return v >= 1 ? limits_int16::max() - : static_cast(v * limits_int16::max() + 0.5f); - return v <= -1 ? limits_int16::min() - : static_cast(-v * limits_int16::min() - 0.5f); -} - static inline float S16ToFloat(int16_t v) { - static const float kMaxInt16Inverse = 1.f / limits_int16::max(); - static const float kMinInt16Inverse = 1.f / limits_int16::min(); - return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse); + constexpr float kScaling = 1.f / 32768.f; + return v * kScaling; } static inline int16_t FloatS16ToS16(float v) { - static const float kMaxRound = limits_int16::max() - 0.5f; - static const float kMinRound = limits_int16::min() + 0.5f; - if (v > 0) - return v >= kMaxRound ? limits_int16::max() - : static_cast(v + 0.5f); - return v <= kMinRound ? limits_int16::min() : static_cast(v - 0.5f); + v = std::min(v, 32767.f); + v = std::max(v, -32768.f); + return static_cast(v + std::copysign(0.5f, v)); } static inline float FloatToFloatS16(float v) { - return v * (v > 0 ? limits_int16::max() : -limits_int16::min()); + RTC_DCHECK_LE(v, 1.f); + RTC_DCHECK_GE(v, -1.f); + return v * 32768.f; } static inline float FloatS16ToFloat(float v) { - static const float kMaxInt16Inverse = 1.f / limits_int16::max(); - static const float kMinInt16Inverse = 1.f / limits_int16::min(); - return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse); + RTC_DCHECK_LE(v, 32768.f); + RTC_DCHECK_GE(v, -32768.f); + constexpr float kScaling = 1.f / 32768.f; + return v * kScaling; } -void FloatToS16(const float* src, size_t size, int16_t* dest); void S16ToFloat(const int16_t* src, size_t size, float* dest); void FloatS16ToS16(const float* src, size_t size, int16_t* dest); void FloatToFloatS16(const float* src, size_t size, float* dest); diff --git a/common_audio/resampler/push_sinc_resampler_unittest.cc b/common_audio/resampler/push_sinc_resampler_unittest.cc index 1a25a8c3e8..f9943b3cc8 100644 --- a/common_audio/resampler/push_sinc_resampler_unittest.cc +++ b/common_audio/resampler/push_sinc_resampler_unittest.cc @@ -33,6 +33,18 @@ T DBFS(T x) { return 20 * std::log10(x); } +void FloatToS16(const float* src, size_t size, int16_t* dest) { + for (size_t i = 0; i < size; ++i) { + RTC_DCHECK_GE(32767.f, src[i]); + RTC_DCHECK_LE(-32768.f, src[i]); + if (src[i] >= 1.f) + dest[i] = 32767; + if (src[i] <= -1.f) + dest[i] = -32768; + dest[i] = static_cast(src[i] * 32767.5f); + } +} + } // namespace class PushSincResamplerTest : public ::testing::TestWithParam< @@ -322,14 +334,14 @@ INSTANTIATE_TEST_SUITE_P( ::testing::make_tuple(32000, 16000, -18.48, -28.59), ::testing::make_tuple(44100, 16000, -19.30, -19.67), ::testing::make_tuple(48000, 16000, -19.81, -18.11), - ::testing::make_tuple(96000, 16000, -20.95, -10.96), + ::testing::make_tuple(96000, 16000, -20.95, -10.9596), // To 32 kHz ::testing::make_tuple(8000, 32000, kResamplingRMSError, -70.30), ::testing::make_tuple(16000, 32000, kResamplingRMSError, -75.51), ::testing::make_tuple(32000, 32000, kResamplingRMSError, -75.51), - ::testing::make_tuple(44100, 32000, -16.44, -51.10), - ::testing::make_tuple(48000, 32000, -16.90, -44.03), + ::testing::make_tuple(44100, 32000, -16.44, -51.0349), + ::testing::make_tuple(48000, 32000, -16.90, -43.9967), ::testing::make_tuple(96000, 32000, -19.61, -18.04), ::testing::make_tuple(192000, 32000, -21.02, -10.94))); diff --git a/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc b/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc index 80f36a8c0e..d44483c4bc 100644 --- a/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc +++ b/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc @@ -324,8 +324,8 @@ TEST(EchoCancellationBitExactnessTest, DISABLED_Stereo32kHz_HighLevel_NoDrift_StreamDelay0) { #endif #if defined(WEBRTC_MAC) - const float kOutputReference[] = {-0.000458f, 0.000244f, 0.000153f, - -0.000458f, 0.000244f, 0.000153f}; + const float kOutputReference[] = {-0.000458f, 0.000214f, 0.000122f, + -0.000458f, 0.000214f, 0.000122f}; #else const float kOutputReference[] = {-0.000427f, 0.000183f, 0.000183f, -0.000427f, 0.000183f, 0.000183f}; diff --git a/modules/audio_processing/low_cut_filter_unittest.cc b/modules/audio_processing/low_cut_filter_unittest.cc index ea4fb6711b..fb950da640 100644 --- a/modules/audio_processing/low_cut_filter_unittest.cc +++ b/modules/audio_processing/low_cut_filter_unittest.cc @@ -110,9 +110,9 @@ TEST(LowCutFilterBitExactnessTest, Mono8kHzInitial) { 0.349705f, 0.173054f, 0.016750f, -0.415957f, -0.461001f, -0.557111f, 0.738711f, 0.275720f}; - const float kReference[] = {0.142277f, -0.418518f, -0.028229f, -0.102112f, - 0.141270f, 0.137791f, 0.124577f, -0.088715f, - -0.142273f, -0.125885f, 0.266640f, -0.468079f}; + const float kReference[] = {0.142273f, -0.418518f, -0.028229f, -0.102112f, + 0.141266f, 0.137787f, 0.124573f, -0.088715f, + -0.142273f, -0.125885f, 0.266663f, -0.468109f}; RunBitexactnessTest( 8000, 1, CreateVector(rtc::ArrayView(kReferenceInput)), @@ -162,9 +162,9 @@ TEST(LowCutFilterBitExactnessTest, Mono8kHzConverged) { 0.036391f, -0.408991f, 0.369330f, 0.399785f, -0.471419f, 0.551138f, -0.307569f, 0.064315f, 0.311605f, 0.041736f, 0.650943f, 0.780496f}; - const float kReference[] = {-0.173553f, -0.265778f, 0.158757f, -0.259399f, - -0.176361f, 0.192877f, 0.056825f, 0.171453f, - 0.050752f, -0.194580f, -0.208679f, 0.153722f}; + const float kReference[] = {-0.173584f, -0.265778f, 0.158783f, -0.259430f, + -0.176361f, 0.192841f, 0.056854f, 0.171448f, + 0.050751f, -0.194580f, -0.208710f, 0.153717f}; RunBitexactnessTest( 8000, 1, CreateVector(rtc::ArrayView(kReferenceInput)), @@ -202,10 +202,10 @@ TEST(LowCutFilterBitExactnessTest, Stereo8kHzInitial) { 0.768778f, -0.122021f, 0.563445f, -0.703070f}; const float kReference[] = { - 0.733329f, 0.084109f, 0.072695f, 0.566210f, -1.000000f, 0.652120f, - -0.297424f, -0.964020f, 0.438551f, -0.698364f, -0.654449f, 0.266243f, - 0.454115f, 0.684774f, -0.586823f, -0.747345f, -0.503021f, -0.222961f, - -0.314972f, 0.907224f, -0.796265f, 0.284280f, -0.533417f, 0.773980f}; + 0.733307f, 0.084106f, 0.072693f, 0.566193f, -1.000000f, 0.652130f, + -0.297424f, -0.964020f, 0.438568f, -0.698364f, -0.654449f, 0.266205f, + 0.454102f, 0.684784f, -0.586823f, -0.747375f, -0.503021f, -0.222961f, + -0.314972f, 0.907196f, -0.796295f, 0.284271f, -0.533417f, 0.773956f}; RunBitexactnessTest( 8000, 2, CreateVector(rtc::ArrayView(kReferenceInput)), @@ -296,10 +296,10 @@ TEST(LowCutFilterBitExactnessTest, Stereo8kHzConverged) { -0.034654f, -0.743470f, -0.494178f, 0.767923f, -0.607446f, -0.757293f}; const float kReference[] = { - -0.544495f, 0.264199f, 0.647938f, 0.565569f, 0.496231f, 0.271340f, - 0.519944f, 0.318094f, -0.792999f, 0.733421f, -1.000000f, 0.103977f, - 0.981719f, 0.314859f, 0.476882f, 0.514267f, -0.196381f, -0.425781f, - -0.783081f, 0.101108f, 0.419782f, -0.291718f, 0.183355f, -0.332489f}; + -0.544525f, 0.264221f, 0.647919f, 0.565552f, 0.496185f, 0.271332f, + 0.519958f, 0.318085f, -0.792999f, 0.733429f, -1.000000f, 0.103973f, + 0.981720f, 0.314850f, 0.476837f, 0.514252f, -0.196411f, -0.425812f, + -0.783112f, 0.101105f, 0.419739f, -0.291718f, 0.183350f, -0.332489f}; RunBitexactnessTest( 8000, 2, CreateVector(rtc::ArrayView(kReferenceInput)), @@ -669,10 +669,10 @@ TEST(LowCutFilterBitExactnessTest, Stereo16kHzConverged) { 0.462557f, 0.807713f, -0.095536f, -0.858625f, -0.517444f, 0.463730f}; const float kReference[] = { - -0.816528f, 0.085421f, 0.739647f, -0.922089f, 0.669301f, -0.048187f, - -0.290039f, -0.818085f, -0.596008f, -0.177826f, -0.002197f, -0.350647f, - -0.064301f, 0.337291f, -0.621765f, 0.115909f, 0.311899f, -0.915924f, - 0.020478f, 0.836055f, -0.714020f, -0.037140f, 0.391125f, -0.340118f}; + -0.816559f, 0.085419f, 0.739655f, -0.922089f, 0.669312f, -0.048218f, + -0.290039f, -0.818085f, -0.596039f, -0.177856f, -0.002197f, -0.350647f, + -0.064331f, 0.337280f, -0.621765f, 0.115906f, 0.311890f, -0.915924f, + 0.020477f, 0.836029f, -0.714020f, -0.037140f, 0.391113f, -0.340118f}; RunBitexactnessTest( 16000, 2, CreateVector(rtc::ArrayView(kReferenceInput)), diff --git a/modules/audio_processing/noise_suppression_unittest.cc b/modules/audio_processing/noise_suppression_unittest.cc index b2074859b0..29aae8b90f 100644 --- a/modules/audio_processing/noise_suppression_unittest.cc +++ b/modules/audio_processing/noise_suppression_unittest.cc @@ -106,10 +106,10 @@ TEST(NoiseSuppresionBitExactnessTest, Mono8kHzLow) { 7677.521973f}; const float kOutputReference[] = {0.003510f, 0.004517f, 0.004669f}; #else - const float kSpeechProbabilityReference = 0.73421317f; - const float kNoiseEstimateReference[] = {1175.266113f, 3289.305908f, - 7532.991211f}; - const float kOutputReference[] = {0.003263f, 0.004402f, 0.004537f}; + const float kSpeechProbabilityReference = 0.73650402f; + const float kNoiseEstimateReference[] = {1176.856812f, 3287.490967f, + 7525.964844f}; + const float kOutputReference[] = {0.003306f, 0.004442f, 0.004574f}; #endif RunBitexactnessTest(8000, 1, NoiseSuppression::Level::kLow, @@ -129,9 +129,9 @@ TEST(NoiseSuppresionBitExactnessTest, Mono16kHzLow) { 14367.499023f}; const float kOutputReference[] = {0.003449f, 0.004334f, 0.004303f}; #else - const float kSpeechProbabilityReference = 0.71672988f; - const float kNoiseEstimateReference[] = {2151.313965f, 6509.765137f, - 15658.848633f}; + const float kSpeechProbabilityReference = 0.71743423f; + const float kNoiseEstimateReference[] = {2179.853027f, 6507.995117f, + 15652.758789f}; const float kOutputReference[] = {0.003574f, 0.004494f, 0.004499f}; #endif @@ -166,18 +166,18 @@ TEST(NoiseSuppresionBitExactnessTest, Mono32kHzLow) { TEST(NoiseSuppresionBitExactnessTest, Mono48kHzLow) { #if defined(WEBRTC_ARCH_ARM64) const float kSpeechProbabilityReference = -4.0f; - const float kNoiseEstimateReference[] = {2564.605713f, 6213.656250f, - 13372.284180f}; - const float kOutputReference[] = {-0.013185f, -0.012769f, -0.012023f}; + const float kNoiseEstimateReference[] = {2135.292480f, 6692.695801f, + 14647.632812f}; + const float kOutputReference[] = {-0.012738f, -0.012312f, -0.011576f}; #elif defined(WEBRTC_ARCH_ARM) const float kSpeechProbabilityReference = -4.0f; - const float kNoiseEstimateReference[] = {2564.605713f, 6213.656250f, - 13372.284180f}; - const float kOutputReference[] = {-0.013185f, -0.012769f, -0.012023f}; + const float kNoiseEstimateReference[] = {2135.292480f, 6692.695801f, + 14647.632812f}; + const float kOutputReference[] = {-0.012738f, -0.012312f, -0.011576f}; #else - const float kSpeechProbabilityReference = 0.70645678f; - const float kNoiseEstimateReference[] = {2168.783203f, 6902.895508f, - 13190.677734f}; + const float kSpeechProbabilityReference = 0.70737761f; + const float kNoiseEstimateReference[] = {2187.394043f, 6913.306641f, + 13182.945312f}; const float kOutputReference[] = {-0.013062f, -0.012657f, -0.011934f}; #endif @@ -200,9 +200,9 @@ TEST(NoiseSuppresionBitExactnessTest, Stereo16kHzLow) { const float kOutputReference[] = {-0.011108f, -0.007904f, -0.012390f, -0.002472f, 0.000916f, -0.003235f}; #else - const float kSpeechProbabilityReference = 0.67230678f; - const float kNoiseEstimateReference[] = {9771.250000f, 11329.377930f, - 10503.052734f}; + const float kSpeechProbabilityReference = 0.67285913f; + const float kNoiseEstimateReference[] = {9753.257812f, 11515.603516f, + 10503.309570f}; const float kOutputReference[] = {-0.011459f, -0.008110f, -0.012728f, -0.002399f, 0.001018f, -0.003189f}; #endif @@ -224,9 +224,9 @@ TEST(NoiseSuppresionBitExactnessTest, Mono16kHzModerate) { 16726.523438f}; const float kOutputReference[] = {0.004669f, 0.005615f, 0.005585f}; #else - const float kSpeechProbabilityReference = 0.70897013f; - const float kNoiseEstimateReference[] = {2171.490723f, 6553.567871f, - 15626.562500f}; + const float kSpeechProbabilityReference = 0.70916927f; + const float kNoiseEstimateReference[] = {2172.830566f, 6552.661133f, + 15624.025391f}; const float kOutputReference[] = {0.004513f, 0.005590f, 0.005614f}; #endif @@ -247,9 +247,9 @@ TEST(NoiseSuppresionBitExactnessTest, Mono16kHzHigh) { 16920.960938f}; const float kOutputReference[] = {0.004547f, 0.005432f, 0.005402f}; #else - const float kSpeechProbabilityReference = 0.70106733f; - const float kNoiseEstimateReference[] = {2224.968506f, 6712.025879f, - 15785.087891f}; + const float kSpeechProbabilityReference = 0.70104003f; + const float kNoiseEstimateReference[] = {2225.081055f, 6711.529785f, + 15785.949219}; const float kOutputReference[] = {0.004394f, 0.005406f, 0.005416f}; #endif @@ -270,9 +270,9 @@ TEST(NoiseSuppresionBitExactnessTest, Mono16kHzVeryHigh) { 14365.744141f}; const float kOutputReference[] = {0.004273f, 0.005127f, 0.005188f}; #else - const float kSpeechProbabilityReference = 0.70281971f; - const float kNoiseEstimateReference[] = {2254.347900f, 6723.699707f, - 15771.625977f}; + const float kSpeechProbabilityReference = 0.70290041f; + const float kNoiseEstimateReference[] = {2254.921875f, 6723.172852f, + 15770.559570f}; const float kOutputReference[] = {0.004321f, 0.005247f, 0.005263f}; #endif