Add decibel conversion functions to //common_audio:common_audio

The functions replace some existing code and will be used in the
the new AutomaticGainController.

Bug: webrtc:7949
Change-Id: I9a32132d4a4699a507b8548a2eac10972a2f3fd6
Reviewed-on: https://webrtc-review.googlesource.com/53141
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Commit-Queue: Alex Loiko <aleloi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22045}
This commit is contained in:
Alex Loiko
2018-02-16 10:42:48 +01:00
committed by Commit Bot
parent e4be6dad65
commit 6df09f6f6a
8 changed files with 121 additions and 52 deletions

View File

@ -9,6 +9,8 @@
*/ */
#include "common_audio/include/audio_util.h" #include "common_audio/include/audio_util.h"
#include "rtc_base/arraysize.h"
#include "test/gmock.h" #include "test/gmock.h"
#include "test/gtest.h" #include "test/gtest.h"
#include "typedefs.h" // NOLINT(build/include) #include "typedefs.h" // NOLINT(build/include)
@ -26,84 +28,121 @@ void ExpectArraysEq(const int16_t* ref, const int16_t* test, size_t length) {
void ExpectArraysEq(const float* ref, const float* test, size_t length) { void ExpectArraysEq(const float* ref, const float* test, size_t length) {
for (size_t i = 0; i < length; ++i) { for (size_t i = 0; i < length; ++i) {
EXPECT_FLOAT_EQ(ref[i], test[i]); EXPECT_NEAR(ref[i], test[i], 0.01f);
} }
} }
TEST(AudioUtilTest, FloatToS16) { TEST(AudioUtilTest, FloatToS16) {
const size_t kSize = 9; static constexpr float kInput[] = {0.f,
const float kInput[kSize] = {0.f, 0.4f / 32767.f,
0.4f / 32767.f, 0.6f / 32767.f,
0.6f / 32767.f, -0.4f / 32768.f,
-0.4f / 32768.f, -0.6f / 32768.f,
-0.6f / 32768.f, 1.f,
1.f, -1.f,
-1.f, 1.1f,
1.1f, -1.1f};
-1.1f}; static constexpr int16_t kReference[] = {0, 0, 1, 0, -1,
const int16_t kReference[kSize] = {0, 0, 1, 0, -1, 32767, -32768, 32767, -32768};
32767, -32768, 32767, -32768}; static constexpr size_t kSize = arraysize(kInput);
static_assert(arraysize(kReference) == kSize, "");
int16_t output[kSize]; int16_t output[kSize];
FloatToS16(kInput, kSize, output); FloatToS16(kInput, kSize, output);
ExpectArraysEq(kReference, output, kSize); ExpectArraysEq(kReference, output, kSize);
} }
TEST(AudioUtilTest, S16ToFloat) { TEST(AudioUtilTest, S16ToFloat) {
const size_t kSize = 7; static constexpr int16_t kInput[] = {0, 1, -1, 16384, -16384, 32767, -32768};
const int16_t kInput[kSize] = {0, 1, -1, 16384, -16384, 32767, -32768}; static constexpr float kReference[] = {
const float kReference[kSize] = {
0.f, 1.f / 32767.f, -1.f / 32768.f, 16384.f / 32767.f, -0.5f, 1.f, -1.f}; 0.f, 1.f / 32767.f, -1.f / 32768.f, 16384.f / 32767.f, -0.5f, 1.f, -1.f};
static constexpr size_t kSize = arraysize(kInput);
static_assert(arraysize(kReference) == kSize, "");
float output[kSize]; float output[kSize];
S16ToFloat(kInput, kSize, output); S16ToFloat(kInput, kSize, output);
ExpectArraysEq(kReference, output, kSize); ExpectArraysEq(kReference, output, kSize);
} }
TEST(AudioUtilTest, FloatS16ToS16) { TEST(AudioUtilTest, FloatS16ToS16) {
const size_t kSize = 7; static constexpr float kInput[] = {0.f, 0.4f, 0.5f, -0.4f,
const float kInput[kSize] = {0.f, 0.4f, 0.5f, -0.4f, -0.5f, 32768.f, -32769.f};
-0.5f, 32768.f, -32769.f}; static constexpr int16_t kReference[] = {0, 0, 1, 0, -1, 32767, -32768};
const int16_t kReference[kSize] = {0, 0, 1, 0, -1, 32767, -32768}; static constexpr size_t kSize = arraysize(kInput);
static_assert(arraysize(kReference) == kSize, "");
int16_t output[kSize]; int16_t output[kSize];
FloatS16ToS16(kInput, kSize, output); FloatS16ToS16(kInput, kSize, output);
ExpectArraysEq(kReference, output, kSize); ExpectArraysEq(kReference, output, kSize);
} }
TEST(AudioUtilTest, FloatToFloatS16) { TEST(AudioUtilTest, FloatToFloatS16) {
const size_t kSize = 9; static constexpr float kInput[] = {0.f,
const float kInput[kSize] = {0.f, 0.4f / 32767.f,
0.4f / 32767.f, 0.6f / 32767.f,
0.6f / 32767.f, -0.4f / 32768.f,
-0.4f / 32768.f, -0.6f / 32768.f,
-0.6f / 32768.f, 1.f,
1.f, -1.f,
-1.f, 1.1f,
1.1f, -1.1f};
-1.1f}; static constexpr float kReference[] = {
const float kReference[kSize] = {0.f, 0.4f, 0.6f, -0.4f, -0.6f, 0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f};
32767.f, -32768.f, 36043.7f, -36044.8f}; static constexpr size_t kSize = arraysize(kInput);
static_assert(arraysize(kReference) == kSize, "");
float output[kSize]; float output[kSize];
FloatToFloatS16(kInput, kSize, output); FloatToFloatS16(kInput, kSize, output);
ExpectArraysEq(kReference, output, kSize); ExpectArraysEq(kReference, output, kSize);
} }
TEST(AudioUtilTest, FloatS16ToFloat) { TEST(AudioUtilTest, FloatS16ToFloat) {
const size_t kSize = 9; static constexpr float kInput[] = {
const float kInput[kSize] = {0.f, 0.4f, 0.6f, -0.4f, -0.6f, 0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f};
32767.f, -32768.f, 36043.7f, -36044.8f}; static constexpr float kReference[] = {0.f,
const float kReference[kSize] = {0.f, 0.4f / 32767.f,
0.4f / 32767.f, 0.6f / 32767.f,
0.6f / 32767.f, -0.4f / 32768.f,
-0.4f / 32768.f, -0.6f / 32768.f,
-0.6f / 32768.f, 1.f,
1.f, -1.f,
-1.f, 1.1f,
1.1f, -1.1f};
-1.1f}; static constexpr size_t kSize = arraysize(kInput);
static_assert(arraysize(kReference) == kSize, "");
float output[kSize]; float output[kSize];
FloatS16ToFloat(kInput, kSize, output); FloatS16ToFloat(kInput, kSize, output);
ExpectArraysEq(kReference, output, kSize); ExpectArraysEq(kReference, output, kSize);
} }
TEST(AudioUtilTest, DbfsToFloatS16) {
static constexpr float kInput[] = {-90.f, -70.f, -30.f, -20.f, -10.f,
-5.f, -1.f, 0.f, 1.f};
static constexpr float kReference[] = {
1.036215186f, 10.36215115f, 1036.215088f, 3276.800049f, 10362.15137f,
18426.80078f, 29204.51172f, 32768.f, 36766.30078f};
static constexpr size_t kSize = arraysize(kInput);
static_assert(arraysize(kReference) == kSize, "");
float output[kSize];
for (size_t i = 0; i < kSize; ++i) {
output[i] = DbfsToFloatS16(kInput[i]);
}
ExpectArraysEq(kReference, output, kSize);
}
TEST(AudioUtilTest, FloatS16ToDbfs) {
static constexpr float kInput[] = {1.036215143f, 10.36215143f, 1036.215143f,
3276.8f, 10362.151436f, 18426.800543f,
29204.51074f, 32768.0f, 36766.30071f};
static constexpr float kReference[] = {
-90.f, -70.f, -30.f, -20.f, -10.f, -5.f, -1.f, 0.f, 0.9999923706f};
static constexpr size_t kSize = arraysize(kInput);
static_assert(arraysize(kReference) == kSize, "");
float output[kSize];
for (size_t i = 0; i < kSize; ++i) {
output[i] = FloatS16ToDbfs(kInput[i]);
}
ExpectArraysEq(kReference, output, kSize);
}
TEST(AudioUtilTest, InterleavingStereo) { TEST(AudioUtilTest, InterleavingStereo) {
const int16_t kInterleaved[] = {2, 3, 4, 9, 8, 27, 16, 81}; const int16_t kInterleaved[] = {2, 3, 4, 9, 8, 27, 16, 81};
const size_t kSamplesPerChannel = 4; const size_t kSamplesPerChannel = 4;

View File

@ -12,8 +12,9 @@
#define COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_ #define COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_
#include <algorithm> #include <algorithm>
#include <limits> #include <cmath>
#include <cstring> #include <cstring>
#include <limits>
#include "rtc_base/checks.h" #include "rtc_base/checks.h"
#include "typedefs.h" // NOLINT(build/include) #include "typedefs.h" // NOLINT(build/include)
@ -26,6 +27,10 @@ typedef std::numeric_limits<int16_t> limits_int16;
// S16: int16_t [-32768, 32767] // S16: int16_t [-32768, 32767]
// Float: float [-1.0, 1.0] // Float: float [-1.0, 1.0]
// FloatS16: float [-32768.0, 32767.0] // FloatS16: float [-32768.0, 32767.0]
// Dbfs: float [-20.0*log(10, 32768), 0] = [-90.3, 0]
// The ratio conversion functions use this naming convention:
// Ratio: float (0, +inf)
// Db: float (-inf, +inf)
static inline int16_t FloatToS16(float v) { static inline int16_t FloatToS16(float v) {
if (v > 0) if (v > 0)
return v >= 1 ? limits_int16::max() return v >= 1 ? limits_int16::max()
@ -65,6 +70,27 @@ void FloatS16ToS16(const float* src, size_t size, int16_t* dest);
void FloatToFloatS16(const float* src, size_t size, float* dest); void FloatToFloatS16(const float* src, size_t size, float* dest);
void FloatS16ToFloat(const float* src, size_t size, float* dest); void FloatS16ToFloat(const float* src, size_t size, float* dest);
inline float DbToRatio(float v) {
return std::pow(10.0f, v / 20.0f);
}
inline float DbfsToFloatS16(float v) {
static constexpr float kMaximumAbsFloatS16 = -limits_int16::min();
return DbToRatio(v) * kMaximumAbsFloatS16;
}
inline float FloatS16ToDbfs(float v) {
RTC_DCHECK_GE(v, 0);
// kMinDbfs is equal to -20.0 * log10(-limits_int16::min())
static constexpr float kMinDbfs = -90.30899869919436f;
if (v <= 1.0f) {
return kMinDbfs;
}
// Equal to 20 * log10(v / (-limits_int16::min()))
return 20.0f * std::log10(v) + kMinDbfs;
}
// Copy audio from |src| channels to |dest| channels unless |src| and |dest| // Copy audio from |src| channels to |dest| channels unless |src| and |dest|
// point to the same address. |src| and |dest| must have the same number of // point to the same address. |src| and |dest| must have the same number of
// channels, and there must be sufficient space allocated in |dest|. // channels, and there must be sufficient space allocated in |dest|.

View File

@ -14,6 +14,7 @@
#include <math.h> #include <math.h>
#include "common_audio/include/audio_util.h"
#include "modules/audio_coding/neteq/audio_multi_vector.h" #include "modules/audio_coding/neteq/audio_multi_vector.h"
#include "test/gtest.h" #include "test/gtest.h"
@ -79,7 +80,7 @@ class DtmfToneGeneratorTest : public ::testing::Test {
EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &signal)); EXPECT_EQ(kNumSamples, tone_gen_.Generate(kNumSamples, &signal));
for (int n = 0; n < kNumSamples; ++n) { for (int n = 0; n < kNumSamples; ++n) {
double attenuation_factor = double attenuation_factor =
pow(10, -static_cast<double>(attenuation) / 20); DbToRatio(-static_cast<float>(attenuation));
// Verify that the attenuation is correct. // Verify that the attenuation is correct.
for (int channel = 0; channel < channels; ++channel) { for (int channel = 0; channel < channels; ++channel) {
EXPECT_NEAR(attenuation_factor * ref_signal[channel][n], EXPECT_NEAR(attenuation_factor * ref_signal[channel][n],

View File

@ -12,6 +12,7 @@
#include <cmath> #include <cmath>
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/audio_buffer.h" #include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/logging/apm_data_dumper.h" #include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/atomicops.h" #include "rtc_base/atomicops.h"
@ -56,7 +57,7 @@ void GainController2::Process(AudioBuffer* audio) {
void GainController2::ApplyConfig( void GainController2::ApplyConfig(
const AudioProcessing::Config::GainController2& config) { const AudioProcessing::Config::GainController2& config) {
RTC_DCHECK(Validate(config)); RTC_DCHECK(Validate(config));
fixed_gain_ = std::pow(10.f, config.fixed_gain_db / 20.f); fixed_gain_ = DbToRatio(config.fixed_gain_db);
} }
bool GainController2::Validate( bool GainController2::Validate(

View File

@ -1262,7 +1262,7 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
int gain_db = public_submodules_->gain_control->is_enabled() ? int gain_db = public_submodules_->gain_control->is_enabled() ?
public_submodules_->gain_control->compression_gain_db() : public_submodules_->gain_control->compression_gain_db() :
0; 0;
float gain = std::pow(10.f, gain_db / 20.f); float gain = DbToRatio(gain_db);
gain *= capture_nonlocked_.level_controller_enabled ? gain *= capture_nonlocked_.level_controller_enabled ?
private_submodules_->level_controller->GetLastGain() : private_submodules_->level_controller->GetLastGain() :
1.f; 1.f;

View File

@ -12,6 +12,7 @@
#include <algorithm> #include <algorithm>
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/audio_buffer.h" #include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/logging/apm_data_dumper.h" #include "modules/audio_processing/logging/apm_data_dumper.h"
@ -32,8 +33,7 @@ void PeakLevelEstimator::Initialize(float initial_peak_level_dbfs) {
RTC_DCHECK_LE(-100.f, initial_peak_level_dbfs); RTC_DCHECK_LE(-100.f, initial_peak_level_dbfs);
RTC_DCHECK_GE(0.f, initial_peak_level_dbfs); RTC_DCHECK_GE(0.f, initial_peak_level_dbfs);
peak_level_ = std::pow(10.f, initial_peak_level_dbfs / 20.f) * 32768.f; peak_level_ = std::max(DbfsToFloatS16(initial_peak_level_dbfs), kMinLevel);
peak_level_ = std::max(peak_level_, kMinLevel);
hold_counter_ = 0; hold_counter_ = 0;
initialization_phase_ = true; initialization_phase_ = true;

View File

@ -18,6 +18,7 @@
#include <vector> #include <vector>
#include "api/array_view.h" #include "api/array_view.h"
#include "common_audio/include/audio_util.h"
#include "common_audio/wav_file.h" #include "common_audio/wav_file.h"
#include "modules/audio_processing/test/conversational_speech/wavreader_interface.h" #include "modules/audio_processing/test/conversational_speech/wavreader_interface.h"
#include "rtc_base/constructormagic.h" #include "rtc_base/constructormagic.h"
@ -165,7 +166,7 @@ void PadRightWrite(WavWriter* wav_writer, size_t pad_samples) {
void ScaleSignal(rtc::ArrayView<const int16_t> source_samples, void ScaleSignal(rtc::ArrayView<const int16_t> source_samples,
int gain, int gain,
rtc::ArrayView<int16_t> output_samples) { rtc::ArrayView<int16_t> output_samples) {
const float gain_linear = pow(10.0, gain / 20.0); const float gain_linear = DbToRatio(gain);
RTC_DCHECK_EQ(source_samples.size(), output_samples.size()); RTC_DCHECK_EQ(source_samples.size(), output_samples.size());
std::transform(source_samples.begin(), source_samples.end(), std::transform(source_samples.begin(), source_samples.end(),
output_samples.begin(), [gain_linear](int16_t x) -> int16_t { output_samples.begin(), [gain_linear](int16_t x) -> int16_t {

View File

@ -11,6 +11,7 @@
#include <cmath> #include <cmath>
#include <fstream> #include <fstream>
#include "common_audio/include/audio_util.h"
#include "common_audio/wav_file.h" #include "common_audio/wav_file.h"
#include "rtc_base/flags.h" #include "rtc_base/flags.h"
#include "rtc_base/logging.h" #include "rtc_base/logging.h"
@ -23,7 +24,7 @@ constexpr int kMaxSampleRate = 48000;
constexpr uint8_t kMaxFrameLenMs = 30; constexpr uint8_t kMaxFrameLenMs = 30;
constexpr size_t kMaxFrameLen = kMaxFrameLenMs * kMaxSampleRate / 1000; constexpr size_t kMaxFrameLen = kMaxFrameLenMs * kMaxSampleRate / 1000;
const double kOneDbReduction = std::pow(10.0, -1.0 / 20.0); const double kOneDbReduction = DbToRatio(-1.0);
DEFINE_string(i, "", "Input wav file"); DEFINE_string(i, "", "Input wav file");
DEFINE_string(oc, "", "Config output file"); DEFINE_string(oc, "", "Config output file");