From 7bd5f253bc09fe18bc4190fe88dbba9792ce32ae Mon Sep 17 00:00:00 2001 From: aluebs Date: Tue, 21 Jun 2016 11:30:25 -0700 Subject: [PATCH] Fine tune the IntelligibilityEnhancer Label less chunks as speech, adapt slower and be more conservative with the maximum gain it can apply. Review-Url: https://codereview.webrtc.org/2087623003 Cr-Commit-Position: refs/heads/master@{#13242} --- .../intelligibility/intelligibility_enhancer.cc | 8 ++++---- .../intelligibility/intelligibility_enhancer_unittest.cc | 2 +- .../intelligibility/intelligibility_utils.cc | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc index a10d10a75e..243bd12229 100644 --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc @@ -31,11 +31,11 @@ const float kClipFreqKhz = 0.2f; const float kKbdAlpha = 1.5f; const float kLambdaBot = -1.f; // Extreme values in bisection const float kLambdaTop = -1e-5f; // search for lamda. -const float kVoiceProbabilityThreshold = 0.02f; +const float kVoiceProbabilityThreshold = 0.5f; // Number of chunks after voice activity which is still considered speech. -const size_t kSpeechOffsetDelay = 80; -const float kDecayRate = 0.994f; // Power estimation decay rate. -const float kMaxRelativeGainChange = 0.006f; +const size_t kSpeechOffsetDelay = 10; +const float kDecayRate = 0.995f; // Power estimation decay rate. +const float kMaxRelativeGainChange = 0.005f; const float kRho = 0.0004f; // Default production and interpretation SNR. const float kPowerNormalizationFactor = 1.f / (1 << 30); const float kMaxActiveSNR = 128.f; // 21dB diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc index 30035ab16e..852357e2da 100644 --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc @@ -409,7 +409,7 @@ TEST_F(IntelligibilityEnhancerTest, TestSolveForGains) { TEST_F(IntelligibilityEnhancerTest, TestNoiseGainHasExpectedResult) { const int kGainDB = 6; const float kGainFactor = std::pow(10.f, kGainDB / 20.f); - const float kTolerance = 0.003f; + const float kTolerance = 0.007f; std::vector noise(kNumNoiseBins); std::vector noise_psd(kNumNoiseBins); std::generate(noise.begin(), noise.end(), float_rand); diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc index 3675f66caf..dfa912b31f 100644 --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc @@ -23,7 +23,7 @@ namespace intelligibility { namespace { const float kMinFactor = 0.01f; -const float kMaxFactor = 1000.f; +const float kMaxFactor = 100.f; // Return |current| changed towards |target|, with the relative change being at // most |limit|.