Use VAD to get a better speech power estimation in the IntelligibilityEnhancer

R=henrik.lundin@webrtc.org, turaj@webrtc.org Review URL: https://codereview.webrtc.org/1693823004 . Cr-Commit-Position: refs/heads/master@{#11713}
2016-02-22 15:57:38 -08:00
parent 67b81f92f4
commit 18fcbcf48c
9 changed files with 330 additions and 306 deletions
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@ -1184,8 +1184,7 @@ bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
 }

 bool AudioProcessingImpl::is_rev_processed() const {
-  return constants_.intelligibility_enabled &&
-         public_submodules_->intelligibility_enhancer->active();
+  return constants_.intelligibility_enabled;
 }

 bool AudioProcessingImpl::render_check_rev_conversion_needed() const {
@ -1236,12 +1235,9 @@ void AudioProcessingImpl::InitializeBeamformer() {

 void AudioProcessingImpl::InitializeIntelligibility() {
  if (constants_.intelligibility_enabled) {
-    IntelligibilityEnhancer::Config config;
-    config.sample_rate_hz = capture_nonlocked_.split_rate;
-    config.num_capture_channels = capture_.capture_audio->num_channels();
-    config.num_render_channels = render_.render_audio->num_channels();
    public_submodules_->intelligibility_enhancer.reset(
-        new IntelligibilityEnhancer(config));
+        new IntelligibilityEnhancer(capture_nonlocked_.split_rate,
+                                    render_.render_audio->num_channels()));
  }
 }

--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
@ -27,11 +27,16 @@ namespace {
 const size_t kErbResolution = 2;
 const int kWindowSizeMs = 16;
 const int kChunkSizeMs = 10;  // Size provided by APM.
-const float kClipFreq = 200.0f;
-const float kConfigRho = 0.02f;  // Default production and interpretation SNR.
+const float kClipFreqKhz = 0.2f;
 const float kKbdAlpha = 1.5f;
 const float kLambdaBot = -1.0f;      // Extreme values in bisection
 const float kLambdaTop = -10e-18f;  // search for lamda.
+const float kVoiceProbabilityThreshold = 0.02f;
+// Number of chunks after voice activity which is still considered speech.
+const size_t kSpeechOffsetDelay = 80;
+const float kDecayRate = 0.98f;              // Power estimation decay rate.
+const float kMaxRelativeGainChange = 0.04f;  // Maximum relative change in gain.
+const float kRho = 0.0004f;  // Default production and interpretation SNR.

 // Returns dot product of vectors |a| and |b| with size |length|.
 float DotProduct(const float* a, const float* b, size_t length) {
@ -72,61 +77,46 @@ void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock(
  }
 }

-IntelligibilityEnhancer::IntelligibilityEnhancer()
-    : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) {
-}
-
-IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config)
+IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
+                                                 size_t num_render_channels)
    : freqs_(RealFourier::ComplexLength(
-          RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))),
-      window_size_(static_cast<size_t>(1 << RealFourier::FftOrder(freqs_))),
-      chunk_length_(
-          static_cast<size_t>(config.sample_rate_hz * kChunkSizeMs / 1000)),
-      bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)),
-      sample_rate_hz_(config.sample_rate_hz),
-      erb_resolution_(kErbResolution),
-      num_capture_channels_(config.num_capture_channels),
-      num_render_channels_(config.num_render_channels),
-      analysis_rate_(config.analysis_rate),
-      active_(true),
-      clear_power_(freqs_, config.decay_rate),
-      noise_power_(freqs_, 0.f),
+          RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
+      chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)),
+      bank_size_(GetBankSize(sample_rate_hz, kErbResolution)),
+      sample_rate_hz_(sample_rate_hz),
+      num_render_channels_(num_render_channels),
+      clear_power_estimator_(freqs_, kDecayRate),
+      noise_power_estimator_(
+          new intelligibility::PowerEstimator<float>(freqs_, kDecayRate)),
      filtered_clear_pow_(new float[bank_size_]),
      filtered_noise_pow_(new float[bank_size_]),
      center_freqs_(new float[bank_size_]),
      render_filter_bank_(CreateErbBank(freqs_)),
-      rho_(new float[bank_size_]),
      gains_eq_(new float[bank_size_]),
-      gain_applier_(freqs_, config.gain_change_limit),
+      gain_applier_(freqs_, kMaxRelativeGainChange),
      temp_render_out_buffer_(chunk_length_, num_render_channels_),
-      kbd_window_(new float[window_size_]),
      render_callback_(this),
-      block_count_(0),
-      analysis_step_(0) {
-  RTC_DCHECK_LE(config.rho, 1.0f);
+      audio_s16_(chunk_length_),
+      chunks_since_voice_(kSpeechOffsetDelay),
+      is_speech_(false) {
+  RTC_DCHECK_LE(kRho, 1.f);

-  memset(filtered_clear_pow_.get(),
-         0,
+  memset(filtered_clear_pow_.get(), 0,
         bank_size_ * sizeof(filtered_clear_pow_[0]));
-  memset(filtered_noise_pow_.get(),
-         0,
+  memset(filtered_noise_pow_.get(), 0,
         bank_size_ * sizeof(filtered_noise_pow_[0]));

-  // Assumes all rho equal.
-  for (size_t i = 0; i < bank_size_; ++i) {
-    rho_[i] = config.rho * config.rho;
-  }
+  const size_t erb_index = static_cast<size_t>(
+      ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) +
+            43.f));
+  start_freq_ = std::max(static_cast<size_t>(1), erb_index * kErbResolution);

-  float freqs_khz = kClipFreq / 1000.0f;
-  size_t erb_index = static_cast<size_t>(ceilf(
-      11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f));
-  start_freq_ = std::max(static_cast<size_t>(1), erb_index * erb_resolution_);
-
-  WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_,
-                                       kbd_window_.get());
+  size_t window_size = static_cast<size_t>(1 << RealFourier::FftOrder(freqs_));
+  std::vector<float> kbd_window(window_size);
+  WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, &kbd_window[0]);
  render_mangler_.reset(new LappedTransform(
-      num_render_channels_, num_render_channels_, chunk_length_,
-      kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_));
+      num_render_channels_, num_render_channels_, chunk_length_, &kbd_window[0],
+      window_size, window_size / 2, &render_callback_));
 }

 void IntelligibilityEnhancer::SetCaptureNoiseEstimate(
@ -134,13 +124,10 @@ void IntelligibilityEnhancer::SetCaptureNoiseEstimate(
  if (capture_filter_bank_.size() != bank_size_ ||
      capture_filter_bank_[0].size() != noise.size()) {
    capture_filter_bank_ = CreateErbBank(noise.size());
+    noise_power_estimator_.reset(
+        new intelligibility::PowerEstimator<float>(noise.size(), kDecayRate));
  }
-  if (noise.size() != noise_power_.size()) {
-    noise_power_.resize(noise.size());
-  }
-  for (size_t i = 0; i < noise.size(); ++i) {
-    noise_power_[i] = noise[i] * noise[i];
-  }
+  noise_power_estimator_->Step(&noise[0]);
 }

 void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
@ -148,54 +135,29 @@ void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
                                                 size_t num_channels) {
  RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);
  RTC_CHECK_EQ(num_render_channels_, num_channels);
-
-  if (active_) {
+  is_speech_ = IsSpeech(audio[0]);
  render_mangler_->ProcessChunk(audio, temp_render_out_buffer_.channels());
-  }
-
-  if (active_) {
  for (size_t i = 0; i < num_render_channels_; ++i) {
    memcpy(audio[i], temp_render_out_buffer_.channels()[i],
           chunk_length_ * sizeof(**audio));
  }
-  }
 }

 void IntelligibilityEnhancer::ProcessClearBlock(
    const std::complex<float>* in_block,
    std::complex<float>* out_block) {
-  if (block_count_ < 2) {
-    memset(out_block, 0, freqs_ * sizeof(*out_block));
-    ++block_count_;
-    return;
+  if (is_speech_) {
+    clear_power_estimator_.Step(in_block);
  }
-
-  // TODO(ekm): Use VAD to |Step| and |AnalyzeClearBlock| only if necessary.
-  if (true) {
-    clear_power_.Step(in_block);
-    if (block_count_ % analysis_rate_ == analysis_rate_ - 1) {
-      AnalyzeClearBlock();
-      ++analysis_step_;
-    }
-    ++block_count_;
-  }
-
-  if (active_) {
-    gain_applier_.Apply(in_block, out_block);
-  }
-}
-
-void IntelligibilityEnhancer::AnalyzeClearBlock() {
-  const float* clear_power = clear_power_.Power();
-  MapToErbBands(clear_power,
-                render_filter_bank_,
+  const std::vector<float>& clear_power = clear_power_estimator_.power();
+  const std::vector<float>& noise_power = noise_power_estimator_->power();
+  MapToErbBands(&clear_power[0], render_filter_bank_,
                filtered_clear_pow_.get());
-  MapToErbBands(&noise_power_[0],
-                capture_filter_bank_,
+  MapToErbBands(&noise_power[0], capture_filter_bank_,
                filtered_noise_pow_.get());
  SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get());
-  const float power_target = std::accumulate(
-          clear_power, clear_power + freqs_, 0.f);
+  const float power_target =
+      std::accumulate(&clear_power[0], &clear_power[0] + freqs_, 0.f);
  const float power_top =
      DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);
  SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get());
@ -205,6 +167,7 @@ void IntelligibilityEnhancer::AnalyzeClearBlock() {
    SolveForLambda(power_target, power_bot, power_top);
    UpdateErbGains();
  }  // Else experiencing power underflow, so do nothing.
+  gain_applier_.Apply(in_block, out_block);
 }

 void IntelligibilityEnhancer::SolveForLambda(float power_target,
@ -217,11 +180,10 @@ void IntelligibilityEnhancer::SolveForLambda(float power_target,
      1.f / (power_target + std::numeric_limits<float>::epsilon());
  float lambda_bot = kLambdaBot;
  float lambda_top = kLambdaTop;
-  float power_ratio = 2.0f;  // Ratio of achieved power to target power.
+  float power_ratio = 2.f;  // Ratio of achieved power to target power.
  int iters = 0;
-  while (std::fabs(power_ratio - 1.0f) > kConvergeThresh &&
-         iters <= kMaxIters) {
-    const float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.0f;
+  while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) {
+    const float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.f;
    SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get());
    const float power =
        DotProduct(gains_eq_.get(), filtered_clear_pow_.get(), bank_size_);
@ -239,7 +201,7 @@ void IntelligibilityEnhancer::UpdateErbGains() {
  // (ERB gain) = filterbank' * (freq gain)
  float* gains = gain_applier_.target();
  for (size_t i = 0; i < freqs_; ++i) {
-    gains[i] = 0.0f;
+    gains[i] = 0.f;
    for (size_t j = 0; j < bank_size_; ++j) {
      gains[i] = fmaf(render_filter_bank_[j][i], gains_eq_[j], gains[i]);
    }
@ -248,9 +210,9 @@ void IntelligibilityEnhancer::UpdateErbGains() {

 size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
                                            size_t erb_resolution) {
-  float freq_limit = sample_rate / 2000.0f;
+  float freq_limit = sample_rate / 2000.f;
  size_t erb_scale = static_cast<size_t>(ceilf(
-      11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f));
+      11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.f));
  return erb_scale * erb_resolution;
 }

@ -260,7 +222,7 @@ std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank(
  size_t lf = 1, rf = 4;

  for (size_t i = 0; i < bank_size_; ++i) {
-    float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_));
+    float abs_temp = fabsf((i + 1.f) / static_cast<float>(kErbResolution));
    center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));
    center_freqs_[i] -= 14678.49f;
  }
@ -274,48 +236,43 @@ std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank(
  }

  for (size_t i = 1; i <= bank_size_; ++i) {
-    size_t lll, ll, rr, rrr;
    static const size_t kOne = 1;  // Avoids repeated static_cast<>s below.
-    lll = static_cast<size_t>(round(
-        center_freqs_[std::max(kOne, i - lf) - 1] * num_freqs /
-            (0.5f * sample_rate_hz_)));
-    ll = static_cast<size_t>(round(
-        center_freqs_[std::max(kOne, i) - 1] * num_freqs /
-            (0.5f * sample_rate_hz_)));
+    size_t lll =
+        static_cast<size_t>(round(center_freqs_[std::max(kOne, i - lf) - 1] *
+                                  num_freqs / (0.5f * sample_rate_hz_)));
+    size_t ll = static_cast<size_t>(round(center_freqs_[std::max(kOne, i) - 1] *
+                                   num_freqs / (0.5f * sample_rate_hz_)));
    lll = std::min(num_freqs, std::max(lll, kOne)) - 1;
    ll = std::min(num_freqs, std::max(ll, kOne)) - 1;

-    rrr = static_cast<size_t>(round(
-        center_freqs_[std::min(bank_size_, i + rf) - 1] * num_freqs /
+    size_t rrr = static_cast<size_t>(
+        round(center_freqs_[std::min(bank_size_, i + rf) - 1] * num_freqs /
              (0.5f * sample_rate_hz_)));
-    rr = static_cast<size_t>(round(
-        center_freqs_[std::min(bank_size_, i + 1) - 1] * num_freqs /
+    size_t rr = static_cast<size_t>(
+        round(center_freqs_[std::min(bank_size_, i + 1) - 1] * num_freqs /
              (0.5f * sample_rate_hz_)));
    rrr = std::min(num_freqs, std::max(rrr, kOne)) - 1;
    rr = std::min(num_freqs, std::max(rr, kOne)) - 1;

-    float step, element;
-
-    step = ll == lll ? 0.f : 1.f / (ll - lll);
-    element = 0.0f;
+    float step = ll == lll ? 0.f : 1.f / (ll - lll);
+    float element = 0.f;
    for (size_t j = lll; j <= ll; ++j) {
      filter_bank[i - 1][j] = element;
      element += step;
    }
    step = rr == rrr ? 0.f : 1.f / (rrr - rr);
-    element = 1.0f;
+    element = 1.f;
    for (size_t j = rr; j <= rrr; ++j) {
      filter_bank[i - 1][j] = element;
      element -= step;
    }
    for (size_t j = ll; j <= rr; ++j) {
-      filter_bank[i - 1][j] = 1.0f;
+      filter_bank[i - 1][j] = 1.f;
    }
  }

-  float sum;
  for (size_t i = 0; i < num_freqs; ++i) {
-    sum = 0.0f;
+    float sum = 0.f;
    for (size_t j = 0; j < bank_size_; ++j) {
      sum += filter_bank[j][i];
    }
@ -329,22 +286,22 @@ std::vector<std::vector<float>> IntelligibilityEnhancer::CreateErbBank(
 void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
                                                       size_t start_freq,
                                                       float* sols) {
-  bool quadratic = (kConfigRho < 1.0f);
+  bool quadratic = (kRho < 1.f);
  const float* pow_x0 = filtered_clear_pow_.get();
  const float* pow_n0 = filtered_noise_pow_.get();

  for (size_t n = 0; n < start_freq; ++n) {
-    sols[n] = 1.0f;
+    sols[n] = 1.f;
  }

  // Analytic solution for optimal gains. See paper for derivation.
  for (size_t n = start_freq - 1; n < bank_size_; ++n) {
    float alpha0, beta0, gamma0;
-    gamma0 = 0.5f * rho_[n] * pow_x0[n] * pow_n0[n] +
+    gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] +
             lambda * pow_x0[n] * pow_n0[n] * pow_n0[n];
-    beta0 = lambda * pow_x0[n] * (2 - rho_[n]) * pow_x0[n] * pow_n0[n];
+    beta0 = lambda * pow_x0[n] * (2 - kRho) * pow_x0[n] * pow_n0[n];
    if (quadratic) {
-      alpha0 = lambda * pow_x0[n] * (1 - rho_[n]) * pow_x0[n] * pow_x0[n];
+      alpha0 = lambda * pow_x0[n] * (1 - kRho) * pow_x0[n] * pow_x0[n];
      sols[n] =
          (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) /
          (2 * alpha0 + std::numeric_limits<float>::epsilon());
@ -355,8 +312,15 @@ void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
  }
 }

-bool IntelligibilityEnhancer::active() const {
-  return active_;
+bool IntelligibilityEnhancer::IsSpeech(const float* audio) {
+  FloatToS16(audio, chunk_length_, &audio_s16_[0]);
+  vad_.ProcessChunk(&audio_s16_[0], chunk_length_, sample_rate_hz_);
+  if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) {
+    chunks_since_voice_ = 0;
+  } else if (chunks_since_voice_ < kSpeechOffsetDelay) {
+    ++chunks_since_voice_;
+  }
+  return chunks_since_voice_ < kSpeechOffsetDelay;
 }

 }  // namespace webrtc
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
@ -18,6 +18,7 @@
 #include "webrtc/common_audio/lapped_transform.h"
 #include "webrtc/common_audio/channel_buffer.h"
 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"
+#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"

 namespace webrtc {

@ -28,28 +29,7 @@ namespace webrtc {
 // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788
 class IntelligibilityEnhancer {
 public:
-  struct Config {
-    // TODO(bercic): the |decay_rate|, |analysis_rate| and |gain_limit|
-    // parameters should probably go away once fine tuning is done.
-    Config()
-        : sample_rate_hz(16000),
-          num_capture_channels(1),
-          num_render_channels(1),
-          decay_rate(0.9f),
-          analysis_rate(60),
-          gain_change_limit(0.1f),
-          rho(0.02f) {}
-    int sample_rate_hz;
-    size_t num_capture_channels;
-    size_t num_render_channels;
-    float decay_rate;
-    int analysis_rate;
-    float gain_change_limit;
-    float rho;
-  };
-
-  explicit IntelligibilityEnhancer(const Config& config);
-  IntelligibilityEnhancer();  // Initialize with default config.
+  IntelligibilityEnhancer(int sample_rate_hz, size_t num_render_channels);

  // Sets the capture noise magnitude spectrum estimate.
  void SetCaptureNoiseEstimate(std::vector<float> noise);
@ -86,9 +66,6 @@ class IntelligibilityEnhancer {
  void ProcessClearBlock(const std::complex<float>* in_block,
                         std::complex<float>* out_block);

-  // Computes and sets modified gains.
-  void AnalyzeClearBlock();
-
  // Bisection search for optimal |lambda|.
  void SolveForLambda(float power_target, float power_bot, float power_top);

@ -105,29 +82,25 @@ class IntelligibilityEnhancer {
  // Negative gains are set to 0. Stores the results in |sols|.
  void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);

+  // Returns true if the audio is speech.
+  bool IsSpeech(const float* audio);
+
  const size_t freqs_;         // Num frequencies in frequency domain.
-  const size_t window_size_;   // Window size in samples; also the block size.
  const size_t chunk_length_;  // Chunk size in samples.
  const size_t bank_size_;     // Num ERB filters.
  const int sample_rate_hz_;
-  const int erb_resolution_;
-  const size_t num_capture_channels_;
  const size_t num_render_channels_;
-  const int analysis_rate_;    // Num blocks before gains recalculated.

-  const bool active_;          // Whether render gains are being updated.
-                               // TODO(ekm): Add logic for updating |active_|.
-
-  intelligibility::PowerEstimator clear_power_;
-  std::vector<float> noise_power_;
+  intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;
+  std::unique_ptr<intelligibility::PowerEstimator<float>>
+      noise_power_estimator_;
  std::unique_ptr<float[]> filtered_clear_pow_;
  std::unique_ptr<float[]> filtered_noise_pow_;
  std::unique_ptr<float[]> center_freqs_;
  std::vector<std::vector<float>> capture_filter_bank_;
  std::vector<std::vector<float>> render_filter_bank_;
  size_t start_freq_;
-  std::unique_ptr<float[]> rho_;  // Production and interpretation SNR.
-                                  // for each ERB band.
+
  std::unique_ptr<float[]> gains_eq_;  // Pre-filter modified gains.
  intelligibility::GainApplier gain_applier_;

@ -135,11 +108,13 @@ class IntelligibilityEnhancer {
  // the original input array with modifications.
  ChannelBuffer<float> temp_render_out_buffer_;

-  std::unique_ptr<float[]> kbd_window_;
  TransformCallback render_callback_;
  std::unique_ptr<LappedTransform> render_mangler_;
-  int block_count_;
-  int analysis_step_;
+
+  VoiceActivityDetector vad_;
+  std::vector<int16_t> audio_s16_;
+  size_t chunks_since_voice_;
+  bool is_speech_;
 };

 }  // namespace webrtc
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc
@ -26,54 +26,184 @@ namespace {

 // Target output for ERB create test. Generated with matlab.
 const float kTestCenterFreqs[] = {
-    13.169f, 26.965f, 41.423f, 56.577f, 72.461f, 89.113f, 106.57f, 124.88f,
-    144.08f, 164.21f, 185.34f, 207.5f,  230.75f, 255.16f, 280.77f, 307.66f,
-    335.9f,  365.56f, 396.71f, 429.44f, 463.84f, 500.f};
-const float kTestFilterBank[][9] = {
-    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.2f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.25f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.25f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.25f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.5f},
-    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.5f}};
+    14.5213f, 29.735f,  45.6781f, 62.3884f, 79.9058f, 98.2691f, 117.521f,
+    137.708f, 158.879f, 181.084f, 204.378f, 228.816f, 254.459f, 281.371f,
+    309.618f, 339.273f, 370.411f, 403.115f, 437.469f, 473.564f, 511.497f,
+    551.371f, 593.293f, 637.386f, 683.77f,  732.581f, 783.96f,  838.06f,
+    895.046f, 955.09f,  1018.38f, 1085.13f, 1155.54f, 1229.85f, 1308.32f,
+    1391.22f, 1478.83f, 1571.5f,  1669.55f, 1773.37f, 1883.37f, 2000.f};
+const float kTestFilterBank[][33] = {
+    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.2f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,  0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.25f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,   0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.25f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.25f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,   0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f, 0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.157895f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.210526f, 0.117647f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.315789f, 0.176471f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.315789f, 0.352941f, 0.142857f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f},
+    {0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.352941f, 0.285714f,
+     0.157895f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f},
+    {0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f,
+     0.210526f, 0.111111f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.285714f, 0.315789f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,       0.f,       0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f, 0.f,
+     0.f, 0.f, 0.315789f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f, 0.f,
+     0.f, 0.f, 0.f,       0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f,
+     0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f, 0.f,
+     0.f, 0.f, 0.f, 0.f,       0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f,       0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,
+     0.f,       0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f,
+     0.108108f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,
+     0.f,       0.f, 0.f, 0.f, 0.f, 0.f},
+    {0.f,       0.f,       0.f,        0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f,       0.f,       0.f,        0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f,
+     0.243243f, 0.153846f, 0.0833333f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f,       0.f,       0.f,        0.f, 0.f, 0.f},
+    {0.f,       0.f,       0.f,       0.f,        0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f,       0.f,        0.f, 0.f, 0.f, 0.f, 0.333333f,
+     0.324324f, 0.230769f, 0.166667f, 0.0909091f, 0.f, 0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f,       0.f,        0.f, 0.f},
+    {0.f,       0.f,       0.f,   0.f,       0.f,        0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f,   0.f,       0.f,        0.f, 0.f, 0.f, 0.f,
+     0.324324f, 0.307692f, 0.25f, 0.181818f, 0.0833333f, 0.f, 0.f, 0.f, 0.f,
+     0.f,       0.f,       0.f,   0.f,       0.f,        0.f},
+    {0.f,       0.f,   0.f,       0.f,        0.f, 0.f,       0.f,
+     0.f,       0.f,   0.f,       0.f,        0.f, 0.f,       0.f,
+     0.f,       0.f,   0.f,       0.f,        0.f, 0.307692f, 0.333333f,
+     0.363636f, 0.25f, 0.151515f, 0.0793651f, 0.f, 0.f,       0.f,
+     0.f,       0.f,   0.f,       0.f,        0.f},
+    {0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,       0.f,       0.166667f,  0.363636f, 0.333333f, 0.242424f,
+     0.190476f, 0.133333f, 0.0689655f, 0.f,       0.f,       0.f,
+     0.f,       0.f,       0.f},
+    {0.f,        0.f, 0.f, 0.f, 0.f,       0.f,      0.f,       0.f,  0.f,
+     0.f,        0.f, 0.f, 0.f, 0.f,       0.f,      0.f,       0.f,  0.f,
+     0.f,        0.f, 0.f, 0.f, 0.333333f, 0.30303f, 0.253968f, 0.2f, 0.137931f,
+     0.0714286f, 0.f, 0.f, 0.f, 0.f,       0.f},
+    {0.f,    0.f,        0.f,      0.f,      0.f,       0.f,       0.f,
+     0.f,    0.f,        0.f,      0.f,      0.f,       0.f,       0.f,
+     0.f,    0.f,        0.f,      0.f,      0.f,       0.f,       0.f,
+     0.f,    0.f,        0.30303f, 0.31746f, 0.333333f, 0.275862f, 0.214286f,
+     0.125f, 0.0655738f, 0.f,      0.f,      0.f},
+    {0.f,   0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,   0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,   0.f,       0.f,       0.f,        0.f,       0.f,       0.f,
+     0.f,   0.f,       0.f,       0.15873f,   0.333333f, 0.344828f, 0.357143f,
+     0.25f, 0.196721f, 0.137931f, 0.0816327f, 0.f},
+    {0.f,     0.f,       0.f,       0.f,       0.f, 0.f,       0.f,
+     0.f,     0.f,       0.f,       0.f,       0.f, 0.f,       0.f,
+     0.f,     0.f,       0.f,       0.f,       0.f, 0.f,       0.f,
+     0.f,     0.f,       0.f,       0.f,       0.f, 0.172414f, 0.357143f,
+     0.3125f, 0.245902f, 0.172414f, 0.102041f, 0.f},
+    {0.f, 0.f,     0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,     0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.f,     0.f,       0.f,       0.f,       0.f, 0.f, 0.f, 0.f,
+     0.f, 0.3125f, 0.327869f, 0.344828f, 0.204082f, 0.f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,       0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.163934f, 0.344828f, 0.408163f, 0.5f},
+    {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,       0.f,
+     0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.204082f, 0.5f}};
 static_assert(arraysize(kTestCenterFreqs) == arraysize(kTestFilterBank),
              "Test filterbank badly initialized.");

 // Target output for gain solving test. Generated with matlab.
 const size_t kTestStartFreq = 12;  // Lowest integral frequency for ERBs.
-const float kTestZeroVar[] = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f,
-                              1.f, 1.f, 1.f, 0.f, 0.f, 0.f, 0.f, 0.f,
-                              0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
+const float kTestZeroVar[] = {
+    1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, 0.f,
+    0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+    0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0};
 static_assert(arraysize(kTestCenterFreqs) == arraysize(kTestZeroVar),
              "Power test data badly initialized.");
 const float kTestNonZeroVarLambdaTop[] = {
-    1.f,     1.f,     1.f,     1.f,     1.f,     1.f,     1.f,     1.f,
-    1.f,     1.f,     1.f,     0.f,     0.f,     0.0351f, 0.0636f, 0.0863f,
-    0.1037f, 0.1162f, 0.1236f, 0.1251f, 0.1189f, 0.0993f};
+    1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, 0.f,
+    0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+    0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0};
 static_assert(arraysize(kTestCenterFreqs) ==
                  arraysize(kTestNonZeroVarLambdaTop),
              "Power test data badly initialized.");
 const float kMaxTestError = 0.005f;

 // Enhancer initialization parameters.
-const int kSamples = 2000;
-const int kSampleRate = 1000;
+const int kSamples = 1000;
+const int kSampleRate = 4000;
 const int kNumChannels = 1;
 const int kFragmentSize = kSampleRate / 100;

@ -83,13 +213,11 @@ class IntelligibilityEnhancerTest : public ::testing::Test {
 protected:
  IntelligibilityEnhancerTest()
      : clear_data_(kSamples), noise_data_(kSamples), orig_data_(kSamples) {
-    config_.sample_rate_hz = kSampleRate;
-    enh_.reset(new IntelligibilityEnhancer(config_));
+    enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels));
  }

  bool CheckUpdate() {
-    config_.sample_rate_hz = kSampleRate;
-    enh_.reset(new IntelligibilityEnhancer(config_));
+    enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels));
    float* clear_cursor = &clear_data_[0];
    float* noise_cursor = &noise_data_[0];
    for (int i = 0; i < kSamples; i += kFragmentSize) {
@ -105,7 +233,6 @@ class IntelligibilityEnhancerTest : public ::testing::Test {
    return false;
  }

-  IntelligibilityEnhancer::Config config_;
  std::unique_ptr<IntelligibilityEnhancer> enh_;
  std::vector<float> clear_data_;
  std::vector<float> noise_data_;
@ -115,9 +242,9 @@ class IntelligibilityEnhancerTest : public ::testing::Test {
 // For each class of generated data, tests that render stream is updated when
 // it should be.
 TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) {
-  std::fill(noise_data_.begin(), noise_data_.end(), 0.0f);
-  std::fill(orig_data_.begin(), orig_data_.end(), 0.0f);
-  std::fill(clear_data_.begin(), clear_data_.end(), 0.0f);
+  std::fill(noise_data_.begin(), noise_data_.end(), 0.f);
+  std::fill(orig_data_.begin(), orig_data_.end(), 0.f);
+  std::fill(clear_data_.begin(), clear_data_.end(), 0.f);
  EXPECT_FALSE(CheckUpdate());
  std::srand(1);
  auto float_rand = []() { return std::rand() * 2.f / RAND_MAX - 1; };
@ -148,9 +275,8 @@ TEST_F(IntelligibilityEnhancerTest, TestSolveForGains) {
  std::vector<float> sols(enh_->bank_size_);
  float lambda = -0.001f;
  for (size_t i = 0; i < enh_->bank_size_; i++) {
-    enh_->filtered_clear_pow_[i] = 0.0f;
-    enh_->filtered_noise_pow_[i] = 0.0f;
-    enh_->rho_[i] = 0.02f;
+    enh_->filtered_clear_pow_[i] = 0.f;
+    enh_->filtered_noise_pow_[i] = 0.f;
  }
  enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, &sols[0]);
  for (size_t i = 0; i < enh_->bank_size_; i++) {
@ -164,7 +290,7 @@ TEST_F(IntelligibilityEnhancerTest, TestSolveForGains) {
  for (size_t i = 0; i < enh_->bank_size_; i++) {
    EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError);
  }
-  lambda = -1.0;
+  lambda = -1.f;
  enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, &sols[0]);
  for (size_t i = 0; i < enh_->bank_size_; i++) {
    EXPECT_NEAR(kTestZeroVar[i], sols[i], kMaxTestError);
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc
@ -14,6 +14,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <algorithm>
+#include <limits>

 namespace webrtc {

@ -21,45 +22,38 @@ namespace intelligibility {

 namespace {

-// Return |current| changed towards |target|, with the change being at most
-// |limit|.
+// Return |current| changed towards |target|, with the relative change being at
+// most |limit|.
 float UpdateFactor(float target, float current, float limit) {
-  float delta = fabsf(target - current);
-  float sign = copysign(1.f, target - current);
-  return current + sign * fminf(delta, limit);
+  float gain = target / (current + std::numeric_limits<float>::epsilon());
+  if (gain < 1.f - limit) {
+    gain = 1.f - limit;
+  } else if (gain > 1.f + limit) {
+    gain = 1.f + limit;
+  }
+  return current * gain + std::numeric_limits<float>::epsilon();
 }

 }  // namespace

-PowerEstimator::PowerEstimator(size_t num_freqs,
-                               float decay)
-    : magnitude_(new float[num_freqs]()),
-      power_(new float[num_freqs]()),
-      num_freqs_(num_freqs),
-      decay_(decay) {
-  memset(magnitude_.get(), 0, sizeof(*magnitude_.get()) * num_freqs_);
-  memset(power_.get(), 0, sizeof(*power_.get()) * num_freqs_);
-}
+template<typename T>
+PowerEstimator<T>::PowerEstimator(size_t num_freqs, float decay)
+    : power_(num_freqs, 0.f), decay_(decay) {}

-// Compute the magnitude from the beginning, with exponential decaying of the
-// series data.
-void PowerEstimator::Step(const std::complex<float>* data) {
-  for (size_t i = 0; i < num_freqs_; ++i) {
-    magnitude_[i] = decay_ * magnitude_[i] +
-                (1.f - decay_) * std::abs(data[i]);
+template<typename T>
+void PowerEstimator<T>::Step(const T* data) {
+  for (size_t i = 0; i < power_.size(); ++i) {
+    power_[i] = decay_ * power_[i] +
+                (1.f - decay_) * std::abs(data[i]) * std::abs(data[i]);
  }
 }

-const float* PowerEstimator::Power() {
-  for (size_t i = 0; i < num_freqs_; ++i) {
-    power_[i] = magnitude_[i] * magnitude_[i];
-  }
-  return &power_[0];
-}
+template class PowerEstimator<float>;
+template class PowerEstimator<std::complex<float>>;

-GainApplier::GainApplier(size_t freqs, float change_limit)
+GainApplier::GainApplier(size_t freqs, float relative_change_limit)
    : num_freqs_(freqs),
-      change_limit_(change_limit),
+      relative_change_limit_(relative_change_limit),
      target_(new float[freqs]()),
      current_(new float[freqs]()) {
  for (size_t i = 0; i < freqs; ++i) {
@ -71,12 +65,8 @@ GainApplier::GainApplier(size_t freqs, float change_limit)
 void GainApplier::Apply(const std::complex<float>* in_block,
                        std::complex<float>* out_block) {
  for (size_t i = 0; i < num_freqs_; ++i) {
-    float factor = sqrtf(fabsf(current_[i]));
-    if (!std::isnormal(factor)) {
-      factor = 1.f;
-    }
-    out_block[i] = factor * in_block[i];
-    current_[i] = UpdateFactor(target_[i], current_[i], change_limit_);
+    current_[i] = UpdateFactor(target_[i], current_[i], relative_change_limit_);
+    out_block[i] = sqrtf(fabsf(current_[i])) * in_block[i];
  }
 }

--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h
@ -13,6 +13,7 @@

 #include <complex>
 #include <memory>
+#include <vector>

 namespace webrtc {

@ -21,6 +22,7 @@ namespace intelligibility {
 // Internal helper for computing the power of a stream of arrays.
 // The result is an array of power per position: the i-th power is the power of
 // the stream of data on the i-th positions in the input arrays.
+template <typename T>
 class PowerEstimator {
 public:
  // Construct an instance for the given input array length (|freqs|), with the
@ -28,31 +30,24 @@ class PowerEstimator {
  PowerEstimator(size_t freqs, float decay);

  // Add a new data point to the series.
-  void Step(const std::complex<float>* data);
+  void Step(const T* data);

  // The current power array.
-  const float* Power();
+  const std::vector<float>& power() { return power_; };

 private:
-  // TODO(ekmeyerson): Switch the following running means
-  // and histories from std::unique_ptr to std::vector.
-  std::unique_ptr<std::complex<float>[]> running_mean_sq_;
-
-  // The current magnitude array.
-  std::unique_ptr<float[]> magnitude_;
  // The current power array.
-  std::unique_ptr<float[]> power_;
+  std::vector<float> power_;

-  const size_t num_freqs_;
  const float decay_;
 };

 // Helper class for smoothing gain changes. On each application step, the
 // currently used gains are changed towards a set of settable target gains,
-// constrained by a limit on the magnitude of the changes.
+// constrained by a limit on the relative changes.
 class GainApplier {
 public:
-  GainApplier(size_t freqs, float change_limit);
+  GainApplier(size_t freqs, float relative_change_limit);

  // Copy |in_block| to |out_block|, multiplied by the current set of gains,
  // and step the current set of gains towards the target set.
@ -64,7 +59,7 @@ class GainApplier {

 private:
  const size_t num_freqs_;
-  const float change_limit_;
+  const float relative_change_limit_;
  std::unique_ptr<float[]> target_;
  std::unique_ptr<float[]> current_;
 };
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc
@ -39,17 +39,16 @@ TEST(IntelligibilityUtilsTest, TestPowerEstimator) {
  const float kDecay = 0.5f;
  const std::vector<std::vector<std::complex<float>>> test_data(
      GenerateTestData(kFreqs, kSamples));
-  PowerEstimator power_estimator(kFreqs, kDecay);
-  EXPECT_EQ(0, power_estimator.Power()[0]);
+  PowerEstimator<std::complex<float>> power_estimator(kFreqs, kDecay);
+  EXPECT_EQ(0, power_estimator.power()[0]);

  // Makes sure Step is doing something.
  power_estimator.Step(&test_data[0][0]);
  for (size_t i = 1; i < kSamples; ++i) {
    power_estimator.Step(&test_data[i][0]);
    for (size_t j = 0; j < kFreqs; ++j) {
-      const float* power = power_estimator.Power();
-      EXPECT_GE(power[j], 0.f);
-      EXPECT_LE(power[j], 1.f);
+      EXPECT_GE(power_estimator.power()[j], 0.f);
+      EXPECT_LE(power_estimator.power()[j], 1.f);
    }
  }
 }
@ -62,8 +61,8 @@ TEST(IntelligibilityUtilsTest, TestGainApplier) {
  GainApplier gain_applier(kFreqs, kChangeLimit);
  const std::vector<std::vector<std::complex<float>>> in_data(
      GenerateTestData(kFreqs, kSamples));
-  std::vector<std::vector<std::complex<float>>> out_data(GenerateTestData(
-      kFreqs, kSamples));
+  std::vector<std::vector<std::complex<float>>> out_data(
+      GenerateTestData(kFreqs, kSamples));
  for (size_t i = 0; i < kSamples; ++i) {
    gain_applier.Apply(&in_data[i][0], &out_data[i][0]);
    for (size_t j = 0; j < kFreqs; ++j) {
--- a/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc
+++ b/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc
@ -30,44 +30,24 @@ using std::complex;
 namespace webrtc {
 namespace {

-DEFINE_double(clear_alpha, 0.9, "Power decay factor for clear data.");
-DEFINE_int32(sample_rate,
-             16000,
-             "Audio sample rate used in the input and output files.");
-DEFINE_int32(ana_rate,
-             60,
-             "Analysis rate; gains recalculated every N blocks.");
-DEFINE_double(gain_limit, 1000.0, "Maximum gain change in one block.");
-
 DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");
 DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");
 DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file.");

-const size_t kNumChannels = 1;
-
 // void function for gtest
 void void_main(int argc, char* argv[]) {
  google::SetUsageMessage(
      "\n\nInput files must be little-endian 16-bit signed raw PCM.\n");
  google::ParseCommandLineFlags(&argc, &argv, true);

-  size_t samples;        // Number of samples in input PCM file
-  size_t fragment_size;  // Number of samples to process at a time
-                         // to simulate APM stream processing
-
  // Load settings and wav input.
-
-  fragment_size = FLAGS_sample_rate / 100;  // Mirror real time APM chunk size.
-                                            // Duplicates chunk_length_ in
-                                            // IntelligibilityEnhancer.
-
  struct stat in_stat, noise_stat;
  ASSERT_EQ(stat(FLAGS_clear_file.c_str(), &in_stat), 0)
      << "Empty speech file.";
  ASSERT_EQ(stat(FLAGS_noise_file.c_str(), &noise_stat), 0)
      << "Empty noise file.";

-  samples = std::min(in_stat.st_size, noise_stat.st_size) / 2;
+  const size_t samples = std::min(in_stat.st_size, noise_stat.st_size) / 2;

  WavReader in_file(FLAGS_clear_file);
  std::vector<float> in_fpcm(samples);
@ -80,23 +60,19 @@ void void_main(int argc, char* argv[]) {
  FloatS16ToFloat(&noise_fpcm[0], samples, &noise_fpcm[0]);

  // Run intelligibility enhancement.
-  IntelligibilityEnhancer::Config config;
-  config.sample_rate_hz = FLAGS_sample_rate;
-  config.decay_rate = static_cast<float>(FLAGS_clear_alpha);
-  config.analysis_rate = FLAGS_ana_rate;
-  config.gain_change_limit = FLAGS_gain_limit;
-  IntelligibilityEnhancer enh(config);
+  IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels());
  rtc::CriticalSection crit;
  NoiseSuppressionImpl ns(&crit);
-  ns.Initialize(kNumChannels, FLAGS_sample_rate);
+  ns.Initialize(noise_file.num_channels(), noise_file.sample_rate());
  ns.Enable(true);

-  AudioBuffer capture_audio(fragment_size,
-                            kNumChannels,
-                            fragment_size,
-                            kNumChannels,
+  // Mirror real time APM chunk size. Duplicates chunk_length_ in
+  // IntelligibilityEnhancer.
+  size_t fragment_size = in_file.sample_rate() / 100;
+  AudioBuffer capture_audio(fragment_size, noise_file.num_channels(),
+                            fragment_size, noise_file.num_channels(),
                            fragment_size);
-  StreamConfig stream_config(FLAGS_sample_rate, kNumChannels);
+  StreamConfig stream_config(in_file.sample_rate(), noise_file.num_channels());

  // Slice the input into smaller chunks, as the APM would do, and feed them
  // through the enhancer.
@ -108,14 +84,17 @@ void void_main(int argc, char* argv[]) {
    ns.AnalyzeCaptureAudio(&capture_audio);
    ns.ProcessCaptureAudio(&capture_audio);
    enh.SetCaptureNoiseEstimate(ns.NoiseEstimate());
-    enh.ProcessRenderAudio(&clear_cursor, FLAGS_sample_rate, kNumChannels);
+    enh.ProcessRenderAudio(&clear_cursor, in_file.sample_rate(),
+                           in_file.num_channels());
    clear_cursor += fragment_size;
    noise_cursor += fragment_size;
  }

  FloatToFloatS16(&in_fpcm[0], samples, &in_fpcm[0]);

-  WavWriter out_file(FLAGS_out_file, FLAGS_sample_rate, kNumChannels);
+  WavWriter out_file(FLAGS_out_file,
+                     in_file.sample_rate(),
+                     in_file.num_channels());
  out_file.WriteSamples(&in_fpcm[0], samples);
 }

--- a/webrtc/modules/audio_processing/noise_suppression_impl.cc
+++ b/webrtc/modules/audio_processing/noise_suppression_impl.cc
@ -182,8 +182,8 @@ std::vector<float> NoiseSuppressionImpl::NoiseEstimate() {
  for (auto& suppressor : suppressors_) {
    const float* noise = WebRtcNs_noise_estimate(suppressor->state());
    for (size_t i = 0; i < noise_estimate.size(); ++i) {
-      noise_estimate[i] += kNormalizationFactor *
-          noise[i] / suppressors_.size();
+      noise_estimate[i] +=
+          kNormalizationFactor * noise[i] / suppressors_.size();
    }
  }
 #elif defined(WEBRTC_NS_FIXED)