Allow AGC2 level estimation in AgcManagerDirect.

This CL does the following:

1. Adds a new AdaptiveModeLevelEstimatorAgc implementation of the Agc
  interface. The new implementation differs from webrtc::Agc by
   1. using the AGC2 speech level estimator in
      GetRmsErrorDb. webrtc::Agc implements its own with help of
      webrtc::LoudnessHistogram.
   2. Doesn't forget its past at every GetRmsErrorDb call.
2. Makes AgcManagerDirect use AdaptiveModeLevelEstimatorAgc instead of
   webrtc::Agc if the use_agc2_level_estimation flag is set.

Bug: webrtc:7494
Change-Id: I8df3f52e322d433eb5ce5297f4236af2f1877b04
Reviewed-on: https://webrtc-review.googlesource.com/86603
Commit-Queue: Alex Loiko <aleloi@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23875}
This commit is contained in:
Alex Loiko
2018-07-06 15:35:42 +02:00
committed by Commit Bot
parent 9a94057a79
commit 2ffafa8244
7 changed files with 164 additions and 20 deletions

View File

@ -26,6 +26,7 @@ rtc_source_set("agc") {
"../../../rtc_base:macromagic",
"../../../rtc_base:safe_minmax",
"../../../system_wrappers:metrics_api",
"../agc2:level_estimation_agc",
"../vad",
]
}

View File

@ -17,6 +17,7 @@
#endif
#include "modules/audio_processing/agc/gain_map_internal.h"
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h"
#include "modules/audio_processing/include/gain_control.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"
@ -164,7 +165,10 @@ AgcManagerDirect::AgcManagerDirect(Agc* agc,
file_postproc_(new DebugFile("agc_postproc.pcm")) {
instance_counter_++;
if (use_agc2_level_estimation_) {
RTC_NOTREACHED() << "Agc2 level estimation not implemented.";
RTC_DCHECK(!agc);
agc_.reset(new AdaptiveModeLevelEstimatorAgc(data_dumper_.get()));
} else {
RTC_DCHECK(agc);
}
if (use_agc2_digital_adaptive_) {
RTC_NOTREACHED() << "Agc2 digital adaptive not implemented.";

View File

@ -52,22 +52,6 @@ class AgcManagerDirect final {
bool use_agc2_level_estimation,
bool use_agc2_digital_adaptive);
// Dependency injection for testing. Don't delete |agc| as the memory is owned
// by the manager.
AgcManagerDirect(Agc* agc,
GainControl* gctrl,
VolumeCallbacks* volume_callbacks,
int startup_min_level,
int clipped_level_min);
// Most general c-tor.
AgcManagerDirect(Agc* agc,
GainControl* gctrl,
VolumeCallbacks* volume_callbacks,
int startup_min_level,
int clipped_level_min,
bool use_agc2_level_estimation,
bool use_agc2_digital_adaptive);
~AgcManagerDirect();
int Initialize();
@ -85,6 +69,25 @@ class AgcManagerDirect final {
float voice_probability();
private:
friend class AgcManagerDirectTest;
// Dependency injection for testing. Don't delete |agc| as the memory is owned
// by the manager.
AgcManagerDirect(Agc* agc,
GainControl* gctrl,
VolumeCallbacks* volume_callbacks,
int startup_min_level,
int clipped_level_min);
// Most general c-tor.
AgcManagerDirect(Agc* agc,
GainControl* gctrl,
VolumeCallbacks* volume_callbacks,
int startup_min_level,
int clipped_level_min,
bool use_agc2_level_estimation,
bool use_agc2_digital_adaptive);
// Sets a new microphone level, after first checking that it hasn't been
// updated by the user, in which case no action is taken.
void SetLevel(int new_level);

View File

@ -15,6 +15,32 @@ group("agc2") {
]
}
rtc_source_set("level_estimation_agc") {
sources = [
"adaptive_mode_level_estimator_agc.cc",
"adaptive_mode_level_estimator_agc.h",
]
configs += [ "..:apm_debug_dump" ]
deps = [
":adaptive_digital",
":common",
":gain_applier",
":noise_level_estimator",
":rnn_vad_with_level",
"..:aec_core",
"..:apm_logging",
"..:audio_frame_view",
"../../..:typedefs",
"../../../api:array_view",
"../../../common_audio",
"../../../rtc_base:checks",
"../../../rtc_base:rtc_base_approved",
"../../../rtc_base:safe_minmax",
"../agc:level_estimation",
"../vad",
]
}
rtc_source_set("adaptive_digital") {
sources = [
"adaptive_agc.cc",

View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h"
#include "modules/audio_processing/include/audio_frame_view.h"
namespace webrtc {
AdaptiveModeLevelEstimatorAgc::AdaptiveModeLevelEstimatorAgc(
ApmDataDumper* apm_data_dumper)
: level_estimator_(apm_data_dumper) {
set_target_level_dbfs(kDefaultLevelDbfs);
}
// |audio| must be mono; in a multi-channel stream, provide the first (usually
// left) channel.
void AdaptiveModeLevelEstimatorAgc::Process(const int16_t* audio,
size_t length,
int sample_rate_hz) {
std::vector<float> float_audio_frame(audio, audio + length);
const float* const first_channel = &float_audio_frame[0];
AudioFrameView<const float> frame_view(&first_channel, 1 /* num channels */,
length);
const auto vad_prob = agc2_vad_.AnalyzeFrame(frame_view);
latest_voice_probability_ = vad_prob.speech_probability;
if (latest_voice_probability_ > kVadConfidenceThreshold) {
time_in_ms_since_last_estimate_ += kFrameDurationMs;
}
level_estimator_.UpdateEstimation(vad_prob);
}
// Retrieves the difference between the target RMS level and the current
// signal RMS level in dB. Returns true if an update is available and false
// otherwise, in which case |error| should be ignored and no action taken.
bool AdaptiveModeLevelEstimatorAgc::GetRmsErrorDb(int* error) {
if (time_in_ms_since_last_estimate_ <= kTimeUntilConfidentMs) {
return false;
}
*error = std::floor(target_level_dbfs() -
level_estimator_.LatestLevelEstimate() + 0.5f);
time_in_ms_since_last_estimate_ = 0;
return true;
}
void AdaptiveModeLevelEstimatorAgc::Reset() {
level_estimator_.Reset();
}
float AdaptiveModeLevelEstimatorAgc::voice_probability() const {
return latest_voice_probability_;
}
} // namespace webrtc

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_AGC_H_
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_AGC_H_
#include <vector>
#include "modules/audio_processing/agc/agc.h"
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
#include "modules/audio_processing/agc2/vad_with_level.h"
namespace webrtc {
class AdaptiveModeLevelEstimatorAgc : public Agc {
public:
explicit AdaptiveModeLevelEstimatorAgc(ApmDataDumper* apm_data_dumper);
// |audio| must be mono; in a multi-channel stream, provide the first (usually
// left) channel.
void Process(const int16_t* audio,
size_t length,
int sample_rate_hz) override;
// Retrieves the difference between the target RMS level and the current
// signal RMS level in dB. Returns true if an update is available and false
// otherwise, in which case |error| should be ignored and no action taken.
bool GetRmsErrorDb(int* error) override;
void Reset() override;
float voice_probability() const override;
private:
static constexpr int kTimeUntilConfidentMs = 700;
static constexpr int kDefaultLevelDbfs = 0;
int32_t time_in_ms_since_last_estimate_ = 0;
AdaptiveModeLevelEstimator level_estimator_;
VadWithLevel agc2_vad_;
float latest_voice_probability_ = 0.f;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_AGC_H_

View File

@ -38,9 +38,9 @@ constexpr float kInitialAdaptiveDigitalGainDb = 8.f;
// This parameter must be tuned together with the noise estimator.
constexpr float kMaxNoiseLevelDbfs = -50.f;
// Used in the Level Estimator for deciding when to update the speech
// level estimate. Also used in the adaptive digital gain applier to
// decide when to allow target gain reduction.
// This is the threshold for speech. Speech frames are used for updating the
// speech level, measuring the amount of speech, and decide when to allow target
// gain reduction.
constexpr float kVadConfidenceThreshold = 0.4f;
// The amount of 'memory' of the Level Estimator. Decides leak factors.