Allow AGC2 level estimation in AgcManagerDirect.
This CL does the following: 1. Adds a new AdaptiveModeLevelEstimatorAgc implementation of the Agc interface. The new implementation differs from webrtc::Agc by 1. using the AGC2 speech level estimator in GetRmsErrorDb. webrtc::Agc implements its own with help of webrtc::LoudnessHistogram. 2. Doesn't forget its past at every GetRmsErrorDb call. 2. Makes AgcManagerDirect use AdaptiveModeLevelEstimatorAgc instead of webrtc::Agc if the use_agc2_level_estimation flag is set. Bug: webrtc:7494 Change-Id: I8df3f52e322d433eb5ce5297f4236af2f1877b04 Reviewed-on: https://webrtc-review.googlesource.com/86603 Commit-Queue: Alex Loiko <aleloi@webrtc.org> Reviewed-by: Sam Zackrisson <saza@webrtc.org> Cr-Commit-Position: refs/heads/master@{#23875}
This commit is contained in:
@ -26,6 +26,7 @@ rtc_source_set("agc") {
|
||||
"../../../rtc_base:macromagic",
|
||||
"../../../rtc_base:safe_minmax",
|
||||
"../../../system_wrappers:metrics_api",
|
||||
"../agc2:level_estimation_agc",
|
||||
"../vad",
|
||||
]
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
#endif
|
||||
|
||||
#include "modules/audio_processing/agc/gain_map_internal.h"
|
||||
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h"
|
||||
#include "modules/audio_processing/include/gain_control.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
@ -164,7 +165,10 @@ AgcManagerDirect::AgcManagerDirect(Agc* agc,
|
||||
file_postproc_(new DebugFile("agc_postproc.pcm")) {
|
||||
instance_counter_++;
|
||||
if (use_agc2_level_estimation_) {
|
||||
RTC_NOTREACHED() << "Agc2 level estimation not implemented.";
|
||||
RTC_DCHECK(!agc);
|
||||
agc_.reset(new AdaptiveModeLevelEstimatorAgc(data_dumper_.get()));
|
||||
} else {
|
||||
RTC_DCHECK(agc);
|
||||
}
|
||||
if (use_agc2_digital_adaptive_) {
|
||||
RTC_NOTREACHED() << "Agc2 digital adaptive not implemented.";
|
||||
|
@ -52,22 +52,6 @@ class AgcManagerDirect final {
|
||||
bool use_agc2_level_estimation,
|
||||
bool use_agc2_digital_adaptive);
|
||||
|
||||
// Dependency injection for testing. Don't delete |agc| as the memory is owned
|
||||
// by the manager.
|
||||
AgcManagerDirect(Agc* agc,
|
||||
GainControl* gctrl,
|
||||
VolumeCallbacks* volume_callbacks,
|
||||
int startup_min_level,
|
||||
int clipped_level_min);
|
||||
|
||||
// Most general c-tor.
|
||||
AgcManagerDirect(Agc* agc,
|
||||
GainControl* gctrl,
|
||||
VolumeCallbacks* volume_callbacks,
|
||||
int startup_min_level,
|
||||
int clipped_level_min,
|
||||
bool use_agc2_level_estimation,
|
||||
bool use_agc2_digital_adaptive);
|
||||
~AgcManagerDirect();
|
||||
|
||||
int Initialize();
|
||||
@ -85,6 +69,25 @@ class AgcManagerDirect final {
|
||||
float voice_probability();
|
||||
|
||||
private:
|
||||
friend class AgcManagerDirectTest;
|
||||
|
||||
// Dependency injection for testing. Don't delete |agc| as the memory is owned
|
||||
// by the manager.
|
||||
AgcManagerDirect(Agc* agc,
|
||||
GainControl* gctrl,
|
||||
VolumeCallbacks* volume_callbacks,
|
||||
int startup_min_level,
|
||||
int clipped_level_min);
|
||||
|
||||
// Most general c-tor.
|
||||
AgcManagerDirect(Agc* agc,
|
||||
GainControl* gctrl,
|
||||
VolumeCallbacks* volume_callbacks,
|
||||
int startup_min_level,
|
||||
int clipped_level_min,
|
||||
bool use_agc2_level_estimation,
|
||||
bool use_agc2_digital_adaptive);
|
||||
|
||||
// Sets a new microphone level, after first checking that it hasn't been
|
||||
// updated by the user, in which case no action is taken.
|
||||
void SetLevel(int new_level);
|
||||
|
@ -15,6 +15,32 @@ group("agc2") {
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("level_estimation_agc") {
|
||||
sources = [
|
||||
"adaptive_mode_level_estimator_agc.cc",
|
||||
"adaptive_mode_level_estimator_agc.h",
|
||||
]
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
deps = [
|
||||
":adaptive_digital",
|
||||
":common",
|
||||
":gain_applier",
|
||||
":noise_level_estimator",
|
||||
":rnn_vad_with_level",
|
||||
"..:aec_core",
|
||||
"..:apm_logging",
|
||||
"..:audio_frame_view",
|
||||
"../../..:typedefs",
|
||||
"../../../api:array_view",
|
||||
"../../../common_audio",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base:safe_minmax",
|
||||
"../agc:level_estimation",
|
||||
"../vad",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("adaptive_digital") {
|
||||
sources = [
|
||||
"adaptive_agc.cc",
|
||||
|
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h"
|
||||
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
AdaptiveModeLevelEstimatorAgc::AdaptiveModeLevelEstimatorAgc(
|
||||
ApmDataDumper* apm_data_dumper)
|
||||
: level_estimator_(apm_data_dumper) {
|
||||
set_target_level_dbfs(kDefaultLevelDbfs);
|
||||
}
|
||||
|
||||
// |audio| must be mono; in a multi-channel stream, provide the first (usually
|
||||
// left) channel.
|
||||
void AdaptiveModeLevelEstimatorAgc::Process(const int16_t* audio,
|
||||
size_t length,
|
||||
int sample_rate_hz) {
|
||||
std::vector<float> float_audio_frame(audio, audio + length);
|
||||
const float* const first_channel = &float_audio_frame[0];
|
||||
AudioFrameView<const float> frame_view(&first_channel, 1 /* num channels */,
|
||||
length);
|
||||
const auto vad_prob = agc2_vad_.AnalyzeFrame(frame_view);
|
||||
latest_voice_probability_ = vad_prob.speech_probability;
|
||||
if (latest_voice_probability_ > kVadConfidenceThreshold) {
|
||||
time_in_ms_since_last_estimate_ += kFrameDurationMs;
|
||||
}
|
||||
level_estimator_.UpdateEstimation(vad_prob);
|
||||
}
|
||||
|
||||
// Retrieves the difference between the target RMS level and the current
|
||||
// signal RMS level in dB. Returns true if an update is available and false
|
||||
// otherwise, in which case |error| should be ignored and no action taken.
|
||||
bool AdaptiveModeLevelEstimatorAgc::GetRmsErrorDb(int* error) {
|
||||
if (time_in_ms_since_last_estimate_ <= kTimeUntilConfidentMs) {
|
||||
return false;
|
||||
}
|
||||
*error = std::floor(target_level_dbfs() -
|
||||
level_estimator_.LatestLevelEstimate() + 0.5f);
|
||||
time_in_ms_since_last_estimate_ = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
void AdaptiveModeLevelEstimatorAgc::Reset() {
|
||||
level_estimator_.Reset();
|
||||
}
|
||||
|
||||
float AdaptiveModeLevelEstimatorAgc::voice_probability() const {
|
||||
return latest_voice_probability_;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_AGC_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_AGC_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/agc/agc.h"
|
||||
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
|
||||
#include "modules/audio_processing/agc2/vad_with_level.h"
|
||||
|
||||
namespace webrtc {
|
||||
class AdaptiveModeLevelEstimatorAgc : public Agc {
|
||||
public:
|
||||
explicit AdaptiveModeLevelEstimatorAgc(ApmDataDumper* apm_data_dumper);
|
||||
|
||||
// |audio| must be mono; in a multi-channel stream, provide the first (usually
|
||||
// left) channel.
|
||||
void Process(const int16_t* audio,
|
||||
size_t length,
|
||||
int sample_rate_hz) override;
|
||||
|
||||
// Retrieves the difference between the target RMS level and the current
|
||||
// signal RMS level in dB. Returns true if an update is available and false
|
||||
// otherwise, in which case |error| should be ignored and no action taken.
|
||||
bool GetRmsErrorDb(int* error) override;
|
||||
void Reset() override;
|
||||
|
||||
float voice_probability() const override;
|
||||
|
||||
private:
|
||||
static constexpr int kTimeUntilConfidentMs = 700;
|
||||
static constexpr int kDefaultLevelDbfs = 0;
|
||||
int32_t time_in_ms_since_last_estimate_ = 0;
|
||||
AdaptiveModeLevelEstimator level_estimator_;
|
||||
VadWithLevel agc2_vad_;
|
||||
float latest_voice_probability_ = 0.f;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_AGC_H_
|
@ -38,9 +38,9 @@ constexpr float kInitialAdaptiveDigitalGainDb = 8.f;
|
||||
// This parameter must be tuned together with the noise estimator.
|
||||
constexpr float kMaxNoiseLevelDbfs = -50.f;
|
||||
|
||||
// Used in the Level Estimator for deciding when to update the speech
|
||||
// level estimate. Also used in the adaptive digital gain applier to
|
||||
// decide when to allow target gain reduction.
|
||||
// This is the threshold for speech. Speech frames are used for updating the
|
||||
// speech level, measuring the amount of speech, and decide when to allow target
|
||||
// gain reduction.
|
||||
constexpr float kVadConfidenceThreshold = 0.4f;
|
||||
|
||||
// The amount of 'memory' of the Level Estimator. Decides leak factors.
|
||||
|
Reference in New Issue
Block a user