Removing deprecated legacy noise suppressor
This CL removes the code for the deprecated legacy noise. Bug: webrtc:5298 Change-Id: If287d8967a3079ef96bff4790afa31f37d178823 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/167922 Reviewed-by: Sam Zackrisson <saza@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#30434}
This commit is contained in:
@ -137,6 +137,7 @@ rtc_library("audio_processing") {
|
||||
"transient/transient_detector.h",
|
||||
"transient/transient_suppressor.cc",
|
||||
"transient/transient_suppressor.h",
|
||||
"transient/windows_private.h",
|
||||
"transient/wpd_node.cc",
|
||||
"transient/wpd_node.h",
|
||||
"transient/wpd_tree.cc",
|
||||
@ -183,8 +184,6 @@ rtc_library("audio_processing") {
|
||||
"agc2:adaptive_digital",
|
||||
"agc2:fixed_digital",
|
||||
"agc2:gain_applier",
|
||||
"legacy_ns:legacy_ns",
|
||||
"legacy_ns:legacy_ns_c",
|
||||
"ns",
|
||||
"vad",
|
||||
"//third_party/abseil-cpp/absl/types:optional",
|
||||
@ -376,7 +375,6 @@ if (rtc_include_tests) {
|
||||
"agc2:rnn_vad_with_level_unittests",
|
||||
"agc2:test_utils",
|
||||
"agc2/rnn_vad:unittests",
|
||||
"legacy_ns:legacy_ns",
|
||||
"test/conversational_speech:unittest",
|
||||
"utility:legacy_delay_estimator_unittest",
|
||||
"utility:pffft_wrapper_unittest",
|
||||
|
@ -70,11 +70,6 @@ bool SampleRateSupportsMultiBand(int sample_rate_hz) {
|
||||
sample_rate_hz == AudioProcessing::kSampleRate48kHz;
|
||||
}
|
||||
|
||||
// Checks whether the legacy ns functionality should be enforced.
|
||||
bool DetectLegacyNsEnforcement() {
|
||||
return field_trial::IsEnabled("WebRTC-NewNoiseSuppressionKillSwitch");
|
||||
}
|
||||
|
||||
// Checks whether the high-pass filter should be done in the full-band.
|
||||
bool EnforceSplitBandHpf() {
|
||||
return field_trial::IsEnabled("WebRTC-FullBandHpfKillSwitch");
|
||||
@ -106,23 +101,6 @@ int SuitableProcessRate(int minimum_rate,
|
||||
return uppermost_native_rate;
|
||||
}
|
||||
|
||||
NoiseSuppression::Level NsConfigLevelToInterfaceLevel(
|
||||
AudioProcessing::Config::NoiseSuppression::Level level) {
|
||||
using NsConfig = AudioProcessing::Config::NoiseSuppression;
|
||||
switch (level) {
|
||||
case NsConfig::kLow:
|
||||
return NoiseSuppression::Level::kLow;
|
||||
case NsConfig::kModerate:
|
||||
return NoiseSuppression::Level::kModerate;
|
||||
case NsConfig::kHigh:
|
||||
return NoiseSuppression::Level::kHigh;
|
||||
case NsConfig::kVeryHigh:
|
||||
return NoiseSuppression::Level::kVeryHigh;
|
||||
default:
|
||||
RTC_NOTREACHED();
|
||||
}
|
||||
}
|
||||
|
||||
GainControl::Mode Agc1ConfigModeToInterfaceMode(
|
||||
AudioProcessing::Config::GainController1::Mode mode) {
|
||||
using Agc1Config = AudioProcessing::Config::GainController1;
|
||||
@ -319,7 +297,6 @@ AudioProcessingImpl::AudioProcessingImpl(
|
||||
std::unique_ptr<CustomAudioAnalyzer> capture_analyzer)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
enforced_usage_of_legacy_ns_(DetectLegacyNsEnforcement()),
|
||||
use_setup_specific_default_aec3_config_(
|
||||
UseSetupSpecificDefaultAec3Congfig()),
|
||||
capture_runtime_settings_(kRuntimeSettingQueueSize),
|
||||
@ -1220,16 +1197,11 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
|
||||
RETURN_ON_ERR(
|
||||
submodules_.gain_control->AnalyzeCaptureAudio(*capture_buffer));
|
||||
}
|
||||
RTC_DCHECK(
|
||||
!(submodules_.legacy_noise_suppressor && submodules_.noise_suppressor));
|
||||
|
||||
if (!config_.noise_suppression.analyze_linear_aec_output_when_available ||
|
||||
!linear_aec_buffer || submodules_.echo_control_mobile) {
|
||||
if (submodules_.noise_suppressor) {
|
||||
if ((!config_.noise_suppression.analyze_linear_aec_output_when_available ||
|
||||
!linear_aec_buffer || submodules_.echo_control_mobile) &&
|
||||
submodules_.noise_suppressor) {
|
||||
submodules_.noise_suppressor->Analyze(*capture_buffer);
|
||||
} else if (submodules_.legacy_noise_suppressor) {
|
||||
submodules_.legacy_noise_suppressor->AnalyzeCaptureAudio(capture_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
if (submodules_.echo_control_mobile) {
|
||||
@ -1241,9 +1213,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
|
||||
|
||||
if (submodules_.noise_suppressor) {
|
||||
submodules_.noise_suppressor->Process(capture_buffer);
|
||||
} else if (submodules_.legacy_noise_suppressor) {
|
||||
submodules_.echo_control_mobile->CopyLowPassReference(capture_buffer);
|
||||
submodules_.legacy_noise_suppressor->ProcessCaptureAudio(capture_buffer);
|
||||
}
|
||||
|
||||
RETURN_ON_ERR(submodules_.echo_control_mobile->ProcessCaptureAudio(
|
||||
@ -1261,19 +1230,12 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
|
||||
}
|
||||
|
||||
if (config_.noise_suppression.analyze_linear_aec_output_when_available &&
|
||||
linear_aec_buffer) {
|
||||
if (submodules_.noise_suppressor) {
|
||||
linear_aec_buffer && submodules_.noise_suppressor) {
|
||||
submodules_.noise_suppressor->Analyze(*linear_aec_buffer);
|
||||
} else if (submodules_.legacy_noise_suppressor) {
|
||||
submodules_.legacy_noise_suppressor->AnalyzeCaptureAudio(
|
||||
linear_aec_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
if (submodules_.noise_suppressor) {
|
||||
submodules_.noise_suppressor->Process(capture_buffer);
|
||||
} else if (submodules_.legacy_noise_suppressor) {
|
||||
submodules_.legacy_noise_suppressor->ProcessCaptureAudio(capture_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1682,8 +1644,7 @@ AudioProcessing::Config AudioProcessingImpl::GetConfig() const {
|
||||
bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
|
||||
return submodule_states_.Update(
|
||||
config_.high_pass_filter.enabled, !!submodules_.echo_control_mobile,
|
||||
config_.residual_echo_detector.enabled,
|
||||
!!submodules_.legacy_noise_suppressor || !!submodules_.noise_suppressor,
|
||||
config_.residual_echo_detector.enabled, !!submodules_.noise_suppressor,
|
||||
!!submodules_.gain_control, !!submodules_.gain_controller2,
|
||||
config_.pre_amplifier.enabled, capture_nonlocked_.echo_controller_enabled,
|
||||
config_.voice_detection.enabled, !!submodules_.transient_suppressor);
|
||||
@ -1896,14 +1857,9 @@ void AudioProcessingImpl::InitializeGainController2() {
|
||||
}
|
||||
|
||||
void AudioProcessingImpl::InitializeNoiseSuppressor() {
|
||||
submodules_.legacy_noise_suppressor.reset();
|
||||
submodules_.noise_suppressor.reset();
|
||||
|
||||
if (config_.noise_suppression.enabled) {
|
||||
const bool use_legacy_ns =
|
||||
config_.noise_suppression.use_legacy_ns || enforced_usage_of_legacy_ns_;
|
||||
|
||||
if (!use_legacy_ns) {
|
||||
auto map_level =
|
||||
[](AudioProcessing::Config::NoiseSuppression::Level level) {
|
||||
using NoiseSuppresionConfig =
|
||||
@ -1926,12 +1882,6 @@ void AudioProcessingImpl::InitializeNoiseSuppressor() {
|
||||
cfg.target_level = map_level(config_.noise_suppression.level);
|
||||
submodules_.noise_suppressor = std::make_unique<NoiseSuppressor>(
|
||||
cfg, proc_sample_rate_hz(), num_proc_channels());
|
||||
} else {
|
||||
auto ns_level =
|
||||
NsConfigLevelToInterfaceLevel(config_.noise_suppression.level);
|
||||
submodules_.legacy_noise_suppressor = std::make_unique<NoiseSuppression>(
|
||||
num_proc_channels(), proc_sample_rate_hz(), ns_level);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -27,7 +27,6 @@
|
||||
#include "modules/audio_processing/include/aec_dump.h"
|
||||
#include "modules/audio_processing/include/audio_processing.h"
|
||||
#include "modules/audio_processing/include/audio_processing_statistics.h"
|
||||
#include "modules/audio_processing/legacy_ns/legacy_noise_suppression.h"
|
||||
#include "modules/audio_processing/level_estimator.h"
|
||||
#include "modules/audio_processing/ns/noise_suppressor.h"
|
||||
#include "modules/audio_processing/render_queue_item_verifier.h"
|
||||
@ -152,7 +151,6 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
static int instance_count_;
|
||||
const bool enforced_usage_of_legacy_ns_;
|
||||
const bool use_setup_specific_default_aec3_config_;
|
||||
|
||||
SwapQueue<RuntimeSetting> capture_runtime_settings_;
|
||||
@ -346,7 +344,6 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
rtc::scoped_refptr<EchoDetector> echo_detector;
|
||||
std::unique_ptr<EchoControl> echo_controller;
|
||||
std::unique_ptr<EchoControlMobileImpl> echo_control_mobile;
|
||||
std::unique_ptr<NoiseSuppression> legacy_noise_suppressor;
|
||||
std::unique_ptr<NoiseSuppressor> noise_suppressor;
|
||||
std::unique_ptr<TransientSuppressor> transient_suppressor;
|
||||
std::unique_ptr<CustomProcessing> capture_post_processor;
|
||||
|
@ -220,16 +220,6 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio,
|
||||
return AudioProcessing::kNoError;
|
||||
}
|
||||
|
||||
void EchoControlMobileImpl::CopyLowPassReference(AudioBuffer* audio) {
|
||||
RTC_DCHECK_LE(audio->num_channels(), low_pass_reference_.size());
|
||||
reference_copied_ = true;
|
||||
for (size_t capture = 0; capture < audio->num_channels(); ++capture) {
|
||||
FloatS16ToS16(audio->split_bands_const(capture)[kBand0To8kHz],
|
||||
audio->num_frames_per_band(),
|
||||
low_pass_reference_[capture].data());
|
||||
}
|
||||
}
|
||||
|
||||
int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) {
|
||||
if (MapSetting(mode) == -1) {
|
||||
return AudioProcessing::kBadParameterError;
|
||||
|
@ -54,7 +54,6 @@ class EchoControlMobileImpl {
|
||||
|
||||
void ProcessRenderAudio(rtc::ArrayView<const int16_t> packed_render_audio);
|
||||
int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms);
|
||||
void CopyLowPassReference(AudioBuffer* audio);
|
||||
|
||||
void Initialize(int sample_rate_hz,
|
||||
size_t num_reverse_channels,
|
||||
|
@ -250,8 +250,6 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
|
||||
enum Level { kLow, kModerate, kHigh, kVeryHigh };
|
||||
Level level = kModerate;
|
||||
bool analyze_linear_aec_output_when_available = false;
|
||||
// Recommended not to use. Will be removed in the future.
|
||||
bool use_legacy_ns = false;
|
||||
} noise_suppression;
|
||||
|
||||
// Enables transient suppression.
|
||||
|
@ -1,105 +0,0 @@
|
||||
# Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
import("../../../webrtc.gni")
|
||||
|
||||
rtc_library("legacy_ns") {
|
||||
visibility = [ "*" ] # Only targets in this file can depend on this.
|
||||
sources = [
|
||||
"legacy_noise_suppression.cc",
|
||||
"legacy_noise_suppression.h",
|
||||
]
|
||||
|
||||
if (rtc_prefer_fixed_point) {
|
||||
defines = [ "WEBRTC_NS_FIXED" ]
|
||||
} else {
|
||||
defines = [ "WEBRTC_NS_FLOAT" ]
|
||||
}
|
||||
|
||||
deps = [
|
||||
":legacy_ns_c",
|
||||
"..:audio_buffer",
|
||||
"../../../common_audio",
|
||||
"../../../common_audio:common_audio_c",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../system_wrappers:cpu_features_api",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_library("legacy_ns_c") {
|
||||
visibility = [ "*" ] # Only targets in this file can depend on this.
|
||||
sources = [ "windows_private.h" ]
|
||||
|
||||
if (rtc_prefer_fixed_point) {
|
||||
sources += [
|
||||
"noise_suppression_x.c",
|
||||
"noise_suppression_x.h",
|
||||
"nsx_core.c",
|
||||
"nsx_core.h",
|
||||
"nsx_core_c.c",
|
||||
"nsx_defines.h",
|
||||
]
|
||||
} else {
|
||||
sources += [
|
||||
"defines.h",
|
||||
"noise_suppression.c",
|
||||
"noise_suppression.h",
|
||||
"ns_core.c",
|
||||
"ns_core.h",
|
||||
]
|
||||
}
|
||||
|
||||
if (rtc_prefer_fixed_point) {
|
||||
defines = [ "WEBRTC_NS_FIXED" ]
|
||||
} else {
|
||||
defines = [ "WEBRTC_NS_FLOAT" ]
|
||||
}
|
||||
|
||||
deps = [
|
||||
"..:audio_buffer",
|
||||
"../../../common_audio",
|
||||
"../../../common_audio:common_audio_c",
|
||||
"../../../common_audio/third_party/fft4g",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../system_wrappers:cpu_features_api",
|
||||
]
|
||||
|
||||
if (rtc_build_with_neon) {
|
||||
sources += [ "nsx_core_neon.c" ]
|
||||
|
||||
if (current_cpu != "arm64") {
|
||||
# Enable compilation for the NEON instruction set.
|
||||
suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ]
|
||||
cflags = [ "-mfpu=neon" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rtc_include_tests) {
|
||||
rtc_source_set("legacy_ns_unittests") {
|
||||
testonly = true
|
||||
|
||||
configs += []
|
||||
sources = [ "legacy_noise_suppression_unittest.cc" ]
|
||||
|
||||
deps = [
|
||||
"..:audio_buffer",
|
||||
"..:audioproc_test_utils",
|
||||
"../../../api:array_view",
|
||||
"../../../test:test_support",
|
||||
]
|
||||
|
||||
defines = []
|
||||
|
||||
if (rtc_enable_protobuf) {
|
||||
sources += []
|
||||
}
|
||||
}
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_DEFINES_H_
|
||||
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_DEFINES_H_
|
||||
|
||||
#define BLOCKL_MAX 160 // max processing block length: 160
|
||||
#define ANAL_BLOCKL_MAX 256 // max analysis block length: 256
|
||||
#define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1
|
||||
#define NUM_HIGH_BANDS_MAX 2 // max number of high bands: 2
|
||||
|
||||
#define QUANTILE 0.25f
|
||||
|
||||
#define SIMULT 3
|
||||
#define END_STARTUP_LONG 200
|
||||
#define END_STARTUP_SHORT 50
|
||||
#define FACTOR 40.f
|
||||
#define WIDTH 0.01f
|
||||
|
||||
// Length of fft work arrays.
|
||||
#define IP_LENGTH \
|
||||
(ANAL_BLOCKL_MAX >> 1) // must be at least ceil(2 + sqrt(ANAL_BLOCKL_MAX/2))
|
||||
#define W_LENGTH (ANAL_BLOCKL_MAX >> 1)
|
||||
|
||||
// PARAMETERS FOR NEW METHOD
|
||||
#define DD_PR_SNR 0.98f // DD update of prior SNR
|
||||
#define LRT_TAVG 0.5f // tavg parameter for LRT (previously 0.90)
|
||||
#define SPECT_FL_TAVG 0.30f // tavg parameter for spectral flatness measure
|
||||
#define SPECT_DIFF_TAVG 0.30f // tavg parameter for spectral difference measure
|
||||
#define PRIOR_UPDATE 0.1f // update parameter of prior model
|
||||
#define NOISE_UPDATE 0.9f // update parameter for noise
|
||||
#define SPEECH_UPDATE 0.99f // update parameter when likely speech
|
||||
#define WIDTH_PR_MAP 4.0f // width parameter in sigmoid map for prior model
|
||||
#define LRT_FEATURE_THR 0.5f // default threshold for LRT feature
|
||||
#define SF_FEATURE_THR 0.5f // default threshold for Spectral Flatness feature
|
||||
#define SD_FEATURE_THR \
|
||||
0.5f // default threshold for Spectral Difference feature
|
||||
#define PROB_RANGE \
|
||||
0.2f // probability threshold for noise state in
|
||||
// speech/noise likelihood
|
||||
#define HIST_PAR_EST 1000 // histogram size for estimation of parameters
|
||||
#define GAMMA_PAUSE 0.05f // update for conservative noise estimate
|
||||
//
|
||||
#define B_LIM 0.5f // threshold in final energy gain factor calculation
|
||||
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_DEFINES_H_
|
@ -1,172 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/legacy_ns/legacy_noise_suppression.h"
|
||||
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#if defined(WEBRTC_NS_FLOAT)
|
||||
#include "modules/audio_processing/legacy_ns/noise_suppression.h"
|
||||
|
||||
#define NS_CREATE WebRtcNs_Create
|
||||
#define NS_FREE WebRtcNs_Free
|
||||
#define NS_INIT WebRtcNs_Init
|
||||
#define NS_SET_POLICY WebRtcNs_set_policy
|
||||
typedef NsHandle NsState;
|
||||
#elif defined(WEBRTC_NS_FIXED)
|
||||
#include "modules/audio_processing/legacy_ns/noise_suppression_x.h"
|
||||
|
||||
#define NS_CREATE WebRtcNsx_Create
|
||||
#define NS_FREE WebRtcNsx_Free
|
||||
#define NS_INIT WebRtcNsx_Init
|
||||
#define NS_SET_POLICY WebRtcNsx_set_policy
|
||||
typedef NsxHandle NsState;
|
||||
#endif
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
int NoiseSuppressionLevelToPolicy(NoiseSuppression::Level level) {
|
||||
switch (level) {
|
||||
case NoiseSuppression::Level::kLow:
|
||||
return 0;
|
||||
case NoiseSuppression::Level::kModerate:
|
||||
return 1;
|
||||
case NoiseSuppression::Level::kHigh:
|
||||
return 2;
|
||||
case NoiseSuppression::Level::kVeryHigh:
|
||||
return 3;
|
||||
default:
|
||||
RTC_NOTREACHED();
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
class NoiseSuppression::Suppressor {
|
||||
public:
|
||||
explicit Suppressor(int sample_rate_hz) {
|
||||
state_ = NS_CREATE();
|
||||
RTC_CHECK(state_);
|
||||
int error = NS_INIT(state_, sample_rate_hz);
|
||||
RTC_DCHECK_EQ(0, error);
|
||||
}
|
||||
~Suppressor() { NS_FREE(state_); }
|
||||
|
||||
Suppressor(Suppressor&) = delete;
|
||||
Suppressor& operator=(Suppressor&) = delete;
|
||||
|
||||
NsState* state() { return state_; }
|
||||
|
||||
private:
|
||||
NsState* state_ = nullptr;
|
||||
};
|
||||
|
||||
NoiseSuppression::NoiseSuppression(size_t channels,
|
||||
int sample_rate_hz,
|
||||
Level level) {
|
||||
const int policy = NoiseSuppressionLevelToPolicy(level);
|
||||
for (size_t i = 0; i < channels; ++i) {
|
||||
suppressors_.push_back(std::make_unique<Suppressor>(sample_rate_hz));
|
||||
int error = NS_SET_POLICY(suppressors_[i]->state(), policy);
|
||||
RTC_DCHECK_EQ(0, error);
|
||||
}
|
||||
}
|
||||
|
||||
NoiseSuppression::~NoiseSuppression() {}
|
||||
|
||||
void NoiseSuppression::AnalyzeCaptureAudio(AudioBuffer* audio) {
|
||||
RTC_DCHECK(audio);
|
||||
#if defined(WEBRTC_NS_FLOAT)
|
||||
RTC_DCHECK_GE(160, audio->num_frames_per_band());
|
||||
RTC_DCHECK_EQ(suppressors_.size(), audio->num_channels());
|
||||
for (size_t i = 0; i < suppressors_.size(); i++) {
|
||||
WebRtcNs_Analyze(suppressors_[i]->state(),
|
||||
audio->split_bands_const(i)[kBand0To8kHz]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void NoiseSuppression::ProcessCaptureAudio(AudioBuffer* audio) {
|
||||
RTC_DCHECK(audio);
|
||||
RTC_DCHECK_GE(160, audio->num_frames_per_band());
|
||||
RTC_DCHECK_EQ(suppressors_.size(), audio->num_channels());
|
||||
for (size_t i = 0; i < suppressors_.size(); i++) {
|
||||
#if defined(WEBRTC_NS_FLOAT)
|
||||
WebRtcNs_Process(suppressors_[i]->state(), audio->split_bands_const(i),
|
||||
audio->num_bands(), audio->split_bands(i));
|
||||
#elif defined(WEBRTC_NS_FIXED)
|
||||
int16_t split_band_data[AudioBuffer::kMaxNumBands]
|
||||
[AudioBuffer::kMaxSplitFrameLength];
|
||||
int16_t* split_bands[AudioBuffer::kMaxNumBands] = {
|
||||
split_band_data[0], split_band_data[1], split_band_data[2]};
|
||||
audio->ExportSplitChannelData(i, split_bands);
|
||||
|
||||
WebRtcNsx_Process(suppressors_[i]->state(), split_bands, audio->num_bands(),
|
||||
split_bands);
|
||||
|
||||
audio->ImportSplitChannelData(i, split_bands);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
float NoiseSuppression::speech_probability() const {
|
||||
#if defined(WEBRTC_NS_FLOAT)
|
||||
float probability_average = 0.0f;
|
||||
for (auto& suppressor : suppressors_) {
|
||||
probability_average +=
|
||||
WebRtcNs_prior_speech_probability(suppressor->state());
|
||||
}
|
||||
if (!suppressors_.empty()) {
|
||||
probability_average /= suppressors_.size();
|
||||
}
|
||||
return probability_average;
|
||||
#elif defined(WEBRTC_NS_FIXED)
|
||||
// TODO(peah): Returning error code as a float! Remove this.
|
||||
// Currently not available for the fixed point implementation.
|
||||
return AudioProcessing::kUnsupportedFunctionError;
|
||||
#endif
|
||||
}
|
||||
|
||||
std::vector<float> NoiseSuppression::NoiseEstimate() {
|
||||
std::vector<float> noise_estimate;
|
||||
#if defined(WEBRTC_NS_FLOAT)
|
||||
const float kNumChannelsFraction = 1.f / suppressors_.size();
|
||||
noise_estimate.assign(WebRtcNs_num_freq(), 0.f);
|
||||
for (auto& suppressor : suppressors_) {
|
||||
const float* noise = WebRtcNs_noise_estimate(suppressor->state());
|
||||
for (size_t i = 0; i < noise_estimate.size(); ++i) {
|
||||
noise_estimate[i] += kNumChannelsFraction * noise[i];
|
||||
}
|
||||
}
|
||||
#elif defined(WEBRTC_NS_FIXED)
|
||||
noise_estimate.assign(WebRtcNsx_num_freq(), 0.f);
|
||||
for (auto& suppressor : suppressors_) {
|
||||
int q_noise;
|
||||
const uint32_t* noise =
|
||||
WebRtcNsx_noise_estimate(suppressor->state(), &q_noise);
|
||||
const float kNormalizationFactor =
|
||||
1.f / ((1 << q_noise) * suppressors_.size());
|
||||
for (size_t i = 0; i < noise_estimate.size(); ++i) {
|
||||
noise_estimate[i] += kNormalizationFactor * noise[i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return noise_estimate;
|
||||
}
|
||||
|
||||
size_t NoiseSuppression::num_noise_bins() {
|
||||
#if defined(WEBRTC_NS_FLOAT)
|
||||
return WebRtcNs_num_freq();
|
||||
#elif defined(WEBRTC_NS_FIXED)
|
||||
return WebRtcNsx_num_freq();
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -1,57 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_LEGACY_NOISE_SUPPRESSION_H_
|
||||
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_LEGACY_NOISE_SUPPRESSION_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioBuffer;
|
||||
|
||||
// The noise suppression (NS) component attempts to remove noise while
|
||||
// retaining speech. Recommended to be enabled on the client-side.
|
||||
class NoiseSuppression {
|
||||
public:
|
||||
// Determines the aggressiveness of the suppression. Increasing the level
|
||||
// will reduce the noise level at the expense of a higher speech distortion.
|
||||
enum class Level { kLow, kModerate, kHigh, kVeryHigh };
|
||||
|
||||
NoiseSuppression(size_t channels, int sample_rate_hz, Level level);
|
||||
~NoiseSuppression();
|
||||
|
||||
NoiseSuppression(NoiseSuppression&) = delete;
|
||||
NoiseSuppression& operator=(NoiseSuppression&) = delete;
|
||||
|
||||
void AnalyzeCaptureAudio(AudioBuffer* audio);
|
||||
void ProcessCaptureAudio(AudioBuffer* audio);
|
||||
|
||||
// LEGACY: Returns the internally computed prior speech probability of current
|
||||
// frame averaged over output channels. This is not supported in fixed point,
|
||||
// for which |kUnsupportedFunctionError| is returned.
|
||||
float speech_probability() const;
|
||||
|
||||
// LEGACY: Returns the size of the noise vector returned by NoiseEstimate().
|
||||
static size_t num_noise_bins();
|
||||
|
||||
// LEGACY: Returns the noise estimate per frequency bin averaged over all
|
||||
// channels.
|
||||
std::vector<float> NoiseEstimate();
|
||||
|
||||
private:
|
||||
class Suppressor;
|
||||
|
||||
std::vector<std::unique_ptr<Suppressor>> suppressors_;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_LEGACY_NOISE_SUPPRESSION_H_
|
@ -1,279 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "modules/audio_processing/legacy_noise_suppression.h"
|
||||
#include "modules/audio_processing/test/audio_buffer_tools.h"
|
||||
#include "modules/audio_processing/test/bitexactness_tools.h"
|
||||
#include "test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
const int kNumFramesToProcess = 1000;
|
||||
|
||||
// Process one frame of data and produce the output.
|
||||
void ProcessOneFrame(int sample_rate_hz,
|
||||
AudioBuffer* capture_buffer,
|
||||
NoiseSuppression* noise_suppressor) {
|
||||
if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
|
||||
capture_buffer->SplitIntoFrequencyBands();
|
||||
}
|
||||
|
||||
noise_suppressor->AnalyzeCaptureAudio(capture_buffer);
|
||||
noise_suppressor->ProcessCaptureAudio(capture_buffer);
|
||||
|
||||
if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
|
||||
capture_buffer->MergeFrequencyBands();
|
||||
}
|
||||
}
|
||||
|
||||
// Processes a specified amount of frames, verifies the results and reports
|
||||
// any errors.
|
||||
void RunBitexactnessTest(int sample_rate_hz,
|
||||
size_t num_channels,
|
||||
NoiseSuppression::Level level,
|
||||
float speech_probability_reference,
|
||||
rtc::ArrayView<const float> noise_estimate_reference,
|
||||
rtc::ArrayView<const float> output_reference) {
|
||||
NoiseSuppression noise_suppressor(num_channels, sample_rate_hz, level);
|
||||
|
||||
int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100);
|
||||
const StreamConfig capture_config(sample_rate_hz, num_channels, false);
|
||||
AudioBuffer capture_buffer(
|
||||
capture_config.sample_rate_hz(), capture_config.num_channels(),
|
||||
capture_config.sample_rate_hz(), capture_config.num_channels(),
|
||||
capture_config.sample_rate_hz(), capture_config.num_channels());
|
||||
test::InputAudioFile capture_file(
|
||||
test::GetApmCaptureTestVectorFileName(sample_rate_hz));
|
||||
std::vector<float> capture_input(samples_per_channel * num_channels);
|
||||
for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) {
|
||||
ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels,
|
||||
&capture_file, capture_input);
|
||||
|
||||
test::CopyVectorToAudioBuffer(capture_config, capture_input,
|
||||
&capture_buffer);
|
||||
|
||||
ProcessOneFrame(sample_rate_hz, &capture_buffer, &noise_suppressor);
|
||||
}
|
||||
|
||||
// Extract test results.
|
||||
std::vector<float> capture_output;
|
||||
test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer,
|
||||
&capture_output);
|
||||
float speech_probability = noise_suppressor.speech_probability();
|
||||
std::vector<float> noise_estimate = noise_suppressor.NoiseEstimate();
|
||||
|
||||
const float kVectorElementErrorBound = 1.0f / 32768.0f;
|
||||
EXPECT_FLOAT_EQ(speech_probability_reference, speech_probability);
|
||||
EXPECT_TRUE(test::VerifyArray(noise_estimate_reference, noise_estimate,
|
||||
kVectorElementErrorBound));
|
||||
|
||||
// Compare the output with the reference. Only the first values of the output
|
||||
// from last frame processed are compared in order not having to specify all
|
||||
// preceeding frames as testvectors. As the algorithm being tested has a
|
||||
// memory, testing only the last frame implicitly also tests the preceeding
|
||||
// frames.
|
||||
EXPECT_TRUE(test::VerifyDeinterleavedArray(
|
||||
capture_config.num_frames(), capture_config.num_channels(),
|
||||
output_reference, capture_output, kVectorElementErrorBound));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono8kHzLow) {
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {1432.341431f, 3321.919922f,
|
||||
7677.521973f};
|
||||
const float kOutputReference[] = {0.003510f, 0.004517f, 0.004669f};
|
||||
#elif defined(WEBRTC_ARCH_ARM)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {1432.341431f, 3321.919922f,
|
||||
7677.521973f};
|
||||
const float kOutputReference[] = {0.003510f, 0.004517f, 0.004669f};
|
||||
#else
|
||||
const float kSpeechProbabilityReference = 0.73650402f;
|
||||
const float kNoiseEstimateReference[] = {1176.856812f, 3287.490967f,
|
||||
7525.964844f};
|
||||
const float kOutputReference[] = {0.003306f, 0.004442f, 0.004574f};
|
||||
#endif
|
||||
|
||||
RunBitexactnessTest(8000, 1, NoiseSuppression::Level::kLow,
|
||||
kSpeechProbabilityReference, kNoiseEstimateReference,
|
||||
kOutputReference);
|
||||
}
|
||||
|
||||
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono16kHzLow) {
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {2534.461914f, 6277.638672f,
|
||||
14367.499023f};
|
||||
const float kOutputReference[] = {0.003449f, 0.004334f, 0.004303f};
|
||||
#elif defined(WEBRTC_ARCH_ARM)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {2534.461914f, 6277.638672f,
|
||||
14367.499023f};
|
||||
const float kOutputReference[] = {0.003449f, 0.004334f, 0.004303f};
|
||||
#else
|
||||
const float kSpeechProbabilityReference = 0.71743423f;
|
||||
const float kNoiseEstimateReference[] = {2179.853027f, 6507.995117f,
|
||||
15652.758789f};
|
||||
const float kOutputReference[] = {0.003574f, 0.004494f, 0.004499f};
|
||||
#endif
|
||||
|
||||
RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kLow,
|
||||
kSpeechProbabilityReference, kNoiseEstimateReference,
|
||||
kOutputReference);
|
||||
}
|
||||
|
||||
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono32kHzLow) {
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {2540.059082f, 6317.822754f,
|
||||
14440.845703f};
|
||||
const float kOutputReference[] = {0.001679f, 0.002411f, 0.002594f};
|
||||
#elif defined(WEBRTC_ARCH_ARM)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {2540.059082f, 6317.822754f,
|
||||
14440.845703f};
|
||||
const float kOutputReference[] = {0.001679f, 0.002411f, 0.002594f};
|
||||
#else
|
||||
const float kSpeechProbabilityReference = 0.67999554f;
|
||||
const float kNoiseEstimateReference[] = {2149.780518f, 7076.936035f,
|
||||
14939.945312f};
|
||||
const float kOutputReference[] = {0.001221f, 0.001984f, 0.002228f};
|
||||
#endif
|
||||
|
||||
RunBitexactnessTest(32000, 1, NoiseSuppression::Level::kLow,
|
||||
kSpeechProbabilityReference, kNoiseEstimateReference,
|
||||
kOutputReference);
|
||||
}
|
||||
|
||||
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono48kHzLow) {
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {2135.292480f, 6692.695801f,
|
||||
14647.632812f};
|
||||
const float kOutputReference[] = {-0.012738f, -0.012312f, -0.011576f};
|
||||
#elif defined(WEBRTC_ARCH_ARM)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {2135.292480f, 6692.695801f,
|
||||
14647.632812f};
|
||||
const float kOutputReference[] = {-0.012738f, -0.012312f, -0.011576f};
|
||||
#else
|
||||
const float kSpeechProbabilityReference = 0.70737761f;
|
||||
const float kNoiseEstimateReference[] = {2187.394043f, 6913.306641f,
|
||||
13182.945312f};
|
||||
const float kOutputReference[] = {-0.013062f, -0.012657f, -0.011934f};
|
||||
#endif
|
||||
|
||||
RunBitexactnessTest(48000, 1, NoiseSuppression::Level::kLow,
|
||||
kSpeechProbabilityReference, kNoiseEstimateReference,
|
||||
kOutputReference);
|
||||
}
|
||||
|
||||
TEST(LegacyNoiseSuppresionBitExactnessTest, Stereo16kHzLow) {
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {9992.127930f, 12689.569336f,
|
||||
11589.296875f};
|
||||
const float kOutputReference[] = {-0.011108f, -0.007904f, -0.012390f,
|
||||
-0.002441f, 0.000855f, -0.003204f};
|
||||
#elif defined(WEBRTC_ARCH_ARM)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {10321.353516f, 12133.852539f,
|
||||
10923.060547f};
|
||||
const float kOutputReference[] = {-0.011108f, -0.007904f, -0.012390f,
|
||||
-0.002472f, 0.000916f, -0.003235f};
|
||||
#else
|
||||
const float kSpeechProbabilityReference = 0.67285913f;
|
||||
const float kNoiseEstimateReference[] = {9753.257812f, 11515.603516f,
|
||||
10503.309570f};
|
||||
const float kOutputReference[] = {-0.011459f, -0.008110f, -0.012728f,
|
||||
-0.002399f, 0.001018f, -0.003189f};
|
||||
#endif
|
||||
|
||||
RunBitexactnessTest(16000, 2, NoiseSuppression::Level::kLow,
|
||||
kSpeechProbabilityReference, kNoiseEstimateReference,
|
||||
kOutputReference);
|
||||
}
|
||||
|
||||
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono16kHzModerate) {
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {2057.085938f, 7601.055176f,
|
||||
19666.187500f};
|
||||
const float kOutputReference[] = {0.004669f, 0.005524f, 0.005432f};
|
||||
#elif defined(WEBRTC_ARCH_ARM)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {2244.497803f, 6864.164062f,
|
||||
16726.523438f};
|
||||
const float kOutputReference[] = {0.004669f, 0.005615f, 0.005585f};
|
||||
#else
|
||||
const float kSpeechProbabilityReference = 0.70916927f;
|
||||
const float kNoiseEstimateReference[] = {2172.830566f, 6552.661133f,
|
||||
15624.025391f};
|
||||
const float kOutputReference[] = {0.004513f, 0.005590f, 0.005614f};
|
||||
#endif
|
||||
|
||||
RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kModerate,
|
||||
kSpeechProbabilityReference, kNoiseEstimateReference,
|
||||
kOutputReference);
|
||||
}
|
||||
|
||||
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono16kHzHigh) {
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {2095.148193f, 7698.553711f,
|
||||
19689.533203f};
|
||||
const float kOutputReference[] = {0.004639f, 0.005402f, 0.005310f};
|
||||
#elif defined(WEBRTC_ARCH_ARM)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {2282.515625f, 6984.408203f,
|
||||
16920.960938f};
|
||||
const float kOutputReference[] = {0.004547f, 0.005432f, 0.005402f};
|
||||
#else
|
||||
const float kSpeechProbabilityReference = 0.70104003f;
|
||||
const float kNoiseEstimateReference[] = {2225.081055f, 6711.529785f,
|
||||
15785.949219};
|
||||
const float kOutputReference[] = {0.004394f, 0.005406f, 0.005416f};
|
||||
#endif
|
||||
|
||||
RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kHigh,
|
||||
kSpeechProbabilityReference, kNoiseEstimateReference,
|
||||
kOutputReference);
|
||||
}
|
||||
|
||||
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono16kHzVeryHigh) {
|
||||
#if defined(WEBRTC_ARCH_ARM64)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {2677.733398f, 6186.987305f,
|
||||
14365.744141f};
|
||||
const float kOutputReference[] = {0.004273f, 0.005127f, 0.005188f};
|
||||
#elif defined(WEBRTC_ARCH_ARM)
|
||||
const float kSpeechProbabilityReference = -4.0f;
|
||||
const float kNoiseEstimateReference[] = {2677.733398f, 6186.987305f,
|
||||
14365.744141f};
|
||||
const float kOutputReference[] = {0.004273f, 0.005127f, 0.005188f};
|
||||
#else
|
||||
const float kSpeechProbabilityReference = 0.70290041f;
|
||||
const float kNoiseEstimateReference[] = {2254.921875f, 6723.172852f,
|
||||
15770.559570f};
|
||||
const float kOutputReference[] = {0.004321f, 0.005247f, 0.005263f};
|
||||
#endif
|
||||
|
||||
RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kVeryHigh,
|
||||
kSpeechProbabilityReference, kNoiseEstimateReference,
|
||||
kOutputReference);
|
||||
}
|
||||
} // namespace webrtc
|
@ -1,71 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/legacy_ns/noise_suppression.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "modules/audio_processing/legacy_ns/defines.h"
|
||||
#include "modules/audio_processing/legacy_ns/ns_core.h"
|
||||
|
||||
NsHandle* WebRtcNs_Create() {
|
||||
NoiseSuppressionC* self = malloc(sizeof(NoiseSuppressionC));
|
||||
self->initFlag = 0;
|
||||
return (NsHandle*)self;
|
||||
}
|
||||
|
||||
void WebRtcNs_Free(NsHandle* NS_inst) {
|
||||
free(NS_inst);
|
||||
}
|
||||
|
||||
int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs) {
|
||||
return WebRtcNs_InitCore((NoiseSuppressionC*)NS_inst, fs);
|
||||
}
|
||||
|
||||
int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) {
|
||||
return WebRtcNs_set_policy_core((NoiseSuppressionC*)NS_inst, mode);
|
||||
}
|
||||
|
||||
void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe) {
|
||||
WebRtcNs_AnalyzeCore((NoiseSuppressionC*)NS_inst, spframe);
|
||||
}
|
||||
|
||||
void WebRtcNs_Process(NsHandle* NS_inst,
|
||||
const float* const* spframe,
|
||||
size_t num_bands,
|
||||
float* const* outframe) {
|
||||
WebRtcNs_ProcessCore((NoiseSuppressionC*)NS_inst, spframe, num_bands,
|
||||
outframe);
|
||||
}
|
||||
|
||||
float WebRtcNs_prior_speech_probability(NsHandle* handle) {
|
||||
NoiseSuppressionC* self = (NoiseSuppressionC*)handle;
|
||||
if (handle == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (self->initFlag == 0) {
|
||||
return -1;
|
||||
}
|
||||
return self->priorSpeechProb;
|
||||
}
|
||||
|
||||
const float* WebRtcNs_noise_estimate(const NsHandle* handle) {
|
||||
const NoiseSuppressionC* self = (const NoiseSuppressionC*)handle;
|
||||
if (handle == NULL || self->initFlag == 0) {
|
||||
return NULL;
|
||||
}
|
||||
return self->noise;
|
||||
}
|
||||
|
||||
size_t WebRtcNs_num_freq() {
|
||||
return HALF_ANAL_BLOCKL;
|
||||
}
|
@ -1,134 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_NOISE_SUPPRESSION_H_
|
||||
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_NOISE_SUPPRESSION_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct NsHandleT NsHandle;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This function creates an instance of the floating point Noise Suppression.
|
||||
*/
|
||||
NsHandle* WebRtcNs_Create(void);
|
||||
|
||||
/*
|
||||
* This function frees the dynamic memory of a specified noise suppression
|
||||
* instance.
|
||||
*
|
||||
* Input:
|
||||
* - NS_inst : Pointer to NS instance that should be freed
|
||||
*/
|
||||
void WebRtcNs_Free(NsHandle* NS_inst);
|
||||
|
||||
/*
|
||||
* This function initializes a NS instance and has to be called before any other
|
||||
* processing is made.
|
||||
*
|
||||
* Input:
|
||||
* - NS_inst : Instance that should be initialized
|
||||
* - fs : sampling frequency
|
||||
*
|
||||
* Output:
|
||||
* - NS_inst : Initialized instance
|
||||
*
|
||||
* Return value : 0 - Ok
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs);
|
||||
|
||||
/*
|
||||
* This changes the aggressiveness of the noise suppression method.
|
||||
*
|
||||
* Input:
|
||||
* - NS_inst : Noise suppression instance.
|
||||
* - mode : 0: Mild, 1: Medium , 2: Aggressive
|
||||
*
|
||||
* Output:
|
||||
* - NS_inst : Updated instance.
|
||||
*
|
||||
* Return value : 0 - Ok
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
|
||||
|
||||
/*
|
||||
* This functions estimates the background noise for the inserted speech frame.
|
||||
* The input and output signals should always be 10ms (80 or 160 samples).
|
||||
*
|
||||
* Input
|
||||
* - NS_inst : Noise suppression instance.
|
||||
* - spframe : Pointer to speech frame buffer for L band
|
||||
*
|
||||
* Output:
|
||||
* - NS_inst : Updated NS instance
|
||||
*/
|
||||
void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe);
|
||||
|
||||
/*
|
||||
* This functions does Noise Suppression for the inserted speech frame. The
|
||||
* input and output signals should always be 10ms (80 or 160 samples).
|
||||
*
|
||||
* Input
|
||||
* - NS_inst : Noise suppression instance.
|
||||
* - spframe : Pointer to speech frame buffer for each band
|
||||
* - num_bands : Number of bands
|
||||
*
|
||||
* Output:
|
||||
* - NS_inst : Updated NS instance
|
||||
* - outframe : Pointer to output frame for each band
|
||||
*/
|
||||
void WebRtcNs_Process(NsHandle* NS_inst,
|
||||
const float* const* spframe,
|
||||
size_t num_bands,
|
||||
float* const* outframe);
|
||||
|
||||
/* Returns the internally used prior speech probability of the current frame.
|
||||
* There is a frequency bin based one as well, with which this should not be
|
||||
* confused.
|
||||
*
|
||||
* Input
|
||||
* - handle : Noise suppression instance.
|
||||
*
|
||||
* Return value : Prior speech probability in interval [0.0, 1.0].
|
||||
* -1 - NULL pointer or uninitialized instance.
|
||||
*/
|
||||
float WebRtcNs_prior_speech_probability(NsHandle* handle);
|
||||
|
||||
/* Returns a pointer to the noise estimate per frequency bin. The number of
|
||||
* frequency bins can be provided using WebRtcNs_num_freq().
|
||||
*
|
||||
* Input
|
||||
* - handle : Noise suppression instance.
|
||||
*
|
||||
* Return value : Pointer to the noise estimate per frequency bin.
|
||||
* Returns NULL if the input is a NULL pointer or an
|
||||
* uninitialized instance.
|
||||
*/
|
||||
const float* WebRtcNs_noise_estimate(const NsHandle* handle);
|
||||
|
||||
/* Returns the number of frequency bins, which is the length of the noise
|
||||
* estimate for example.
|
||||
*
|
||||
* Return value : Number of frequency bins.
|
||||
*/
|
||||
size_t WebRtcNs_num_freq(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_NOISE_SUPPRESSION_H_
|
@ -1,60 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/legacy_ns/noise_suppression_x.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "common_audio/signal_processing/include/real_fft.h"
|
||||
#include "modules/audio_processing/legacy_ns/nsx_core.h"
|
||||
#include "modules/audio_processing/legacy_ns/nsx_defines.h"
|
||||
|
||||
NsxHandle* WebRtcNsx_Create() {
|
||||
NoiseSuppressionFixedC* self = malloc(sizeof(NoiseSuppressionFixedC));
|
||||
self->real_fft = NULL;
|
||||
self->initFlag = 0;
|
||||
return (NsxHandle*)self;
|
||||
}
|
||||
|
||||
void WebRtcNsx_Free(NsxHandle* nsxInst) {
|
||||
WebRtcSpl_FreeRealFFT(((NoiseSuppressionFixedC*)nsxInst)->real_fft);
|
||||
free(nsxInst);
|
||||
}
|
||||
|
||||
int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs) {
|
||||
return WebRtcNsx_InitCore((NoiseSuppressionFixedC*)nsxInst, fs);
|
||||
}
|
||||
|
||||
int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) {
|
||||
return WebRtcNsx_set_policy_core((NoiseSuppressionFixedC*)nsxInst, mode);
|
||||
}
|
||||
|
||||
void WebRtcNsx_Process(NsxHandle* nsxInst,
|
||||
const int16_t* const* speechFrame,
|
||||
int num_bands,
|
||||
int16_t* const* outFrame) {
|
||||
WebRtcNsx_ProcessCore((NoiseSuppressionFixedC*)nsxInst, speechFrame,
|
||||
num_bands, outFrame);
|
||||
}
|
||||
|
||||
const uint32_t* WebRtcNsx_noise_estimate(const NsxHandle* nsxInst,
|
||||
int* q_noise) {
|
||||
*q_noise = 11;
|
||||
const NoiseSuppressionFixedC* self = (const NoiseSuppressionFixedC*)nsxInst;
|
||||
if (nsxInst == NULL || self->initFlag == 0) {
|
||||
return NULL;
|
||||
}
|
||||
*q_noise += self->prevQNoise;
|
||||
return self->prevNoiseU32;
|
||||
}
|
||||
|
||||
size_t WebRtcNsx_num_freq() {
|
||||
return HALF_ANAL_BLOCKL;
|
||||
}
|
@ -1,112 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_NOISE_SUPPRESSION_X_H_
|
||||
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_NOISE_SUPPRESSION_X_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct NsxHandleT NsxHandle;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This function creates an instance of the fixed point Noise Suppression.
|
||||
*/
|
||||
NsxHandle* WebRtcNsx_Create(void);
|
||||
|
||||
/*
|
||||
* This function frees the dynamic memory of a specified Noise Suppression
|
||||
* instance.
|
||||
*
|
||||
* Input:
|
||||
* - nsxInst : Pointer to NS instance that should be freed
|
||||
*/
|
||||
void WebRtcNsx_Free(NsxHandle* nsxInst);
|
||||
|
||||
/*
|
||||
* This function initializes a NS instance
|
||||
*
|
||||
* Input:
|
||||
* - nsxInst : Instance that should be initialized
|
||||
* - fs : sampling frequency
|
||||
*
|
||||
* Output:
|
||||
* - nsxInst : Initialized instance
|
||||
*
|
||||
* Return value : 0 - Ok
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs);
|
||||
|
||||
/*
|
||||
* This changes the aggressiveness of the noise suppression method.
|
||||
*
|
||||
* Input:
|
||||
* - nsxInst : Instance that should be initialized
|
||||
* - mode : 0: Mild, 1: Medium , 2: Aggressive
|
||||
*
|
||||
* Output:
|
||||
* - nsxInst : Initialized instance
|
||||
*
|
||||
* Return value : 0 - Ok
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode);
|
||||
|
||||
/*
|
||||
* This functions does noise suppression for the inserted speech frame. The
|
||||
* input and output signals should always be 10ms (80 or 160 samples).
|
||||
*
|
||||
* Input
|
||||
* - nsxInst : NSx instance. Needs to be initiated before call.
|
||||
* - speechFrame : Pointer to speech frame buffer for each band
|
||||
* - num_bands : Number of bands
|
||||
*
|
||||
* Output:
|
||||
* - nsxInst : Updated NSx instance
|
||||
* - outFrame : Pointer to output frame for each band
|
||||
*/
|
||||
void WebRtcNsx_Process(NsxHandle* nsxInst,
|
||||
const int16_t* const* speechFrame,
|
||||
int num_bands,
|
||||
int16_t* const* outFrame);
|
||||
|
||||
/* Returns a pointer to the noise estimate per frequency bin. The number of
|
||||
* frequency bins can be provided using WebRtcNsx_num_freq().
|
||||
*
|
||||
* Input
|
||||
* - nsxInst : NSx instance. Needs to be initiated before call.
|
||||
* - q_noise : Q value of the noise estimate, which is the number of
|
||||
* bits that it needs to be right-shifted to be
|
||||
* normalized.
|
||||
*
|
||||
* Return value : Pointer to the noise estimate per frequency bin.
|
||||
* Returns NULL if the input is a NULL pointer or an
|
||||
* uninitialized instance.
|
||||
*/
|
||||
const uint32_t* WebRtcNsx_noise_estimate(const NsxHandle* nsxInst,
|
||||
int* q_noise);
|
||||
|
||||
/* Returns the number of frequency bins, which is the length of the noise
|
||||
* estimate for example.
|
||||
*
|
||||
* Return value : Number of frequency bins.
|
||||
*/
|
||||
size_t WebRtcNsx_num_freq(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_NOISE_SUPPRESSION_X_H_
|
File diff suppressed because it is too large
Load Diff
@ -1,188 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_NS_CORE_H_
|
||||
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_NS_CORE_H_
|
||||
|
||||
#include "modules/audio_processing/legacy_ns/defines.h"
|
||||
|
||||
typedef struct NSParaExtract_ {
|
||||
// Bin size of histogram.
|
||||
float binSizeLrt;
|
||||
float binSizeSpecFlat;
|
||||
float binSizeSpecDiff;
|
||||
// Range of histogram over which LRT threshold is computed.
|
||||
float rangeAvgHistLrt;
|
||||
// Scale parameters: multiply dominant peaks of the histograms by scale factor
|
||||
// to obtain thresholds for prior model.
|
||||
float factor1ModelPars; // For LRT and spectral difference.
|
||||
float factor2ModelPars; // For spectral_flatness: used when noise is flatter
|
||||
// than speech.
|
||||
// Peak limit for spectral flatness (varies between 0 and 1).
|
||||
float thresPosSpecFlat;
|
||||
// Limit on spacing of two highest peaks in histogram: spacing determined by
|
||||
// bin size.
|
||||
float limitPeakSpacingSpecFlat;
|
||||
float limitPeakSpacingSpecDiff;
|
||||
// Limit on relevance of second peak.
|
||||
float limitPeakWeightsSpecFlat;
|
||||
float limitPeakWeightsSpecDiff;
|
||||
// Limit on fluctuation of LRT feature.
|
||||
float thresFluctLrt;
|
||||
// Limit on the max and min values for the feature thresholds.
|
||||
float maxLrt;
|
||||
float minLrt;
|
||||
float maxSpecFlat;
|
||||
float minSpecFlat;
|
||||
float maxSpecDiff;
|
||||
float minSpecDiff;
|
||||
// Criteria of weight of histogram peak to accept/reject feature.
|
||||
int thresWeightSpecFlat;
|
||||
int thresWeightSpecDiff;
|
||||
} NSParaExtract;
|
||||
|
||||
typedef struct NoiseSuppressionC_ {
|
||||
uint32_t fs;
|
||||
size_t blockLen;
|
||||
size_t windShift;
|
||||
size_t anaLen;
|
||||
size_t magnLen;
|
||||
int aggrMode;
|
||||
const float* window;
|
||||
float analyzeBuf[ANAL_BLOCKL_MAX];
|
||||
float dataBuf[ANAL_BLOCKL_MAX];
|
||||
float syntBuf[ANAL_BLOCKL_MAX];
|
||||
|
||||
int initFlag;
|
||||
// Parameters for quantile noise estimation.
|
||||
float density[SIMULT * HALF_ANAL_BLOCKL];
|
||||
float lquantile[SIMULT * HALF_ANAL_BLOCKL];
|
||||
float quantile[HALF_ANAL_BLOCKL];
|
||||
int counter[SIMULT];
|
||||
int updates;
|
||||
// Parameters for Wiener filter.
|
||||
float smooth[HALF_ANAL_BLOCKL];
|
||||
float overdrive;
|
||||
float denoiseBound;
|
||||
int gainmap;
|
||||
// FFT work arrays.
|
||||
size_t ip[IP_LENGTH];
|
||||
float wfft[W_LENGTH];
|
||||
|
||||
// Parameters for new method: some not needed, will reduce/cleanup later.
|
||||
int32_t blockInd; // Frame index counter.
|
||||
int modelUpdatePars[4]; // Parameters for updating or estimating.
|
||||
// Thresholds/weights for prior model.
|
||||
float priorModelPars[7]; // Parameters for prior model.
|
||||
float noise[HALF_ANAL_BLOCKL]; // Noise spectrum from current frame.
|
||||
float noisePrev[HALF_ANAL_BLOCKL]; // Noise spectrum from previous frame.
|
||||
// Magnitude spectrum of previous analyze frame.
|
||||
float magnPrevAnalyze[HALF_ANAL_BLOCKL];
|
||||
// Magnitude spectrum of previous process frame.
|
||||
float magnPrevProcess[HALF_ANAL_BLOCKL];
|
||||
float logLrtTimeAvg[HALF_ANAL_BLOCKL]; // Log LRT factor with time-smoothing.
|
||||
float priorSpeechProb; // Prior speech/noise probability.
|
||||
float featureData[7];
|
||||
// Conservative noise spectrum estimate.
|
||||
float magnAvgPause[HALF_ANAL_BLOCKL];
|
||||
float signalEnergy; // Energy of |magn|.
|
||||
float sumMagn;
|
||||
float whiteNoiseLevel; // Initial noise estimate.
|
||||
float initMagnEst[HALF_ANAL_BLOCKL]; // Initial magnitude spectrum estimate.
|
||||
float pinkNoiseNumerator; // Pink noise parameter: numerator.
|
||||
float pinkNoiseExp; // Pink noise parameter: power of frequencies.
|
||||
float parametricNoise[HALF_ANAL_BLOCKL];
|
||||
// Parameters for feature extraction.
|
||||
NSParaExtract featureExtractionParams;
|
||||
// Histograms for parameter estimation.
|
||||
int histLrt[HIST_PAR_EST];
|
||||
int histSpecFlat[HIST_PAR_EST];
|
||||
int histSpecDiff[HIST_PAR_EST];
|
||||
// Quantities for high band estimate.
|
||||
float speechProb[HALF_ANAL_BLOCKL]; // Final speech/noise prob: prior + LRT.
|
||||
// Buffering data for HB.
|
||||
float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];
|
||||
} NoiseSuppressionC;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcNs_InitCore(...)
|
||||
*
|
||||
* This function initializes a noise suppression instance
|
||||
*
|
||||
* Input:
|
||||
* - self : Instance that should be initialized
|
||||
* - fs : Sampling frequency
|
||||
*
|
||||
* Output:
|
||||
* - self : Initialized instance
|
||||
*
|
||||
* Return value : 0 - Ok
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcNs_set_policy_core(...)
|
||||
*
|
||||
* This changes the aggressiveness of the noise suppression method.
|
||||
*
|
||||
* Input:
|
||||
* - self : Instance that should be initialized
|
||||
* - mode : 0: Mild (6dB), 1: Medium (10dB), 2: Aggressive (15dB)
|
||||
*
|
||||
* Output:
|
||||
* - self : Initialized instance
|
||||
*
|
||||
* Return value : 0 - Ok
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcNs_AnalyzeCore
|
||||
*
|
||||
* Estimate the background noise.
|
||||
*
|
||||
* Input:
|
||||
* - self : Instance that should be initialized
|
||||
* - speechFrame : Input speech frame for lower band
|
||||
*
|
||||
* Output:
|
||||
* - self : Updated instance
|
||||
*/
|
||||
void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcNs_ProcessCore
|
||||
*
|
||||
* Do noise suppression.
|
||||
*
|
||||
* Input:
|
||||
* - self : Instance that should be initialized
|
||||
* - inFrame : Input speech frame for each band
|
||||
* - num_bands : Number of bands
|
||||
*
|
||||
* Output:
|
||||
* - self : Updated instance
|
||||
* - outFrame : Output speech frame for each band
|
||||
*/
|
||||
void WebRtcNs_ProcessCore(NoiseSuppressionC* self,
|
||||
const float* const* inFrame,
|
||||
size_t num_bands,
|
||||
float* const* outFrame);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_NS_CORE_H_
|
File diff suppressed because it is too large
Load Diff
@ -1,261 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_NSX_CORE_H_
|
||||
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_NSX_CORE_H_
|
||||
|
||||
#ifdef NS_FILEDEBUG
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "modules/audio_processing/legacy_ns/nsx_defines.h"
|
||||
|
||||
typedef struct NoiseSuppressionFixedC_ {
|
||||
uint32_t fs;
|
||||
|
||||
const int16_t* window;
|
||||
int16_t analysisBuffer[ANAL_BLOCKL_MAX];
|
||||
int16_t synthesisBuffer[ANAL_BLOCKL_MAX];
|
||||
uint16_t noiseSupFilter[HALF_ANAL_BLOCKL];
|
||||
uint16_t overdrive; /* Q8 */
|
||||
uint16_t denoiseBound; /* Q14 */
|
||||
const int16_t* factor2Table;
|
||||
int16_t noiseEstLogQuantile[SIMULT * HALF_ANAL_BLOCKL];
|
||||
int16_t noiseEstDensity[SIMULT * HALF_ANAL_BLOCKL];
|
||||
int16_t noiseEstCounter[SIMULT];
|
||||
int16_t noiseEstQuantile[HALF_ANAL_BLOCKL];
|
||||
|
||||
size_t anaLen;
|
||||
size_t anaLen2;
|
||||
size_t magnLen;
|
||||
int aggrMode;
|
||||
int stages;
|
||||
int initFlag;
|
||||
int gainMap;
|
||||
|
||||
int32_t maxLrt;
|
||||
int32_t minLrt;
|
||||
// Log LRT factor with time-smoothing in Q8.
|
||||
int32_t logLrtTimeAvgW32[HALF_ANAL_BLOCKL];
|
||||
int32_t featureLogLrt;
|
||||
int32_t thresholdLogLrt;
|
||||
int16_t weightLogLrt;
|
||||
|
||||
uint32_t featureSpecDiff;
|
||||
uint32_t thresholdSpecDiff;
|
||||
int16_t weightSpecDiff;
|
||||
|
||||
uint32_t featureSpecFlat;
|
||||
uint32_t thresholdSpecFlat;
|
||||
int16_t weightSpecFlat;
|
||||
|
||||
// Conservative estimate of noise spectrum.
|
||||
int32_t avgMagnPause[HALF_ANAL_BLOCKL];
|
||||
uint32_t magnEnergy;
|
||||
uint32_t sumMagn;
|
||||
uint32_t curAvgMagnEnergy;
|
||||
uint32_t timeAvgMagnEnergy;
|
||||
uint32_t timeAvgMagnEnergyTmp;
|
||||
|
||||
uint32_t whiteNoiseLevel; // Initial noise estimate.
|
||||
// Initial magnitude spectrum estimate.
|
||||
uint32_t initMagnEst[HALF_ANAL_BLOCKL];
|
||||
// Pink noise parameters:
|
||||
int32_t pinkNoiseNumerator; // Numerator.
|
||||
int32_t pinkNoiseExp; // Power of freq.
|
||||
int minNorm; // Smallest normalization factor.
|
||||
int zeroInputSignal; // Zero input signal flag.
|
||||
|
||||
// Noise spectrum from previous frame.
|
||||
uint32_t prevNoiseU32[HALF_ANAL_BLOCKL];
|
||||
// Magnitude spectrum from previous frame.
|
||||
uint16_t prevMagnU16[HALF_ANAL_BLOCKL];
|
||||
// Prior speech/noise probability in Q14.
|
||||
int16_t priorNonSpeechProb;
|
||||
|
||||
int blockIndex; // Frame index counter.
|
||||
// Parameter for updating or estimating thresholds/weights for prior model.
|
||||
int modelUpdate;
|
||||
int cntThresUpdate;
|
||||
|
||||
// Histograms for parameter estimation.
|
||||
int16_t histLrt[HIST_PAR_EST];
|
||||
int16_t histSpecFlat[HIST_PAR_EST];
|
||||
int16_t histSpecDiff[HIST_PAR_EST];
|
||||
|
||||
// Quantities for high band estimate.
|
||||
int16_t dataBufHBFX[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];
|
||||
|
||||
int qNoise;
|
||||
int prevQNoise;
|
||||
int prevQMagn;
|
||||
size_t blockLen10ms;
|
||||
|
||||
int16_t real[ANAL_BLOCKL_MAX];
|
||||
int16_t imag[ANAL_BLOCKL_MAX];
|
||||
int32_t energyIn;
|
||||
int scaleEnergyIn;
|
||||
int normData;
|
||||
|
||||
struct RealFFT* real_fft;
|
||||
} NoiseSuppressionFixedC;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcNsx_InitCore(...)
|
||||
*
|
||||
* This function initializes a noise suppression instance
|
||||
*
|
||||
* Input:
|
||||
* - inst : Instance that should be initialized
|
||||
* - fs : Sampling frequency
|
||||
*
|
||||
* Output:
|
||||
* - inst : Initialized instance
|
||||
*
|
||||
* Return value : 0 - Ok
|
||||
* -1 - Error
|
||||
*/
|
||||
int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcNsx_set_policy_core(...)
|
||||
*
|
||||
* This changes the aggressiveness of the noise suppression method.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Instance that should be initialized
|
||||
* - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
|
||||
*
|
||||
* Output:
|
||||
* - inst : Initialized instance
|
||||
*
|
||||
* Return value : 0 - Ok
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcNsx_ProcessCore
|
||||
*
|
||||
* Do noise suppression.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Instance that should be initialized
|
||||
* - inFrame : Input speech frame for each band
|
||||
* - num_bands : Number of bands
|
||||
*
|
||||
* Output:
|
||||
* - inst : Updated instance
|
||||
* - outFrame : Output speech frame for each band
|
||||
*/
|
||||
void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst,
|
||||
const int16_t* const* inFrame,
|
||||
int num_bands,
|
||||
int16_t* const* outFrame);
|
||||
|
||||
/****************************************************************************
|
||||
* Some function pointers, for internal functions shared by ARM NEON and
|
||||
* generic C code.
|
||||
*/
|
||||
// Noise Estimation.
|
||||
typedef void (*NoiseEstimation)(NoiseSuppressionFixedC* inst,
|
||||
uint16_t* magn,
|
||||
uint32_t* noise,
|
||||
int16_t* q_noise);
|
||||
extern NoiseEstimation WebRtcNsx_NoiseEstimation;
|
||||
|
||||
// Filter the data in the frequency domain, and create spectrum.
|
||||
typedef void (*PrepareSpectrum)(NoiseSuppressionFixedC* inst,
|
||||
int16_t* freq_buff);
|
||||
extern PrepareSpectrum WebRtcNsx_PrepareSpectrum;
|
||||
|
||||
// For the noise supression process, synthesis, read out fully processed
|
||||
// segment, and update synthesis buffer.
|
||||
typedef void (*SynthesisUpdate)(NoiseSuppressionFixedC* inst,
|
||||
int16_t* out_frame,
|
||||
int16_t gain_factor);
|
||||
extern SynthesisUpdate WebRtcNsx_SynthesisUpdate;
|
||||
|
||||
// Update analysis buffer for lower band, and window data before FFT.
|
||||
typedef void (*AnalysisUpdate)(NoiseSuppressionFixedC* inst,
|
||||
int16_t* out,
|
||||
int16_t* new_speech);
|
||||
extern AnalysisUpdate WebRtcNsx_AnalysisUpdate;
|
||||
|
||||
// Denormalize the real-valued signal |in|, the output from inverse FFT.
|
||||
typedef void (*Denormalize)(NoiseSuppressionFixedC* inst,
|
||||
int16_t* in,
|
||||
int factor);
|
||||
extern Denormalize WebRtcNsx_Denormalize;
|
||||
|
||||
// Normalize the real-valued signal |in|, the input to forward FFT.
|
||||
typedef void (*NormalizeRealBuffer)(NoiseSuppressionFixedC* inst,
|
||||
const int16_t* in,
|
||||
int16_t* out);
|
||||
extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
|
||||
|
||||
// Compute speech/noise probability.
|
||||
// Intended to be private.
|
||||
void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
|
||||
uint16_t* nonSpeechProbFinal,
|
||||
uint32_t* priorLocSnr,
|
||||
uint32_t* postLocSnr);
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
// For the above function pointers, functions for generic platforms are declared
|
||||
// and defined as static in file nsx_core.c, while those for ARM Neon platforms
|
||||
// are declared below and defined in file nsx_core_neon.c.
|
||||
void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst,
|
||||
uint16_t* magn,
|
||||
uint32_t* noise,
|
||||
int16_t* q_noise);
|
||||
void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst,
|
||||
int16_t* out_frame,
|
||||
int16_t gain_factor);
|
||||
void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst,
|
||||
int16_t* out,
|
||||
int16_t* new_speech);
|
||||
void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst,
|
||||
int16_t* freq_buff);
|
||||
#endif
|
||||
|
||||
#if defined(MIPS32_LE)
|
||||
// For the above function pointers, functions for generic platforms are declared
|
||||
// and defined as static in file nsx_core.c, while those for MIPS platforms
|
||||
// are declared below and defined in file nsx_core_mips.c.
|
||||
void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst,
|
||||
int16_t* out_frame,
|
||||
int16_t gain_factor);
|
||||
void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst,
|
||||
int16_t* out,
|
||||
int16_t* new_speech);
|
||||
void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst,
|
||||
int16_t* freq_buff);
|
||||
void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst,
|
||||
const int16_t* in,
|
||||
int16_t* out);
|
||||
#if defined(MIPS_DSP_R1_LE)
|
||||
void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst,
|
||||
int16_t* in,
|
||||
int factor);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_NSX_CORE_H_
|
@ -1,259 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "modules/audio_processing/legacy_ns/noise_suppression_x.h"
|
||||
#include "modules/audio_processing/legacy_ns/nsx_core.h"
|
||||
#include "modules/audio_processing/legacy_ns/nsx_defines.h"
|
||||
|
||||
static const int16_t kIndicatorTable[17] = {
|
||||
0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
|
||||
7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
|
||||
};
|
||||
|
||||
// Compute speech/noise probability
|
||||
// speech/noise probability is returned in: probSpeechFinal
|
||||
//snrLocPrior is the prior SNR for each frequency (in Q11)
|
||||
//snrLocPost is the post SNR for each frequency (in Q11)
|
||||
void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
|
||||
uint16_t* nonSpeechProbFinal,
|
||||
uint32_t* priorLocSnr,
|
||||
uint32_t* postLocSnr) {
|
||||
uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3;
|
||||
int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32;
|
||||
int32_t frac32, logTmp;
|
||||
int32_t logLrtTimeAvgKsumFX;
|
||||
int16_t indPriorFX16;
|
||||
int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart;
|
||||
size_t i;
|
||||
int normTmp, normTmp2, nShifts;
|
||||
|
||||
// compute feature based on average LR factor
|
||||
// this is the average over all frequencies of the smooth log LRT
|
||||
logLrtTimeAvgKsumFX = 0;
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11
|
||||
normTmp = WebRtcSpl_NormU32(postLocSnr[i]);
|
||||
num = postLocSnr[i] << normTmp; // Q(11+normTmp)
|
||||
if (normTmp > 10) {
|
||||
den = priorLocSnr[i] << (normTmp - 11); // Q(normTmp)
|
||||
} else {
|
||||
den = priorLocSnr[i] >> (11 - normTmp); // Q(normTmp)
|
||||
}
|
||||
if (den > 0) {
|
||||
besselTmpFX32 -= num / den; // Q11
|
||||
} else {
|
||||
besselTmpFX32 = 0;
|
||||
}
|
||||
|
||||
// inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior)
|
||||
// - inst->logLrtTimeAvg[i]);
|
||||
// Here, LRT_TAVG = 0.5
|
||||
zeros = WebRtcSpl_NormU32(priorLocSnr[i]);
|
||||
frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19);
|
||||
tmp32 = (frac32 * frac32 * -43) >> 19;
|
||||
tmp32 += ((int16_t)frac32 * 5412) >> 12;
|
||||
frac32 = tmp32 + 37;
|
||||
// tmp32 = log2(priorLocSnr[i])
|
||||
tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12
|
||||
logTmp = (tmp32 * 178) >> 8; // log2(priorLocSnr[i])*log(2)
|
||||
// tmp32no1 = LRT_TAVG * (log(snrLocPrior) + inst->logLrtTimeAvg[i]) in Q12.
|
||||
tmp32no1 = (logTmp + inst->logLrtTimeAvgW32[i]) / 2;
|
||||
inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12
|
||||
|
||||
logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12
|
||||
}
|
||||
inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >>
|
||||
(inst->stages + 11);
|
||||
|
||||
// done with computation of LR factor
|
||||
|
||||
//
|
||||
//compute the indicator functions
|
||||
//
|
||||
|
||||
// average LRT feature
|
||||
// FLOAT code
|
||||
// indicator0 = 0.5 * (tanh(widthPrior *
|
||||
// (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
|
||||
tmpIndFX = 16384; // Q14(1.0)
|
||||
tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
|
||||
nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
|
||||
//use larger width in tanh map for pause regions
|
||||
if (tmp32no1 < 0) {
|
||||
tmpIndFX = 0;
|
||||
tmp32no1 = -tmp32no1;
|
||||
//widthPrior = widthPrior * 2.0;
|
||||
nShifts++;
|
||||
}
|
||||
tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
|
||||
// compute indicator function: sigmoid map
|
||||
if (tmp32no1 < (16 << 14) && tmp32no1 >= 0) {
|
||||
tableIndex = (int16_t)(tmp32no1 >> 14);
|
||||
tmp16no2 = kIndicatorTable[tableIndex];
|
||||
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
|
||||
frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
|
||||
tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
|
||||
if (tmpIndFX == 0) {
|
||||
tmpIndFX = 8192 - tmp16no2; // Q14
|
||||
} else {
|
||||
tmpIndFX = 8192 + tmp16no2; // Q14
|
||||
}
|
||||
}
|
||||
indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14
|
||||
|
||||
//spectral flatness feature
|
||||
if (inst->weightSpecFlat) {
|
||||
tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
|
||||
tmpIndFX = 16384; // Q14(1.0)
|
||||
//use larger width in tanh map for pause regions
|
||||
tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
|
||||
nShifts = 4;
|
||||
if (inst->thresholdSpecFlat < tmpU32no1) {
|
||||
tmpIndFX = 0;
|
||||
tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
|
||||
//widthPrior = widthPrior * 2.0;
|
||||
nShifts++;
|
||||
}
|
||||
tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); // Q14
|
||||
// compute indicator function: sigmoid map
|
||||
// FLOAT code
|
||||
// indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
|
||||
// (threshPrior1 - tmpFloat1)) + 1.0);
|
||||
if (tmpU32no1 < (16 << 14)) {
|
||||
tableIndex = (int16_t)(tmpU32no1 >> 14);
|
||||
tmp16no2 = kIndicatorTable[tableIndex];
|
||||
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
|
||||
frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
|
||||
tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
|
||||
if (tmpIndFX) {
|
||||
tmpIndFX = 8192 + tmp16no2; // Q14
|
||||
} else {
|
||||
tmpIndFX = 8192 - tmp16no2; // Q14
|
||||
}
|
||||
}
|
||||
indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14
|
||||
}
|
||||
|
||||
//for template spectral-difference
|
||||
if (inst->weightSpecDiff) {
|
||||
tmpU32no1 = 0;
|
||||
if (inst->featureSpecDiff) {
|
||||
normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
|
||||
WebRtcSpl_NormU32(inst->featureSpecDiff));
|
||||
RTC_DCHECK_GE(normTmp, 0);
|
||||
tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages)
|
||||
tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp);
|
||||
if (tmpU32no2 > 0) {
|
||||
// Q(20 - inst->stages)
|
||||
tmpU32no1 /= tmpU32no2;
|
||||
} else {
|
||||
tmpU32no1 = (uint32_t)(0x7fffffff);
|
||||
}
|
||||
}
|
||||
tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25;
|
||||
tmpU32no2 = tmpU32no1 - tmpU32no3;
|
||||
nShifts = 1;
|
||||
tmpIndFX = 16384; // Q14(1.0)
|
||||
//use larger width in tanh map for pause regions
|
||||
if (tmpU32no2 & 0x80000000) {
|
||||
tmpIndFX = 0;
|
||||
tmpU32no2 = tmpU32no3 - tmpU32no1;
|
||||
//widthPrior = widthPrior * 2.0;
|
||||
nShifts--;
|
||||
}
|
||||
tmpU32no1 = tmpU32no2 >> nShifts;
|
||||
// compute indicator function: sigmoid map
|
||||
/* FLOAT code
|
||||
indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
|
||||
*/
|
||||
if (tmpU32no1 < (16 << 14)) {
|
||||
tableIndex = (int16_t)(tmpU32no1 >> 14);
|
||||
tmp16no2 = kIndicatorTable[tableIndex];
|
||||
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
|
||||
frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
|
||||
tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
tmp16no1, frac, 14);
|
||||
if (tmpIndFX) {
|
||||
tmpIndFX = 8192 + tmp16no2;
|
||||
} else {
|
||||
tmpIndFX = 8192 - tmp16no2;
|
||||
}
|
||||
}
|
||||
indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14
|
||||
}
|
||||
|
||||
//combine the indicator function with the feature weights
|
||||
// FLOAT code
|
||||
// indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
|
||||
// indicator1 + weightIndPrior2 * indicator2);
|
||||
indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
|
||||
// done with computing indicator function
|
||||
|
||||
//compute the prior probability
|
||||
// FLOAT code
|
||||
// inst->priorNonSpeechProb += PRIOR_UPDATE *
|
||||
// (indPriorNonSpeech - inst->priorNonSpeechProb);
|
||||
tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
|
||||
inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14);
|
||||
|
||||
//final speech probability: combine prior model with LR factor:
|
||||
|
||||
memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
|
||||
|
||||
if (inst->priorNonSpeechProb > 0) {
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
// FLOAT code
|
||||
// invLrt = exp(inst->logLrtTimeAvg[i]);
|
||||
// invLrt = inst->priorSpeechProb * invLrt;
|
||||
// nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) /
|
||||
// (1.0 - inst->priorSpeechProb + invLrt);
|
||||
// invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt;
|
||||
// nonSpeechProbFinal[i] = inst->priorNonSpeechProb /
|
||||
// (inst->priorNonSpeechProb + invLrt);
|
||||
if (inst->logLrtTimeAvgW32[i] < 65300) {
|
||||
tmp32no1 = (inst->logLrtTimeAvgW32[i] * 23637) >> 14; // Q12
|
||||
intPart = (int16_t)(tmp32no1 >> 12);
|
||||
if (intPart < -8) {
|
||||
intPart = -8;
|
||||
}
|
||||
frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12
|
||||
|
||||
// Quadratic approximation of 2^frac
|
||||
tmp32no2 = (frac * frac * 44) >> 19; // Q12.
|
||||
tmp32no2 += (frac * 84) >> 7; // Q12
|
||||
invLrtFX = (1 << (8 + intPart)) +
|
||||
WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8
|
||||
|
||||
normTmp = WebRtcSpl_NormW32(invLrtFX);
|
||||
normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb));
|
||||
if (normTmp + normTmp2 >= 7) {
|
||||
if (normTmp + normTmp2 < 15) {
|
||||
invLrtFX >>= 15 - normTmp2 - normTmp;
|
||||
// Q(normTmp+normTmp2-7)
|
||||
tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb);
|
||||
// Q(normTmp+normTmp2+7)
|
||||
invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2);
|
||||
// Q14
|
||||
} else {
|
||||
tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb);
|
||||
// Q22
|
||||
invLrtFX = tmp32no1 >> 8; // Q14.
|
||||
}
|
||||
|
||||
tmp32no1 = (int32_t)inst->priorNonSpeechProb << 8; // Q22
|
||||
|
||||
nonSpeechProbFinal[i] = tmp32no1 /
|
||||
(inst->priorNonSpeechProb + invLrtFX); // Q8
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,606 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/legacy_ns/nsx_core.h"
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
// Constants to compensate for shifting signal log(2^shifts).
|
||||
const int16_t WebRtcNsx_kLogTable[9] = {
|
||||
0, 177, 355, 532, 710, 887, 1065, 1242, 1420
|
||||
};
|
||||
|
||||
const int16_t WebRtcNsx_kCounterDiv[201] = {
|
||||
32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731,
|
||||
2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311,
|
||||
1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840,
|
||||
819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607,
|
||||
596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475,
|
||||
468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390,
|
||||
386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331,
|
||||
328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287,
|
||||
285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254,
|
||||
252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228,
|
||||
226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206,
|
||||
205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188,
|
||||
187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
|
||||
172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
|
||||
};
|
||||
|
||||
const int16_t WebRtcNsx_kLogTableFrac[256] = {
|
||||
0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21,
|
||||
22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42,
|
||||
44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62,
|
||||
63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81,
|
||||
82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99,
|
||||
100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116,
|
||||
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
|
||||
132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
|
||||
147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160,
|
||||
161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174,
|
||||
175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187,
|
||||
188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200,
|
||||
201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212,
|
||||
213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224,
|
||||
225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236,
|
||||
237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247,
|
||||
248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255
|
||||
};
|
||||
|
||||
// Update the noise estimation information.
|
||||
static void UpdateNoiseEstimateNeon(NoiseSuppressionFixedC* inst, int offset) {
|
||||
const int16_t kExp2Const = 11819; // Q13
|
||||
int16_t* ptr_noiseEstLogQuantile = NULL;
|
||||
int16_t* ptr_noiseEstQuantile = NULL;
|
||||
int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const);
|
||||
int32x4_t twentyOne32x4 = vdupq_n_s32(21);
|
||||
int32x4_t constA32x4 = vdupq_n_s32(0x1fffff);
|
||||
int32x4_t constB32x4 = vdupq_n_s32(0x200000);
|
||||
|
||||
int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
|
||||
inst->magnLen);
|
||||
|
||||
// Guarantee a Q-domain as high as possible and still fit in int16
|
||||
inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const,
|
||||
tmp16,
|
||||
21);
|
||||
|
||||
int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise);
|
||||
|
||||
for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset],
|
||||
ptr_noiseEstQuantile = &inst->noiseEstQuantile[0];
|
||||
ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3];
|
||||
ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) {
|
||||
|
||||
// tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i];
|
||||
int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile);
|
||||
int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4);
|
||||
|
||||
// tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
|
||||
int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4);
|
||||
v32x4A = vorrq_s32(v32x4A, constB32x4);
|
||||
|
||||
// tmp16 = (int16_t)(tmp32no2 >> 21);
|
||||
v32x4B = vshrq_n_s32(v32x4B, 21);
|
||||
|
||||
// tmp16 -= 21;// shift 21 to get result in Q0
|
||||
v32x4B = vsubq_s32(v32x4B, twentyOne32x4);
|
||||
|
||||
// tmp16 += (int16_t) inst->qNoise;
|
||||
// shift to get result in Q(qNoise)
|
||||
v32x4B = vaddq_s32(v32x4B, qNoise32x4);
|
||||
|
||||
// if (tmp16 < 0) {
|
||||
// tmp32no1 >>= -tmp16;
|
||||
// } else {
|
||||
// tmp32no1 <<= tmp16;
|
||||
// }
|
||||
v32x4B = vshlq_s32(v32x4A, v32x4B);
|
||||
|
||||
// tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1);
|
||||
v16x4 = vqmovn_s32(v32x4B);
|
||||
|
||||
//inst->noiseEstQuantile[i] = tmp16;
|
||||
vst1_s16(ptr_noiseEstQuantile, v16x4);
|
||||
}
|
||||
|
||||
// Last iteration:
|
||||
|
||||
// inst->quantile[i]=exp(inst->lquantile[offset+i]);
|
||||
// in Q21
|
||||
int32_t tmp32no2 = kExp2Const * *ptr_noiseEstLogQuantile;
|
||||
int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
|
||||
|
||||
tmp16 = (int16_t)(tmp32no2 >> 21);
|
||||
tmp16 -= 21;// shift 21 to get result in Q0
|
||||
tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise)
|
||||
if (tmp16 < 0) {
|
||||
tmp32no1 >>= -tmp16;
|
||||
} else {
|
||||
tmp32no1 <<= tmp16;
|
||||
}
|
||||
*ptr_noiseEstQuantile = WebRtcSpl_SatW32ToW16(tmp32no1);
|
||||
}
|
||||
|
||||
// Noise Estimation
|
||||
void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst,
|
||||
uint16_t* magn,
|
||||
uint32_t* noise,
|
||||
int16_t* q_noise) {
|
||||
int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
|
||||
int16_t countProd, delta, zeros, frac;
|
||||
int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
|
||||
const int16_t log2_const = 22713;
|
||||
const int16_t width_factor = 21845;
|
||||
|
||||
size_t i, s, offset;
|
||||
|
||||
tabind = inst->stages - inst->normData;
|
||||
RTC_DCHECK_LT(tabind, 9);
|
||||
RTC_DCHECK_GT(tabind, -9);
|
||||
if (tabind < 0) {
|
||||
logval = -WebRtcNsx_kLogTable[-tabind];
|
||||
} else {
|
||||
logval = WebRtcNsx_kLogTable[tabind];
|
||||
}
|
||||
|
||||
int16x8_t logval_16x8 = vdupq_n_s16(logval);
|
||||
|
||||
// lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
|
||||
// magn is in Q(-stages), and the real lmagn values are:
|
||||
// real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
|
||||
// lmagn in Q8
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
if (magn[i]) {
|
||||
zeros = WebRtcSpl_NormU32((uint32_t)magn[i]);
|
||||
frac = (int16_t)((((uint32_t)magn[i] << zeros)
|
||||
& 0x7FFFFFFF) >> 23);
|
||||
RTC_DCHECK_LT(frac, 256);
|
||||
// log2(magn(i))
|
||||
log2 = (int16_t)(((31 - zeros) << 8)
|
||||
+ WebRtcNsx_kLogTableFrac[frac]);
|
||||
// log2(magn(i))*log(2)
|
||||
lmagn[i] = (int16_t)((log2 * log2_const) >> 15);
|
||||
// + log(2^stages)
|
||||
lmagn[i] += logval;
|
||||
} else {
|
||||
lmagn[i] = logval;
|
||||
}
|
||||
}
|
||||
|
||||
int16x4_t Q3_16x4 = vdup_n_s16(3);
|
||||
int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8);
|
||||
int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor);
|
||||
|
||||
int16_t factor = FACTOR_Q7;
|
||||
if (inst->blockIndex < END_STARTUP_LONG)
|
||||
factor = FACTOR_Q7_STARTUP;
|
||||
|
||||
// Loop over simultaneous estimates
|
||||
for (s = 0; s < SIMULT; s++) {
|
||||
offset = s * inst->magnLen;
|
||||
|
||||
// Get counter values from state
|
||||
counter = inst->noiseEstCounter[s];
|
||||
RTC_DCHECK_LT(counter, 201);
|
||||
countDiv = WebRtcNsx_kCounterDiv[counter];
|
||||
countProd = (int16_t)(counter * countDiv);
|
||||
|
||||
// quant_est(...)
|
||||
int16_t deltaBuff[8];
|
||||
int16x4_t tmp16x4_0;
|
||||
int16x4_t tmp16x4_1;
|
||||
int16x4_t countDiv_16x4 = vdup_n_s16(countDiv);
|
||||
int16x8_t countProd_16x8 = vdupq_n_s16(countProd);
|
||||
int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv);
|
||||
int16x8_t prod16x8 = vqrdmulhq_s16(WIDTHFACTOR_16x8, tmp16x8_0);
|
||||
int16x8_t tmp16x8_1;
|
||||
int16x8_t tmp16x8_2;
|
||||
int16x8_t tmp16x8_3;
|
||||
uint16x8_t tmp16x8_4;
|
||||
int32x4_t tmp32x4;
|
||||
|
||||
for (i = 0; i + 7 < inst->magnLen; i += 8) {
|
||||
// Compute delta.
|
||||
// Smaller step size during startup. This prevents from using
|
||||
// unrealistic values causing overflow.
|
||||
tmp16x8_0 = vdupq_n_s16(factor);
|
||||
vst1q_s16(deltaBuff, tmp16x8_0);
|
||||
|
||||
int j;
|
||||
for (j = 0; j < 8; j++) {
|
||||
if (inst->noiseEstDensity[offset + i + j] > 512) {
|
||||
// Get values for deltaBuff by shifting intead of dividing.
|
||||
int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i + j]);
|
||||
deltaBuff[j] = (int16_t)(FACTOR_Q16 >> (14 - factor));
|
||||
}
|
||||
}
|
||||
|
||||
// Update log quantile estimate
|
||||
|
||||
// tmp16 = (int16_t)((delta * countDiv) >> 14);
|
||||
tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4);
|
||||
tmp16x4_1 = vshrn_n_s32(tmp32x4, 14);
|
||||
tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4);
|
||||
tmp16x4_0 = vshrn_n_s32(tmp32x4, 14);
|
||||
tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // Keep for several lines.
|
||||
|
||||
// prepare for the "if" branch
|
||||
// tmp16 += 2;
|
||||
// tmp16_1 = (Word16)(tmp16>>2);
|
||||
tmp16x8_1 = vrshrq_n_s16(tmp16x8_0, 2);
|
||||
|
||||
// inst->noiseEstLogQuantile[offset+i] + tmp16_1;
|
||||
tmp16x8_2 = vld1q_s16(&inst->noiseEstLogQuantile[offset + i]); // Keep
|
||||
tmp16x8_1 = vaddq_s16(tmp16x8_2, tmp16x8_1); // Keep for several lines
|
||||
|
||||
// Prepare for the "else" branch
|
||||
// tmp16 += 1;
|
||||
// tmp16_1 = (Word16)(tmp16>>1);
|
||||
tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1);
|
||||
|
||||
// tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1);
|
||||
tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4);
|
||||
tmp16x4_1 = vshrn_n_s32(tmp32x4, 1);
|
||||
|
||||
// tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1);
|
||||
tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4);
|
||||
tmp16x4_0 = vshrn_n_s32(tmp32x4, 1);
|
||||
|
||||
// inst->noiseEstLogQuantile[offset + i] - tmp16_2;
|
||||
tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep
|
||||
tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0);
|
||||
|
||||
// logval is the smallest fixed point representation we can have. Values
|
||||
// below that will correspond to values in the interval [0, 1], which
|
||||
// can't possibly occur.
|
||||
tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8);
|
||||
|
||||
// Do the if-else branches:
|
||||
tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines
|
||||
tmp16x8_4 = vcgtq_s16(tmp16x8_3, tmp16x8_2);
|
||||
tmp16x8_2 = vbslq_s16(tmp16x8_4, tmp16x8_1, tmp16x8_0);
|
||||
vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2);
|
||||
|
||||
// Update density estimate
|
||||
// tmp16_1 + tmp16_2
|
||||
tmp16x8_1 = vld1q_s16(&inst->noiseEstDensity[offset + i]);
|
||||
tmp16x8_0 = vqrdmulhq_s16(tmp16x8_1, countProd_16x8);
|
||||
tmp16x8_0 = vaddq_s16(tmp16x8_0, prod16x8);
|
||||
|
||||
// lmagn[i] - inst->noiseEstLogQuantile[offset + i]
|
||||
tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2);
|
||||
tmp16x8_3 = vabsq_s16(tmp16x8_3);
|
||||
tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3);
|
||||
tmp16x8_1 = vbslq_s16(tmp16x8_4, tmp16x8_0, tmp16x8_1);
|
||||
vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1);
|
||||
} // End loop over magnitude spectrum
|
||||
|
||||
// Last iteration over magnitude spectrum:
|
||||
// compute delta
|
||||
if (inst->noiseEstDensity[offset + i] > 512) {
|
||||
// Get values for deltaBuff by shifting intead of dividing.
|
||||
int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]);
|
||||
delta = (int16_t)(FACTOR_Q16 >> (14 - factor));
|
||||
} else {
|
||||
delta = FACTOR_Q7;
|
||||
if (inst->blockIndex < END_STARTUP_LONG) {
|
||||
// Smaller step size during startup. This prevents from using
|
||||
// unrealistic values causing overflow.
|
||||
delta = FACTOR_Q7_STARTUP;
|
||||
}
|
||||
}
|
||||
// update log quantile estimate
|
||||
tmp16 = (int16_t)((delta * countDiv) >> 14);
|
||||
if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
|
||||
// +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
|
||||
// CounterDiv=1/(inst->counter[s]+1) in Q15
|
||||
tmp16 += 2;
|
||||
inst->noiseEstLogQuantile[offset + i] += tmp16 / 4;
|
||||
} else {
|
||||
tmp16 += 1;
|
||||
// *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
|
||||
// TODO(bjornv): investigate why we need to truncate twice.
|
||||
tmp16no2 = (int16_t)((tmp16 / 2) * 3 / 2);
|
||||
inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
|
||||
if (inst->noiseEstLogQuantile[offset + i] < logval) {
|
||||
// logval is the smallest fixed point representation we can have.
|
||||
// Values below that will correspond to values in the interval
|
||||
// [0, 1], which can't possibly occur.
|
||||
inst->noiseEstLogQuantile[offset + i] = logval;
|
||||
}
|
||||
}
|
||||
|
||||
// update density estimate
|
||||
if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
|
||||
< WIDTH_Q8) {
|
||||
tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
inst->noiseEstDensity[offset + i], countProd, 15);
|
||||
tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||
width_factor, countDiv, 15);
|
||||
inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
|
||||
}
|
||||
|
||||
|
||||
if (counter >= END_STARTUP_LONG) {
|
||||
inst->noiseEstCounter[s] = 0;
|
||||
if (inst->blockIndex >= END_STARTUP_LONG) {
|
||||
UpdateNoiseEstimateNeon(inst, offset);
|
||||
}
|
||||
}
|
||||
inst->noiseEstCounter[s]++;
|
||||
|
||||
} // end loop over simultaneous estimates
|
||||
|
||||
// Sequentially update the noise during startup
|
||||
if (inst->blockIndex < END_STARTUP_LONG) {
|
||||
UpdateNoiseEstimateNeon(inst, offset);
|
||||
}
|
||||
|
||||
for (i = 0; i < inst->magnLen; i++) {
|
||||
noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise)
|
||||
}
|
||||
(*q_noise) = (int16_t)inst->qNoise;
|
||||
}
|
||||
|
||||
// Filter the data in the frequency domain, and create spectrum.
|
||||
void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst,
|
||||
int16_t* freq_buf) {
|
||||
RTC_DCHECK_EQ(1, inst->magnLen % 8);
|
||||
RTC_DCHECK_EQ(0, inst->anaLen2 % 16);
|
||||
|
||||
// (1) Filtering.
|
||||
|
||||
// Fixed point C code for the next block is as follows:
|
||||
// for (i = 0; i < inst->magnLen; i++) {
|
||||
// inst->real[i] = (int16_t)((inst->real[i] *
|
||||
// (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages)
|
||||
// inst->imag[i] = (int16_t)((inst->imag[i] *
|
||||
// (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages)
|
||||
// }
|
||||
|
||||
int16_t* preal = &inst->real[0];
|
||||
int16_t* pimag = &inst->imag[0];
|
||||
int16_t* pns_filter = (int16_t*)&inst->noiseSupFilter[0];
|
||||
int16_t* pimag_end = pimag + inst->magnLen - 4;
|
||||
|
||||
while (pimag < pimag_end) {
|
||||
int16x8_t real = vld1q_s16(preal);
|
||||
int16x8_t imag = vld1q_s16(pimag);
|
||||
int16x8_t ns_filter = vld1q_s16(pns_filter);
|
||||
|
||||
int32x4_t tmp_r_0 = vmull_s16(vget_low_s16(real), vget_low_s16(ns_filter));
|
||||
int32x4_t tmp_i_0 = vmull_s16(vget_low_s16(imag), vget_low_s16(ns_filter));
|
||||
int32x4_t tmp_r_1 = vmull_s16(vget_high_s16(real),
|
||||
vget_high_s16(ns_filter));
|
||||
int32x4_t tmp_i_1 = vmull_s16(vget_high_s16(imag),
|
||||
vget_high_s16(ns_filter));
|
||||
|
||||
int16x4_t result_r_0 = vshrn_n_s32(tmp_r_0, 14);
|
||||
int16x4_t result_i_0 = vshrn_n_s32(tmp_i_0, 14);
|
||||
int16x4_t result_r_1 = vshrn_n_s32(tmp_r_1, 14);
|
||||
int16x4_t result_i_1 = vshrn_n_s32(tmp_i_1, 14);
|
||||
|
||||
vst1q_s16(preal, vcombine_s16(result_r_0, result_r_1));
|
||||
vst1q_s16(pimag, vcombine_s16(result_i_0, result_i_1));
|
||||
preal += 8;
|
||||
pimag += 8;
|
||||
pns_filter += 8;
|
||||
}
|
||||
|
||||
// Filter the last element
|
||||
*preal = (int16_t)((*preal * *pns_filter) >> 14);
|
||||
*pimag = (int16_t)((*pimag * *pns_filter) >> 14);
|
||||
|
||||
// (2) Create spectrum.
|
||||
|
||||
// Fixed point C code for the rest of the function is as follows:
|
||||
// freq_buf[0] = inst->real[0];
|
||||
// freq_buf[1] = -inst->imag[0];
|
||||
// for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
|
||||
// freq_buf[j] = inst->real[i];
|
||||
// freq_buf[j + 1] = -inst->imag[i];
|
||||
// }
|
||||
// freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
|
||||
// freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
|
||||
|
||||
preal = &inst->real[0];
|
||||
pimag = &inst->imag[0];
|
||||
pimag_end = pimag + inst->anaLen2;
|
||||
int16_t * freq_buf_start = freq_buf;
|
||||
while (pimag < pimag_end) {
|
||||
// loop unroll
|
||||
int16x8x2_t real_imag_0;
|
||||
int16x8x2_t real_imag_1;
|
||||
real_imag_0.val[1] = vld1q_s16(pimag);
|
||||
real_imag_0.val[0] = vld1q_s16(preal);
|
||||
preal += 8;
|
||||
pimag += 8;
|
||||
real_imag_1.val[1] = vld1q_s16(pimag);
|
||||
real_imag_1.val[0] = vld1q_s16(preal);
|
||||
preal += 8;
|
||||
pimag += 8;
|
||||
|
||||
real_imag_0.val[1] = vnegq_s16(real_imag_0.val[1]);
|
||||
real_imag_1.val[1] = vnegq_s16(real_imag_1.val[1]);
|
||||
vst2q_s16(freq_buf_start, real_imag_0);
|
||||
freq_buf_start += 16;
|
||||
vst2q_s16(freq_buf_start, real_imag_1);
|
||||
freq_buf_start += 16;
|
||||
}
|
||||
freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
|
||||
freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
|
||||
}
|
||||
|
||||
// For the noise supress process, synthesis, read out fully processed segment,
|
||||
// and update synthesis buffer.
|
||||
void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst,
|
||||
int16_t* out_frame,
|
||||
int16_t gain_factor) {
|
||||
RTC_DCHECK_EQ(0, inst->anaLen % 16);
|
||||
RTC_DCHECK_EQ(0, inst->blockLen10ms % 16);
|
||||
|
||||
int16_t* preal_start = inst->real;
|
||||
const int16_t* pwindow = inst->window;
|
||||
int16_t* preal_end = preal_start + inst->anaLen;
|
||||
int16_t* psynthesis_buffer = inst->synthesisBuffer;
|
||||
|
||||
while (preal_start < preal_end) {
|
||||
// Loop unroll.
|
||||
int16x8_t window_0 = vld1q_s16(pwindow);
|
||||
int16x8_t real_0 = vld1q_s16(preal_start);
|
||||
int16x8_t synthesis_buffer_0 = vld1q_s16(psynthesis_buffer);
|
||||
|
||||
int16x8_t window_1 = vld1q_s16(pwindow + 8);
|
||||
int16x8_t real_1 = vld1q_s16(preal_start + 8);
|
||||
int16x8_t synthesis_buffer_1 = vld1q_s16(psynthesis_buffer + 8);
|
||||
|
||||
int32x4_t tmp32a_0_low = vmull_s16(vget_low_s16(real_0),
|
||||
vget_low_s16(window_0));
|
||||
int32x4_t tmp32a_0_high = vmull_s16(vget_high_s16(real_0),
|
||||
vget_high_s16(window_0));
|
||||
|
||||
int32x4_t tmp32a_1_low = vmull_s16(vget_low_s16(real_1),
|
||||
vget_low_s16(window_1));
|
||||
int32x4_t tmp32a_1_high = vmull_s16(vget_high_s16(real_1),
|
||||
vget_high_s16(window_1));
|
||||
|
||||
int16x4_t tmp16a_0_low = vqrshrn_n_s32(tmp32a_0_low, 14);
|
||||
int16x4_t tmp16a_0_high = vqrshrn_n_s32(tmp32a_0_high, 14);
|
||||
|
||||
int16x4_t tmp16a_1_low = vqrshrn_n_s32(tmp32a_1_low, 14);
|
||||
int16x4_t tmp16a_1_high = vqrshrn_n_s32(tmp32a_1_high, 14);
|
||||
|
||||
int32x4_t tmp32b_0_low = vmull_n_s16(tmp16a_0_low, gain_factor);
|
||||
int32x4_t tmp32b_0_high = vmull_n_s16(tmp16a_0_high, gain_factor);
|
||||
|
||||
int32x4_t tmp32b_1_low = vmull_n_s16(tmp16a_1_low, gain_factor);
|
||||
int32x4_t tmp32b_1_high = vmull_n_s16(tmp16a_1_high, gain_factor);
|
||||
|
||||
int16x4_t tmp16b_0_low = vqrshrn_n_s32(tmp32b_0_low, 13);
|
||||
int16x4_t tmp16b_0_high = vqrshrn_n_s32(tmp32b_0_high, 13);
|
||||
|
||||
int16x4_t tmp16b_1_low = vqrshrn_n_s32(tmp32b_1_low, 13);
|
||||
int16x4_t tmp16b_1_high = vqrshrn_n_s32(tmp32b_1_high, 13);
|
||||
|
||||
synthesis_buffer_0 = vqaddq_s16(vcombine_s16(tmp16b_0_low, tmp16b_0_high),
|
||||
synthesis_buffer_0);
|
||||
synthesis_buffer_1 = vqaddq_s16(vcombine_s16(tmp16b_1_low, tmp16b_1_high),
|
||||
synthesis_buffer_1);
|
||||
vst1q_s16(psynthesis_buffer, synthesis_buffer_0);
|
||||
vst1q_s16(psynthesis_buffer + 8, synthesis_buffer_1);
|
||||
|
||||
pwindow += 16;
|
||||
preal_start += 16;
|
||||
psynthesis_buffer += 16;
|
||||
}
|
||||
|
||||
// Read out fully processed segment.
|
||||
int16_t * p_start = inst->synthesisBuffer;
|
||||
int16_t * p_end = inst->synthesisBuffer + inst->blockLen10ms;
|
||||
int16_t * p_frame = out_frame;
|
||||
while (p_start < p_end) {
|
||||
int16x8_t frame_0 = vld1q_s16(p_start);
|
||||
vst1q_s16(p_frame, frame_0);
|
||||
p_start += 8;
|
||||
p_frame += 8;
|
||||
}
|
||||
|
||||
// Update synthesis buffer.
|
||||
int16_t* p_start_src = inst->synthesisBuffer + inst->blockLen10ms;
|
||||
int16_t* p_end_src = inst->synthesisBuffer + inst->anaLen;
|
||||
int16_t* p_start_dst = inst->synthesisBuffer;
|
||||
while (p_start_src < p_end_src) {
|
||||
int16x8_t frame = vld1q_s16(p_start_src);
|
||||
vst1q_s16(p_start_dst, frame);
|
||||
p_start_src += 8;
|
||||
p_start_dst += 8;
|
||||
}
|
||||
|
||||
p_start = inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms;
|
||||
p_end = p_start + inst->blockLen10ms;
|
||||
int16x8_t zero = vdupq_n_s16(0);
|
||||
for (;p_start < p_end; p_start += 8) {
|
||||
vst1q_s16(p_start, zero);
|
||||
}
|
||||
}
|
||||
|
||||
// Update analysis buffer for lower band, and window data before FFT.
|
||||
void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst,
|
||||
int16_t* out,
|
||||
int16_t* new_speech) {
|
||||
RTC_DCHECK_EQ(0, inst->blockLen10ms % 16);
|
||||
RTC_DCHECK_EQ(0, inst->anaLen % 16);
|
||||
|
||||
// For lower band update analysis buffer.
|
||||
// memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
|
||||
// (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer));
|
||||
int16_t* p_start_src = inst->analysisBuffer + inst->blockLen10ms;
|
||||
int16_t* p_end_src = inst->analysisBuffer + inst->anaLen;
|
||||
int16_t* p_start_dst = inst->analysisBuffer;
|
||||
while (p_start_src < p_end_src) {
|
||||
int16x8_t frame = vld1q_s16(p_start_src);
|
||||
vst1q_s16(p_start_dst, frame);
|
||||
|
||||
p_start_src += 8;
|
||||
p_start_dst += 8;
|
||||
}
|
||||
|
||||
// memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms,
|
||||
// new_speech, inst->blockLen10ms * sizeof(*inst->analysisBuffer));
|
||||
p_start_src = new_speech;
|
||||
p_end_src = new_speech + inst->blockLen10ms;
|
||||
p_start_dst = inst->analysisBuffer + inst->anaLen - inst->blockLen10ms;
|
||||
while (p_start_src < p_end_src) {
|
||||
int16x8_t frame = vld1q_s16(p_start_src);
|
||||
vst1q_s16(p_start_dst, frame);
|
||||
|
||||
p_start_src += 8;
|
||||
p_start_dst += 8;
|
||||
}
|
||||
|
||||
// Window data before FFT.
|
||||
int16_t* p_start_window = (int16_t*) inst->window;
|
||||
int16_t* p_start_buffer = inst->analysisBuffer;
|
||||
int16_t* p_end_buffer = inst->analysisBuffer + inst->anaLen;
|
||||
int16_t* p_start_out = out;
|
||||
|
||||
// Load the first element to reduce pipeline bubble.
|
||||
int16x8_t window = vld1q_s16(p_start_window);
|
||||
int16x8_t buffer = vld1q_s16(p_start_buffer);
|
||||
p_start_window += 8;
|
||||
p_start_buffer += 8;
|
||||
|
||||
while (p_start_buffer < p_end_buffer) {
|
||||
// Unroll loop.
|
||||
int32x4_t tmp32_low = vmull_s16(vget_low_s16(window), vget_low_s16(buffer));
|
||||
int32x4_t tmp32_high = vmull_s16(vget_high_s16(window),
|
||||
vget_high_s16(buffer));
|
||||
window = vld1q_s16(p_start_window);
|
||||
buffer = vld1q_s16(p_start_buffer);
|
||||
|
||||
int16x4_t result_low = vrshrn_n_s32(tmp32_low, 14);
|
||||
int16x4_t result_high = vrshrn_n_s32(tmp32_high, 14);
|
||||
vst1q_s16(p_start_out, vcombine_s16(result_low, result_high));
|
||||
|
||||
p_start_buffer += 8;
|
||||
p_start_window += 8;
|
||||
p_start_out += 8;
|
||||
}
|
||||
int32x4_t tmp32_low = vmull_s16(vget_low_s16(window), vget_low_s16(buffer));
|
||||
int32x4_t tmp32_high = vmull_s16(vget_high_s16(window),
|
||||
vget_high_s16(buffer));
|
||||
|
||||
int16x4_t result_low = vrshrn_n_s32(tmp32_low, 14);
|
||||
int16x4_t result_high = vrshrn_n_s32(tmp32_high, 14);
|
||||
vst1q_s16(p_start_out, vcombine_s16(result_low, result_high));
|
||||
}
|
@ -1,74 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_NSX_DEFINES_H_
|
||||
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_NSX_DEFINES_H_
|
||||
|
||||
#define ANAL_BLOCKL_MAX 256 /* Max analysis block length */
|
||||
#define HALF_ANAL_BLOCKL 129 /* Half max analysis block length + 1 */
|
||||
#define NUM_HIGH_BANDS_MAX 2 /* Max number of high bands */
|
||||
#define SIMULT 3
|
||||
#define END_STARTUP_LONG 200
|
||||
#define END_STARTUP_SHORT 50
|
||||
#define FACTOR_Q16 2621440 /* 40 in Q16 */
|
||||
#define FACTOR_Q7 5120 /* 40 in Q7 */
|
||||
#define FACTOR_Q7_STARTUP 1024 /* 8 in Q7 */
|
||||
#define WIDTH_Q8 3 /* 0.01 in Q8 (or 25 ) */
|
||||
|
||||
/* PARAMETERS FOR NEW METHOD */
|
||||
#define DD_PR_SNR_Q11 2007 /* ~= Q11(0.98) DD update of prior SNR */
|
||||
#define ONE_MINUS_DD_PR_SNR_Q11 41 /* DD update of prior SNR */
|
||||
#define SPECT_FLAT_TAVG_Q14 \
|
||||
4915 /* (0.30) tavg parameter for spectral flatness measure */
|
||||
#define SPECT_DIFF_TAVG_Q8 \
|
||||
77 /* (0.30) tavg parameter for spectral flatness measure */
|
||||
#define PRIOR_UPDATE_Q14 1638 /* Q14(0.1) Update parameter of prior model */
|
||||
#define NOISE_UPDATE_Q8 26 /* 26 ~= Q8(0.1) Update parameter for noise */
|
||||
|
||||
/* Probability threshold for noise state in speech/noise likelihood. */
|
||||
#define ONE_MINUS_PROB_RANGE_Q8 205 /* 205 ~= Q8(0.8) */
|
||||
#define HIST_PAR_EST 1000 /* Histogram size for estimation of parameters */
|
||||
|
||||
/* FEATURE EXTRACTION CONFIG */
|
||||
/* Bin size of histogram */
|
||||
#define BIN_SIZE_LRT 10
|
||||
/* Scale parameters: multiply dominant peaks of the histograms by scale factor
|
||||
* to obtain. */
|
||||
/* Thresholds for prior model */
|
||||
#define FACTOR_1_LRT_DIFF \
|
||||
6 /* For LRT and spectral difference (5 times bigger) */
|
||||
/* For spectral_flatness: used when noise is flatter than speech (10 times
|
||||
* bigger). */
|
||||
#define FACTOR_2_FLAT_Q10 922
|
||||
/* Peak limit for spectral flatness (varies between 0 and 1) */
|
||||
#define THRES_PEAK_FLAT 24 /* * 2 * BIN_SIZE_FLAT_FX */
|
||||
/* Limit on spacing of two highest peaks in histogram: spacing determined by bin
|
||||
* size. */
|
||||
#define LIM_PEAK_SPACE_FLAT_DIFF 4 /* * 2 * BIN_SIZE_DIFF_FX */
|
||||
/* Limit on relevance of second peak */
|
||||
#define LIM_PEAK_WEIGHT_FLAT_DIFF 2
|
||||
#define THRES_FLUCT_LRT \
|
||||
10240 /* = 20 * inst->modelUpdate; fluctuation limit of LRT feat. */
|
||||
/* Limit on the max and min values for the feature thresholds */
|
||||
#define MAX_FLAT_Q10 38912 /* * 2 * BIN_SIZE_FLAT_FX */
|
||||
#define MIN_FLAT_Q10 4096 /* * 2 * BIN_SIZE_FLAT_FX */
|
||||
#define MAX_DIFF 100 /* * 2 * BIN_SIZE_DIFF_FX */
|
||||
#define MIN_DIFF 16 /* * 2 * BIN_SIZE_DIFF_FX */
|
||||
/* Criteria of weight of histogram peak to accept/reject feature */
|
||||
#define THRES_WEIGHT_FLAT_DIFF \
|
||||
154 /*(int)(0.3*(inst->modelUpdate)) for flatness and difference */
|
||||
|
||||
#define STAT_UPDATES 9 /* Update every 512 = 1 << 9 block */
|
||||
#define ONE_MINUS_GAMMA_PAUSE_Q8 \
|
||||
13 /* ~= Q8(0.05) Update for conservative noise estimate */
|
||||
#define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 \
|
||||
3 /* ~= Q8(0.01) Update for transition and noise region */
|
||||
|
||||
#endif /* MODULES_AUDIO_PROCESSING_LEGACY_NS_NSX_DEFINES_H_ */
|
@ -520,12 +520,6 @@ void AudioProcessingSimulator::CreateAudioProcessor() {
|
||||
*settings_.maximum_internal_processing_rate;
|
||||
}
|
||||
|
||||
const bool use_legacy_ns =
|
||||
settings_.use_legacy_ns && *settings_.use_legacy_ns;
|
||||
if (use_legacy_ns) {
|
||||
apm_config.noise_suppression.use_legacy_ns = use_legacy_ns;
|
||||
}
|
||||
|
||||
if (settings_.use_ns) {
|
||||
apm_config.noise_suppression.enabled = *settings_.use_ns;
|
||||
}
|
||||
|
@ -61,7 +61,6 @@ struct SimulationSettings {
|
||||
absl::optional<bool> use_vad;
|
||||
absl::optional<bool> use_le;
|
||||
absl::optional<bool> use_all;
|
||||
absl::optional<bool> use_legacy_ns;
|
||||
absl::optional<bool> use_analog_agc_agc2_level_estimator;
|
||||
absl::optional<bool> analog_agc_disable_digital_adaptive;
|
||||
absl::optional<int> agc_mode;
|
||||
|
@ -118,10 +118,6 @@ ABSL_FLAG(bool,
|
||||
false,
|
||||
"Activate all of the default components (will be overridden by any "
|
||||
"other settings)");
|
||||
ABSL_FLAG(int,
|
||||
use_legacy_ns,
|
||||
kParameterNotSpecifiedValue,
|
||||
"Activate (1) or deactivate(0) the legacy NS");
|
||||
ABSL_FLAG(int,
|
||||
analog_agc_disable_digital_adaptive,
|
||||
kParameterNotSpecifiedValue,
|
||||
@ -381,8 +377,6 @@ SimulationSettings CreateSettings() {
|
||||
&settings.use_analog_agc);
|
||||
SetSettingIfFlagSet(absl::GetFlag(FLAGS_vad), &settings.use_vad);
|
||||
SetSettingIfFlagSet(absl::GetFlag(FLAGS_le), &settings.use_le);
|
||||
SetSettingIfFlagSet(absl::GetFlag(FLAGS_use_legacy_ns),
|
||||
&settings.use_legacy_ns);
|
||||
SetSettingIfFlagSet(absl::GetFlag(FLAGS_analog_agc_disable_digital_adaptive),
|
||||
&settings.analog_agc_disable_digital_adaptive);
|
||||
SetSettingIfFlagSet(absl::GetFlag(FLAGS_analog_agc_agc2_level_estimator),
|
||||
|
@ -20,9 +20,9 @@
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "common_audio/third_party/fft4g/fft4g.h"
|
||||
#include "modules/audio_processing/legacy_ns/windows_private.h"
|
||||
#include "modules/audio_processing/transient/common.h"
|
||||
#include "modules/audio_processing/transient/transient_detector.h"
|
||||
#include "modules/audio_processing/transient/windows_private.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
|
||||
|
@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_WINDOWS_PRIVATE_H_
|
||||
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_WINDOWS_PRIVATE_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_WINDOWS_PRIVATE_H_
|
||||
#define MODULES_AUDIO_PROCESSING_TRANSIENT_WINDOWS_PRIVATE_H_
|
||||
|
||||
// Hanning window for 4ms 16kHz
|
||||
static const float kHanning64w128[128] = {
|
||||
@ -550,4 +550,4 @@ static const float kBlocks480w1024[1024] = {
|
||||
0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
|
||||
0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f};
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_WINDOWS_PRIVATE_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_WINDOWS_PRIVATE_H_
|
Reference in New Issue
Block a user