Removing deprecated legacy noise suppressor

This CL removes the code for the deprecated legacy noise.

Bug: webrtc:5298
Change-Id: If287d8967a3079ef96bff4790afa31f37d178823
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/167922
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30434}
This commit is contained in:
Per Åhgren
2020-01-30 07:40:58 +01:00
committed by Commit Bot
parent ec47b57f14
commit 8ad9e74d62
27 changed files with 12 additions and 6097 deletions

View File

@ -137,6 +137,7 @@ rtc_library("audio_processing") {
"transient/transient_detector.h",
"transient/transient_suppressor.cc",
"transient/transient_suppressor.h",
"transient/windows_private.h",
"transient/wpd_node.cc",
"transient/wpd_node.h",
"transient/wpd_tree.cc",
@ -183,8 +184,6 @@ rtc_library("audio_processing") {
"agc2:adaptive_digital",
"agc2:fixed_digital",
"agc2:gain_applier",
"legacy_ns:legacy_ns",
"legacy_ns:legacy_ns_c",
"ns",
"vad",
"//third_party/abseil-cpp/absl/types:optional",
@ -376,7 +375,6 @@ if (rtc_include_tests) {
"agc2:rnn_vad_with_level_unittests",
"agc2:test_utils",
"agc2/rnn_vad:unittests",
"legacy_ns:legacy_ns",
"test/conversational_speech:unittest",
"utility:legacy_delay_estimator_unittest",
"utility:pffft_wrapper_unittest",

View File

@ -70,11 +70,6 @@ bool SampleRateSupportsMultiBand(int sample_rate_hz) {
sample_rate_hz == AudioProcessing::kSampleRate48kHz;
}
// Checks whether the legacy ns functionality should be enforced.
bool DetectLegacyNsEnforcement() {
return field_trial::IsEnabled("WebRTC-NewNoiseSuppressionKillSwitch");
}
// Checks whether the high-pass filter should be done in the full-band.
bool EnforceSplitBandHpf() {
return field_trial::IsEnabled("WebRTC-FullBandHpfKillSwitch");
@ -106,23 +101,6 @@ int SuitableProcessRate(int minimum_rate,
return uppermost_native_rate;
}
NoiseSuppression::Level NsConfigLevelToInterfaceLevel(
AudioProcessing::Config::NoiseSuppression::Level level) {
using NsConfig = AudioProcessing::Config::NoiseSuppression;
switch (level) {
case NsConfig::kLow:
return NoiseSuppression::Level::kLow;
case NsConfig::kModerate:
return NoiseSuppression::Level::kModerate;
case NsConfig::kHigh:
return NoiseSuppression::Level::kHigh;
case NsConfig::kVeryHigh:
return NoiseSuppression::Level::kVeryHigh;
default:
RTC_NOTREACHED();
}
}
GainControl::Mode Agc1ConfigModeToInterfaceMode(
AudioProcessing::Config::GainController1::Mode mode) {
using Agc1Config = AudioProcessing::Config::GainController1;
@ -319,7 +297,6 @@ AudioProcessingImpl::AudioProcessingImpl(
std::unique_ptr<CustomAudioAnalyzer> capture_analyzer)
: data_dumper_(
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
enforced_usage_of_legacy_ns_(DetectLegacyNsEnforcement()),
use_setup_specific_default_aec3_config_(
UseSetupSpecificDefaultAec3Congfig()),
capture_runtime_settings_(kRuntimeSettingQueueSize),
@ -1220,16 +1197,11 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
RETURN_ON_ERR(
submodules_.gain_control->AnalyzeCaptureAudio(*capture_buffer));
}
RTC_DCHECK(
!(submodules_.legacy_noise_suppressor && submodules_.noise_suppressor));
if (!config_.noise_suppression.analyze_linear_aec_output_when_available ||
!linear_aec_buffer || submodules_.echo_control_mobile) {
if (submodules_.noise_suppressor) {
if ((!config_.noise_suppression.analyze_linear_aec_output_when_available ||
!linear_aec_buffer || submodules_.echo_control_mobile) &&
submodules_.noise_suppressor) {
submodules_.noise_suppressor->Analyze(*capture_buffer);
} else if (submodules_.legacy_noise_suppressor) {
submodules_.legacy_noise_suppressor->AnalyzeCaptureAudio(capture_buffer);
}
}
if (submodules_.echo_control_mobile) {
@ -1241,9 +1213,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
if (submodules_.noise_suppressor) {
submodules_.noise_suppressor->Process(capture_buffer);
} else if (submodules_.legacy_noise_suppressor) {
submodules_.echo_control_mobile->CopyLowPassReference(capture_buffer);
submodules_.legacy_noise_suppressor->ProcessCaptureAudio(capture_buffer);
}
RETURN_ON_ERR(submodules_.echo_control_mobile->ProcessCaptureAudio(
@ -1261,19 +1230,12 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
}
if (config_.noise_suppression.analyze_linear_aec_output_when_available &&
linear_aec_buffer) {
if (submodules_.noise_suppressor) {
linear_aec_buffer && submodules_.noise_suppressor) {
submodules_.noise_suppressor->Analyze(*linear_aec_buffer);
} else if (submodules_.legacy_noise_suppressor) {
submodules_.legacy_noise_suppressor->AnalyzeCaptureAudio(
linear_aec_buffer);
}
}
if (submodules_.noise_suppressor) {
submodules_.noise_suppressor->Process(capture_buffer);
} else if (submodules_.legacy_noise_suppressor) {
submodules_.legacy_noise_suppressor->ProcessCaptureAudio(capture_buffer);
}
}
@ -1682,8 +1644,7 @@ AudioProcessing::Config AudioProcessingImpl::GetConfig() const {
bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
return submodule_states_.Update(
config_.high_pass_filter.enabled, !!submodules_.echo_control_mobile,
config_.residual_echo_detector.enabled,
!!submodules_.legacy_noise_suppressor || !!submodules_.noise_suppressor,
config_.residual_echo_detector.enabled, !!submodules_.noise_suppressor,
!!submodules_.gain_control, !!submodules_.gain_controller2,
config_.pre_amplifier.enabled, capture_nonlocked_.echo_controller_enabled,
config_.voice_detection.enabled, !!submodules_.transient_suppressor);
@ -1896,14 +1857,9 @@ void AudioProcessingImpl::InitializeGainController2() {
}
void AudioProcessingImpl::InitializeNoiseSuppressor() {
submodules_.legacy_noise_suppressor.reset();
submodules_.noise_suppressor.reset();
if (config_.noise_suppression.enabled) {
const bool use_legacy_ns =
config_.noise_suppression.use_legacy_ns || enforced_usage_of_legacy_ns_;
if (!use_legacy_ns) {
auto map_level =
[](AudioProcessing::Config::NoiseSuppression::Level level) {
using NoiseSuppresionConfig =
@ -1926,12 +1882,6 @@ void AudioProcessingImpl::InitializeNoiseSuppressor() {
cfg.target_level = map_level(config_.noise_suppression.level);
submodules_.noise_suppressor = std::make_unique<NoiseSuppressor>(
cfg, proc_sample_rate_hz(), num_proc_channels());
} else {
auto ns_level =
NsConfigLevelToInterfaceLevel(config_.noise_suppression.level);
submodules_.legacy_noise_suppressor = std::make_unique<NoiseSuppression>(
num_proc_channels(), proc_sample_rate_hz(), ns_level);
}
}
}

View File

@ -27,7 +27,6 @@
#include "modules/audio_processing/include/aec_dump.h"
#include "modules/audio_processing/include/audio_processing.h"
#include "modules/audio_processing/include/audio_processing_statistics.h"
#include "modules/audio_processing/legacy_ns/legacy_noise_suppression.h"
#include "modules/audio_processing/level_estimator.h"
#include "modules/audio_processing/ns/noise_suppressor.h"
#include "modules/audio_processing/render_queue_item_verifier.h"
@ -152,7 +151,6 @@ class AudioProcessingImpl : public AudioProcessing {
std::unique_ptr<ApmDataDumper> data_dumper_;
static int instance_count_;
const bool enforced_usage_of_legacy_ns_;
const bool use_setup_specific_default_aec3_config_;
SwapQueue<RuntimeSetting> capture_runtime_settings_;
@ -346,7 +344,6 @@ class AudioProcessingImpl : public AudioProcessing {
rtc::scoped_refptr<EchoDetector> echo_detector;
std::unique_ptr<EchoControl> echo_controller;
std::unique_ptr<EchoControlMobileImpl> echo_control_mobile;
std::unique_ptr<NoiseSuppression> legacy_noise_suppressor;
std::unique_ptr<NoiseSuppressor> noise_suppressor;
std::unique_ptr<TransientSuppressor> transient_suppressor;
std::unique_ptr<CustomProcessing> capture_post_processor;

View File

@ -220,16 +220,6 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio,
return AudioProcessing::kNoError;
}
void EchoControlMobileImpl::CopyLowPassReference(AudioBuffer* audio) {
RTC_DCHECK_LE(audio->num_channels(), low_pass_reference_.size());
reference_copied_ = true;
for (size_t capture = 0; capture < audio->num_channels(); ++capture) {
FloatS16ToS16(audio->split_bands_const(capture)[kBand0To8kHz],
audio->num_frames_per_band(),
low_pass_reference_[capture].data());
}
}
int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) {
if (MapSetting(mode) == -1) {
return AudioProcessing::kBadParameterError;

View File

@ -54,7 +54,6 @@ class EchoControlMobileImpl {
void ProcessRenderAudio(rtc::ArrayView<const int16_t> packed_render_audio);
int ProcessCaptureAudio(AudioBuffer* audio, int stream_delay_ms);
void CopyLowPassReference(AudioBuffer* audio);
void Initialize(int sample_rate_hz,
size_t num_reverse_channels,

View File

@ -250,8 +250,6 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
enum Level { kLow, kModerate, kHigh, kVeryHigh };
Level level = kModerate;
bool analyze_linear_aec_output_when_available = false;
// Recommended not to use. Will be removed in the future.
bool use_legacy_ns = false;
} noise_suppression;
// Enables transient suppression.

View File

@ -1,105 +0,0 @@
# Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
import("../../../webrtc.gni")
rtc_library("legacy_ns") {
visibility = [ "*" ] # Only targets in this file can depend on this.
sources = [
"legacy_noise_suppression.cc",
"legacy_noise_suppression.h",
]
if (rtc_prefer_fixed_point) {
defines = [ "WEBRTC_NS_FIXED" ]
} else {
defines = [ "WEBRTC_NS_FLOAT" ]
}
deps = [
":legacy_ns_c",
"..:audio_buffer",
"../../../common_audio",
"../../../common_audio:common_audio_c",
"../../../rtc_base:checks",
"../../../rtc_base:rtc_base_approved",
"../../../system_wrappers:cpu_features_api",
]
}
rtc_library("legacy_ns_c") {
visibility = [ "*" ] # Only targets in this file can depend on this.
sources = [ "windows_private.h" ]
if (rtc_prefer_fixed_point) {
sources += [
"noise_suppression_x.c",
"noise_suppression_x.h",
"nsx_core.c",
"nsx_core.h",
"nsx_core_c.c",
"nsx_defines.h",
]
} else {
sources += [
"defines.h",
"noise_suppression.c",
"noise_suppression.h",
"ns_core.c",
"ns_core.h",
]
}
if (rtc_prefer_fixed_point) {
defines = [ "WEBRTC_NS_FIXED" ]
} else {
defines = [ "WEBRTC_NS_FLOAT" ]
}
deps = [
"..:audio_buffer",
"../../../common_audio",
"../../../common_audio:common_audio_c",
"../../../common_audio/third_party/fft4g",
"../../../rtc_base:checks",
"../../../rtc_base:rtc_base_approved",
"../../../system_wrappers:cpu_features_api",
]
if (rtc_build_with_neon) {
sources += [ "nsx_core_neon.c" ]
if (current_cpu != "arm64") {
# Enable compilation for the NEON instruction set.
suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ]
cflags = [ "-mfpu=neon" ]
}
}
}
if (rtc_include_tests) {
rtc_source_set("legacy_ns_unittests") {
testonly = true
configs += []
sources = [ "legacy_noise_suppression_unittest.cc" ]
deps = [
"..:audio_buffer",
"..:audioproc_test_utils",
"../../../api:array_view",
"../../../test:test_support",
]
defines = []
if (rtc_enable_protobuf) {
sources += []
}
}
}

View File

@ -1,52 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_DEFINES_H_
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_DEFINES_H_
#define BLOCKL_MAX 160 // max processing block length: 160
#define ANAL_BLOCKL_MAX 256 // max analysis block length: 256
#define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1
#define NUM_HIGH_BANDS_MAX 2 // max number of high bands: 2
#define QUANTILE 0.25f
#define SIMULT 3
#define END_STARTUP_LONG 200
#define END_STARTUP_SHORT 50
#define FACTOR 40.f
#define WIDTH 0.01f
// Length of fft work arrays.
#define IP_LENGTH \
(ANAL_BLOCKL_MAX >> 1) // must be at least ceil(2 + sqrt(ANAL_BLOCKL_MAX/2))
#define W_LENGTH (ANAL_BLOCKL_MAX >> 1)
// PARAMETERS FOR NEW METHOD
#define DD_PR_SNR 0.98f // DD update of prior SNR
#define LRT_TAVG 0.5f // tavg parameter for LRT (previously 0.90)
#define SPECT_FL_TAVG 0.30f // tavg parameter for spectral flatness measure
#define SPECT_DIFF_TAVG 0.30f // tavg parameter for spectral difference measure
#define PRIOR_UPDATE 0.1f // update parameter of prior model
#define NOISE_UPDATE 0.9f // update parameter for noise
#define SPEECH_UPDATE 0.99f // update parameter when likely speech
#define WIDTH_PR_MAP 4.0f // width parameter in sigmoid map for prior model
#define LRT_FEATURE_THR 0.5f // default threshold for LRT feature
#define SF_FEATURE_THR 0.5f // default threshold for Spectral Flatness feature
#define SD_FEATURE_THR \
0.5f // default threshold for Spectral Difference feature
#define PROB_RANGE \
0.2f // probability threshold for noise state in
// speech/noise likelihood
#define HIST_PAR_EST 1000 // histogram size for estimation of parameters
#define GAMMA_PAUSE 0.05f // update for conservative noise estimate
//
#define B_LIM 0.5f // threshold in final energy gain factor calculation
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_DEFINES_H_

View File

@ -1,172 +0,0 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/legacy_ns/legacy_noise_suppression.h"
#include "modules/audio_processing/audio_buffer.h"
#include "rtc_base/checks.h"
#if defined(WEBRTC_NS_FLOAT)
#include "modules/audio_processing/legacy_ns/noise_suppression.h"
#define NS_CREATE WebRtcNs_Create
#define NS_FREE WebRtcNs_Free
#define NS_INIT WebRtcNs_Init
#define NS_SET_POLICY WebRtcNs_set_policy
typedef NsHandle NsState;
#elif defined(WEBRTC_NS_FIXED)
#include "modules/audio_processing/legacy_ns/noise_suppression_x.h"
#define NS_CREATE WebRtcNsx_Create
#define NS_FREE WebRtcNsx_Free
#define NS_INIT WebRtcNsx_Init
#define NS_SET_POLICY WebRtcNsx_set_policy
typedef NsxHandle NsState;
#endif
namespace webrtc {
namespace {
int NoiseSuppressionLevelToPolicy(NoiseSuppression::Level level) {
switch (level) {
case NoiseSuppression::Level::kLow:
return 0;
case NoiseSuppression::Level::kModerate:
return 1;
case NoiseSuppression::Level::kHigh:
return 2;
case NoiseSuppression::Level::kVeryHigh:
return 3;
default:
RTC_NOTREACHED();
}
return 1;
}
} // namespace
class NoiseSuppression::Suppressor {
public:
explicit Suppressor(int sample_rate_hz) {
state_ = NS_CREATE();
RTC_CHECK(state_);
int error = NS_INIT(state_, sample_rate_hz);
RTC_DCHECK_EQ(0, error);
}
~Suppressor() { NS_FREE(state_); }
Suppressor(Suppressor&) = delete;
Suppressor& operator=(Suppressor&) = delete;
NsState* state() { return state_; }
private:
NsState* state_ = nullptr;
};
NoiseSuppression::NoiseSuppression(size_t channels,
int sample_rate_hz,
Level level) {
const int policy = NoiseSuppressionLevelToPolicy(level);
for (size_t i = 0; i < channels; ++i) {
suppressors_.push_back(std::make_unique<Suppressor>(sample_rate_hz));
int error = NS_SET_POLICY(suppressors_[i]->state(), policy);
RTC_DCHECK_EQ(0, error);
}
}
NoiseSuppression::~NoiseSuppression() {}
void NoiseSuppression::AnalyzeCaptureAudio(AudioBuffer* audio) {
RTC_DCHECK(audio);
#if defined(WEBRTC_NS_FLOAT)
RTC_DCHECK_GE(160, audio->num_frames_per_band());
RTC_DCHECK_EQ(suppressors_.size(), audio->num_channels());
for (size_t i = 0; i < suppressors_.size(); i++) {
WebRtcNs_Analyze(suppressors_[i]->state(),
audio->split_bands_const(i)[kBand0To8kHz]);
}
#endif
}
void NoiseSuppression::ProcessCaptureAudio(AudioBuffer* audio) {
RTC_DCHECK(audio);
RTC_DCHECK_GE(160, audio->num_frames_per_band());
RTC_DCHECK_EQ(suppressors_.size(), audio->num_channels());
for (size_t i = 0; i < suppressors_.size(); i++) {
#if defined(WEBRTC_NS_FLOAT)
WebRtcNs_Process(suppressors_[i]->state(), audio->split_bands_const(i),
audio->num_bands(), audio->split_bands(i));
#elif defined(WEBRTC_NS_FIXED)
int16_t split_band_data[AudioBuffer::kMaxNumBands]
[AudioBuffer::kMaxSplitFrameLength];
int16_t* split_bands[AudioBuffer::kMaxNumBands] = {
split_band_data[0], split_band_data[1], split_band_data[2]};
audio->ExportSplitChannelData(i, split_bands);
WebRtcNsx_Process(suppressors_[i]->state(), split_bands, audio->num_bands(),
split_bands);
audio->ImportSplitChannelData(i, split_bands);
#endif
}
}
float NoiseSuppression::speech_probability() const {
#if defined(WEBRTC_NS_FLOAT)
float probability_average = 0.0f;
for (auto& suppressor : suppressors_) {
probability_average +=
WebRtcNs_prior_speech_probability(suppressor->state());
}
if (!suppressors_.empty()) {
probability_average /= suppressors_.size();
}
return probability_average;
#elif defined(WEBRTC_NS_FIXED)
// TODO(peah): Returning error code as a float! Remove this.
// Currently not available for the fixed point implementation.
return AudioProcessing::kUnsupportedFunctionError;
#endif
}
std::vector<float> NoiseSuppression::NoiseEstimate() {
std::vector<float> noise_estimate;
#if defined(WEBRTC_NS_FLOAT)
const float kNumChannelsFraction = 1.f / suppressors_.size();
noise_estimate.assign(WebRtcNs_num_freq(), 0.f);
for (auto& suppressor : suppressors_) {
const float* noise = WebRtcNs_noise_estimate(suppressor->state());
for (size_t i = 0; i < noise_estimate.size(); ++i) {
noise_estimate[i] += kNumChannelsFraction * noise[i];
}
}
#elif defined(WEBRTC_NS_FIXED)
noise_estimate.assign(WebRtcNsx_num_freq(), 0.f);
for (auto& suppressor : suppressors_) {
int q_noise;
const uint32_t* noise =
WebRtcNsx_noise_estimate(suppressor->state(), &q_noise);
const float kNormalizationFactor =
1.f / ((1 << q_noise) * suppressors_.size());
for (size_t i = 0; i < noise_estimate.size(); ++i) {
noise_estimate[i] += kNormalizationFactor * noise[i];
}
}
#endif
return noise_estimate;
}
size_t NoiseSuppression::num_noise_bins() {
#if defined(WEBRTC_NS_FLOAT)
return WebRtcNs_num_freq();
#elif defined(WEBRTC_NS_FIXED)
return WebRtcNsx_num_freq();
#endif
}
} // namespace webrtc

View File

@ -1,57 +0,0 @@
/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_LEGACY_NOISE_SUPPRESSION_H_
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_LEGACY_NOISE_SUPPRESSION_H_
#include <memory>
#include <vector>
namespace webrtc {
class AudioBuffer;
// The noise suppression (NS) component attempts to remove noise while
// retaining speech. Recommended to be enabled on the client-side.
class NoiseSuppression {
public:
// Determines the aggressiveness of the suppression. Increasing the level
// will reduce the noise level at the expense of a higher speech distortion.
enum class Level { kLow, kModerate, kHigh, kVeryHigh };
NoiseSuppression(size_t channels, int sample_rate_hz, Level level);
~NoiseSuppression();
NoiseSuppression(NoiseSuppression&) = delete;
NoiseSuppression& operator=(NoiseSuppression&) = delete;
void AnalyzeCaptureAudio(AudioBuffer* audio);
void ProcessCaptureAudio(AudioBuffer* audio);
// LEGACY: Returns the internally computed prior speech probability of current
// frame averaged over output channels. This is not supported in fixed point,
// for which |kUnsupportedFunctionError| is returned.
float speech_probability() const;
// LEGACY: Returns the size of the noise vector returned by NoiseEstimate().
static size_t num_noise_bins();
// LEGACY: Returns the noise estimate per frequency bin averaged over all
// channels.
std::vector<float> NoiseEstimate();
private:
class Suppressor;
std::vector<std::unique_ptr<Suppressor>> suppressors_;
};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_LEGACY_NOISE_SUPPRESSION_H_

View File

@ -1,279 +0,0 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <vector>
#include "api/array_view.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/legacy_noise_suppression.h"
#include "modules/audio_processing/test/audio_buffer_tools.h"
#include "modules/audio_processing/test/bitexactness_tools.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
const int kNumFramesToProcess = 1000;
// Process one frame of data and produce the output.
void ProcessOneFrame(int sample_rate_hz,
AudioBuffer* capture_buffer,
NoiseSuppression* noise_suppressor) {
if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
capture_buffer->SplitIntoFrequencyBands();
}
noise_suppressor->AnalyzeCaptureAudio(capture_buffer);
noise_suppressor->ProcessCaptureAudio(capture_buffer);
if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
capture_buffer->MergeFrequencyBands();
}
}
// Processes a specified amount of frames, verifies the results and reports
// any errors.
void RunBitexactnessTest(int sample_rate_hz,
size_t num_channels,
NoiseSuppression::Level level,
float speech_probability_reference,
rtc::ArrayView<const float> noise_estimate_reference,
rtc::ArrayView<const float> output_reference) {
NoiseSuppression noise_suppressor(num_channels, sample_rate_hz, level);
int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100);
const StreamConfig capture_config(sample_rate_hz, num_channels, false);
AudioBuffer capture_buffer(
capture_config.sample_rate_hz(), capture_config.num_channels(),
capture_config.sample_rate_hz(), capture_config.num_channels(),
capture_config.sample_rate_hz(), capture_config.num_channels());
test::InputAudioFile capture_file(
test::GetApmCaptureTestVectorFileName(sample_rate_hz));
std::vector<float> capture_input(samples_per_channel * num_channels);
for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) {
ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels,
&capture_file, capture_input);
test::CopyVectorToAudioBuffer(capture_config, capture_input,
&capture_buffer);
ProcessOneFrame(sample_rate_hz, &capture_buffer, &noise_suppressor);
}
// Extract test results.
std::vector<float> capture_output;
test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer,
&capture_output);
float speech_probability = noise_suppressor.speech_probability();
std::vector<float> noise_estimate = noise_suppressor.NoiseEstimate();
const float kVectorElementErrorBound = 1.0f / 32768.0f;
EXPECT_FLOAT_EQ(speech_probability_reference, speech_probability);
EXPECT_TRUE(test::VerifyArray(noise_estimate_reference, noise_estimate,
kVectorElementErrorBound));
// Compare the output with the reference. Only the first values of the output
// from last frame processed are compared in order not having to specify all
// preceeding frames as testvectors. As the algorithm being tested has a
// memory, testing only the last frame implicitly also tests the preceeding
// frames.
EXPECT_TRUE(test::VerifyDeinterleavedArray(
capture_config.num_frames(), capture_config.num_channels(),
output_reference, capture_output, kVectorElementErrorBound));
}
} // namespace
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono8kHzLow) {
#if defined(WEBRTC_ARCH_ARM64)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {1432.341431f, 3321.919922f,
7677.521973f};
const float kOutputReference[] = {0.003510f, 0.004517f, 0.004669f};
#elif defined(WEBRTC_ARCH_ARM)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {1432.341431f, 3321.919922f,
7677.521973f};
const float kOutputReference[] = {0.003510f, 0.004517f, 0.004669f};
#else
const float kSpeechProbabilityReference = 0.73650402f;
const float kNoiseEstimateReference[] = {1176.856812f, 3287.490967f,
7525.964844f};
const float kOutputReference[] = {0.003306f, 0.004442f, 0.004574f};
#endif
RunBitexactnessTest(8000, 1, NoiseSuppression::Level::kLow,
kSpeechProbabilityReference, kNoiseEstimateReference,
kOutputReference);
}
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono16kHzLow) {
#if defined(WEBRTC_ARCH_ARM64)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {2534.461914f, 6277.638672f,
14367.499023f};
const float kOutputReference[] = {0.003449f, 0.004334f, 0.004303f};
#elif defined(WEBRTC_ARCH_ARM)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {2534.461914f, 6277.638672f,
14367.499023f};
const float kOutputReference[] = {0.003449f, 0.004334f, 0.004303f};
#else
const float kSpeechProbabilityReference = 0.71743423f;
const float kNoiseEstimateReference[] = {2179.853027f, 6507.995117f,
15652.758789f};
const float kOutputReference[] = {0.003574f, 0.004494f, 0.004499f};
#endif
RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kLow,
kSpeechProbabilityReference, kNoiseEstimateReference,
kOutputReference);
}
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono32kHzLow) {
#if defined(WEBRTC_ARCH_ARM64)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {2540.059082f, 6317.822754f,
14440.845703f};
const float kOutputReference[] = {0.001679f, 0.002411f, 0.002594f};
#elif defined(WEBRTC_ARCH_ARM)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {2540.059082f, 6317.822754f,
14440.845703f};
const float kOutputReference[] = {0.001679f, 0.002411f, 0.002594f};
#else
const float kSpeechProbabilityReference = 0.67999554f;
const float kNoiseEstimateReference[] = {2149.780518f, 7076.936035f,
14939.945312f};
const float kOutputReference[] = {0.001221f, 0.001984f, 0.002228f};
#endif
RunBitexactnessTest(32000, 1, NoiseSuppression::Level::kLow,
kSpeechProbabilityReference, kNoiseEstimateReference,
kOutputReference);
}
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono48kHzLow) {
#if defined(WEBRTC_ARCH_ARM64)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {2135.292480f, 6692.695801f,
14647.632812f};
const float kOutputReference[] = {-0.012738f, -0.012312f, -0.011576f};
#elif defined(WEBRTC_ARCH_ARM)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {2135.292480f, 6692.695801f,
14647.632812f};
const float kOutputReference[] = {-0.012738f, -0.012312f, -0.011576f};
#else
const float kSpeechProbabilityReference = 0.70737761f;
const float kNoiseEstimateReference[] = {2187.394043f, 6913.306641f,
13182.945312f};
const float kOutputReference[] = {-0.013062f, -0.012657f, -0.011934f};
#endif
RunBitexactnessTest(48000, 1, NoiseSuppression::Level::kLow,
kSpeechProbabilityReference, kNoiseEstimateReference,
kOutputReference);
}
TEST(LegacyNoiseSuppresionBitExactnessTest, Stereo16kHzLow) {
#if defined(WEBRTC_ARCH_ARM64)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {9992.127930f, 12689.569336f,
11589.296875f};
const float kOutputReference[] = {-0.011108f, -0.007904f, -0.012390f,
-0.002441f, 0.000855f, -0.003204f};
#elif defined(WEBRTC_ARCH_ARM)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {10321.353516f, 12133.852539f,
10923.060547f};
const float kOutputReference[] = {-0.011108f, -0.007904f, -0.012390f,
-0.002472f, 0.000916f, -0.003235f};
#else
const float kSpeechProbabilityReference = 0.67285913f;
const float kNoiseEstimateReference[] = {9753.257812f, 11515.603516f,
10503.309570f};
const float kOutputReference[] = {-0.011459f, -0.008110f, -0.012728f,
-0.002399f, 0.001018f, -0.003189f};
#endif
RunBitexactnessTest(16000, 2, NoiseSuppression::Level::kLow,
kSpeechProbabilityReference, kNoiseEstimateReference,
kOutputReference);
}
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono16kHzModerate) {
#if defined(WEBRTC_ARCH_ARM64)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {2057.085938f, 7601.055176f,
19666.187500f};
const float kOutputReference[] = {0.004669f, 0.005524f, 0.005432f};
#elif defined(WEBRTC_ARCH_ARM)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {2244.497803f, 6864.164062f,
16726.523438f};
const float kOutputReference[] = {0.004669f, 0.005615f, 0.005585f};
#else
const float kSpeechProbabilityReference = 0.70916927f;
const float kNoiseEstimateReference[] = {2172.830566f, 6552.661133f,
15624.025391f};
const float kOutputReference[] = {0.004513f, 0.005590f, 0.005614f};
#endif
RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kModerate,
kSpeechProbabilityReference, kNoiseEstimateReference,
kOutputReference);
}
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono16kHzHigh) {
#if defined(WEBRTC_ARCH_ARM64)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {2095.148193f, 7698.553711f,
19689.533203f};
const float kOutputReference[] = {0.004639f, 0.005402f, 0.005310f};
#elif defined(WEBRTC_ARCH_ARM)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {2282.515625f, 6984.408203f,
16920.960938f};
const float kOutputReference[] = {0.004547f, 0.005432f, 0.005402f};
#else
const float kSpeechProbabilityReference = 0.70104003f;
const float kNoiseEstimateReference[] = {2225.081055f, 6711.529785f,
15785.949219};
const float kOutputReference[] = {0.004394f, 0.005406f, 0.005416f};
#endif
RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kHigh,
kSpeechProbabilityReference, kNoiseEstimateReference,
kOutputReference);
}
TEST(LegacyNoiseSuppresionBitExactnessTest, Mono16kHzVeryHigh) {
#if defined(WEBRTC_ARCH_ARM64)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {2677.733398f, 6186.987305f,
14365.744141f};
const float kOutputReference[] = {0.004273f, 0.005127f, 0.005188f};
#elif defined(WEBRTC_ARCH_ARM)
const float kSpeechProbabilityReference = -4.0f;
const float kNoiseEstimateReference[] = {2677.733398f, 6186.987305f,
14365.744141f};
const float kOutputReference[] = {0.004273f, 0.005127f, 0.005188f};
#else
const float kSpeechProbabilityReference = 0.70290041f;
const float kNoiseEstimateReference[] = {2254.921875f, 6723.172852f,
15770.559570f};
const float kOutputReference[] = {0.004321f, 0.005247f, 0.005263f};
#endif
RunBitexactnessTest(16000, 1, NoiseSuppression::Level::kVeryHigh,
kSpeechProbabilityReference, kNoiseEstimateReference,
kOutputReference);
}
} // namespace webrtc

View File

@ -1,71 +0,0 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/legacy_ns/noise_suppression.h"
#include <stdlib.h>
#include <string.h>
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "modules/audio_processing/legacy_ns/defines.h"
#include "modules/audio_processing/legacy_ns/ns_core.h"
NsHandle* WebRtcNs_Create() {
NoiseSuppressionC* self = malloc(sizeof(NoiseSuppressionC));
self->initFlag = 0;
return (NsHandle*)self;
}
void WebRtcNs_Free(NsHandle* NS_inst) {
free(NS_inst);
}
int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs) {
return WebRtcNs_InitCore((NoiseSuppressionC*)NS_inst, fs);
}
int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) {
return WebRtcNs_set_policy_core((NoiseSuppressionC*)NS_inst, mode);
}
void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe) {
WebRtcNs_AnalyzeCore((NoiseSuppressionC*)NS_inst, spframe);
}
void WebRtcNs_Process(NsHandle* NS_inst,
const float* const* spframe,
size_t num_bands,
float* const* outframe) {
WebRtcNs_ProcessCore((NoiseSuppressionC*)NS_inst, spframe, num_bands,
outframe);
}
float WebRtcNs_prior_speech_probability(NsHandle* handle) {
NoiseSuppressionC* self = (NoiseSuppressionC*)handle;
if (handle == NULL) {
return -1;
}
if (self->initFlag == 0) {
return -1;
}
return self->priorSpeechProb;
}
const float* WebRtcNs_noise_estimate(const NsHandle* handle) {
const NoiseSuppressionC* self = (const NoiseSuppressionC*)handle;
if (handle == NULL || self->initFlag == 0) {
return NULL;
}
return self->noise;
}
size_t WebRtcNs_num_freq() {
return HALF_ANAL_BLOCKL;
}

View File

@ -1,134 +0,0 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_NOISE_SUPPRESSION_H_
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_NOISE_SUPPRESSION_H_
#include <stddef.h>
#include <stdint.h>
typedef struct NsHandleT NsHandle;
#ifdef __cplusplus
extern "C" {
#endif
/*
* This function creates an instance of the floating point Noise Suppression.
*/
NsHandle* WebRtcNs_Create(void);
/*
* This function frees the dynamic memory of a specified noise suppression
* instance.
*
* Input:
* - NS_inst : Pointer to NS instance that should be freed
*/
void WebRtcNs_Free(NsHandle* NS_inst);
/*
* This function initializes a NS instance and has to be called before any other
* processing is made.
*
* Input:
* - NS_inst : Instance that should be initialized
* - fs : sampling frequency
*
* Output:
* - NS_inst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs);
/*
* This changes the aggressiveness of the noise suppression method.
*
* Input:
* - NS_inst : Noise suppression instance.
* - mode : 0: Mild, 1: Medium , 2: Aggressive
*
* Output:
* - NS_inst : Updated instance.
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
/*
* This functions estimates the background noise for the inserted speech frame.
* The input and output signals should always be 10ms (80 or 160 samples).
*
* Input
* - NS_inst : Noise suppression instance.
* - spframe : Pointer to speech frame buffer for L band
*
* Output:
* - NS_inst : Updated NS instance
*/
void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe);
/*
* This functions does Noise Suppression for the inserted speech frame. The
* input and output signals should always be 10ms (80 or 160 samples).
*
* Input
* - NS_inst : Noise suppression instance.
* - spframe : Pointer to speech frame buffer for each band
* - num_bands : Number of bands
*
* Output:
* - NS_inst : Updated NS instance
* - outframe : Pointer to output frame for each band
*/
void WebRtcNs_Process(NsHandle* NS_inst,
const float* const* spframe,
size_t num_bands,
float* const* outframe);
/* Returns the internally used prior speech probability of the current frame.
* There is a frequency bin based one as well, with which this should not be
* confused.
*
* Input
* - handle : Noise suppression instance.
*
* Return value : Prior speech probability in interval [0.0, 1.0].
* -1 - NULL pointer or uninitialized instance.
*/
float WebRtcNs_prior_speech_probability(NsHandle* handle);
/* Returns a pointer to the noise estimate per frequency bin. The number of
* frequency bins can be provided using WebRtcNs_num_freq().
*
* Input
* - handle : Noise suppression instance.
*
* Return value : Pointer to the noise estimate per frequency bin.
* Returns NULL if the input is a NULL pointer or an
* uninitialized instance.
*/
const float* WebRtcNs_noise_estimate(const NsHandle* handle);
/* Returns the number of frequency bins, which is the length of the noise
* estimate for example.
*
* Return value : Number of frequency bins.
*/
size_t WebRtcNs_num_freq(void);
#ifdef __cplusplus
}
#endif
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_NOISE_SUPPRESSION_H_

View File

@ -1,60 +0,0 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/legacy_ns/noise_suppression_x.h"
#include <stdlib.h>
#include "common_audio/signal_processing/include/real_fft.h"
#include "modules/audio_processing/legacy_ns/nsx_core.h"
#include "modules/audio_processing/legacy_ns/nsx_defines.h"
NsxHandle* WebRtcNsx_Create() {
NoiseSuppressionFixedC* self = malloc(sizeof(NoiseSuppressionFixedC));
self->real_fft = NULL;
self->initFlag = 0;
return (NsxHandle*)self;
}
void WebRtcNsx_Free(NsxHandle* nsxInst) {
WebRtcSpl_FreeRealFFT(((NoiseSuppressionFixedC*)nsxInst)->real_fft);
free(nsxInst);
}
int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs) {
return WebRtcNsx_InitCore((NoiseSuppressionFixedC*)nsxInst, fs);
}
int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) {
return WebRtcNsx_set_policy_core((NoiseSuppressionFixedC*)nsxInst, mode);
}
void WebRtcNsx_Process(NsxHandle* nsxInst,
const int16_t* const* speechFrame,
int num_bands,
int16_t* const* outFrame) {
WebRtcNsx_ProcessCore((NoiseSuppressionFixedC*)nsxInst, speechFrame,
num_bands, outFrame);
}
const uint32_t* WebRtcNsx_noise_estimate(const NsxHandle* nsxInst,
int* q_noise) {
*q_noise = 11;
const NoiseSuppressionFixedC* self = (const NoiseSuppressionFixedC*)nsxInst;
if (nsxInst == NULL || self->initFlag == 0) {
return NULL;
}
*q_noise += self->prevQNoise;
return self->prevNoiseU32;
}
size_t WebRtcNsx_num_freq() {
return HALF_ANAL_BLOCKL;
}

View File

@ -1,112 +0,0 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_NOISE_SUPPRESSION_X_H_
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_NOISE_SUPPRESSION_X_H_
#include <stddef.h>
#include <stdint.h>
typedef struct NsxHandleT NsxHandle;
#ifdef __cplusplus
extern "C" {
#endif
/*
* This function creates an instance of the fixed point Noise Suppression.
*/
NsxHandle* WebRtcNsx_Create(void);
/*
* This function frees the dynamic memory of a specified Noise Suppression
* instance.
*
* Input:
* - nsxInst : Pointer to NS instance that should be freed
*/
void WebRtcNsx_Free(NsxHandle* nsxInst);
/*
* This function initializes a NS instance
*
* Input:
* - nsxInst : Instance that should be initialized
* - fs : sampling frequency
*
* Output:
* - nsxInst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs);
/*
* This changes the aggressiveness of the noise suppression method.
*
* Input:
* - nsxInst : Instance that should be initialized
* - mode : 0: Mild, 1: Medium , 2: Aggressive
*
* Output:
* - nsxInst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode);
/*
* This functions does noise suppression for the inserted speech frame. The
* input and output signals should always be 10ms (80 or 160 samples).
*
* Input
* - nsxInst : NSx instance. Needs to be initiated before call.
* - speechFrame : Pointer to speech frame buffer for each band
* - num_bands : Number of bands
*
* Output:
* - nsxInst : Updated NSx instance
* - outFrame : Pointer to output frame for each band
*/
void WebRtcNsx_Process(NsxHandle* nsxInst,
const int16_t* const* speechFrame,
int num_bands,
int16_t* const* outFrame);
/* Returns a pointer to the noise estimate per frequency bin. The number of
* frequency bins can be provided using WebRtcNsx_num_freq().
*
* Input
* - nsxInst : NSx instance. Needs to be initiated before call.
* - q_noise : Q value of the noise estimate, which is the number of
* bits that it needs to be right-shifted to be
* normalized.
*
* Return value : Pointer to the noise estimate per frequency bin.
* Returns NULL if the input is a NULL pointer or an
* uninitialized instance.
*/
const uint32_t* WebRtcNsx_noise_estimate(const NsxHandle* nsxInst,
int* q_noise);
/* Returns the number of frequency bins, which is the length of the noise
* estimate for example.
*
* Return value : Number of frequency bins.
*/
size_t WebRtcNsx_num_freq(void);
#ifdef __cplusplus
}
#endif
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_NOISE_SUPPRESSION_X_H_

File diff suppressed because it is too large Load Diff

View File

@ -1,188 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_NS_CORE_H_
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_NS_CORE_H_
#include "modules/audio_processing/legacy_ns/defines.h"
typedef struct NSParaExtract_ {
// Bin size of histogram.
float binSizeLrt;
float binSizeSpecFlat;
float binSizeSpecDiff;
// Range of histogram over which LRT threshold is computed.
float rangeAvgHistLrt;
// Scale parameters: multiply dominant peaks of the histograms by scale factor
// to obtain thresholds for prior model.
float factor1ModelPars; // For LRT and spectral difference.
float factor2ModelPars; // For spectral_flatness: used when noise is flatter
// than speech.
// Peak limit for spectral flatness (varies between 0 and 1).
float thresPosSpecFlat;
// Limit on spacing of two highest peaks in histogram: spacing determined by
// bin size.
float limitPeakSpacingSpecFlat;
float limitPeakSpacingSpecDiff;
// Limit on relevance of second peak.
float limitPeakWeightsSpecFlat;
float limitPeakWeightsSpecDiff;
// Limit on fluctuation of LRT feature.
float thresFluctLrt;
// Limit on the max and min values for the feature thresholds.
float maxLrt;
float minLrt;
float maxSpecFlat;
float minSpecFlat;
float maxSpecDiff;
float minSpecDiff;
// Criteria of weight of histogram peak to accept/reject feature.
int thresWeightSpecFlat;
int thresWeightSpecDiff;
} NSParaExtract;
typedef struct NoiseSuppressionC_ {
uint32_t fs;
size_t blockLen;
size_t windShift;
size_t anaLen;
size_t magnLen;
int aggrMode;
const float* window;
float analyzeBuf[ANAL_BLOCKL_MAX];
float dataBuf[ANAL_BLOCKL_MAX];
float syntBuf[ANAL_BLOCKL_MAX];
int initFlag;
// Parameters for quantile noise estimation.
float density[SIMULT * HALF_ANAL_BLOCKL];
float lquantile[SIMULT * HALF_ANAL_BLOCKL];
float quantile[HALF_ANAL_BLOCKL];
int counter[SIMULT];
int updates;
// Parameters for Wiener filter.
float smooth[HALF_ANAL_BLOCKL];
float overdrive;
float denoiseBound;
int gainmap;
// FFT work arrays.
size_t ip[IP_LENGTH];
float wfft[W_LENGTH];
// Parameters for new method: some not needed, will reduce/cleanup later.
int32_t blockInd; // Frame index counter.
int modelUpdatePars[4]; // Parameters for updating or estimating.
// Thresholds/weights for prior model.
float priorModelPars[7]; // Parameters for prior model.
float noise[HALF_ANAL_BLOCKL]; // Noise spectrum from current frame.
float noisePrev[HALF_ANAL_BLOCKL]; // Noise spectrum from previous frame.
// Magnitude spectrum of previous analyze frame.
float magnPrevAnalyze[HALF_ANAL_BLOCKL];
// Magnitude spectrum of previous process frame.
float magnPrevProcess[HALF_ANAL_BLOCKL];
float logLrtTimeAvg[HALF_ANAL_BLOCKL]; // Log LRT factor with time-smoothing.
float priorSpeechProb; // Prior speech/noise probability.
float featureData[7];
// Conservative noise spectrum estimate.
float magnAvgPause[HALF_ANAL_BLOCKL];
float signalEnergy; // Energy of |magn|.
float sumMagn;
float whiteNoiseLevel; // Initial noise estimate.
float initMagnEst[HALF_ANAL_BLOCKL]; // Initial magnitude spectrum estimate.
float pinkNoiseNumerator; // Pink noise parameter: numerator.
float pinkNoiseExp; // Pink noise parameter: power of frequencies.
float parametricNoise[HALF_ANAL_BLOCKL];
// Parameters for feature extraction.
NSParaExtract featureExtractionParams;
// Histograms for parameter estimation.
int histLrt[HIST_PAR_EST];
int histSpecFlat[HIST_PAR_EST];
int histSpecDiff[HIST_PAR_EST];
// Quantities for high band estimate.
float speechProb[HALF_ANAL_BLOCKL]; // Final speech/noise prob: prior + LRT.
// Buffering data for HB.
float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];
} NoiseSuppressionC;
#ifdef __cplusplus
extern "C" {
#endif
/****************************************************************************
* WebRtcNs_InitCore(...)
*
* This function initializes a noise suppression instance
*
* Input:
* - self : Instance that should be initialized
* - fs : Sampling frequency
*
* Output:
* - self : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs);
/****************************************************************************
* WebRtcNs_set_policy_core(...)
*
* This changes the aggressiveness of the noise suppression method.
*
* Input:
* - self : Instance that should be initialized
* - mode : 0: Mild (6dB), 1: Medium (10dB), 2: Aggressive (15dB)
*
* Output:
* - self : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode);
/****************************************************************************
* WebRtcNs_AnalyzeCore
*
* Estimate the background noise.
*
* Input:
* - self : Instance that should be initialized
* - speechFrame : Input speech frame for lower band
*
* Output:
* - self : Updated instance
*/
void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame);
/****************************************************************************
* WebRtcNs_ProcessCore
*
* Do noise suppression.
*
* Input:
* - self : Instance that should be initialized
* - inFrame : Input speech frame for each band
* - num_bands : Number of bands
*
* Output:
* - self : Updated instance
* - outFrame : Output speech frame for each band
*/
void WebRtcNs_ProcessCore(NoiseSuppressionC* self,
const float* const* inFrame,
size_t num_bands,
float* const* outFrame);
#ifdef __cplusplus
}
#endif
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_NS_CORE_H_

File diff suppressed because it is too large Load Diff

View File

@ -1,261 +0,0 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_NSX_CORE_H_
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_NSX_CORE_H_
#ifdef NS_FILEDEBUG
#include <stdio.h>
#endif
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "modules/audio_processing/legacy_ns/nsx_defines.h"
typedef struct NoiseSuppressionFixedC_ {
uint32_t fs;
const int16_t* window;
int16_t analysisBuffer[ANAL_BLOCKL_MAX];
int16_t synthesisBuffer[ANAL_BLOCKL_MAX];
uint16_t noiseSupFilter[HALF_ANAL_BLOCKL];
uint16_t overdrive; /* Q8 */
uint16_t denoiseBound; /* Q14 */
const int16_t* factor2Table;
int16_t noiseEstLogQuantile[SIMULT * HALF_ANAL_BLOCKL];
int16_t noiseEstDensity[SIMULT * HALF_ANAL_BLOCKL];
int16_t noiseEstCounter[SIMULT];
int16_t noiseEstQuantile[HALF_ANAL_BLOCKL];
size_t anaLen;
size_t anaLen2;
size_t magnLen;
int aggrMode;
int stages;
int initFlag;
int gainMap;
int32_t maxLrt;
int32_t minLrt;
// Log LRT factor with time-smoothing in Q8.
int32_t logLrtTimeAvgW32[HALF_ANAL_BLOCKL];
int32_t featureLogLrt;
int32_t thresholdLogLrt;
int16_t weightLogLrt;
uint32_t featureSpecDiff;
uint32_t thresholdSpecDiff;
int16_t weightSpecDiff;
uint32_t featureSpecFlat;
uint32_t thresholdSpecFlat;
int16_t weightSpecFlat;
// Conservative estimate of noise spectrum.
int32_t avgMagnPause[HALF_ANAL_BLOCKL];
uint32_t magnEnergy;
uint32_t sumMagn;
uint32_t curAvgMagnEnergy;
uint32_t timeAvgMagnEnergy;
uint32_t timeAvgMagnEnergyTmp;
uint32_t whiteNoiseLevel; // Initial noise estimate.
// Initial magnitude spectrum estimate.
uint32_t initMagnEst[HALF_ANAL_BLOCKL];
// Pink noise parameters:
int32_t pinkNoiseNumerator; // Numerator.
int32_t pinkNoiseExp; // Power of freq.
int minNorm; // Smallest normalization factor.
int zeroInputSignal; // Zero input signal flag.
// Noise spectrum from previous frame.
uint32_t prevNoiseU32[HALF_ANAL_BLOCKL];
// Magnitude spectrum from previous frame.
uint16_t prevMagnU16[HALF_ANAL_BLOCKL];
// Prior speech/noise probability in Q14.
int16_t priorNonSpeechProb;
int blockIndex; // Frame index counter.
// Parameter for updating or estimating thresholds/weights for prior model.
int modelUpdate;
int cntThresUpdate;
// Histograms for parameter estimation.
int16_t histLrt[HIST_PAR_EST];
int16_t histSpecFlat[HIST_PAR_EST];
int16_t histSpecDiff[HIST_PAR_EST];
// Quantities for high band estimate.
int16_t dataBufHBFX[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];
int qNoise;
int prevQNoise;
int prevQMagn;
size_t blockLen10ms;
int16_t real[ANAL_BLOCKL_MAX];
int16_t imag[ANAL_BLOCKL_MAX];
int32_t energyIn;
int scaleEnergyIn;
int normData;
struct RealFFT* real_fft;
} NoiseSuppressionFixedC;
#ifdef __cplusplus
extern "C" {
#endif
/****************************************************************************
* WebRtcNsx_InitCore(...)
*
* This function initializes a noise suppression instance
*
* Input:
* - inst : Instance that should be initialized
* - fs : Sampling frequency
*
* Output:
* - inst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs);
/****************************************************************************
* WebRtcNsx_set_policy_core(...)
*
* This changes the aggressiveness of the noise suppression method.
*
* Input:
* - inst : Instance that should be initialized
* - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
*
* Output:
* - inst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode);
/****************************************************************************
* WebRtcNsx_ProcessCore
*
* Do noise suppression.
*
* Input:
* - inst : Instance that should be initialized
* - inFrame : Input speech frame for each band
* - num_bands : Number of bands
*
* Output:
* - inst : Updated instance
* - outFrame : Output speech frame for each band
*/
void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst,
const int16_t* const* inFrame,
int num_bands,
int16_t* const* outFrame);
/****************************************************************************
* Some function pointers, for internal functions shared by ARM NEON and
* generic C code.
*/
// Noise Estimation.
typedef void (*NoiseEstimation)(NoiseSuppressionFixedC* inst,
uint16_t* magn,
uint32_t* noise,
int16_t* q_noise);
extern NoiseEstimation WebRtcNsx_NoiseEstimation;
// Filter the data in the frequency domain, and create spectrum.
typedef void (*PrepareSpectrum)(NoiseSuppressionFixedC* inst,
int16_t* freq_buff);
extern PrepareSpectrum WebRtcNsx_PrepareSpectrum;
// For the noise supression process, synthesis, read out fully processed
// segment, and update synthesis buffer.
typedef void (*SynthesisUpdate)(NoiseSuppressionFixedC* inst,
int16_t* out_frame,
int16_t gain_factor);
extern SynthesisUpdate WebRtcNsx_SynthesisUpdate;
// Update analysis buffer for lower band, and window data before FFT.
typedef void (*AnalysisUpdate)(NoiseSuppressionFixedC* inst,
int16_t* out,
int16_t* new_speech);
extern AnalysisUpdate WebRtcNsx_AnalysisUpdate;
// Denormalize the real-valued signal |in|, the output from inverse FFT.
typedef void (*Denormalize)(NoiseSuppressionFixedC* inst,
int16_t* in,
int factor);
extern Denormalize WebRtcNsx_Denormalize;
// Normalize the real-valued signal |in|, the input to forward FFT.
typedef void (*NormalizeRealBuffer)(NoiseSuppressionFixedC* inst,
const int16_t* in,
int16_t* out);
extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
// Compute speech/noise probability.
// Intended to be private.
void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
uint16_t* nonSpeechProbFinal,
uint32_t* priorLocSnr,
uint32_t* postLocSnr);
#if defined(WEBRTC_HAS_NEON)
// For the above function pointers, functions for generic platforms are declared
// and defined as static in file nsx_core.c, while those for ARM Neon platforms
// are declared below and defined in file nsx_core_neon.c.
void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst,
uint16_t* magn,
uint32_t* noise,
int16_t* q_noise);
void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst,
int16_t* out_frame,
int16_t gain_factor);
void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst,
int16_t* out,
int16_t* new_speech);
void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst,
int16_t* freq_buff);
#endif
#if defined(MIPS32_LE)
// For the above function pointers, functions for generic platforms are declared
// and defined as static in file nsx_core.c, while those for MIPS platforms
// are declared below and defined in file nsx_core_mips.c.
void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst,
int16_t* out_frame,
int16_t gain_factor);
void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst,
int16_t* out,
int16_t* new_speech);
void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst,
int16_t* freq_buff);
void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst,
const int16_t* in,
int16_t* out);
#if defined(MIPS_DSP_R1_LE)
void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst,
int16_t* in,
int factor);
#endif
#endif
#ifdef __cplusplus
}
#endif
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_NSX_CORE_H_

View File

@ -1,259 +0,0 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "rtc_base/checks.h"
#include "modules/audio_processing/legacy_ns/noise_suppression_x.h"
#include "modules/audio_processing/legacy_ns/nsx_core.h"
#include "modules/audio_processing/legacy_ns/nsx_defines.h"
static const int16_t kIndicatorTable[17] = {
0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
};
// Compute speech/noise probability
// speech/noise probability is returned in: probSpeechFinal
//snrLocPrior is the prior SNR for each frequency (in Q11)
//snrLocPost is the post SNR for each frequency (in Q11)
void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
uint16_t* nonSpeechProbFinal,
uint32_t* priorLocSnr,
uint32_t* postLocSnr) {
uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3;
int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32;
int32_t frac32, logTmp;
int32_t logLrtTimeAvgKsumFX;
int16_t indPriorFX16;
int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart;
size_t i;
int normTmp, normTmp2, nShifts;
// compute feature based on average LR factor
// this is the average over all frequencies of the smooth log LRT
logLrtTimeAvgKsumFX = 0;
for (i = 0; i < inst->magnLen; i++) {
besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11
normTmp = WebRtcSpl_NormU32(postLocSnr[i]);
num = postLocSnr[i] << normTmp; // Q(11+normTmp)
if (normTmp > 10) {
den = priorLocSnr[i] << (normTmp - 11); // Q(normTmp)
} else {
den = priorLocSnr[i] >> (11 - normTmp); // Q(normTmp)
}
if (den > 0) {
besselTmpFX32 -= num / den; // Q11
} else {
besselTmpFX32 = 0;
}
// inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior)
// - inst->logLrtTimeAvg[i]);
// Here, LRT_TAVG = 0.5
zeros = WebRtcSpl_NormU32(priorLocSnr[i]);
frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19);
tmp32 = (frac32 * frac32 * -43) >> 19;
tmp32 += ((int16_t)frac32 * 5412) >> 12;
frac32 = tmp32 + 37;
// tmp32 = log2(priorLocSnr[i])
tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12
logTmp = (tmp32 * 178) >> 8; // log2(priorLocSnr[i])*log(2)
// tmp32no1 = LRT_TAVG * (log(snrLocPrior) + inst->logLrtTimeAvg[i]) in Q12.
tmp32no1 = (logTmp + inst->logLrtTimeAvgW32[i]) / 2;
inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12
logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12
}
inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >>
(inst->stages + 11);
// done with computation of LR factor
//
//compute the indicator functions
//
// average LRT feature
// FLOAT code
// indicator0 = 0.5 * (tanh(widthPrior *
// (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
tmpIndFX = 16384; // Q14(1.0)
tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
//use larger width in tanh map for pause regions
if (tmp32no1 < 0) {
tmpIndFX = 0;
tmp32no1 = -tmp32no1;
//widthPrior = widthPrior * 2.0;
nShifts++;
}
tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
// compute indicator function: sigmoid map
if (tmp32no1 < (16 << 14) && tmp32no1 >= 0) {
tableIndex = (int16_t)(tmp32no1 >> 14);
tmp16no2 = kIndicatorTable[tableIndex];
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
if (tmpIndFX == 0) {
tmpIndFX = 8192 - tmp16no2; // Q14
} else {
tmpIndFX = 8192 + tmp16no2; // Q14
}
}
indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14
//spectral flatness feature
if (inst->weightSpecFlat) {
tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
tmpIndFX = 16384; // Q14(1.0)
//use larger width in tanh map for pause regions
tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
nShifts = 4;
if (inst->thresholdSpecFlat < tmpU32no1) {
tmpIndFX = 0;
tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
//widthPrior = widthPrior * 2.0;
nShifts++;
}
tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); // Q14
// compute indicator function: sigmoid map
// FLOAT code
// indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
// (threshPrior1 - tmpFloat1)) + 1.0);
if (tmpU32no1 < (16 << 14)) {
tableIndex = (int16_t)(tmpU32no1 >> 14);
tmp16no2 = kIndicatorTable[tableIndex];
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
if (tmpIndFX) {
tmpIndFX = 8192 + tmp16no2; // Q14
} else {
tmpIndFX = 8192 - tmp16no2; // Q14
}
}
indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14
}
//for template spectral-difference
if (inst->weightSpecDiff) {
tmpU32no1 = 0;
if (inst->featureSpecDiff) {
normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
WebRtcSpl_NormU32(inst->featureSpecDiff));
RTC_DCHECK_GE(normTmp, 0);
tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages)
tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp);
if (tmpU32no2 > 0) {
// Q(20 - inst->stages)
tmpU32no1 /= tmpU32no2;
} else {
tmpU32no1 = (uint32_t)(0x7fffffff);
}
}
tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25;
tmpU32no2 = tmpU32no1 - tmpU32no3;
nShifts = 1;
tmpIndFX = 16384; // Q14(1.0)
//use larger width in tanh map for pause regions
if (tmpU32no2 & 0x80000000) {
tmpIndFX = 0;
tmpU32no2 = tmpU32no3 - tmpU32no1;
//widthPrior = widthPrior * 2.0;
nShifts--;
}
tmpU32no1 = tmpU32no2 >> nShifts;
// compute indicator function: sigmoid map
/* FLOAT code
indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
*/
if (tmpU32no1 < (16 << 14)) {
tableIndex = (int16_t)(tmpU32no1 >> 14);
tmp16no2 = kIndicatorTable[tableIndex];
tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
tmp16no1, frac, 14);
if (tmpIndFX) {
tmpIndFX = 8192 + tmp16no2;
} else {
tmpIndFX = 8192 - tmp16no2;
}
}
indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14
}
//combine the indicator function with the feature weights
// FLOAT code
// indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
// indicator1 + weightIndPrior2 * indicator2);
indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
// done with computing indicator function
//compute the prior probability
// FLOAT code
// inst->priorNonSpeechProb += PRIOR_UPDATE *
// (indPriorNonSpeech - inst->priorNonSpeechProb);
tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14);
//final speech probability: combine prior model with LR factor:
memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
if (inst->priorNonSpeechProb > 0) {
for (i = 0; i < inst->magnLen; i++) {
// FLOAT code
// invLrt = exp(inst->logLrtTimeAvg[i]);
// invLrt = inst->priorSpeechProb * invLrt;
// nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) /
// (1.0 - inst->priorSpeechProb + invLrt);
// invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt;
// nonSpeechProbFinal[i] = inst->priorNonSpeechProb /
// (inst->priorNonSpeechProb + invLrt);
if (inst->logLrtTimeAvgW32[i] < 65300) {
tmp32no1 = (inst->logLrtTimeAvgW32[i] * 23637) >> 14; // Q12
intPart = (int16_t)(tmp32no1 >> 12);
if (intPart < -8) {
intPart = -8;
}
frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12
// Quadratic approximation of 2^frac
tmp32no2 = (frac * frac * 44) >> 19; // Q12.
tmp32no2 += (frac * 84) >> 7; // Q12
invLrtFX = (1 << (8 + intPart)) +
WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8
normTmp = WebRtcSpl_NormW32(invLrtFX);
normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb));
if (normTmp + normTmp2 >= 7) {
if (normTmp + normTmp2 < 15) {
invLrtFX >>= 15 - normTmp2 - normTmp;
// Q(normTmp+normTmp2-7)
tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb);
// Q(normTmp+normTmp2+7)
invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2);
// Q14
} else {
tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb);
// Q22
invLrtFX = tmp32no1 >> 8; // Q14.
}
tmp32no1 = (int32_t)inst->priorNonSpeechProb << 8; // Q22
nonSpeechProbFinal[i] = tmp32no1 /
(inst->priorNonSpeechProb + invLrtFX); // Q8
}
}
}
}
}

View File

@ -1,606 +0,0 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/legacy_ns/nsx_core.h"
#include <arm_neon.h>
#include "rtc_base/checks.h"
// Constants to compensate for shifting signal log(2^shifts).
const int16_t WebRtcNsx_kLogTable[9] = {
0, 177, 355, 532, 710, 887, 1065, 1242, 1420
};
const int16_t WebRtcNsx_kCounterDiv[201] = {
32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731,
2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311,
1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840,
819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607,
596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475,
468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390,
386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331,
328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287,
285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254,
252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228,
226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206,
205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188,
187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163
};
const int16_t WebRtcNsx_kLogTableFrac[256] = {
0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21,
22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42,
44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62,
63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81,
82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99,
100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116,
117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131,
132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146,
147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160,
161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174,
175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187,
188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200,
201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212,
213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224,
225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236,
237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247,
248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255
};
// Update the noise estimation information.
static void UpdateNoiseEstimateNeon(NoiseSuppressionFixedC* inst, int offset) {
const int16_t kExp2Const = 11819; // Q13
int16_t* ptr_noiseEstLogQuantile = NULL;
int16_t* ptr_noiseEstQuantile = NULL;
int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const);
int32x4_t twentyOne32x4 = vdupq_n_s32(21);
int32x4_t constA32x4 = vdupq_n_s32(0x1fffff);
int32x4_t constB32x4 = vdupq_n_s32(0x200000);
int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset,
inst->magnLen);
// Guarantee a Q-domain as high as possible and still fit in int16
inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const,
tmp16,
21);
int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise);
for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset],
ptr_noiseEstQuantile = &inst->noiseEstQuantile[0];
ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3];
ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) {
// tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i];
int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile);
int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4);
// tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4);
v32x4A = vorrq_s32(v32x4A, constB32x4);
// tmp16 = (int16_t)(tmp32no2 >> 21);
v32x4B = vshrq_n_s32(v32x4B, 21);
// tmp16 -= 21;// shift 21 to get result in Q0
v32x4B = vsubq_s32(v32x4B, twentyOne32x4);
// tmp16 += (int16_t) inst->qNoise;
// shift to get result in Q(qNoise)
v32x4B = vaddq_s32(v32x4B, qNoise32x4);
// if (tmp16 < 0) {
// tmp32no1 >>= -tmp16;
// } else {
// tmp32no1 <<= tmp16;
// }
v32x4B = vshlq_s32(v32x4A, v32x4B);
// tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1);
v16x4 = vqmovn_s32(v32x4B);
//inst->noiseEstQuantile[i] = tmp16;
vst1_s16(ptr_noiseEstQuantile, v16x4);
}
// Last iteration:
// inst->quantile[i]=exp(inst->lquantile[offset+i]);
// in Q21
int32_t tmp32no2 = kExp2Const * *ptr_noiseEstLogQuantile;
int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac
tmp16 = (int16_t)(tmp32no2 >> 21);
tmp16 -= 21;// shift 21 to get result in Q0
tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise)
if (tmp16 < 0) {
tmp32no1 >>= -tmp16;
} else {
tmp32no1 <<= tmp16;
}
*ptr_noiseEstQuantile = WebRtcSpl_SatW32ToW16(tmp32no1);
}
// Noise Estimation
void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst,
uint16_t* magn,
uint32_t* noise,
int16_t* q_noise) {
int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
int16_t countProd, delta, zeros, frac;
int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
const int16_t log2_const = 22713;
const int16_t width_factor = 21845;
size_t i, s, offset;
tabind = inst->stages - inst->normData;
RTC_DCHECK_LT(tabind, 9);
RTC_DCHECK_GT(tabind, -9);
if (tabind < 0) {
logval = -WebRtcNsx_kLogTable[-tabind];
} else {
logval = WebRtcNsx_kLogTable[tabind];
}
int16x8_t logval_16x8 = vdupq_n_s16(logval);
// lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
// magn is in Q(-stages), and the real lmagn values are:
// real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
// lmagn in Q8
for (i = 0; i < inst->magnLen; i++) {
if (magn[i]) {
zeros = WebRtcSpl_NormU32((uint32_t)magn[i]);
frac = (int16_t)((((uint32_t)magn[i] << zeros)
& 0x7FFFFFFF) >> 23);
RTC_DCHECK_LT(frac, 256);
// log2(magn(i))
log2 = (int16_t)(((31 - zeros) << 8)
+ WebRtcNsx_kLogTableFrac[frac]);
// log2(magn(i))*log(2)
lmagn[i] = (int16_t)((log2 * log2_const) >> 15);
// + log(2^stages)
lmagn[i] += logval;
} else {
lmagn[i] = logval;
}
}
int16x4_t Q3_16x4 = vdup_n_s16(3);
int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8);
int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor);
int16_t factor = FACTOR_Q7;
if (inst->blockIndex < END_STARTUP_LONG)
factor = FACTOR_Q7_STARTUP;
// Loop over simultaneous estimates
for (s = 0; s < SIMULT; s++) {
offset = s * inst->magnLen;
// Get counter values from state
counter = inst->noiseEstCounter[s];
RTC_DCHECK_LT(counter, 201);
countDiv = WebRtcNsx_kCounterDiv[counter];
countProd = (int16_t)(counter * countDiv);
// quant_est(...)
int16_t deltaBuff[8];
int16x4_t tmp16x4_0;
int16x4_t tmp16x4_1;
int16x4_t countDiv_16x4 = vdup_n_s16(countDiv);
int16x8_t countProd_16x8 = vdupq_n_s16(countProd);
int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv);
int16x8_t prod16x8 = vqrdmulhq_s16(WIDTHFACTOR_16x8, tmp16x8_0);
int16x8_t tmp16x8_1;
int16x8_t tmp16x8_2;
int16x8_t tmp16x8_3;
uint16x8_t tmp16x8_4;
int32x4_t tmp32x4;
for (i = 0; i + 7 < inst->magnLen; i += 8) {
// Compute delta.
// Smaller step size during startup. This prevents from using
// unrealistic values causing overflow.
tmp16x8_0 = vdupq_n_s16(factor);
vst1q_s16(deltaBuff, tmp16x8_0);
int j;
for (j = 0; j < 8; j++) {
if (inst->noiseEstDensity[offset + i + j] > 512) {
// Get values for deltaBuff by shifting intead of dividing.
int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i + j]);
deltaBuff[j] = (int16_t)(FACTOR_Q16 >> (14 - factor));
}
}
// Update log quantile estimate
// tmp16 = (int16_t)((delta * countDiv) >> 14);
tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4);
tmp16x4_1 = vshrn_n_s32(tmp32x4, 14);
tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4);
tmp16x4_0 = vshrn_n_s32(tmp32x4, 14);
tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // Keep for several lines.
// prepare for the "if" branch
// tmp16 += 2;
// tmp16_1 = (Word16)(tmp16>>2);
tmp16x8_1 = vrshrq_n_s16(tmp16x8_0, 2);
// inst->noiseEstLogQuantile[offset+i] + tmp16_1;
tmp16x8_2 = vld1q_s16(&inst->noiseEstLogQuantile[offset + i]); // Keep
tmp16x8_1 = vaddq_s16(tmp16x8_2, tmp16x8_1); // Keep for several lines
// Prepare for the "else" branch
// tmp16 += 1;
// tmp16_1 = (Word16)(tmp16>>1);
tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1);
// tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1);
tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4);
tmp16x4_1 = vshrn_n_s32(tmp32x4, 1);
// tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1);
tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4);
tmp16x4_0 = vshrn_n_s32(tmp32x4, 1);
// inst->noiseEstLogQuantile[offset + i] - tmp16_2;
tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep
tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0);
// logval is the smallest fixed point representation we can have. Values
// below that will correspond to values in the interval [0, 1], which
// can't possibly occur.
tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8);
// Do the if-else branches:
tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines
tmp16x8_4 = vcgtq_s16(tmp16x8_3, tmp16x8_2);
tmp16x8_2 = vbslq_s16(tmp16x8_4, tmp16x8_1, tmp16x8_0);
vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2);
// Update density estimate
// tmp16_1 + tmp16_2
tmp16x8_1 = vld1q_s16(&inst->noiseEstDensity[offset + i]);
tmp16x8_0 = vqrdmulhq_s16(tmp16x8_1, countProd_16x8);
tmp16x8_0 = vaddq_s16(tmp16x8_0, prod16x8);
// lmagn[i] - inst->noiseEstLogQuantile[offset + i]
tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2);
tmp16x8_3 = vabsq_s16(tmp16x8_3);
tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3);
tmp16x8_1 = vbslq_s16(tmp16x8_4, tmp16x8_0, tmp16x8_1);
vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1);
} // End loop over magnitude spectrum
// Last iteration over magnitude spectrum:
// compute delta
if (inst->noiseEstDensity[offset + i] > 512) {
// Get values for deltaBuff by shifting intead of dividing.
int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]);
delta = (int16_t)(FACTOR_Q16 >> (14 - factor));
} else {
delta = FACTOR_Q7;
if (inst->blockIndex < END_STARTUP_LONG) {
// Smaller step size during startup. This prevents from using
// unrealistic values causing overflow.
delta = FACTOR_Q7_STARTUP;
}
}
// update log quantile estimate
tmp16 = (int16_t)((delta * countDiv) >> 14);
if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
// +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
// CounterDiv=1/(inst->counter[s]+1) in Q15
tmp16 += 2;
inst->noiseEstLogQuantile[offset + i] += tmp16 / 4;
} else {
tmp16 += 1;
// *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
// TODO(bjornv): investigate why we need to truncate twice.
tmp16no2 = (int16_t)((tmp16 / 2) * 3 / 2);
inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
if (inst->noiseEstLogQuantile[offset + i] < logval) {
// logval is the smallest fixed point representation we can have.
// Values below that will correspond to values in the interval
// [0, 1], which can't possibly occur.
inst->noiseEstLogQuantile[offset + i] = logval;
}
}
// update density estimate
if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
< WIDTH_Q8) {
tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
inst->noiseEstDensity[offset + i], countProd, 15);
tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
width_factor, countDiv, 15);
inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
}
if (counter >= END_STARTUP_LONG) {
inst->noiseEstCounter[s] = 0;
if (inst->blockIndex >= END_STARTUP_LONG) {
UpdateNoiseEstimateNeon(inst, offset);
}
}
inst->noiseEstCounter[s]++;
} // end loop over simultaneous estimates
// Sequentially update the noise during startup
if (inst->blockIndex < END_STARTUP_LONG) {
UpdateNoiseEstimateNeon(inst, offset);
}
for (i = 0; i < inst->magnLen; i++) {
noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise)
}
(*q_noise) = (int16_t)inst->qNoise;
}
// Filter the data in the frequency domain, and create spectrum.
void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst,
int16_t* freq_buf) {
RTC_DCHECK_EQ(1, inst->magnLen % 8);
RTC_DCHECK_EQ(0, inst->anaLen2 % 16);
// (1) Filtering.
// Fixed point C code for the next block is as follows:
// for (i = 0; i < inst->magnLen; i++) {
// inst->real[i] = (int16_t)((inst->real[i] *
// (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages)
// inst->imag[i] = (int16_t)((inst->imag[i] *
// (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages)
// }
int16_t* preal = &inst->real[0];
int16_t* pimag = &inst->imag[0];
int16_t* pns_filter = (int16_t*)&inst->noiseSupFilter[0];
int16_t* pimag_end = pimag + inst->magnLen - 4;
while (pimag < pimag_end) {
int16x8_t real = vld1q_s16(preal);
int16x8_t imag = vld1q_s16(pimag);
int16x8_t ns_filter = vld1q_s16(pns_filter);
int32x4_t tmp_r_0 = vmull_s16(vget_low_s16(real), vget_low_s16(ns_filter));
int32x4_t tmp_i_0 = vmull_s16(vget_low_s16(imag), vget_low_s16(ns_filter));
int32x4_t tmp_r_1 = vmull_s16(vget_high_s16(real),
vget_high_s16(ns_filter));
int32x4_t tmp_i_1 = vmull_s16(vget_high_s16(imag),
vget_high_s16(ns_filter));
int16x4_t result_r_0 = vshrn_n_s32(tmp_r_0, 14);
int16x4_t result_i_0 = vshrn_n_s32(tmp_i_0, 14);
int16x4_t result_r_1 = vshrn_n_s32(tmp_r_1, 14);
int16x4_t result_i_1 = vshrn_n_s32(tmp_i_1, 14);
vst1q_s16(preal, vcombine_s16(result_r_0, result_r_1));
vst1q_s16(pimag, vcombine_s16(result_i_0, result_i_1));
preal += 8;
pimag += 8;
pns_filter += 8;
}
// Filter the last element
*preal = (int16_t)((*preal * *pns_filter) >> 14);
*pimag = (int16_t)((*pimag * *pns_filter) >> 14);
// (2) Create spectrum.
// Fixed point C code for the rest of the function is as follows:
// freq_buf[0] = inst->real[0];
// freq_buf[1] = -inst->imag[0];
// for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
// freq_buf[j] = inst->real[i];
// freq_buf[j + 1] = -inst->imag[i];
// }
// freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
// freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
preal = &inst->real[0];
pimag = &inst->imag[0];
pimag_end = pimag + inst->anaLen2;
int16_t * freq_buf_start = freq_buf;
while (pimag < pimag_end) {
// loop unroll
int16x8x2_t real_imag_0;
int16x8x2_t real_imag_1;
real_imag_0.val[1] = vld1q_s16(pimag);
real_imag_0.val[0] = vld1q_s16(preal);
preal += 8;
pimag += 8;
real_imag_1.val[1] = vld1q_s16(pimag);
real_imag_1.val[0] = vld1q_s16(preal);
preal += 8;
pimag += 8;
real_imag_0.val[1] = vnegq_s16(real_imag_0.val[1]);
real_imag_1.val[1] = vnegq_s16(real_imag_1.val[1]);
vst2q_s16(freq_buf_start, real_imag_0);
freq_buf_start += 16;
vst2q_s16(freq_buf_start, real_imag_1);
freq_buf_start += 16;
}
freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
}
// For the noise supress process, synthesis, read out fully processed segment,
// and update synthesis buffer.
void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst,
int16_t* out_frame,
int16_t gain_factor) {
RTC_DCHECK_EQ(0, inst->anaLen % 16);
RTC_DCHECK_EQ(0, inst->blockLen10ms % 16);
int16_t* preal_start = inst->real;
const int16_t* pwindow = inst->window;
int16_t* preal_end = preal_start + inst->anaLen;
int16_t* psynthesis_buffer = inst->synthesisBuffer;
while (preal_start < preal_end) {
// Loop unroll.
int16x8_t window_0 = vld1q_s16(pwindow);
int16x8_t real_0 = vld1q_s16(preal_start);
int16x8_t synthesis_buffer_0 = vld1q_s16(psynthesis_buffer);
int16x8_t window_1 = vld1q_s16(pwindow + 8);
int16x8_t real_1 = vld1q_s16(preal_start + 8);
int16x8_t synthesis_buffer_1 = vld1q_s16(psynthesis_buffer + 8);
int32x4_t tmp32a_0_low = vmull_s16(vget_low_s16(real_0),
vget_low_s16(window_0));
int32x4_t tmp32a_0_high = vmull_s16(vget_high_s16(real_0),
vget_high_s16(window_0));
int32x4_t tmp32a_1_low = vmull_s16(vget_low_s16(real_1),
vget_low_s16(window_1));
int32x4_t tmp32a_1_high = vmull_s16(vget_high_s16(real_1),
vget_high_s16(window_1));
int16x4_t tmp16a_0_low = vqrshrn_n_s32(tmp32a_0_low, 14);
int16x4_t tmp16a_0_high = vqrshrn_n_s32(tmp32a_0_high, 14);
int16x4_t tmp16a_1_low = vqrshrn_n_s32(tmp32a_1_low, 14);
int16x4_t tmp16a_1_high = vqrshrn_n_s32(tmp32a_1_high, 14);
int32x4_t tmp32b_0_low = vmull_n_s16(tmp16a_0_low, gain_factor);
int32x4_t tmp32b_0_high = vmull_n_s16(tmp16a_0_high, gain_factor);
int32x4_t tmp32b_1_low = vmull_n_s16(tmp16a_1_low, gain_factor);
int32x4_t tmp32b_1_high = vmull_n_s16(tmp16a_1_high, gain_factor);
int16x4_t tmp16b_0_low = vqrshrn_n_s32(tmp32b_0_low, 13);
int16x4_t tmp16b_0_high = vqrshrn_n_s32(tmp32b_0_high, 13);
int16x4_t tmp16b_1_low = vqrshrn_n_s32(tmp32b_1_low, 13);
int16x4_t tmp16b_1_high = vqrshrn_n_s32(tmp32b_1_high, 13);
synthesis_buffer_0 = vqaddq_s16(vcombine_s16(tmp16b_0_low, tmp16b_0_high),
synthesis_buffer_0);
synthesis_buffer_1 = vqaddq_s16(vcombine_s16(tmp16b_1_low, tmp16b_1_high),
synthesis_buffer_1);
vst1q_s16(psynthesis_buffer, synthesis_buffer_0);
vst1q_s16(psynthesis_buffer + 8, synthesis_buffer_1);
pwindow += 16;
preal_start += 16;
psynthesis_buffer += 16;
}
// Read out fully processed segment.
int16_t * p_start = inst->synthesisBuffer;
int16_t * p_end = inst->synthesisBuffer + inst->blockLen10ms;
int16_t * p_frame = out_frame;
while (p_start < p_end) {
int16x8_t frame_0 = vld1q_s16(p_start);
vst1q_s16(p_frame, frame_0);
p_start += 8;
p_frame += 8;
}
// Update synthesis buffer.
int16_t* p_start_src = inst->synthesisBuffer + inst->blockLen10ms;
int16_t* p_end_src = inst->synthesisBuffer + inst->anaLen;
int16_t* p_start_dst = inst->synthesisBuffer;
while (p_start_src < p_end_src) {
int16x8_t frame = vld1q_s16(p_start_src);
vst1q_s16(p_start_dst, frame);
p_start_src += 8;
p_start_dst += 8;
}
p_start = inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms;
p_end = p_start + inst->blockLen10ms;
int16x8_t zero = vdupq_n_s16(0);
for (;p_start < p_end; p_start += 8) {
vst1q_s16(p_start, zero);
}
}
// Update analysis buffer for lower band, and window data before FFT.
void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst,
int16_t* out,
int16_t* new_speech) {
RTC_DCHECK_EQ(0, inst->blockLen10ms % 16);
RTC_DCHECK_EQ(0, inst->anaLen % 16);
// For lower band update analysis buffer.
// memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
// (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer));
int16_t* p_start_src = inst->analysisBuffer + inst->blockLen10ms;
int16_t* p_end_src = inst->analysisBuffer + inst->anaLen;
int16_t* p_start_dst = inst->analysisBuffer;
while (p_start_src < p_end_src) {
int16x8_t frame = vld1q_s16(p_start_src);
vst1q_s16(p_start_dst, frame);
p_start_src += 8;
p_start_dst += 8;
}
// memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms,
// new_speech, inst->blockLen10ms * sizeof(*inst->analysisBuffer));
p_start_src = new_speech;
p_end_src = new_speech + inst->blockLen10ms;
p_start_dst = inst->analysisBuffer + inst->anaLen - inst->blockLen10ms;
while (p_start_src < p_end_src) {
int16x8_t frame = vld1q_s16(p_start_src);
vst1q_s16(p_start_dst, frame);
p_start_src += 8;
p_start_dst += 8;
}
// Window data before FFT.
int16_t* p_start_window = (int16_t*) inst->window;
int16_t* p_start_buffer = inst->analysisBuffer;
int16_t* p_end_buffer = inst->analysisBuffer + inst->anaLen;
int16_t* p_start_out = out;
// Load the first element to reduce pipeline bubble.
int16x8_t window = vld1q_s16(p_start_window);
int16x8_t buffer = vld1q_s16(p_start_buffer);
p_start_window += 8;
p_start_buffer += 8;
while (p_start_buffer < p_end_buffer) {
// Unroll loop.
int32x4_t tmp32_low = vmull_s16(vget_low_s16(window), vget_low_s16(buffer));
int32x4_t tmp32_high = vmull_s16(vget_high_s16(window),
vget_high_s16(buffer));
window = vld1q_s16(p_start_window);
buffer = vld1q_s16(p_start_buffer);
int16x4_t result_low = vrshrn_n_s32(tmp32_low, 14);
int16x4_t result_high = vrshrn_n_s32(tmp32_high, 14);
vst1q_s16(p_start_out, vcombine_s16(result_low, result_high));
p_start_buffer += 8;
p_start_window += 8;
p_start_out += 8;
}
int32x4_t tmp32_low = vmull_s16(vget_low_s16(window), vget_low_s16(buffer));
int32x4_t tmp32_high = vmull_s16(vget_high_s16(window),
vget_high_s16(buffer));
int16x4_t result_low = vrshrn_n_s32(tmp32_low, 14);
int16x4_t result_high = vrshrn_n_s32(tmp32_high, 14);
vst1q_s16(p_start_out, vcombine_s16(result_low, result_high));
}

View File

@ -1,74 +0,0 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_NSX_DEFINES_H_
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_NSX_DEFINES_H_
#define ANAL_BLOCKL_MAX 256 /* Max analysis block length */
#define HALF_ANAL_BLOCKL 129 /* Half max analysis block length + 1 */
#define NUM_HIGH_BANDS_MAX 2 /* Max number of high bands */
#define SIMULT 3
#define END_STARTUP_LONG 200
#define END_STARTUP_SHORT 50
#define FACTOR_Q16 2621440 /* 40 in Q16 */
#define FACTOR_Q7 5120 /* 40 in Q7 */
#define FACTOR_Q7_STARTUP 1024 /* 8 in Q7 */
#define WIDTH_Q8 3 /* 0.01 in Q8 (or 25 ) */
/* PARAMETERS FOR NEW METHOD */
#define DD_PR_SNR_Q11 2007 /* ~= Q11(0.98) DD update of prior SNR */
#define ONE_MINUS_DD_PR_SNR_Q11 41 /* DD update of prior SNR */
#define SPECT_FLAT_TAVG_Q14 \
4915 /* (0.30) tavg parameter for spectral flatness measure */
#define SPECT_DIFF_TAVG_Q8 \
77 /* (0.30) tavg parameter for spectral flatness measure */
#define PRIOR_UPDATE_Q14 1638 /* Q14(0.1) Update parameter of prior model */
#define NOISE_UPDATE_Q8 26 /* 26 ~= Q8(0.1) Update parameter for noise */
/* Probability threshold for noise state in speech/noise likelihood. */
#define ONE_MINUS_PROB_RANGE_Q8 205 /* 205 ~= Q8(0.8) */
#define HIST_PAR_EST 1000 /* Histogram size for estimation of parameters */
/* FEATURE EXTRACTION CONFIG */
/* Bin size of histogram */
#define BIN_SIZE_LRT 10
/* Scale parameters: multiply dominant peaks of the histograms by scale factor
* to obtain. */
/* Thresholds for prior model */
#define FACTOR_1_LRT_DIFF \
6 /* For LRT and spectral difference (5 times bigger) */
/* For spectral_flatness: used when noise is flatter than speech (10 times
* bigger). */
#define FACTOR_2_FLAT_Q10 922
/* Peak limit for spectral flatness (varies between 0 and 1) */
#define THRES_PEAK_FLAT 24 /* * 2 * BIN_SIZE_FLAT_FX */
/* Limit on spacing of two highest peaks in histogram: spacing determined by bin
* size. */
#define LIM_PEAK_SPACE_FLAT_DIFF 4 /* * 2 * BIN_SIZE_DIFF_FX */
/* Limit on relevance of second peak */
#define LIM_PEAK_WEIGHT_FLAT_DIFF 2
#define THRES_FLUCT_LRT \
10240 /* = 20 * inst->modelUpdate; fluctuation limit of LRT feat. */
/* Limit on the max and min values for the feature thresholds */
#define MAX_FLAT_Q10 38912 /* * 2 * BIN_SIZE_FLAT_FX */
#define MIN_FLAT_Q10 4096 /* * 2 * BIN_SIZE_FLAT_FX */
#define MAX_DIFF 100 /* * 2 * BIN_SIZE_DIFF_FX */
#define MIN_DIFF 16 /* * 2 * BIN_SIZE_DIFF_FX */
/* Criteria of weight of histogram peak to accept/reject feature */
#define THRES_WEIGHT_FLAT_DIFF \
154 /*(int)(0.3*(inst->modelUpdate)) for flatness and difference */
#define STAT_UPDATES 9 /* Update every 512 = 1 << 9 block */
#define ONE_MINUS_GAMMA_PAUSE_Q8 \
13 /* ~= Q8(0.05) Update for conservative noise estimate */
#define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 \
3 /* ~= Q8(0.01) Update for transition and noise region */
#endif /* MODULES_AUDIO_PROCESSING_LEGACY_NS_NSX_DEFINES_H_ */

View File

@ -520,12 +520,6 @@ void AudioProcessingSimulator::CreateAudioProcessor() {
*settings_.maximum_internal_processing_rate;
}
const bool use_legacy_ns =
settings_.use_legacy_ns && *settings_.use_legacy_ns;
if (use_legacy_ns) {
apm_config.noise_suppression.use_legacy_ns = use_legacy_ns;
}
if (settings_.use_ns) {
apm_config.noise_suppression.enabled = *settings_.use_ns;
}

View File

@ -61,7 +61,6 @@ struct SimulationSettings {
absl::optional<bool> use_vad;
absl::optional<bool> use_le;
absl::optional<bool> use_all;
absl::optional<bool> use_legacy_ns;
absl::optional<bool> use_analog_agc_agc2_level_estimator;
absl::optional<bool> analog_agc_disable_digital_adaptive;
absl::optional<int> agc_mode;

View File

@ -118,10 +118,6 @@ ABSL_FLAG(bool,
false,
"Activate all of the default components (will be overridden by any "
"other settings)");
ABSL_FLAG(int,
use_legacy_ns,
kParameterNotSpecifiedValue,
"Activate (1) or deactivate(0) the legacy NS");
ABSL_FLAG(int,
analog_agc_disable_digital_adaptive,
kParameterNotSpecifiedValue,
@ -381,8 +377,6 @@ SimulationSettings CreateSettings() {
&settings.use_analog_agc);
SetSettingIfFlagSet(absl::GetFlag(FLAGS_vad), &settings.use_vad);
SetSettingIfFlagSet(absl::GetFlag(FLAGS_le), &settings.use_le);
SetSettingIfFlagSet(absl::GetFlag(FLAGS_use_legacy_ns),
&settings.use_legacy_ns);
SetSettingIfFlagSet(absl::GetFlag(FLAGS_analog_agc_disable_digital_adaptive),
&settings.analog_agc_disable_digital_adaptive);
SetSettingIfFlagSet(absl::GetFlag(FLAGS_analog_agc_agc2_level_estimator),

View File

@ -20,9 +20,9 @@
#include "common_audio/include/audio_util.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "common_audio/third_party/fft4g/fft4g.h"
#include "modules/audio_processing/legacy_ns/windows_private.h"
#include "modules/audio_processing/transient/common.h"
#include "modules/audio_processing/transient/transient_detector.h"
#include "modules/audio_processing/transient/windows_private.h"
#include "rtc_base/checks.h"
#include "rtc_base/logging.h"

View File

@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_AUDIO_PROCESSING_LEGACY_NS_WINDOWS_PRIVATE_H_
#define MODULES_AUDIO_PROCESSING_LEGACY_NS_WINDOWS_PRIVATE_H_
#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_WINDOWS_PRIVATE_H_
#define MODULES_AUDIO_PROCESSING_TRANSIENT_WINDOWS_PRIVATE_H_
// Hanning window for 4ms 16kHz
static const float kHanning64w128[128] = {
@ -550,4 +550,4 @@ static const float kBlocks480w1024[1024] = {
0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f};
#endif // MODULES_AUDIO_PROCESSING_LEGACY_NS_WINDOWS_PRIVATE_H_
#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_WINDOWS_PRIVATE_H_