Clang-format ns_core

BUG=webrtc:3811
R=bjornv@webrtc.org, kwiberg@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/29539004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7257 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
aluebs@webrtc.org
2014-09-22 10:59:46 +00:00
parent 759982d357
commit bdfdc96b22
2 changed files with 401 additions and 334 deletions

View File

@ -10,8 +10,8 @@
#include <math.h>
#include <string.h>
//#include <stdio.h>
#include <stdlib.h>
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h"
#include "webrtc/modules/audio_processing/ns/ns_core.h"
@ -28,16 +28,19 @@ void WebRtcNs_set_feature_extraction_parameters(NSinst_t* inst) {
// range of histogram over which lrt threshold is computed
inst->featureExtractionParams.rangeAvgHistLrt = (float)1.0;
//scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
// thresholds for prior model
inst->featureExtractionParams.factor1ModelPars = (float)1.20; //for lrt and spectral diff
inst->featureExtractionParams.factor2ModelPars = (float)0.9; //for spectral_flatness:
// scale parameters: multiply dominant peaks of the histograms by scale factor
// to obtain thresholds for prior model
inst->featureExtractionParams.factor1ModelPars =
(float)1.20; // for lrt and spectral diff
inst->featureExtractionParams.factor2ModelPars =
(float)0.9; // for spectral_flatness:
// used when noise is flatter than speech
// peak limit for spectral flatness (varies between 0 and 1)
inst->featureExtractionParams.thresPosSpecFlat = (float)0.6;
//limit on spacing of two highest peaks in histogram: spacing determined by bin size
// limit on spacing of two highest peaks in histogram: spacing determined by
// bin size
inst->featureExtractionParams.limitPeakSpacingSpecFlat =
2 * inst->featureExtractionParams.binSizeSpecFlat;
inst->featureExtractionParams.limitPeakSpacingSpecDiff =
@ -61,10 +64,10 @@ void WebRtcNs_set_feature_extraction_parameters(NSinst_t* inst) {
inst->featureExtractionParams.minSpecDiff = (float)0.16;
// criteria of weight of histogram peak to accept/reject feature
inst->featureExtractionParams.thresWeightSpecFlat = (int)(0.3
* (inst->modelUpdatePars[1])); //for spectral flatness
inst->featureExtractionParams.thresWeightSpecDiff = (int)(0.3
* (inst->modelUpdatePars[1])); //for spectral difference
inst->featureExtractionParams.thresWeightSpecFlat =
(int)(0.3 * (inst->modelUpdatePars[1])); // for spectral flatness
inst->featureExtractionParams.thresWeightSpecDiff =
(int)(0.3 * (inst->modelUpdatePars[1])); // for spectral difference
}
// Initialize state
@ -128,7 +131,8 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) {
}
for (i = 0; i < SIMULT; i++) {
inst->counter[i] = (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT);
inst->counter[i] =
(int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT);
}
inst->updates = 0;
@ -146,20 +150,27 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) {
for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
inst->magnPrev[i] = (float)0.0; // previous mag spectrum
inst->noisePrev[i] = (float)0.0; // previous noise-spectrum
inst->logLrtTimeAvg[i] = LRT_FEATURE_THR; //smooth LR ratio (same as threshold)
inst->logLrtTimeAvg[i] =
LRT_FEATURE_THR; // smooth LR ratio (same as threshold)
inst->magnAvgPause[i] = (float)0.0; // conservative noise spectrum estimate
inst->speechProb[i] = (float)0.0; // for estimation of HB in second pass
inst->initMagnEst[i] = (float)0.0; // initial average mag spectrum
}
// feature quantities
inst->featureData[0] = SF_FEATURE_THR; //spectral flatness (start on threshold)
inst->featureData[1] = (float)0.0; //spectral entropy: not used in this version
inst->featureData[2] = (float)0.0; //spectral variance: not used in this version
inst->featureData[3] = LRT_FEATURE_THR; //average lrt factor (start on threshold)
inst->featureData[4] = SF_FEATURE_THR; //spectral template diff (start on threshold)
inst->featureData[0] =
SF_FEATURE_THR; // spectral flatness (start on threshold)
inst->featureData[1] =
(float)0.0; // spectral entropy: not used in this version
inst->featureData[2] =
(float)0.0; // spectral variance: not used in this version
inst->featureData[3] =
LRT_FEATURE_THR; // average lrt factor (start on threshold)
inst->featureData[4] =
SF_FEATURE_THR; // spectral template diff (start on threshold)
inst->featureData[5] = (float)0.0; // normalization for spectral-diff
inst->featureData[6] = (float)0.0; //window time-average of input magnitude spectrum
inst->featureData[6] =
(float)0.0; // window time-average of input magnitude spectrum
// histogram quantities: used to estimate/update thresholds for features
for (i = 0; i < HIST_PAR_EST; i++) {
@ -169,14 +180,17 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) {
}
inst->blockInd = -1; // frame counter
inst->priorModelPars[0] = LRT_FEATURE_THR; //default threshold for lrt feature
inst->priorModelPars[0] =
LRT_FEATURE_THR; // default threshold for lrt feature
inst->priorModelPars[1] = (float)0.5; // threshold for spectral flatness:
// determined on-line
inst->priorModelPars[2] = (float)1.0; // sgn_map par for spectral measure:
// 1 for flatness measure
inst->priorModelPars[3] = (float)0.5; //threshold for template-difference feature:
inst->priorModelPars[3] =
(float)0.5; // threshold for template-difference feature:
// determined on-line
inst->priorModelPars[4] = (float)1.0; //default weighting parameter for lrt feature
inst->priorModelPars[4] =
(float)1.0; // default weighting parameter for lrt feature
inst->priorModelPars[5] = (float)0.0; // default weighting parameter for
// spectral flatness feature
inst->priorModelPars[6] = (float)0.0; // default weighting parameter for
@ -185,7 +199,8 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) {
inst->modelUpdatePars[0] = 2; // update flag for parameters:
// 0 no update, 1=update once, 2=update every window
inst->modelUpdatePars[1] = 500; // window for update
inst->modelUpdatePars[2] = 0; //counter for update of conservative noise spectrum
inst->modelUpdatePars[2] =
0; // counter for update of conservative noise spectrum
// counter if the feature thresholds are updated during the sequence
inst->modelUpdatePars[3] = inst->modelUpdatePars[1];
@ -195,12 +210,11 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) {
inst->pinkNoiseNumerator = 0.0;
inst->pinkNoiseExp = 0.0;
WebRtcNs_set_feature_extraction_parameters(inst); // Set feature configuration
WebRtcNs_set_feature_extraction_parameters(inst);
// default mode
WebRtcNs_set_policy_core(inst, 0);
memset(inst->outBuf, 0, sizeof(float) * 3 * BLOCKL_MAX);
inst->initFlag = 1;
@ -265,17 +279,19 @@ void WebRtcNs_NoiseEstimation(NSinst_t* inst, float* magn, float* noise) {
// update log quantile estimate
if (lmagn[i] > inst->lquantile[offset + i]) {
inst->lquantile[offset + i] += QUANTILE * delta
/ (float)(inst->counter[s] + 1);
inst->lquantile[offset + i] +=
QUANTILE * delta / (float)(inst->counter[s] + 1);
} else {
inst->lquantile[offset + i] -= ((float)1.0 - QUANTILE) * delta
/ (float)(inst->counter[s] + 1);
inst->lquantile[offset + i] -=
((float)1.0 - QUANTILE) * delta / (float)(inst->counter[s] + 1);
}
// update density estimate
if (fabs(lmagn[i] - inst->lquantile[offset + i]) < WIDTH) {
inst->density[offset + i] = ((float)inst->counter[s] * inst->density[offset
+ i] + (float)1.0 / ((float)2.0 * WIDTH)) / (float)(inst->counter[s] + 1);
inst->density[offset + i] =
((float)inst->counter[s] * inst->density[offset + i] +
(float)1.0 / ((float)2.0 * WIDTH)) /
(float)(inst->counter[s] + 1);
}
} // end loop over magnitude spectrum
@ -305,14 +321,17 @@ void WebRtcNs_NoiseEstimation(NSinst_t* inst, float* magn, float* noise) {
}
// Extract thresholds for feature parameters
// histograms are computed over some window_size (given by inst->modelUpdatePars[1])
// histograms are computed over some window_size (given by
// inst->modelUpdatePars[1])
// thresholds and weights are extracted every window
// flag 0 means update histogram only, flag 1 means compute the thresholds/weights
// flag 0 means update histogram only, flag 1 means compute the
// thresholds/weights
// threshold and weights are returned in: inst->priorModelPars
void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt;
int maxPeak1, maxPeak2;
int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, weightPeak2SpecDiff;
int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff,
weightPeak2SpecDiff;
float binMid, featureSum;
float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff;
@ -326,36 +345,42 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
// update histograms
if (flag == 0) {
// LRT
if ((inst->featureData[3] < HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt)
&& (inst->featureData[3] >= 0.0)) {
i = (int)(inst->featureData[3] / inst->featureExtractionParams.binSizeLrt);
if ((inst->featureData[3] <
HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt) &&
(inst->featureData[3] >= 0.0)) {
i = (int)(inst->featureData[3] /
inst->featureExtractionParams.binSizeLrt);
inst->histLrt[i]++;
}
// Spectral flatness
if ((inst->featureData[0] < HIST_PAR_EST
* inst->featureExtractionParams.binSizeSpecFlat)
&& (inst->featureData[0] >= 0.0)) {
i = (int)(inst->featureData[0] / inst->featureExtractionParams.binSizeSpecFlat);
if ((inst->featureData[0] <
HIST_PAR_EST * inst->featureExtractionParams.binSizeSpecFlat) &&
(inst->featureData[0] >= 0.0)) {
i = (int)(inst->featureData[0] /
inst->featureExtractionParams.binSizeSpecFlat);
inst->histSpecFlat[i]++;
}
// Spectral difference
if ((inst->featureData[4] < HIST_PAR_EST
* inst->featureExtractionParams.binSizeSpecDiff)
&& (inst->featureData[4] >= 0.0)) {
i = (int)(inst->featureData[4] / inst->featureExtractionParams.binSizeSpecDiff);
if ((inst->featureData[4] <
HIST_PAR_EST * inst->featureExtractionParams.binSizeSpecDiff) &&
(inst->featureData[4] >= 0.0)) {
i = (int)(inst->featureData[4] /
inst->featureExtractionParams.binSizeSpecDiff);
inst->histSpecDiff[i]++;
}
}
// extract parameters for speech/noise probability
if (flag == 1) {
//lrt feature: compute the average over inst->featureExtractionParams.rangeAvgHistLrt
// lrt feature: compute the average over
// inst->featureExtractionParams.rangeAvgHistLrt
avgHistLrt = 0.0;
avgHistLrtCompl = 0.0;
avgSquareHistLrt = 0.0;
numHistLrt = 0;
for (i = 0; i < HIST_PAR_EST; i++) {
binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt;
binMid =
((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt;
if (binMid <= inst->featureExtractionParams.rangeAvgHistLrt) {
avgHistLrt += inst->histLrt[i] * binMid;
numHistLrt += inst->histLrt[i];
@ -374,8 +399,8 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
// very low fluct, so likely noise
inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt;
} else {
inst->priorModelPars[0] = inst->featureExtractionParams.factor1ModelPars
* avgHistLrt;
inst->priorModelPars[0] =
inst->featureExtractionParams.factor1ModelPars * avgHistLrt;
// check if value is within min/max range
if (inst->priorModelPars[0] < inst->featureExtractionParams.minLrt) {
inst->priorModelPars[0] = inst->featureExtractionParams.minLrt;
@ -386,8 +411,8 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
}
// done with lrt feature
//
// for spectral flatness and spectral difference: compute the main peaks of histogram
// for spectral flatness and spectral difference: compute the main peaks of
// histogram
maxPeak1 = 0;
maxPeak2 = 0;
posPeak1SpecFlat = 0.0;
@ -397,7 +422,8 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
// peaks for flatness
for (i = 0; i < HIST_PAR_EST; i++) {
binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecFlat;
binMid = ((float)i + (float)0.5) *
inst->featureExtractionParams.binSizeSpecFlat;
if (inst->histSpecFlat[i] > maxPeak1) {
// Found new "first" peak
maxPeak2 = maxPeak1;
@ -424,7 +450,8 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
weightPeak2SpecDiff = 0;
// peaks for spectral difference
for (i = 0; i < HIST_PAR_EST; i++) {
binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecDiff;
binMid = ((float)i + (float)0.5) *
inst->featureExtractionParams.binSizeSpecDiff;
if (inst->histSpecDiff[i] > maxPeak1) {
// Found new "first" peak
maxPeak2 = maxPeak1;
@ -445,24 +472,25 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
// for spectrum flatness feature
useFeatureSpecFlat = 1;
// merge the two peaks if they are close
if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat)
< inst->featureExtractionParams.limitPeakSpacingSpecFlat)
&& (weightPeak2SpecFlat
> inst->featureExtractionParams.limitPeakWeightsSpecFlat
* weightPeak1SpecFlat)) {
if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) <
inst->featureExtractionParams.limitPeakSpacingSpecFlat) &&
(weightPeak2SpecFlat >
inst->featureExtractionParams.limitPeakWeightsSpecFlat *
weightPeak1SpecFlat)) {
weightPeak1SpecFlat += weightPeak2SpecFlat;
posPeak1SpecFlat = (float)0.5 * (posPeak1SpecFlat + posPeak2SpecFlat);
}
// reject if weight of peaks is not large enough, or peak value too small
if (weightPeak1SpecFlat < inst->featureExtractionParams.thresWeightSpecFlat
|| posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) {
if (weightPeak1SpecFlat <
inst->featureExtractionParams.thresWeightSpecFlat ||
posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) {
useFeatureSpecFlat = 0;
}
// if selected, get the threshold
if (useFeatureSpecFlat == 1) {
// compute the threshold
inst->priorModelPars[1] = inst->featureExtractionParams.factor2ModelPars
* posPeak1SpecFlat;
inst->priorModelPars[1] =
inst->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat;
// check if value is within min/max range
if (inst->priorModelPars[1] < inst->featureExtractionParams.minSpecFlat) {
inst->priorModelPars[1] = inst->featureExtractionParams.minSpecFlat;
@ -476,19 +504,20 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
// for template feature
useFeatureSpecDiff = 1;
// merge the two peaks if they are close
if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff)
< inst->featureExtractionParams.limitPeakSpacingSpecDiff)
&& (weightPeak2SpecDiff
> inst->featureExtractionParams.limitPeakWeightsSpecDiff
* weightPeak1SpecDiff)) {
if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) <
inst->featureExtractionParams.limitPeakSpacingSpecDiff) &&
(weightPeak2SpecDiff >
inst->featureExtractionParams.limitPeakWeightsSpecDiff *
weightPeak1SpecDiff)) {
weightPeak1SpecDiff += weightPeak2SpecDiff;
posPeak1SpecDiff = (float)0.5 * (posPeak1SpecDiff + posPeak2SpecDiff);
}
// get the threshold value
inst->priorModelPars[3] = inst->featureExtractionParams.factor1ModelPars
* posPeak1SpecDiff;
inst->priorModelPars[3] =
inst->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff;
// reject if weight of peaks is not large enough
if (weightPeak1SpecDiff < inst->featureExtractionParams.thresWeightSpecDiff) {
if (weightPeak1SpecDiff <
inst->featureExtractionParams.thresWeightSpecDiff) {
useFeatureSpecDiff = 0;
}
// check if value is within min/max range
@ -541,7 +570,8 @@ void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) {
for (i = 0; i < shiftLP; i++) {
avgSpectralFlatnessDen -= magnIn[i];
}
// compute log of ratio of the geometric to arithmetic mean: check for log(0) case
// compute log of ratio of the geometric to arithmetic mean: check for log(0)
// case
for (i = shiftLP; i < inst->magnLen; i++) {
if (magnIn[i] > 0.0) {
avgSpectralFlatnessNum += (float)log(magnIn[i]);
@ -562,12 +592,14 @@ void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) {
// done with flatness feature
}
// Compute the difference measure between input spectrum and a template/learned noise spectrum
// Compute the difference measure between input spectrum and a template/learned
// noise spectrum
// magnIn is the input spectrum
// the reference/template spectrum is inst->magnAvgPause[i]
// returns (normalized) spectral difference in inst->featureData[4]
void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) {
// avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause)
// avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 /
// var(magnAvgPause)
int i;
float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn;
@ -587,7 +619,8 @@ void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) {
// compute variance and covariance quantities
for (i = 0; i < inst->magnLen; i++) {
covMagnPause += (magnIn[i] - avgMagn) * (inst->magnAvgPause[i] - avgPause);
varPause += (inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause);
varPause +=
(inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause);
varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn);
}
covMagnPause = covMagnPause / ((float)inst->magnLen);
@ -596,10 +629,13 @@ void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) {
// update of average magnitude spectrum
inst->featureData[6] += inst->signalEnergy;
avgDiffNormMagn = varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001);
avgDiffNormMagn =
varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001);
// normalize and compute time-avg update of difference feature
avgDiffNormMagn = (float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001));
inst->featureData[4] += SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]);
avgDiffNormMagn =
(float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001));
inst->featureData[4] +=
SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]);
}
// Compute speech/noise probability
@ -608,7 +644,9 @@ void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) {
// noise is the noise spectrum
// snrLocPrior is the prior snr for each freq.
// snr loc_post is the post snr for each freq.
void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snrLocPrior,
void WebRtcNs_SpeechNoiseProb(NSinst_t* inst,
float* probSpeechFinal,
float* snrLocPrior,
float* snrLocPost) {
int i, sgnMap;
float invLrt, gainPrior, indPrior;
@ -644,8 +682,8 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr
tmpFloat1 = (float)1.0 + (float)2.0 * snrLocPrior[i];
tmpFloat2 = (float)2.0 * snrLocPrior[i] / (tmpFloat1 + (float)0.0001);
besselTmp = (snrLocPost[i] + (float)1.0) * tmpFloat2;
inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - (float)log(tmpFloat1)
- inst->logLrtTimeAvg[i]);
inst->logLrtTimeAvg[i] +=
LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - inst->logLrtTimeAvg[i]);
logLrtTimeAvgKsum += inst->logLrtTimeAvg[i];
}
logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (inst->magnLen);
@ -663,8 +701,9 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr
widthPrior = widthPrior1;
}
// compute indicator function: sigmoid map
indicator0 = (float)0.5 * ((float)tanh(widthPrior *
(logLrtTimeAvgKsum - threshPrior0)) + (float)1.0);
indicator0 = (float)0.5 *
((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) +
(float)1.0);
// spectral flatness feature
tmpFloat1 = inst->featureData[0];
@ -677,8 +716,10 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr
widthPrior = widthPrior1;
}
// compute indicator function: sigmoid map
indicator1 = (float)0.5 * ((float)tanh((float)sgnMap *
widthPrior * (threshPrior1 - tmpFloat1)) + (float)1.0);
indicator1 =
(float)0.5 *
((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) +
(float)1.0);
// for template spectrum-difference
tmpFloat1 = inst->featureData[4];
@ -688,12 +729,13 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr
widthPrior = widthPrior2;
}
// compute indicator function: sigmoid map
indicator2 = (float)0.5 * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2))
+ (float)1.0);
indicator2 =
(float)0.5 *
((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + (float)1.0);
// combine the indicator function with the feature weights
indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2
* indicator2;
indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 +
weightIndPrior2 * indicator2;
// done with computing indicator function
// compute the prior probability
@ -707,7 +749,8 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr
}
// final speech probability: combine prior model with LR factor:
gainPrior = ((float)1.0 - inst->priorSpeechProb) / (inst->priorSpeechProb + (float)0.0001);
gainPrior = ((float)1.0 - inst->priorSpeechProb) /
(inst->priorSpeechProb + (float)0.0001);
for (i = 0; i < inst->magnLen; i++) {
invLrt = (float)exp(-inst->logLrtTimeAvg[i]);
invLrt = (float)gainPrior * invLrt;
@ -749,9 +792,11 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
//
// update analysis buffer for L band
memcpy(inst->analyzeBuf, inst->analyzeBuf + inst->blockLen10ms,
memcpy(inst->analyzeBuf,
inst->analyzeBuf + inst->blockLen10ms,
sizeof(float) * (inst->anaLen - inst->blockLen10ms));
memcpy(inst->analyzeBuf + inst->anaLen - inst->blockLen10ms, speechFrame,
memcpy(inst->analyzeBuf + inst->anaLen - inst->blockLen10ms,
speechFrame,
sizeof(float) * inst->blockLen10ms);
// check if processing needed
@ -764,12 +809,13 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
}
if (energy == 0.0) {
// we want to avoid updating statistics in this case:
// Updating feature statistics when we have zeros only will cause thresholds to
// move towards zero signal situations. This in turn has the effect that once the
// signal is "turned on" (non-zero values) everything will be treated as speech
// and there is no noise suppression effect. Depending on the duration of the
// inactive signal it takes a considerable amount of time for the system to learn
// what is noise and what is speech.
// Updating feature statistics when we have zeros only will cause
// thresholds to move towards zero signal situations. This in turn has the
// effect that once the signal is "turned on" (non-zero values) everything
// will be treated as speech and there is no noise suppression effect.
// Depending on the duration of the inactive signal it takes a
// considerable amount of time for the system to learn what is noise and
// what is speech.
return 0;
}
@ -829,11 +875,13 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
// compute simplified noise model during startup
if (inst->blockInd < END_STARTUP_SHORT) {
// Estimate White noise
inst->whiteNoiseLevel += sumMagn / ((float)inst->magnLen) * inst->overdrive;
inst->whiteNoiseLevel +=
sumMagn / ((float)inst->magnLen) * inst->overdrive;
// Estimate Pink noise parameters
tmpFloat1 = sum_log_i_square * ((float)(inst->magnLen - kStartBand));
tmpFloat1 -= (sum_log_i * sum_log_i);
tmpFloat2 = (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn);
tmpFloat2 =
(sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn);
tmpFloat3 = tmpFloat2 / tmpFloat1;
// Constrain the estimated spectrum to be positive
if (tmpFloat3 < 0.0f) {
@ -858,18 +906,22 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
parametric_noise = inst->whiteNoiseLevel;
} else {
// Use pink noise estimate
parametric_num = exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1));
parametric_num =
exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1));
parametric_num *= (float)(inst->blockInd + 1);
parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1);
parametric_noise = parametric_num / pow((float)kStartBand, parametric_exp);
parametric_noise =
parametric_num / pow((float)kStartBand, parametric_exp);
}
for (i = 0; i < inst->magnLen; i++) {
// Estimate the background noise using the white and pink noise parameters
// Estimate the background noise using the white and pink noise
// parameters
if ((inst->pinkNoiseExp > 0.0f) && (i >= kStartBand)) {
// Use pink noise estimate
parametric_noise = parametric_num / pow((float)i, parametric_exp);
}
theFilterTmp[i] = (inst->initMagnEst[i] - inst->overdrive * parametric_noise);
theFilterTmp[i] =
(inst->initMagnEst[i] - inst->overdrive * parametric_noise);
theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001);
// Weight quantile noise with modeled noise
noise[i] *= (inst->blockInd);
@ -887,10 +939,7 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
}
// start processing at frames == converged+1
//
// STEP 1: compute prior and post snr based on quantile noise est
//
// compute DD estimate of prior SNR: needed for new method
for (i = 0; i < inst->magnLen; i++) {
// post snr
@ -900,23 +949,25 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
}
// previous post snr
// previous estimate: based on previous frame with gain filter
previousEstimateStsa[i] = inst->magnPrev[i] / (inst->noisePrev[i] + (float)0.0001)
* (inst->smooth[i]);
previousEstimateStsa[i] = inst->magnPrev[i] /
(inst->noisePrev[i] + (float)0.0001) *
(inst->smooth[i]);
// DD estimate is sum of two terms: current estimate and previous estimate
// directed decision update of snrPrior
snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR)
* snrLocPost[i];
snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] +
((float)1.0 - DD_PR_SNR) * snrLocPost[i];
// post and prior snr needed for step 2
} // end of loop over freqs
// done with step 1: dd computation of prior and post snr
//
// STEP 2: compute speech/noise likelihood
//
// compute difference of input spectrum with learned/estimated noise spectrum
// compute difference of input spectrum with learned/estimated noise
// spectrum
WebRtcNs_ComputeSpectralDifference(inst, magn);
// compute histograms for parameter decisions (thresholds and weights for features)
// parameters are extracted once every window time (=inst->modelUpdatePars[1])
// compute histograms for parameter decisions (thresholds and weights for
// features)
// parameters are extracted once every window time
// (=inst->modelUpdatePars[1])
if (updateParsFlag >= 1) {
// counter update
inst->modelUpdatePars[3]--;
@ -934,10 +985,10 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
} else {
// update every window:
// get normalization for spectral difference for next window estimate
inst->featureData[6] = inst->featureData[6]
/ ((float)inst->modelUpdatePars[1]);
inst->featureData[5] = (float)0.5 * (inst->featureData[6]
+ inst->featureData[5]);
inst->featureData[6] =
inst->featureData[6] / ((float)inst->modelUpdatePars[1]);
inst->featureData[5] =
(float)0.5 * (inst->featureData[6] + inst->featureData[5]);
inst->featureData[6] = (float)0.0;
}
}
@ -951,8 +1002,10 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
probNonSpeech = (float)1.0 - probSpeech;
// temporary noise update:
// use it for speech frames if update value is less than previous
noiseUpdateTmp = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp)
* (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]);
noiseUpdateTmp =
gammaNoiseTmp * inst->noisePrev[i] +
((float)1.0 - gammaNoiseTmp) *
(probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]);
//
// time-constant based on speech/noise state
gammaNoiseOld = gammaNoiseTmp;
@ -963,16 +1016,20 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
}
// conservative noise update
if (probSpeech < PROB_RANGE) {
inst->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]);
inst->magnAvgPause[i] +=
GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]);
}
// noise update
if (gammaNoiseTmp == gammaNoiseOld) {
noise[i] = noiseUpdateTmp;
} else {
noise[i] = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp)
* (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]);
noise[i] =
gammaNoiseTmp * inst->noisePrev[i] +
((float)1.0 - gammaNoiseTmp) *
(probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]);
// allow for noise update downwards:
// if noise update decreases the noise, it is safe, so allow it to happen
// if noise update decreases the noise, it is safe, so allow it to
// happen
if (noiseUpdateTmp < noise[i]) {
noise[i] = noiseUpdateTmp;
}
@ -980,19 +1037,19 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
} // end of freq loop
// done with step 2: noise update
//
// STEP 3: compute dd update of prior snr and post snr based on new noise estimate
//
// STEP 3: compute dd update of prior snr and post snr based on new noise
// estimate
for (i = 0; i < inst->magnLen; i++) {
// post and prior snr
currentEstimateStsa = (float)0.0;
if (magn[i] > noise[i]) {
currentEstimateStsa = magn[i] / (noise[i] + (float)0.0001) - (float)1.0;
}
// DD estimate is sume of two terms: current estimate and previous estimate
// DD estimate is sume of two terms: current estimate and previous
// estimate
// directed decision update of snrPrior
snrPrior = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR)
* currentEstimateStsa;
snrPrior = DD_PR_SNR * previousEstimateStsa[i] +
((float)1.0 - DD_PR_SNR) * currentEstimateStsa;
// gain filter
tmpFloat1 = inst->overdrive + snrPrior;
tmpFloat2 = (float)snrPrior / tmpFloat1;
@ -1075,16 +1132,20 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
}
// update analysis buffer for L band
memcpy(inst->dataBuf, inst->dataBuf + inst->blockLen10ms,
memcpy(inst->dataBuf,
inst->dataBuf + inst->blockLen10ms,
sizeof(float) * (inst->anaLen - inst->blockLen10ms));
memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, speechFrame,
memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms,
speechFrame,
sizeof(float) * inst->blockLen10ms);
if (flagHB == 1) {
// update analysis buffer for H band
memcpy(inst->dataBufHB, inst->dataBufHB + inst->blockLen10ms,
memcpy(inst->dataBufHB,
inst->dataBufHB + inst->blockLen10ms,
sizeof(float) * (inst->anaLen - inst->blockLen10ms));
memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, speechFrameHB,
memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms,
speechFrameHB,
sizeof(float) * inst->blockLen10ms);
}
@ -1103,9 +1164,11 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
fout[i - inst->windShift] = inst->syntBuf[i];
}
// update synthesis buffer
memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen,
memcpy(inst->syntBuf,
inst->syntBuf + inst->blockLen,
sizeof(float) * (inst->anaLen - inst->blockLen));
memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0,
memset(inst->syntBuf + inst->anaLen - inst->blockLen,
0,
sizeof(float) * inst->blockLen);
// out buffer
@ -1186,8 +1249,8 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
}
// combine both scales with speech/noise prob:
// note prior (priorSpeechProb) is not frequency dependent
factor = inst->priorSpeechProb * factor1 + ((float)1.0 - inst->priorSpeechProb)
* factor2;
factor = inst->priorSpeechProb * factor1 +
((float)1.0 - inst->priorSpeechProb) * factor2;
} // out of inst->gainmap==1
// synthesis
@ -1199,9 +1262,11 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
fout[i - inst->windShift] = inst->syntBuf[i];
}
// update synthesis buffer
memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen,
memcpy(inst->syntBuf,
inst->syntBuf + inst->blockLen,
sizeof(float) * (inst->anaLen - inst->blockLen));
memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0,
memset(inst->syntBuf + inst->anaLen - inst->blockLen,
0,
sizeof(float) * inst->blockLen);
// out buffer
@ -1216,16 +1281,18 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
for (i = 0; i < inst->blockLen10ms; i++) {
fout[i] = inst->outBuf[i];
}
memcpy(inst->outBuf, inst->outBuf + inst->blockLen10ms,
memcpy(inst->outBuf,
inst->outBuf + inst->blockLen10ms,
sizeof(float) * (inst->outLen - inst->blockLen10ms));
memset(inst->outBuf + inst->outLen - inst->blockLen10ms, 0,
memset(inst->outBuf + inst->outLen - inst->blockLen10ms,
0,
sizeof(float) * inst->blockLen10ms);
inst->outLen -= inst->blockLen10ms;
}
for (i = 0; i < inst->blockLen10ms; ++i)
outFrame[i] = WEBRTC_SPL_SAT(
WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
outFrame[i] =
WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
// for time-domain gain of HB
if (flagHB == 1) {
@ -1245,11 +1312,13 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB));
avgProbSpeechHBTmp = (float)2.0 * avgProbSpeechHB - (float)1.0;
// gain based on speech prob:
gainModHB = (float)0.5 * ((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp));
gainModHB = (float)0.5 *
((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp));
// combine gain with low band gain
gainTimeDomainHB = (float)0.5 * gainModHB + (float)0.5 * avgFilterGainHB;
if (avgProbSpeechHB >= (float)0.5) {
gainTimeDomainHB = (float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB;
gainTimeDomainHB =
(float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB;
}
gainTimeDomainHB = gainTimeDomainHB * decayBweHB;
// make sure gain is within flooring range
@ -1264,8 +1333,8 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
// apply gain
for (i = 0; i < inst->blockLen10ms; i++) {
float o = gainTimeDomainHB * inst->dataBufHB[i];
outFrameHB[i] = WEBRTC_SPL_SAT(
WEBRTC_SPL_WORD16_MAX, o, WEBRTC_SPL_WORD16_MIN);
outFrameHB[i] =
WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, o, WEBRTC_SPL_WORD16_MIN);
}
} // end of H band gain computation
//

View File

@ -8,26 +8,27 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
#include "webrtc/modules/audio_processing/ns/defines.h"
typedef struct NSParaExtract_t_ {
// bin size of histogram
float binSizeLrt;
float binSizeSpecFlat;
float binSizeSpecDiff;
// range of histogram over which lrt threshold is computed
float rangeAvgHistLrt;
//scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
//thresholds for prior model
// scale parameters: multiply dominant peaks of the histograms by scale factor
// to obtain thresholds for prior model
float factor1ModelPars; // for lrt and spectral difference
float factor2ModelPars; //for spectral_flatness: used when noise is flatter than speech
float factor2ModelPars; // for spectral_flatness: used when noise is flatter
// than speech
// peak limit for spectral flatness (varies between 0 and 1)
float thresPosSpecFlat;
//limit on spacing of two highest peaks in histogram: spacing determined by bin size
// limit on spacing of two highest peaks in histogram: spacing determined by
// bin size
float limitPeakSpacingSpecFlat;
float limitPeakSpacingSpecDiff;
// limit on relevance of second peak:
@ -49,7 +50,6 @@ typedef struct NSParaExtract_t_ {
} NSParaExtract_t;
typedef struct NSinst_t_ {
uint32_t fs;
int blockLen;
int blockLen10ms;
@ -108,7 +108,6 @@ typedef struct NSinst_t_ {
} NSinst_t;
#ifdef __cplusplus
extern "C" {
#endif
@ -188,8 +187,7 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
float* outFrameLow,
float* outFrameHigh);
#ifdef __cplusplus
}
#endif
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_