Clang-format ns_core
BUG=webrtc:3811 R=bjornv@webrtc.org, kwiberg@webrtc.org Review URL: https://webrtc-codereview.appspot.com/29539004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7257 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
@ -10,8 +10,8 @@
|
|||||||
|
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
//#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||||
#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h"
|
#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h"
|
||||||
#include "webrtc/modules/audio_processing/ns/ns_core.h"
|
#include "webrtc/modules/audio_processing/ns/ns_core.h"
|
||||||
@ -28,16 +28,19 @@ void WebRtcNs_set_feature_extraction_parameters(NSinst_t* inst) {
|
|||||||
// range of histogram over which lrt threshold is computed
|
// range of histogram over which lrt threshold is computed
|
||||||
inst->featureExtractionParams.rangeAvgHistLrt = (float)1.0;
|
inst->featureExtractionParams.rangeAvgHistLrt = (float)1.0;
|
||||||
|
|
||||||
//scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
|
// scale parameters: multiply dominant peaks of the histograms by scale factor
|
||||||
// thresholds for prior model
|
// to obtain thresholds for prior model
|
||||||
inst->featureExtractionParams.factor1ModelPars = (float)1.20; //for lrt and spectral diff
|
inst->featureExtractionParams.factor1ModelPars =
|
||||||
inst->featureExtractionParams.factor2ModelPars = (float)0.9; //for spectral_flatness:
|
(float)1.20; // for lrt and spectral diff
|
||||||
|
inst->featureExtractionParams.factor2ModelPars =
|
||||||
|
(float)0.9; // for spectral_flatness:
|
||||||
// used when noise is flatter than speech
|
// used when noise is flatter than speech
|
||||||
|
|
||||||
// peak limit for spectral flatness (varies between 0 and 1)
|
// peak limit for spectral flatness (varies between 0 and 1)
|
||||||
inst->featureExtractionParams.thresPosSpecFlat = (float)0.6;
|
inst->featureExtractionParams.thresPosSpecFlat = (float)0.6;
|
||||||
|
|
||||||
//limit on spacing of two highest peaks in histogram: spacing determined by bin size
|
// limit on spacing of two highest peaks in histogram: spacing determined by
|
||||||
|
// bin size
|
||||||
inst->featureExtractionParams.limitPeakSpacingSpecFlat =
|
inst->featureExtractionParams.limitPeakSpacingSpecFlat =
|
||||||
2 * inst->featureExtractionParams.binSizeSpecFlat;
|
2 * inst->featureExtractionParams.binSizeSpecFlat;
|
||||||
inst->featureExtractionParams.limitPeakSpacingSpecDiff =
|
inst->featureExtractionParams.limitPeakSpacingSpecDiff =
|
||||||
@ -61,10 +64,10 @@ void WebRtcNs_set_feature_extraction_parameters(NSinst_t* inst) {
|
|||||||
inst->featureExtractionParams.minSpecDiff = (float)0.16;
|
inst->featureExtractionParams.minSpecDiff = (float)0.16;
|
||||||
|
|
||||||
// criteria of weight of histogram peak to accept/reject feature
|
// criteria of weight of histogram peak to accept/reject feature
|
||||||
inst->featureExtractionParams.thresWeightSpecFlat = (int)(0.3
|
inst->featureExtractionParams.thresWeightSpecFlat =
|
||||||
* (inst->modelUpdatePars[1])); //for spectral flatness
|
(int)(0.3 * (inst->modelUpdatePars[1])); // for spectral flatness
|
||||||
inst->featureExtractionParams.thresWeightSpecDiff = (int)(0.3
|
inst->featureExtractionParams.thresWeightSpecDiff =
|
||||||
* (inst->modelUpdatePars[1])); //for spectral difference
|
(int)(0.3 * (inst->modelUpdatePars[1])); // for spectral difference
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize state
|
// Initialize state
|
||||||
@ -128,7 +131,8 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < SIMULT; i++) {
|
for (i = 0; i < SIMULT; i++) {
|
||||||
inst->counter[i] = (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT);
|
inst->counter[i] =
|
||||||
|
(int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
inst->updates = 0;
|
inst->updates = 0;
|
||||||
@ -146,20 +150,27 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) {
|
|||||||
for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
|
for (i = 0; i < HALF_ANAL_BLOCKL; i++) {
|
||||||
inst->magnPrev[i] = (float)0.0; // previous mag spectrum
|
inst->magnPrev[i] = (float)0.0; // previous mag spectrum
|
||||||
inst->noisePrev[i] = (float)0.0; // previous noise-spectrum
|
inst->noisePrev[i] = (float)0.0; // previous noise-spectrum
|
||||||
inst->logLrtTimeAvg[i] = LRT_FEATURE_THR; //smooth LR ratio (same as threshold)
|
inst->logLrtTimeAvg[i] =
|
||||||
|
LRT_FEATURE_THR; // smooth LR ratio (same as threshold)
|
||||||
inst->magnAvgPause[i] = (float)0.0; // conservative noise spectrum estimate
|
inst->magnAvgPause[i] = (float)0.0; // conservative noise spectrum estimate
|
||||||
inst->speechProb[i] = (float)0.0; // for estimation of HB in second pass
|
inst->speechProb[i] = (float)0.0; // for estimation of HB in second pass
|
||||||
inst->initMagnEst[i] = (float)0.0; // initial average mag spectrum
|
inst->initMagnEst[i] = (float)0.0; // initial average mag spectrum
|
||||||
}
|
}
|
||||||
|
|
||||||
// feature quantities
|
// feature quantities
|
||||||
inst->featureData[0] = SF_FEATURE_THR; //spectral flatness (start on threshold)
|
inst->featureData[0] =
|
||||||
inst->featureData[1] = (float)0.0; //spectral entropy: not used in this version
|
SF_FEATURE_THR; // spectral flatness (start on threshold)
|
||||||
inst->featureData[2] = (float)0.0; //spectral variance: not used in this version
|
inst->featureData[1] =
|
||||||
inst->featureData[3] = LRT_FEATURE_THR; //average lrt factor (start on threshold)
|
(float)0.0; // spectral entropy: not used in this version
|
||||||
inst->featureData[4] = SF_FEATURE_THR; //spectral template diff (start on threshold)
|
inst->featureData[2] =
|
||||||
|
(float)0.0; // spectral variance: not used in this version
|
||||||
|
inst->featureData[3] =
|
||||||
|
LRT_FEATURE_THR; // average lrt factor (start on threshold)
|
||||||
|
inst->featureData[4] =
|
||||||
|
SF_FEATURE_THR; // spectral template diff (start on threshold)
|
||||||
inst->featureData[5] = (float)0.0; // normalization for spectral-diff
|
inst->featureData[5] = (float)0.0; // normalization for spectral-diff
|
||||||
inst->featureData[6] = (float)0.0; //window time-average of input magnitude spectrum
|
inst->featureData[6] =
|
||||||
|
(float)0.0; // window time-average of input magnitude spectrum
|
||||||
|
|
||||||
// histogram quantities: used to estimate/update thresholds for features
|
// histogram quantities: used to estimate/update thresholds for features
|
||||||
for (i = 0; i < HIST_PAR_EST; i++) {
|
for (i = 0; i < HIST_PAR_EST; i++) {
|
||||||
@ -169,14 +180,17 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inst->blockInd = -1; // frame counter
|
inst->blockInd = -1; // frame counter
|
||||||
inst->priorModelPars[0] = LRT_FEATURE_THR; //default threshold for lrt feature
|
inst->priorModelPars[0] =
|
||||||
|
LRT_FEATURE_THR; // default threshold for lrt feature
|
||||||
inst->priorModelPars[1] = (float)0.5; // threshold for spectral flatness:
|
inst->priorModelPars[1] = (float)0.5; // threshold for spectral flatness:
|
||||||
// determined on-line
|
// determined on-line
|
||||||
inst->priorModelPars[2] = (float)1.0; // sgn_map par for spectral measure:
|
inst->priorModelPars[2] = (float)1.0; // sgn_map par for spectral measure:
|
||||||
// 1 for flatness measure
|
// 1 for flatness measure
|
||||||
inst->priorModelPars[3] = (float)0.5; //threshold for template-difference feature:
|
inst->priorModelPars[3] =
|
||||||
|
(float)0.5; // threshold for template-difference feature:
|
||||||
// determined on-line
|
// determined on-line
|
||||||
inst->priorModelPars[4] = (float)1.0; //default weighting parameter for lrt feature
|
inst->priorModelPars[4] =
|
||||||
|
(float)1.0; // default weighting parameter for lrt feature
|
||||||
inst->priorModelPars[5] = (float)0.0; // default weighting parameter for
|
inst->priorModelPars[5] = (float)0.0; // default weighting parameter for
|
||||||
// spectral flatness feature
|
// spectral flatness feature
|
||||||
inst->priorModelPars[6] = (float)0.0; // default weighting parameter for
|
inst->priorModelPars[6] = (float)0.0; // default weighting parameter for
|
||||||
@ -185,7 +199,8 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) {
|
|||||||
inst->modelUpdatePars[0] = 2; // update flag for parameters:
|
inst->modelUpdatePars[0] = 2; // update flag for parameters:
|
||||||
// 0 no update, 1=update once, 2=update every window
|
// 0 no update, 1=update once, 2=update every window
|
||||||
inst->modelUpdatePars[1] = 500; // window for update
|
inst->modelUpdatePars[1] = 500; // window for update
|
||||||
inst->modelUpdatePars[2] = 0; //counter for update of conservative noise spectrum
|
inst->modelUpdatePars[2] =
|
||||||
|
0; // counter for update of conservative noise spectrum
|
||||||
// counter if the feature thresholds are updated during the sequence
|
// counter if the feature thresholds are updated during the sequence
|
||||||
inst->modelUpdatePars[3] = inst->modelUpdatePars[1];
|
inst->modelUpdatePars[3] = inst->modelUpdatePars[1];
|
||||||
|
|
||||||
@ -195,12 +210,11 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) {
|
|||||||
inst->pinkNoiseNumerator = 0.0;
|
inst->pinkNoiseNumerator = 0.0;
|
||||||
inst->pinkNoiseExp = 0.0;
|
inst->pinkNoiseExp = 0.0;
|
||||||
|
|
||||||
WebRtcNs_set_feature_extraction_parameters(inst); // Set feature configuration
|
WebRtcNs_set_feature_extraction_parameters(inst);
|
||||||
|
|
||||||
// default mode
|
// default mode
|
||||||
WebRtcNs_set_policy_core(inst, 0);
|
WebRtcNs_set_policy_core(inst, 0);
|
||||||
|
|
||||||
|
|
||||||
memset(inst->outBuf, 0, sizeof(float) * 3 * BLOCKL_MAX);
|
memset(inst->outBuf, 0, sizeof(float) * 3 * BLOCKL_MAX);
|
||||||
|
|
||||||
inst->initFlag = 1;
|
inst->initFlag = 1;
|
||||||
@ -265,17 +279,19 @@ void WebRtcNs_NoiseEstimation(NSinst_t* inst, float* magn, float* noise) {
|
|||||||
|
|
||||||
// update log quantile estimate
|
// update log quantile estimate
|
||||||
if (lmagn[i] > inst->lquantile[offset + i]) {
|
if (lmagn[i] > inst->lquantile[offset + i]) {
|
||||||
inst->lquantile[offset + i] += QUANTILE * delta
|
inst->lquantile[offset + i] +=
|
||||||
/ (float)(inst->counter[s] + 1);
|
QUANTILE * delta / (float)(inst->counter[s] + 1);
|
||||||
} else {
|
} else {
|
||||||
inst->lquantile[offset + i] -= ((float)1.0 - QUANTILE) * delta
|
inst->lquantile[offset + i] -=
|
||||||
/ (float)(inst->counter[s] + 1);
|
((float)1.0 - QUANTILE) * delta / (float)(inst->counter[s] + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// update density estimate
|
// update density estimate
|
||||||
if (fabs(lmagn[i] - inst->lquantile[offset + i]) < WIDTH) {
|
if (fabs(lmagn[i] - inst->lquantile[offset + i]) < WIDTH) {
|
||||||
inst->density[offset + i] = ((float)inst->counter[s] * inst->density[offset
|
inst->density[offset + i] =
|
||||||
+ i] + (float)1.0 / ((float)2.0 * WIDTH)) / (float)(inst->counter[s] + 1);
|
((float)inst->counter[s] * inst->density[offset + i] +
|
||||||
|
(float)1.0 / ((float)2.0 * WIDTH)) /
|
||||||
|
(float)(inst->counter[s] + 1);
|
||||||
}
|
}
|
||||||
} // end loop over magnitude spectrum
|
} // end loop over magnitude spectrum
|
||||||
|
|
||||||
@ -305,14 +321,17 @@ void WebRtcNs_NoiseEstimation(NSinst_t* inst, float* magn, float* noise) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Extract thresholds for feature parameters
|
// Extract thresholds for feature parameters
|
||||||
// histograms are computed over some window_size (given by inst->modelUpdatePars[1])
|
// histograms are computed over some window_size (given by
|
||||||
|
// inst->modelUpdatePars[1])
|
||||||
// thresholds and weights are extracted every window
|
// thresholds and weights are extracted every window
|
||||||
// flag 0 means update histogram only, flag 1 means compute the thresholds/weights
|
// flag 0 means update histogram only, flag 1 means compute the
|
||||||
|
// thresholds/weights
|
||||||
// threshold and weights are returned in: inst->priorModelPars
|
// threshold and weights are returned in: inst->priorModelPars
|
||||||
void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
|
void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
|
||||||
int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt;
|
int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt;
|
||||||
int maxPeak1, maxPeak2;
|
int maxPeak1, maxPeak2;
|
||||||
int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, weightPeak2SpecDiff;
|
int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff,
|
||||||
|
weightPeak2SpecDiff;
|
||||||
|
|
||||||
float binMid, featureSum;
|
float binMid, featureSum;
|
||||||
float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff;
|
float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff;
|
||||||
@ -326,36 +345,42 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
|
|||||||
// update histograms
|
// update histograms
|
||||||
if (flag == 0) {
|
if (flag == 0) {
|
||||||
// LRT
|
// LRT
|
||||||
if ((inst->featureData[3] < HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt)
|
if ((inst->featureData[3] <
|
||||||
&& (inst->featureData[3] >= 0.0)) {
|
HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt) &&
|
||||||
i = (int)(inst->featureData[3] / inst->featureExtractionParams.binSizeLrt);
|
(inst->featureData[3] >= 0.0)) {
|
||||||
|
i = (int)(inst->featureData[3] /
|
||||||
|
inst->featureExtractionParams.binSizeLrt);
|
||||||
inst->histLrt[i]++;
|
inst->histLrt[i]++;
|
||||||
}
|
}
|
||||||
// Spectral flatness
|
// Spectral flatness
|
||||||
if ((inst->featureData[0] < HIST_PAR_EST
|
if ((inst->featureData[0] <
|
||||||
* inst->featureExtractionParams.binSizeSpecFlat)
|
HIST_PAR_EST * inst->featureExtractionParams.binSizeSpecFlat) &&
|
||||||
&& (inst->featureData[0] >= 0.0)) {
|
(inst->featureData[0] >= 0.0)) {
|
||||||
i = (int)(inst->featureData[0] / inst->featureExtractionParams.binSizeSpecFlat);
|
i = (int)(inst->featureData[0] /
|
||||||
|
inst->featureExtractionParams.binSizeSpecFlat);
|
||||||
inst->histSpecFlat[i]++;
|
inst->histSpecFlat[i]++;
|
||||||
}
|
}
|
||||||
// Spectral difference
|
// Spectral difference
|
||||||
if ((inst->featureData[4] < HIST_PAR_EST
|
if ((inst->featureData[4] <
|
||||||
* inst->featureExtractionParams.binSizeSpecDiff)
|
HIST_PAR_EST * inst->featureExtractionParams.binSizeSpecDiff) &&
|
||||||
&& (inst->featureData[4] >= 0.0)) {
|
(inst->featureData[4] >= 0.0)) {
|
||||||
i = (int)(inst->featureData[4] / inst->featureExtractionParams.binSizeSpecDiff);
|
i = (int)(inst->featureData[4] /
|
||||||
|
inst->featureExtractionParams.binSizeSpecDiff);
|
||||||
inst->histSpecDiff[i]++;
|
inst->histSpecDiff[i]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// extract parameters for speech/noise probability
|
// extract parameters for speech/noise probability
|
||||||
if (flag == 1) {
|
if (flag == 1) {
|
||||||
//lrt feature: compute the average over inst->featureExtractionParams.rangeAvgHistLrt
|
// lrt feature: compute the average over
|
||||||
|
// inst->featureExtractionParams.rangeAvgHistLrt
|
||||||
avgHistLrt = 0.0;
|
avgHistLrt = 0.0;
|
||||||
avgHistLrtCompl = 0.0;
|
avgHistLrtCompl = 0.0;
|
||||||
avgSquareHistLrt = 0.0;
|
avgSquareHistLrt = 0.0;
|
||||||
numHistLrt = 0;
|
numHistLrt = 0;
|
||||||
for (i = 0; i < HIST_PAR_EST; i++) {
|
for (i = 0; i < HIST_PAR_EST; i++) {
|
||||||
binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt;
|
binMid =
|
||||||
|
((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt;
|
||||||
if (binMid <= inst->featureExtractionParams.rangeAvgHistLrt) {
|
if (binMid <= inst->featureExtractionParams.rangeAvgHistLrt) {
|
||||||
avgHistLrt += inst->histLrt[i] * binMid;
|
avgHistLrt += inst->histLrt[i] * binMid;
|
||||||
numHistLrt += inst->histLrt[i];
|
numHistLrt += inst->histLrt[i];
|
||||||
@ -374,8 +399,8 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
|
|||||||
// very low fluct, so likely noise
|
// very low fluct, so likely noise
|
||||||
inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt;
|
inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt;
|
||||||
} else {
|
} else {
|
||||||
inst->priorModelPars[0] = inst->featureExtractionParams.factor1ModelPars
|
inst->priorModelPars[0] =
|
||||||
* avgHistLrt;
|
inst->featureExtractionParams.factor1ModelPars * avgHistLrt;
|
||||||
// check if value is within min/max range
|
// check if value is within min/max range
|
||||||
if (inst->priorModelPars[0] < inst->featureExtractionParams.minLrt) {
|
if (inst->priorModelPars[0] < inst->featureExtractionParams.minLrt) {
|
||||||
inst->priorModelPars[0] = inst->featureExtractionParams.minLrt;
|
inst->priorModelPars[0] = inst->featureExtractionParams.minLrt;
|
||||||
@ -386,8 +411,8 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
|
|||||||
}
|
}
|
||||||
// done with lrt feature
|
// done with lrt feature
|
||||||
|
|
||||||
//
|
// for spectral flatness and spectral difference: compute the main peaks of
|
||||||
// for spectral flatness and spectral difference: compute the main peaks of histogram
|
// histogram
|
||||||
maxPeak1 = 0;
|
maxPeak1 = 0;
|
||||||
maxPeak2 = 0;
|
maxPeak2 = 0;
|
||||||
posPeak1SpecFlat = 0.0;
|
posPeak1SpecFlat = 0.0;
|
||||||
@ -397,7 +422,8 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
|
|||||||
|
|
||||||
// peaks for flatness
|
// peaks for flatness
|
||||||
for (i = 0; i < HIST_PAR_EST; i++) {
|
for (i = 0; i < HIST_PAR_EST; i++) {
|
||||||
binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecFlat;
|
binMid = ((float)i + (float)0.5) *
|
||||||
|
inst->featureExtractionParams.binSizeSpecFlat;
|
||||||
if (inst->histSpecFlat[i] > maxPeak1) {
|
if (inst->histSpecFlat[i] > maxPeak1) {
|
||||||
// Found new "first" peak
|
// Found new "first" peak
|
||||||
maxPeak2 = maxPeak1;
|
maxPeak2 = maxPeak1;
|
||||||
@ -424,7 +450,8 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
|
|||||||
weightPeak2SpecDiff = 0;
|
weightPeak2SpecDiff = 0;
|
||||||
// peaks for spectral difference
|
// peaks for spectral difference
|
||||||
for (i = 0; i < HIST_PAR_EST; i++) {
|
for (i = 0; i < HIST_PAR_EST; i++) {
|
||||||
binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecDiff;
|
binMid = ((float)i + (float)0.5) *
|
||||||
|
inst->featureExtractionParams.binSizeSpecDiff;
|
||||||
if (inst->histSpecDiff[i] > maxPeak1) {
|
if (inst->histSpecDiff[i] > maxPeak1) {
|
||||||
// Found new "first" peak
|
// Found new "first" peak
|
||||||
maxPeak2 = maxPeak1;
|
maxPeak2 = maxPeak1;
|
||||||
@ -445,24 +472,25 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
|
|||||||
// for spectrum flatness feature
|
// for spectrum flatness feature
|
||||||
useFeatureSpecFlat = 1;
|
useFeatureSpecFlat = 1;
|
||||||
// merge the two peaks if they are close
|
// merge the two peaks if they are close
|
||||||
if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat)
|
if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) <
|
||||||
< inst->featureExtractionParams.limitPeakSpacingSpecFlat)
|
inst->featureExtractionParams.limitPeakSpacingSpecFlat) &&
|
||||||
&& (weightPeak2SpecFlat
|
(weightPeak2SpecFlat >
|
||||||
> inst->featureExtractionParams.limitPeakWeightsSpecFlat
|
inst->featureExtractionParams.limitPeakWeightsSpecFlat *
|
||||||
* weightPeak1SpecFlat)) {
|
weightPeak1SpecFlat)) {
|
||||||
weightPeak1SpecFlat += weightPeak2SpecFlat;
|
weightPeak1SpecFlat += weightPeak2SpecFlat;
|
||||||
posPeak1SpecFlat = (float)0.5 * (posPeak1SpecFlat + posPeak2SpecFlat);
|
posPeak1SpecFlat = (float)0.5 * (posPeak1SpecFlat + posPeak2SpecFlat);
|
||||||
}
|
}
|
||||||
// reject if weight of peaks is not large enough, or peak value too small
|
// reject if weight of peaks is not large enough, or peak value too small
|
||||||
if (weightPeak1SpecFlat < inst->featureExtractionParams.thresWeightSpecFlat
|
if (weightPeak1SpecFlat <
|
||||||
|| posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) {
|
inst->featureExtractionParams.thresWeightSpecFlat ||
|
||||||
|
posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) {
|
||||||
useFeatureSpecFlat = 0;
|
useFeatureSpecFlat = 0;
|
||||||
}
|
}
|
||||||
// if selected, get the threshold
|
// if selected, get the threshold
|
||||||
if (useFeatureSpecFlat == 1) {
|
if (useFeatureSpecFlat == 1) {
|
||||||
// compute the threshold
|
// compute the threshold
|
||||||
inst->priorModelPars[1] = inst->featureExtractionParams.factor2ModelPars
|
inst->priorModelPars[1] =
|
||||||
* posPeak1SpecFlat;
|
inst->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat;
|
||||||
// check if value is within min/max range
|
// check if value is within min/max range
|
||||||
if (inst->priorModelPars[1] < inst->featureExtractionParams.minSpecFlat) {
|
if (inst->priorModelPars[1] < inst->featureExtractionParams.minSpecFlat) {
|
||||||
inst->priorModelPars[1] = inst->featureExtractionParams.minSpecFlat;
|
inst->priorModelPars[1] = inst->featureExtractionParams.minSpecFlat;
|
||||||
@ -476,19 +504,20 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) {
|
|||||||
// for template feature
|
// for template feature
|
||||||
useFeatureSpecDiff = 1;
|
useFeatureSpecDiff = 1;
|
||||||
// merge the two peaks if they are close
|
// merge the two peaks if they are close
|
||||||
if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff)
|
if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) <
|
||||||
< inst->featureExtractionParams.limitPeakSpacingSpecDiff)
|
inst->featureExtractionParams.limitPeakSpacingSpecDiff) &&
|
||||||
&& (weightPeak2SpecDiff
|
(weightPeak2SpecDiff >
|
||||||
> inst->featureExtractionParams.limitPeakWeightsSpecDiff
|
inst->featureExtractionParams.limitPeakWeightsSpecDiff *
|
||||||
* weightPeak1SpecDiff)) {
|
weightPeak1SpecDiff)) {
|
||||||
weightPeak1SpecDiff += weightPeak2SpecDiff;
|
weightPeak1SpecDiff += weightPeak2SpecDiff;
|
||||||
posPeak1SpecDiff = (float)0.5 * (posPeak1SpecDiff + posPeak2SpecDiff);
|
posPeak1SpecDiff = (float)0.5 * (posPeak1SpecDiff + posPeak2SpecDiff);
|
||||||
}
|
}
|
||||||
// get the threshold value
|
// get the threshold value
|
||||||
inst->priorModelPars[3] = inst->featureExtractionParams.factor1ModelPars
|
inst->priorModelPars[3] =
|
||||||
* posPeak1SpecDiff;
|
inst->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff;
|
||||||
// reject if weight of peaks is not large enough
|
// reject if weight of peaks is not large enough
|
||||||
if (weightPeak1SpecDiff < inst->featureExtractionParams.thresWeightSpecDiff) {
|
if (weightPeak1SpecDiff <
|
||||||
|
inst->featureExtractionParams.thresWeightSpecDiff) {
|
||||||
useFeatureSpecDiff = 0;
|
useFeatureSpecDiff = 0;
|
||||||
}
|
}
|
||||||
// check if value is within min/max range
|
// check if value is within min/max range
|
||||||
@ -541,7 +570,8 @@ void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) {
|
|||||||
for (i = 0; i < shiftLP; i++) {
|
for (i = 0; i < shiftLP; i++) {
|
||||||
avgSpectralFlatnessDen -= magnIn[i];
|
avgSpectralFlatnessDen -= magnIn[i];
|
||||||
}
|
}
|
||||||
// compute log of ratio of the geometric to arithmetic mean: check for log(0) case
|
// compute log of ratio of the geometric to arithmetic mean: check for log(0)
|
||||||
|
// case
|
||||||
for (i = shiftLP; i < inst->magnLen; i++) {
|
for (i = shiftLP; i < inst->magnLen; i++) {
|
||||||
if (magnIn[i] > 0.0) {
|
if (magnIn[i] > 0.0) {
|
||||||
avgSpectralFlatnessNum += (float)log(magnIn[i]);
|
avgSpectralFlatnessNum += (float)log(magnIn[i]);
|
||||||
@ -562,12 +592,14 @@ void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) {
|
|||||||
// done with flatness feature
|
// done with flatness feature
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute the difference measure between input spectrum and a template/learned noise spectrum
|
// Compute the difference measure between input spectrum and a template/learned
|
||||||
|
// noise spectrum
|
||||||
// magnIn is the input spectrum
|
// magnIn is the input spectrum
|
||||||
// the reference/template spectrum is inst->magnAvgPause[i]
|
// the reference/template spectrum is inst->magnAvgPause[i]
|
||||||
// returns (normalized) spectral difference in inst->featureData[4]
|
// returns (normalized) spectral difference in inst->featureData[4]
|
||||||
void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) {
|
void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) {
|
||||||
// avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause)
|
// avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 /
|
||||||
|
// var(magnAvgPause)
|
||||||
int i;
|
int i;
|
||||||
float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn;
|
float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn;
|
||||||
|
|
||||||
@ -587,7 +619,8 @@ void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) {
|
|||||||
// compute variance and covariance quantities
|
// compute variance and covariance quantities
|
||||||
for (i = 0; i < inst->magnLen; i++) {
|
for (i = 0; i < inst->magnLen; i++) {
|
||||||
covMagnPause += (magnIn[i] - avgMagn) * (inst->magnAvgPause[i] - avgPause);
|
covMagnPause += (magnIn[i] - avgMagn) * (inst->magnAvgPause[i] - avgPause);
|
||||||
varPause += (inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause);
|
varPause +=
|
||||||
|
(inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause);
|
||||||
varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn);
|
varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn);
|
||||||
}
|
}
|
||||||
covMagnPause = covMagnPause / ((float)inst->magnLen);
|
covMagnPause = covMagnPause / ((float)inst->magnLen);
|
||||||
@ -596,10 +629,13 @@ void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) {
|
|||||||
// update of average magnitude spectrum
|
// update of average magnitude spectrum
|
||||||
inst->featureData[6] += inst->signalEnergy;
|
inst->featureData[6] += inst->signalEnergy;
|
||||||
|
|
||||||
avgDiffNormMagn = varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001);
|
avgDiffNormMagn =
|
||||||
|
varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001);
|
||||||
// normalize and compute time-avg update of difference feature
|
// normalize and compute time-avg update of difference feature
|
||||||
avgDiffNormMagn = (float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001));
|
avgDiffNormMagn =
|
||||||
inst->featureData[4] += SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]);
|
(float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001));
|
||||||
|
inst->featureData[4] +=
|
||||||
|
SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute speech/noise probability
|
// Compute speech/noise probability
|
||||||
@ -608,7 +644,9 @@ void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) {
|
|||||||
// noise is the noise spectrum
|
// noise is the noise spectrum
|
||||||
// snrLocPrior is the prior snr for each freq.
|
// snrLocPrior is the prior snr for each freq.
|
||||||
// snr loc_post is the post snr for each freq.
|
// snr loc_post is the post snr for each freq.
|
||||||
void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snrLocPrior,
|
void WebRtcNs_SpeechNoiseProb(NSinst_t* inst,
|
||||||
|
float* probSpeechFinal,
|
||||||
|
float* snrLocPrior,
|
||||||
float* snrLocPost) {
|
float* snrLocPost) {
|
||||||
int i, sgnMap;
|
int i, sgnMap;
|
||||||
float invLrt, gainPrior, indPrior;
|
float invLrt, gainPrior, indPrior;
|
||||||
@ -644,8 +682,8 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr
|
|||||||
tmpFloat1 = (float)1.0 + (float)2.0 * snrLocPrior[i];
|
tmpFloat1 = (float)1.0 + (float)2.0 * snrLocPrior[i];
|
||||||
tmpFloat2 = (float)2.0 * snrLocPrior[i] / (tmpFloat1 + (float)0.0001);
|
tmpFloat2 = (float)2.0 * snrLocPrior[i] / (tmpFloat1 + (float)0.0001);
|
||||||
besselTmp = (snrLocPost[i] + (float)1.0) * tmpFloat2;
|
besselTmp = (snrLocPost[i] + (float)1.0) * tmpFloat2;
|
||||||
inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - (float)log(tmpFloat1)
|
inst->logLrtTimeAvg[i] +=
|
||||||
- inst->logLrtTimeAvg[i]);
|
LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - inst->logLrtTimeAvg[i]);
|
||||||
logLrtTimeAvgKsum += inst->logLrtTimeAvg[i];
|
logLrtTimeAvgKsum += inst->logLrtTimeAvg[i];
|
||||||
}
|
}
|
||||||
logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (inst->magnLen);
|
logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (inst->magnLen);
|
||||||
@ -663,8 +701,9 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr
|
|||||||
widthPrior = widthPrior1;
|
widthPrior = widthPrior1;
|
||||||
}
|
}
|
||||||
// compute indicator function: sigmoid map
|
// compute indicator function: sigmoid map
|
||||||
indicator0 = (float)0.5 * ((float)tanh(widthPrior *
|
indicator0 = (float)0.5 *
|
||||||
(logLrtTimeAvgKsum - threshPrior0)) + (float)1.0);
|
((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) +
|
||||||
|
(float)1.0);
|
||||||
|
|
||||||
// spectral flatness feature
|
// spectral flatness feature
|
||||||
tmpFloat1 = inst->featureData[0];
|
tmpFloat1 = inst->featureData[0];
|
||||||
@ -677,8 +716,10 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr
|
|||||||
widthPrior = widthPrior1;
|
widthPrior = widthPrior1;
|
||||||
}
|
}
|
||||||
// compute indicator function: sigmoid map
|
// compute indicator function: sigmoid map
|
||||||
indicator1 = (float)0.5 * ((float)tanh((float)sgnMap *
|
indicator1 =
|
||||||
widthPrior * (threshPrior1 - tmpFloat1)) + (float)1.0);
|
(float)0.5 *
|
||||||
|
((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) +
|
||||||
|
(float)1.0);
|
||||||
|
|
||||||
// for template spectrum-difference
|
// for template spectrum-difference
|
||||||
tmpFloat1 = inst->featureData[4];
|
tmpFloat1 = inst->featureData[4];
|
||||||
@ -688,12 +729,13 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr
|
|||||||
widthPrior = widthPrior2;
|
widthPrior = widthPrior2;
|
||||||
}
|
}
|
||||||
// compute indicator function: sigmoid map
|
// compute indicator function: sigmoid map
|
||||||
indicator2 = (float)0.5 * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2))
|
indicator2 =
|
||||||
+ (float)1.0);
|
(float)0.5 *
|
||||||
|
((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + (float)1.0);
|
||||||
|
|
||||||
// combine the indicator function with the feature weights
|
// combine the indicator function with the feature weights
|
||||||
indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2
|
indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 +
|
||||||
* indicator2;
|
weightIndPrior2 * indicator2;
|
||||||
// done with computing indicator function
|
// done with computing indicator function
|
||||||
|
|
||||||
// compute the prior probability
|
// compute the prior probability
|
||||||
@ -707,7 +749,8 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr
|
|||||||
}
|
}
|
||||||
|
|
||||||
// final speech probability: combine prior model with LR factor:
|
// final speech probability: combine prior model with LR factor:
|
||||||
gainPrior = ((float)1.0 - inst->priorSpeechProb) / (inst->priorSpeechProb + (float)0.0001);
|
gainPrior = ((float)1.0 - inst->priorSpeechProb) /
|
||||||
|
(inst->priorSpeechProb + (float)0.0001);
|
||||||
for (i = 0; i < inst->magnLen; i++) {
|
for (i = 0; i < inst->magnLen; i++) {
|
||||||
invLrt = (float)exp(-inst->logLrtTimeAvg[i]);
|
invLrt = (float)exp(-inst->logLrtTimeAvg[i]);
|
||||||
invLrt = (float)gainPrior * invLrt;
|
invLrt = (float)gainPrior * invLrt;
|
||||||
@ -749,9 +792,11 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
|
|||||||
//
|
//
|
||||||
|
|
||||||
// update analysis buffer for L band
|
// update analysis buffer for L band
|
||||||
memcpy(inst->analyzeBuf, inst->analyzeBuf + inst->blockLen10ms,
|
memcpy(inst->analyzeBuf,
|
||||||
|
inst->analyzeBuf + inst->blockLen10ms,
|
||||||
sizeof(float) * (inst->anaLen - inst->blockLen10ms));
|
sizeof(float) * (inst->anaLen - inst->blockLen10ms));
|
||||||
memcpy(inst->analyzeBuf + inst->anaLen - inst->blockLen10ms, speechFrame,
|
memcpy(inst->analyzeBuf + inst->anaLen - inst->blockLen10ms,
|
||||||
|
speechFrame,
|
||||||
sizeof(float) * inst->blockLen10ms);
|
sizeof(float) * inst->blockLen10ms);
|
||||||
|
|
||||||
// check if processing needed
|
// check if processing needed
|
||||||
@ -764,12 +809,13 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
|
|||||||
}
|
}
|
||||||
if (energy == 0.0) {
|
if (energy == 0.0) {
|
||||||
// we want to avoid updating statistics in this case:
|
// we want to avoid updating statistics in this case:
|
||||||
// Updating feature statistics when we have zeros only will cause thresholds to
|
// Updating feature statistics when we have zeros only will cause
|
||||||
// move towards zero signal situations. This in turn has the effect that once the
|
// thresholds to move towards zero signal situations. This in turn has the
|
||||||
// signal is "turned on" (non-zero values) everything will be treated as speech
|
// effect that once the signal is "turned on" (non-zero values) everything
|
||||||
// and there is no noise suppression effect. Depending on the duration of the
|
// will be treated as speech and there is no noise suppression effect.
|
||||||
// inactive signal it takes a considerable amount of time for the system to learn
|
// Depending on the duration of the inactive signal it takes a
|
||||||
// what is noise and what is speech.
|
// considerable amount of time for the system to learn what is noise and
|
||||||
|
// what is speech.
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -829,11 +875,13 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
|
|||||||
// compute simplified noise model during startup
|
// compute simplified noise model during startup
|
||||||
if (inst->blockInd < END_STARTUP_SHORT) {
|
if (inst->blockInd < END_STARTUP_SHORT) {
|
||||||
// Estimate White noise
|
// Estimate White noise
|
||||||
inst->whiteNoiseLevel += sumMagn / ((float)inst->magnLen) * inst->overdrive;
|
inst->whiteNoiseLevel +=
|
||||||
|
sumMagn / ((float)inst->magnLen) * inst->overdrive;
|
||||||
// Estimate Pink noise parameters
|
// Estimate Pink noise parameters
|
||||||
tmpFloat1 = sum_log_i_square * ((float)(inst->magnLen - kStartBand));
|
tmpFloat1 = sum_log_i_square * ((float)(inst->magnLen - kStartBand));
|
||||||
tmpFloat1 -= (sum_log_i * sum_log_i);
|
tmpFloat1 -= (sum_log_i * sum_log_i);
|
||||||
tmpFloat2 = (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn);
|
tmpFloat2 =
|
||||||
|
(sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn);
|
||||||
tmpFloat3 = tmpFloat2 / tmpFloat1;
|
tmpFloat3 = tmpFloat2 / tmpFloat1;
|
||||||
// Constrain the estimated spectrum to be positive
|
// Constrain the estimated spectrum to be positive
|
||||||
if (tmpFloat3 < 0.0f) {
|
if (tmpFloat3 < 0.0f) {
|
||||||
@ -858,18 +906,22 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
|
|||||||
parametric_noise = inst->whiteNoiseLevel;
|
parametric_noise = inst->whiteNoiseLevel;
|
||||||
} else {
|
} else {
|
||||||
// Use pink noise estimate
|
// Use pink noise estimate
|
||||||
parametric_num = exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1));
|
parametric_num =
|
||||||
|
exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1));
|
||||||
parametric_num *= (float)(inst->blockInd + 1);
|
parametric_num *= (float)(inst->blockInd + 1);
|
||||||
parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1);
|
parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1);
|
||||||
parametric_noise = parametric_num / pow((float)kStartBand, parametric_exp);
|
parametric_noise =
|
||||||
|
parametric_num / pow((float)kStartBand, parametric_exp);
|
||||||
}
|
}
|
||||||
for (i = 0; i < inst->magnLen; i++) {
|
for (i = 0; i < inst->magnLen; i++) {
|
||||||
// Estimate the background noise using the white and pink noise parameters
|
// Estimate the background noise using the white and pink noise
|
||||||
|
// parameters
|
||||||
if ((inst->pinkNoiseExp > 0.0f) && (i >= kStartBand)) {
|
if ((inst->pinkNoiseExp > 0.0f) && (i >= kStartBand)) {
|
||||||
// Use pink noise estimate
|
// Use pink noise estimate
|
||||||
parametric_noise = parametric_num / pow((float)i, parametric_exp);
|
parametric_noise = parametric_num / pow((float)i, parametric_exp);
|
||||||
}
|
}
|
||||||
theFilterTmp[i] = (inst->initMagnEst[i] - inst->overdrive * parametric_noise);
|
theFilterTmp[i] =
|
||||||
|
(inst->initMagnEst[i] - inst->overdrive * parametric_noise);
|
||||||
theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001);
|
theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001);
|
||||||
// Weight quantile noise with modeled noise
|
// Weight quantile noise with modeled noise
|
||||||
noise[i] *= (inst->blockInd);
|
noise[i] *= (inst->blockInd);
|
||||||
@ -887,10 +939,7 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// start processing at frames == converged+1
|
// start processing at frames == converged+1
|
||||||
//
|
|
||||||
// STEP 1: compute prior and post snr based on quantile noise est
|
// STEP 1: compute prior and post snr based on quantile noise est
|
||||||
//
|
|
||||||
|
|
||||||
// compute DD estimate of prior SNR: needed for new method
|
// compute DD estimate of prior SNR: needed for new method
|
||||||
for (i = 0; i < inst->magnLen; i++) {
|
for (i = 0; i < inst->magnLen; i++) {
|
||||||
// post snr
|
// post snr
|
||||||
@ -900,23 +949,25 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
|
|||||||
}
|
}
|
||||||
// previous post snr
|
// previous post snr
|
||||||
// previous estimate: based on previous frame with gain filter
|
// previous estimate: based on previous frame with gain filter
|
||||||
previousEstimateStsa[i] = inst->magnPrev[i] / (inst->noisePrev[i] + (float)0.0001)
|
previousEstimateStsa[i] = inst->magnPrev[i] /
|
||||||
* (inst->smooth[i]);
|
(inst->noisePrev[i] + (float)0.0001) *
|
||||||
|
(inst->smooth[i]);
|
||||||
// DD estimate is sum of two terms: current estimate and previous estimate
|
// DD estimate is sum of two terms: current estimate and previous estimate
|
||||||
// directed decision update of snrPrior
|
// directed decision update of snrPrior
|
||||||
snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR)
|
snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] +
|
||||||
* snrLocPost[i];
|
((float)1.0 - DD_PR_SNR) * snrLocPost[i];
|
||||||
// post and prior snr needed for step 2
|
// post and prior snr needed for step 2
|
||||||
} // end of loop over freqs
|
} // end of loop over freqs
|
||||||
// done with step 1: dd computation of prior and post snr
|
// done with step 1: dd computation of prior and post snr
|
||||||
|
|
||||||
//
|
|
||||||
// STEP 2: compute speech/noise likelihood
|
// STEP 2: compute speech/noise likelihood
|
||||||
//
|
// compute difference of input spectrum with learned/estimated noise
|
||||||
// compute difference of input spectrum with learned/estimated noise spectrum
|
// spectrum
|
||||||
WebRtcNs_ComputeSpectralDifference(inst, magn);
|
WebRtcNs_ComputeSpectralDifference(inst, magn);
|
||||||
// compute histograms for parameter decisions (thresholds and weights for features)
|
// compute histograms for parameter decisions (thresholds and weights for
|
||||||
// parameters are extracted once every window time (=inst->modelUpdatePars[1])
|
// features)
|
||||||
|
// parameters are extracted once every window time
|
||||||
|
// (=inst->modelUpdatePars[1])
|
||||||
if (updateParsFlag >= 1) {
|
if (updateParsFlag >= 1) {
|
||||||
// counter update
|
// counter update
|
||||||
inst->modelUpdatePars[3]--;
|
inst->modelUpdatePars[3]--;
|
||||||
@ -934,10 +985,10 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
|
|||||||
} else {
|
} else {
|
||||||
// update every window:
|
// update every window:
|
||||||
// get normalization for spectral difference for next window estimate
|
// get normalization for spectral difference for next window estimate
|
||||||
inst->featureData[6] = inst->featureData[6]
|
inst->featureData[6] =
|
||||||
/ ((float)inst->modelUpdatePars[1]);
|
inst->featureData[6] / ((float)inst->modelUpdatePars[1]);
|
||||||
inst->featureData[5] = (float)0.5 * (inst->featureData[6]
|
inst->featureData[5] =
|
||||||
+ inst->featureData[5]);
|
(float)0.5 * (inst->featureData[6] + inst->featureData[5]);
|
||||||
inst->featureData[6] = (float)0.0;
|
inst->featureData[6] = (float)0.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -951,8 +1002,10 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
|
|||||||
probNonSpeech = (float)1.0 - probSpeech;
|
probNonSpeech = (float)1.0 - probSpeech;
|
||||||
// temporary noise update:
|
// temporary noise update:
|
||||||
// use it for speech frames if update value is less than previous
|
// use it for speech frames if update value is less than previous
|
||||||
noiseUpdateTmp = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp)
|
noiseUpdateTmp =
|
||||||
* (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]);
|
gammaNoiseTmp * inst->noisePrev[i] +
|
||||||
|
((float)1.0 - gammaNoiseTmp) *
|
||||||
|
(probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]);
|
||||||
//
|
//
|
||||||
// time-constant based on speech/noise state
|
// time-constant based on speech/noise state
|
||||||
gammaNoiseOld = gammaNoiseTmp;
|
gammaNoiseOld = gammaNoiseTmp;
|
||||||
@ -963,16 +1016,20 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
|
|||||||
}
|
}
|
||||||
// conservative noise update
|
// conservative noise update
|
||||||
if (probSpeech < PROB_RANGE) {
|
if (probSpeech < PROB_RANGE) {
|
||||||
inst->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]);
|
inst->magnAvgPause[i] +=
|
||||||
|
GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]);
|
||||||
}
|
}
|
||||||
// noise update
|
// noise update
|
||||||
if (gammaNoiseTmp == gammaNoiseOld) {
|
if (gammaNoiseTmp == gammaNoiseOld) {
|
||||||
noise[i] = noiseUpdateTmp;
|
noise[i] = noiseUpdateTmp;
|
||||||
} else {
|
} else {
|
||||||
noise[i] = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp)
|
noise[i] =
|
||||||
* (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]);
|
gammaNoiseTmp * inst->noisePrev[i] +
|
||||||
|
((float)1.0 - gammaNoiseTmp) *
|
||||||
|
(probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]);
|
||||||
// allow for noise update downwards:
|
// allow for noise update downwards:
|
||||||
// if noise update decreases the noise, it is safe, so allow it to happen
|
// if noise update decreases the noise, it is safe, so allow it to
|
||||||
|
// happen
|
||||||
if (noiseUpdateTmp < noise[i]) {
|
if (noiseUpdateTmp < noise[i]) {
|
||||||
noise[i] = noiseUpdateTmp;
|
noise[i] = noiseUpdateTmp;
|
||||||
}
|
}
|
||||||
@ -980,19 +1037,19 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) {
|
|||||||
} // end of freq loop
|
} // end of freq loop
|
||||||
// done with step 2: noise update
|
// done with step 2: noise update
|
||||||
|
|
||||||
//
|
// STEP 3: compute dd update of prior snr and post snr based on new noise
|
||||||
// STEP 3: compute dd update of prior snr and post snr based on new noise estimate
|
// estimate
|
||||||
//
|
|
||||||
for (i = 0; i < inst->magnLen; i++) {
|
for (i = 0; i < inst->magnLen; i++) {
|
||||||
// post and prior snr
|
// post and prior snr
|
||||||
currentEstimateStsa = (float)0.0;
|
currentEstimateStsa = (float)0.0;
|
||||||
if (magn[i] > noise[i]) {
|
if (magn[i] > noise[i]) {
|
||||||
currentEstimateStsa = magn[i] / (noise[i] + (float)0.0001) - (float)1.0;
|
currentEstimateStsa = magn[i] / (noise[i] + (float)0.0001) - (float)1.0;
|
||||||
}
|
}
|
||||||
// DD estimate is sume of two terms: current estimate and previous estimate
|
// DD estimate is sume of two terms: current estimate and previous
|
||||||
|
// estimate
|
||||||
// directed decision update of snrPrior
|
// directed decision update of snrPrior
|
||||||
snrPrior = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR)
|
snrPrior = DD_PR_SNR * previousEstimateStsa[i] +
|
||||||
* currentEstimateStsa;
|
((float)1.0 - DD_PR_SNR) * currentEstimateStsa;
|
||||||
// gain filter
|
// gain filter
|
||||||
tmpFloat1 = inst->overdrive + snrPrior;
|
tmpFloat1 = inst->overdrive + snrPrior;
|
||||||
tmpFloat2 = (float)snrPrior / tmpFloat1;
|
tmpFloat2 = (float)snrPrior / tmpFloat1;
|
||||||
@ -1075,16 +1132,20 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// update analysis buffer for L band
|
// update analysis buffer for L band
|
||||||
memcpy(inst->dataBuf, inst->dataBuf + inst->blockLen10ms,
|
memcpy(inst->dataBuf,
|
||||||
|
inst->dataBuf + inst->blockLen10ms,
|
||||||
sizeof(float) * (inst->anaLen - inst->blockLen10ms));
|
sizeof(float) * (inst->anaLen - inst->blockLen10ms));
|
||||||
memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, speechFrame,
|
memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms,
|
||||||
|
speechFrame,
|
||||||
sizeof(float) * inst->blockLen10ms);
|
sizeof(float) * inst->blockLen10ms);
|
||||||
|
|
||||||
if (flagHB == 1) {
|
if (flagHB == 1) {
|
||||||
// update analysis buffer for H band
|
// update analysis buffer for H band
|
||||||
memcpy(inst->dataBufHB, inst->dataBufHB + inst->blockLen10ms,
|
memcpy(inst->dataBufHB,
|
||||||
|
inst->dataBufHB + inst->blockLen10ms,
|
||||||
sizeof(float) * (inst->anaLen - inst->blockLen10ms));
|
sizeof(float) * (inst->anaLen - inst->blockLen10ms));
|
||||||
memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, speechFrameHB,
|
memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms,
|
||||||
|
speechFrameHB,
|
||||||
sizeof(float) * inst->blockLen10ms);
|
sizeof(float) * inst->blockLen10ms);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1103,9 +1164,11 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
|
|||||||
fout[i - inst->windShift] = inst->syntBuf[i];
|
fout[i - inst->windShift] = inst->syntBuf[i];
|
||||||
}
|
}
|
||||||
// update synthesis buffer
|
// update synthesis buffer
|
||||||
memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen,
|
memcpy(inst->syntBuf,
|
||||||
|
inst->syntBuf + inst->blockLen,
|
||||||
sizeof(float) * (inst->anaLen - inst->blockLen));
|
sizeof(float) * (inst->anaLen - inst->blockLen));
|
||||||
memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0,
|
memset(inst->syntBuf + inst->anaLen - inst->blockLen,
|
||||||
|
0,
|
||||||
sizeof(float) * inst->blockLen);
|
sizeof(float) * inst->blockLen);
|
||||||
|
|
||||||
// out buffer
|
// out buffer
|
||||||
@ -1186,8 +1249,8 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
|
|||||||
}
|
}
|
||||||
// combine both scales with speech/noise prob:
|
// combine both scales with speech/noise prob:
|
||||||
// note prior (priorSpeechProb) is not frequency dependent
|
// note prior (priorSpeechProb) is not frequency dependent
|
||||||
factor = inst->priorSpeechProb * factor1 + ((float)1.0 - inst->priorSpeechProb)
|
factor = inst->priorSpeechProb * factor1 +
|
||||||
* factor2;
|
((float)1.0 - inst->priorSpeechProb) * factor2;
|
||||||
} // out of inst->gainmap==1
|
} // out of inst->gainmap==1
|
||||||
|
|
||||||
// synthesis
|
// synthesis
|
||||||
@ -1199,9 +1262,11 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
|
|||||||
fout[i - inst->windShift] = inst->syntBuf[i];
|
fout[i - inst->windShift] = inst->syntBuf[i];
|
||||||
}
|
}
|
||||||
// update synthesis buffer
|
// update synthesis buffer
|
||||||
memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen,
|
memcpy(inst->syntBuf,
|
||||||
|
inst->syntBuf + inst->blockLen,
|
||||||
sizeof(float) * (inst->anaLen - inst->blockLen));
|
sizeof(float) * (inst->anaLen - inst->blockLen));
|
||||||
memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0,
|
memset(inst->syntBuf + inst->anaLen - inst->blockLen,
|
||||||
|
0,
|
||||||
sizeof(float) * inst->blockLen);
|
sizeof(float) * inst->blockLen);
|
||||||
|
|
||||||
// out buffer
|
// out buffer
|
||||||
@ -1216,16 +1281,18 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
|
|||||||
for (i = 0; i < inst->blockLen10ms; i++) {
|
for (i = 0; i < inst->blockLen10ms; i++) {
|
||||||
fout[i] = inst->outBuf[i];
|
fout[i] = inst->outBuf[i];
|
||||||
}
|
}
|
||||||
memcpy(inst->outBuf, inst->outBuf + inst->blockLen10ms,
|
memcpy(inst->outBuf,
|
||||||
|
inst->outBuf + inst->blockLen10ms,
|
||||||
sizeof(float) * (inst->outLen - inst->blockLen10ms));
|
sizeof(float) * (inst->outLen - inst->blockLen10ms));
|
||||||
memset(inst->outBuf + inst->outLen - inst->blockLen10ms, 0,
|
memset(inst->outBuf + inst->outLen - inst->blockLen10ms,
|
||||||
|
0,
|
||||||
sizeof(float) * inst->blockLen10ms);
|
sizeof(float) * inst->blockLen10ms);
|
||||||
inst->outLen -= inst->blockLen10ms;
|
inst->outLen -= inst->blockLen10ms;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < inst->blockLen10ms; ++i)
|
for (i = 0; i < inst->blockLen10ms; ++i)
|
||||||
outFrame[i] = WEBRTC_SPL_SAT(
|
outFrame[i] =
|
||||||
WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
|
WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
|
||||||
|
|
||||||
// for time-domain gain of HB
|
// for time-domain gain of HB
|
||||||
if (flagHB == 1) {
|
if (flagHB == 1) {
|
||||||
@ -1245,11 +1312,13 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
|
|||||||
avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB));
|
avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB));
|
||||||
avgProbSpeechHBTmp = (float)2.0 * avgProbSpeechHB - (float)1.0;
|
avgProbSpeechHBTmp = (float)2.0 * avgProbSpeechHB - (float)1.0;
|
||||||
// gain based on speech prob:
|
// gain based on speech prob:
|
||||||
gainModHB = (float)0.5 * ((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp));
|
gainModHB = (float)0.5 *
|
||||||
|
((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp));
|
||||||
// combine gain with low band gain
|
// combine gain with low band gain
|
||||||
gainTimeDomainHB = (float)0.5 * gainModHB + (float)0.5 * avgFilterGainHB;
|
gainTimeDomainHB = (float)0.5 * gainModHB + (float)0.5 * avgFilterGainHB;
|
||||||
if (avgProbSpeechHB >= (float)0.5) {
|
if (avgProbSpeechHB >= (float)0.5) {
|
||||||
gainTimeDomainHB = (float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB;
|
gainTimeDomainHB =
|
||||||
|
(float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB;
|
||||||
}
|
}
|
||||||
gainTimeDomainHB = gainTimeDomainHB * decayBweHB;
|
gainTimeDomainHB = gainTimeDomainHB * decayBweHB;
|
||||||
// make sure gain is within flooring range
|
// make sure gain is within flooring range
|
||||||
@ -1264,8 +1333,8 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
|
|||||||
// apply gain
|
// apply gain
|
||||||
for (i = 0; i < inst->blockLen10ms; i++) {
|
for (i = 0; i < inst->blockLen10ms; i++) {
|
||||||
float o = gainTimeDomainHB * inst->dataBufHB[i];
|
float o = gainTimeDomainHB * inst->dataBufHB[i];
|
||||||
outFrameHB[i] = WEBRTC_SPL_SAT(
|
outFrameHB[i] =
|
||||||
WEBRTC_SPL_WORD16_MAX, o, WEBRTC_SPL_WORD16_MIN);
|
WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, o, WEBRTC_SPL_WORD16_MIN);
|
||||||
}
|
}
|
||||||
} // end of H band gain computation
|
} // end of H band gain computation
|
||||||
//
|
//
|
||||||
|
@ -8,26 +8,27 @@
|
|||||||
* be found in the AUTHORS file in the root of the source tree.
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_
|
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
|
||||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_
|
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
|
||||||
|
|
||||||
#include "webrtc/modules/audio_processing/ns/defines.h"
|
#include "webrtc/modules/audio_processing/ns/defines.h"
|
||||||
|
|
||||||
typedef struct NSParaExtract_t_ {
|
typedef struct NSParaExtract_t_ {
|
||||||
|
|
||||||
// bin size of histogram
|
// bin size of histogram
|
||||||
float binSizeLrt;
|
float binSizeLrt;
|
||||||
float binSizeSpecFlat;
|
float binSizeSpecFlat;
|
||||||
float binSizeSpecDiff;
|
float binSizeSpecDiff;
|
||||||
// range of histogram over which lrt threshold is computed
|
// range of histogram over which lrt threshold is computed
|
||||||
float rangeAvgHistLrt;
|
float rangeAvgHistLrt;
|
||||||
//scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
|
// scale parameters: multiply dominant peaks of the histograms by scale factor
|
||||||
//thresholds for prior model
|
// to obtain thresholds for prior model
|
||||||
float factor1ModelPars; // for lrt and spectral difference
|
float factor1ModelPars; // for lrt and spectral difference
|
||||||
float factor2ModelPars; //for spectral_flatness: used when noise is flatter than speech
|
float factor2ModelPars; // for spectral_flatness: used when noise is flatter
|
||||||
|
// than speech
|
||||||
// peak limit for spectral flatness (varies between 0 and 1)
|
// peak limit for spectral flatness (varies between 0 and 1)
|
||||||
float thresPosSpecFlat;
|
float thresPosSpecFlat;
|
||||||
//limit on spacing of two highest peaks in histogram: spacing determined by bin size
|
// limit on spacing of two highest peaks in histogram: spacing determined by
|
||||||
|
// bin size
|
||||||
float limitPeakSpacingSpecFlat;
|
float limitPeakSpacingSpecFlat;
|
||||||
float limitPeakSpacingSpecDiff;
|
float limitPeakSpacingSpecDiff;
|
||||||
// limit on relevance of second peak:
|
// limit on relevance of second peak:
|
||||||
@ -49,7 +50,6 @@ typedef struct NSParaExtract_t_ {
|
|||||||
} NSParaExtract_t;
|
} NSParaExtract_t;
|
||||||
|
|
||||||
typedef struct NSinst_t_ {
|
typedef struct NSinst_t_ {
|
||||||
|
|
||||||
uint32_t fs;
|
uint32_t fs;
|
||||||
int blockLen;
|
int blockLen;
|
||||||
int blockLen10ms;
|
int blockLen10ms;
|
||||||
@ -108,7 +108,6 @@ typedef struct NSinst_t_ {
|
|||||||
|
|
||||||
} NSinst_t;
|
} NSinst_t;
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
@ -188,8 +187,7 @@ int WebRtcNs_ProcessCore(NSinst_t* inst,
|
|||||||
float* outFrameLow,
|
float* outFrameLow,
|
||||||
float* outFrameHigh);
|
float* outFrameHigh);
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_
|
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
|
||||||
|
Reference in New Issue
Block a user