From bdfdc96b22bd3ae19dd4d18e17148c69aa1c7285 Mon Sep 17 00:00:00 2001 From: "aluebs@webrtc.org" Date: Mon, 22 Sep 2014 10:59:46 +0000 Subject: [PATCH] Clang-format ns_core BUG=webrtc:3811 R=bjornv@webrtc.org, kwiberg@webrtc.org Review URL: https://webrtc-codereview.appspot.com/29539004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7257 4adac7df-926f-26a2-2b94-8c16560cd09d --- webrtc/modules/audio_processing/ns/ns_core.c | 601 +++++++++++-------- webrtc/modules/audio_processing/ns/ns_core.h | 134 ++--- 2 files changed, 401 insertions(+), 334 deletions(-) diff --git a/webrtc/modules/audio_processing/ns/ns_core.c b/webrtc/modules/audio_processing/ns/ns_core.c index 35606820c4..5aa978f89d 100644 --- a/webrtc/modules/audio_processing/ns/ns_core.c +++ b/webrtc/modules/audio_processing/ns/ns_core.c @@ -10,8 +10,8 @@ #include #include -//#include #include + #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/modules/audio_processing/ns/include/noise_suppression.h" #include "webrtc/modules/audio_processing/ns/ns_core.h" @@ -20,37 +20,40 @@ // Set Feature Extraction Parameters void WebRtcNs_set_feature_extraction_parameters(NSinst_t* inst) { - //bin size of histogram - inst->featureExtractionParams.binSizeLrt = (float)0.1; + // bin size of histogram + inst->featureExtractionParams.binSizeLrt = (float)0.1; inst->featureExtractionParams.binSizeSpecFlat = (float)0.05; inst->featureExtractionParams.binSizeSpecDiff = (float)0.1; - //range of histogram over which lrt threshold is computed + // range of histogram over which lrt threshold is computed inst->featureExtractionParams.rangeAvgHistLrt = (float)1.0; - //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain - // thresholds for prior model - inst->featureExtractionParams.factor1ModelPars = (float)1.20; //for lrt and spectral diff - inst->featureExtractionParams.factor2ModelPars = (float)0.9; //for spectral_flatness: + // scale parameters: multiply dominant peaks of the histograms by scale factor + // to obtain thresholds for prior model + inst->featureExtractionParams.factor1ModelPars = + (float)1.20; // for lrt and spectral diff + inst->featureExtractionParams.factor2ModelPars = + (float)0.9; // for spectral_flatness: // used when noise is flatter than speech - //peak limit for spectral flatness (varies between 0 and 1) + // peak limit for spectral flatness (varies between 0 and 1) inst->featureExtractionParams.thresPosSpecFlat = (float)0.6; - //limit on spacing of two highest peaks in histogram: spacing determined by bin size + // limit on spacing of two highest peaks in histogram: spacing determined by + // bin size inst->featureExtractionParams.limitPeakSpacingSpecFlat = 2 * inst->featureExtractionParams.binSizeSpecFlat; inst->featureExtractionParams.limitPeakSpacingSpecDiff = 2 * inst->featureExtractionParams.binSizeSpecDiff; - //limit on relevance of second peak: + // limit on relevance of second peak: inst->featureExtractionParams.limitPeakWeightsSpecFlat = (float)0.5; inst->featureExtractionParams.limitPeakWeightsSpecDiff = (float)0.5; // fluctuation limit of lrt feature inst->featureExtractionParams.thresFluctLrt = (float)0.05; - //limit on the max and min values for the feature thresholds + // limit on the max and min values for the feature thresholds inst->featureExtractionParams.maxLrt = (float)1.0; inst->featureExtractionParams.minLrt = (float)0.20; @@ -60,19 +63,19 @@ void WebRtcNs_set_feature_extraction_parameters(NSinst_t* inst) { inst->featureExtractionParams.maxSpecDiff = (float)1.0; inst->featureExtractionParams.minSpecDiff = (float)0.16; - //criteria of weight of histogram peak to accept/reject feature - inst->featureExtractionParams.thresWeightSpecFlat = (int)(0.3 - * (inst->modelUpdatePars[1])); //for spectral flatness - inst->featureExtractionParams.thresWeightSpecDiff = (int)(0.3 - * (inst->modelUpdatePars[1])); //for spectral difference + // criteria of weight of histogram peak to accept/reject feature + inst->featureExtractionParams.thresWeightSpecFlat = + (int)(0.3 * (inst->modelUpdatePars[1])); // for spectral flatness + inst->featureExtractionParams.thresWeightSpecDiff = + (int)(0.3 * (inst->modelUpdatePars[1])); // for spectral difference } // Initialize state int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) { int i; - //We only support 10ms frames + // We only support 10ms frames - //check for valid pointer + // check for valid pointer if (inst == NULL) { return -1; } @@ -106,10 +109,10 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) { inst->window = kBlocks160w256; inst->outLen = 0; } - inst->magnLen = inst->anaLen / 2 + 1; // Number of frequency bins + inst->magnLen = inst->anaLen / 2 + 1; // Number of frequency bins // Initialize fft work arrays. - inst->ip[0] = 0; // Setting this triggers initialization. + inst->ip[0] = 0; // Setting this triggers initialization. memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); WebRtc_rdft(inst->anaLen, 1, inst->dataBuf, inst->ip, inst->wfft); @@ -117,10 +120,10 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) { memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); memset(inst->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); - //for HB processing + // for HB processing memset(inst->dataBufHB, 0, sizeof(float) * ANAL_BLOCKL_MAX); - //for quantile noise estimation + // for quantile noise estimation memset(inst->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { inst->lquantile[i] = (float)8.0; @@ -128,7 +131,8 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) { } for (i = 0; i < SIMULT; i++) { - inst->counter[i] = (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); + inst->counter[i] = + (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); } inst->updates = 0; @@ -141,52 +145,63 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) { // Set the aggressiveness: default inst->aggrMode = 0; - //initialize variables for new method - inst->priorSpeechProb = (float)0.5; //prior prob for speech/noise + // initialize variables for new method + inst->priorSpeechProb = (float)0.5; // prior prob for speech/noise for (i = 0; i < HALF_ANAL_BLOCKL; i++) { - inst->magnPrev[i] = (float)0.0; //previous mag spectrum - inst->noisePrev[i] = (float)0.0; //previous noise-spectrum - inst->logLrtTimeAvg[i] = LRT_FEATURE_THR; //smooth LR ratio (same as threshold) - inst->magnAvgPause[i] = (float)0.0; //conservative noise spectrum estimate - inst->speechProb[i] = (float)0.0; //for estimation of HB in second pass - inst->initMagnEst[i] = (float)0.0; //initial average mag spectrum + inst->magnPrev[i] = (float)0.0; // previous mag spectrum + inst->noisePrev[i] = (float)0.0; // previous noise-spectrum + inst->logLrtTimeAvg[i] = + LRT_FEATURE_THR; // smooth LR ratio (same as threshold) + inst->magnAvgPause[i] = (float)0.0; // conservative noise spectrum estimate + inst->speechProb[i] = (float)0.0; // for estimation of HB in second pass + inst->initMagnEst[i] = (float)0.0; // initial average mag spectrum } - //feature quantities - inst->featureData[0] = SF_FEATURE_THR; //spectral flatness (start on threshold) - inst->featureData[1] = (float)0.0; //spectral entropy: not used in this version - inst->featureData[2] = (float)0.0; //spectral variance: not used in this version - inst->featureData[3] = LRT_FEATURE_THR; //average lrt factor (start on threshold) - inst->featureData[4] = SF_FEATURE_THR; //spectral template diff (start on threshold) - inst->featureData[5] = (float)0.0; //normalization for spectral-diff - inst->featureData[6] = (float)0.0; //window time-average of input magnitude spectrum + // feature quantities + inst->featureData[0] = + SF_FEATURE_THR; // spectral flatness (start on threshold) + inst->featureData[1] = + (float)0.0; // spectral entropy: not used in this version + inst->featureData[2] = + (float)0.0; // spectral variance: not used in this version + inst->featureData[3] = + LRT_FEATURE_THR; // average lrt factor (start on threshold) + inst->featureData[4] = + SF_FEATURE_THR; // spectral template diff (start on threshold) + inst->featureData[5] = (float)0.0; // normalization for spectral-diff + inst->featureData[6] = + (float)0.0; // window time-average of input magnitude spectrum - //histogram quantities: used to estimate/update thresholds for features + // histogram quantities: used to estimate/update thresholds for features for (i = 0; i < HIST_PAR_EST; i++) { inst->histLrt[i] = 0; inst->histSpecFlat[i] = 0; inst->histSpecDiff[i] = 0; } - inst->blockInd = -1; //frame counter - inst->priorModelPars[0] = LRT_FEATURE_THR; //default threshold for lrt feature - inst->priorModelPars[1] = (float)0.5; //threshold for spectral flatness: + inst->blockInd = -1; // frame counter + inst->priorModelPars[0] = + LRT_FEATURE_THR; // default threshold for lrt feature + inst->priorModelPars[1] = (float)0.5; // threshold for spectral flatness: // determined on-line - inst->priorModelPars[2] = (float)1.0; //sgn_map par for spectral measure: + inst->priorModelPars[2] = (float)1.0; // sgn_map par for spectral measure: // 1 for flatness measure - inst->priorModelPars[3] = (float)0.5; //threshold for template-difference feature: + inst->priorModelPars[3] = + (float)0.5; // threshold for template-difference feature: // determined on-line - inst->priorModelPars[4] = (float)1.0; //default weighting parameter for lrt feature - inst->priorModelPars[5] = (float)0.0; //default weighting parameter for + inst->priorModelPars[4] = + (float)1.0; // default weighting parameter for lrt feature + inst->priorModelPars[5] = (float)0.0; // default weighting parameter for // spectral flatness feature - inst->priorModelPars[6] = (float)0.0; //default weighting parameter for + inst->priorModelPars[6] = (float)0.0; // default weighting parameter for // spectral difference feature - inst->modelUpdatePars[0] = 2; //update flag for parameters: + inst->modelUpdatePars[0] = 2; // update flag for parameters: // 0 no update, 1=update once, 2=update every window - inst->modelUpdatePars[1] = 500; //window for update - inst->modelUpdatePars[2] = 0; //counter for update of conservative noise spectrum - //counter if the feature thresholds are updated during the sequence + inst->modelUpdatePars[1] = 500; // window for update + inst->modelUpdatePars[2] = + 0; // counter for update of conservative noise spectrum + // counter if the feature thresholds are updated during the sequence inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; inst->signalEnergy = 0.0; @@ -195,12 +210,11 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs) { inst->pinkNoiseNumerator = 0.0; inst->pinkNoiseExp = 0.0; - WebRtcNs_set_feature_extraction_parameters(inst); // Set feature configuration + WebRtcNs_set_feature_extraction_parameters(inst); - //default mode + // default mode WebRtcNs_set_policy_core(inst, 0); - memset(inst->outBuf, 0, sizeof(float) * 3 * BLOCKL_MAX); inst->initFlag = 1; @@ -219,17 +233,17 @@ int WebRtcNs_set_policy_core(NSinst_t* inst, int mode) { inst->denoiseBound = (float)0.5; inst->gainmap = 0; } else if (mode == 1) { - //inst->overdrive = (float)1.25; + // inst->overdrive = (float)1.25; inst->overdrive = (float)1.0; inst->denoiseBound = (float)0.25; inst->gainmap = 1; } else if (mode == 2) { - //inst->overdrive = (float)1.25; + // inst->overdrive = (float)1.25; inst->overdrive = (float)1.1; inst->denoiseBound = (float)0.125; inst->gainmap = 1; } else if (mode == 3) { - //inst->overdrive = (float)1.30; + // inst->overdrive = (float)1.30; inst->overdrive = (float)1.25; inst->denoiseBound = (float)0.09; inst->gainmap = 1; @@ -265,17 +279,19 @@ void WebRtcNs_NoiseEstimation(NSinst_t* inst, float* magn, float* noise) { // update log quantile estimate if (lmagn[i] > inst->lquantile[offset + i]) { - inst->lquantile[offset + i] += QUANTILE * delta - / (float)(inst->counter[s] + 1); + inst->lquantile[offset + i] += + QUANTILE * delta / (float)(inst->counter[s] + 1); } else { - inst->lquantile[offset + i] -= ((float)1.0 - QUANTILE) * delta - / (float)(inst->counter[s] + 1); + inst->lquantile[offset + i] -= + ((float)1.0 - QUANTILE) * delta / (float)(inst->counter[s] + 1); } // update density estimate if (fabs(lmagn[i] - inst->lquantile[offset + i]) < WIDTH) { - inst->density[offset + i] = ((float)inst->counter[s] * inst->density[offset - + i] + (float)1.0 / ((float)2.0 * WIDTH)) / (float)(inst->counter[s] + 1); + inst->density[offset + i] = + ((float)inst->counter[s] * inst->density[offset + i] + + (float)1.0 / ((float)2.0 * WIDTH)) / + (float)(inst->counter[s] + 1); } } // end loop over magnitude spectrum @@ -305,57 +321,66 @@ void WebRtcNs_NoiseEstimation(NSinst_t* inst, float* magn, float* noise) { } // Extract thresholds for feature parameters -// histograms are computed over some window_size (given by inst->modelUpdatePars[1]) +// histograms are computed over some window_size (given by +// inst->modelUpdatePars[1]) // thresholds and weights are extracted every window -// flag 0 means update histogram only, flag 1 means compute the thresholds/weights +// flag 0 means update histogram only, flag 1 means compute the +// thresholds/weights // threshold and weights are returned in: inst->priorModelPars void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt; int maxPeak1, maxPeak2; - int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, weightPeak2SpecDiff; + int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, + weightPeak2SpecDiff; float binMid, featureSum; float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff; float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl; - //3 features: lrt, flatness, difference - //lrt_feature = inst->featureData[3]; - //flat_feature = inst->featureData[0]; - //diff_feature = inst->featureData[4]; + // 3 features: lrt, flatness, difference + // lrt_feature = inst->featureData[3]; + // flat_feature = inst->featureData[0]; + // diff_feature = inst->featureData[4]; - //update histograms + // update histograms if (flag == 0) { // LRT - if ((inst->featureData[3] < HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt) - && (inst->featureData[3] >= 0.0)) { - i = (int)(inst->featureData[3] / inst->featureExtractionParams.binSizeLrt); + if ((inst->featureData[3] < + HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt) && + (inst->featureData[3] >= 0.0)) { + i = (int)(inst->featureData[3] / + inst->featureExtractionParams.binSizeLrt); inst->histLrt[i]++; } // Spectral flatness - if ((inst->featureData[0] < HIST_PAR_EST - * inst->featureExtractionParams.binSizeSpecFlat) - && (inst->featureData[0] >= 0.0)) { - i = (int)(inst->featureData[0] / inst->featureExtractionParams.binSizeSpecFlat); + if ((inst->featureData[0] < + HIST_PAR_EST * inst->featureExtractionParams.binSizeSpecFlat) && + (inst->featureData[0] >= 0.0)) { + i = (int)(inst->featureData[0] / + inst->featureExtractionParams.binSizeSpecFlat); inst->histSpecFlat[i]++; } // Spectral difference - if ((inst->featureData[4] < HIST_PAR_EST - * inst->featureExtractionParams.binSizeSpecDiff) - && (inst->featureData[4] >= 0.0)) { - i = (int)(inst->featureData[4] / inst->featureExtractionParams.binSizeSpecDiff); + if ((inst->featureData[4] < + HIST_PAR_EST * inst->featureExtractionParams.binSizeSpecDiff) && + (inst->featureData[4] >= 0.0)) { + i = (int)(inst->featureData[4] / + inst->featureExtractionParams.binSizeSpecDiff); inst->histSpecDiff[i]++; } } // extract parameters for speech/noise probability if (flag == 1) { - //lrt feature: compute the average over inst->featureExtractionParams.rangeAvgHistLrt + // lrt feature: compute the average over + // inst->featureExtractionParams.rangeAvgHistLrt avgHistLrt = 0.0; avgHistLrtCompl = 0.0; avgSquareHistLrt = 0.0; numHistLrt = 0; for (i = 0; i < HIST_PAR_EST; i++) { - binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt; + binMid = + ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt; if (binMid <= inst->featureExtractionParams.rangeAvgHistLrt) { avgHistLrt += inst->histLrt[i] * binMid; numHistLrt += inst->histLrt[i]; @@ -371,11 +396,11 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl; // get threshold for lrt feature: if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) { - //very low fluct, so likely noise + // very low fluct, so likely noise inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; } else { - inst->priorModelPars[0] = inst->featureExtractionParams.factor1ModelPars - * avgHistLrt; + inst->priorModelPars[0] = + inst->featureExtractionParams.factor1ModelPars * avgHistLrt; // check if value is within min/max range if (inst->priorModelPars[0] < inst->featureExtractionParams.minLrt) { inst->priorModelPars[0] = inst->featureExtractionParams.minLrt; @@ -386,8 +411,8 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { } // done with lrt feature - // - // for spectral flatness and spectral difference: compute the main peaks of histogram + // for spectral flatness and spectral difference: compute the main peaks of + // histogram maxPeak1 = 0; maxPeak2 = 0; posPeak1SpecFlat = 0.0; @@ -397,7 +422,8 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { // peaks for flatness for (i = 0; i < HIST_PAR_EST; i++) { - binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecFlat; + binMid = ((float)i + (float)0.5) * + inst->featureExtractionParams.binSizeSpecFlat; if (inst->histSpecFlat[i] > maxPeak1) { // Found new "first" peak maxPeak2 = maxPeak1; @@ -415,7 +441,7 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { } } - //compute two peaks for spectral difference + // compute two peaks for spectral difference maxPeak1 = 0; maxPeak2 = 0; posPeak1SpecDiff = 0.0; @@ -424,7 +450,8 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { weightPeak2SpecDiff = 0; // peaks for spectral difference for (i = 0; i < HIST_PAR_EST; i++) { - binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecDiff; + binMid = ((float)i + (float)0.5) * + inst->featureExtractionParams.binSizeSpecDiff; if (inst->histSpecDiff[i] > maxPeak1) { // Found new "first" peak maxPeak2 = maxPeak1; @@ -445,25 +472,26 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { // for spectrum flatness feature useFeatureSpecFlat = 1; // merge the two peaks if they are close - if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) - < inst->featureExtractionParams.limitPeakSpacingSpecFlat) - && (weightPeak2SpecFlat - > inst->featureExtractionParams.limitPeakWeightsSpecFlat - * weightPeak1SpecFlat)) { + if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) < + inst->featureExtractionParams.limitPeakSpacingSpecFlat) && + (weightPeak2SpecFlat > + inst->featureExtractionParams.limitPeakWeightsSpecFlat * + weightPeak1SpecFlat)) { weightPeak1SpecFlat += weightPeak2SpecFlat; posPeak1SpecFlat = (float)0.5 * (posPeak1SpecFlat + posPeak2SpecFlat); } - //reject if weight of peaks is not large enough, or peak value too small - if (weightPeak1SpecFlat < inst->featureExtractionParams.thresWeightSpecFlat - || posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) { + // reject if weight of peaks is not large enough, or peak value too small + if (weightPeak1SpecFlat < + inst->featureExtractionParams.thresWeightSpecFlat || + posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) { useFeatureSpecFlat = 0; } // if selected, get the threshold if (useFeatureSpecFlat == 1) { // compute the threshold - inst->priorModelPars[1] = inst->featureExtractionParams.factor2ModelPars - * posPeak1SpecFlat; - //check if value is within min/max range + inst->priorModelPars[1] = + inst->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat; + // check if value is within min/max range if (inst->priorModelPars[1] < inst->featureExtractionParams.minSpecFlat) { inst->priorModelPars[1] = inst->featureExtractionParams.minSpecFlat; } @@ -476,22 +504,23 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { // for template feature useFeatureSpecDiff = 1; // merge the two peaks if they are close - if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) - < inst->featureExtractionParams.limitPeakSpacingSpecDiff) - && (weightPeak2SpecDiff - > inst->featureExtractionParams.limitPeakWeightsSpecDiff - * weightPeak1SpecDiff)) { + if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) < + inst->featureExtractionParams.limitPeakSpacingSpecDiff) && + (weightPeak2SpecDiff > + inst->featureExtractionParams.limitPeakWeightsSpecDiff * + weightPeak1SpecDiff)) { weightPeak1SpecDiff += weightPeak2SpecDiff; posPeak1SpecDiff = (float)0.5 * (posPeak1SpecDiff + posPeak2SpecDiff); } // get the threshold value - inst->priorModelPars[3] = inst->featureExtractionParams.factor1ModelPars - * posPeak1SpecDiff; - //reject if weight of peaks is not large enough - if (weightPeak1SpecDiff < inst->featureExtractionParams.thresWeightSpecDiff) { + inst->priorModelPars[3] = + inst->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff; + // reject if weight of peaks is not large enough + if (weightPeak1SpecDiff < + inst->featureExtractionParams.thresWeightSpecDiff) { useFeatureSpecDiff = 0; } - //check if value is within min/max range + // check if value is within min/max range if (inst->priorModelPars[3] < inst->featureExtractionParams.minSpecDiff) { inst->priorModelPars[3] = inst->featureExtractionParams.minSpecDiff; } @@ -531,7 +560,7 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { // spectral flatness is returned in inst->featureData[0] void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) { int i; - int shiftLP = 1; //option to remove first bin(s) from spectral measures + int shiftLP = 1; // option to remove first bin(s) from spectral measures float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp; // comute spectral measures @@ -541,7 +570,8 @@ void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) { for (i = 0; i < shiftLP; i++) { avgSpectralFlatnessDen -= magnIn[i]; } - // compute log of ratio of the geometric to arithmetic mean: check for log(0) case + // compute log of ratio of the geometric to arithmetic mean: check for log(0) + // case for (i = shiftLP; i < inst->magnLen; i++) { if (magnIn[i] > 0.0) { avgSpectralFlatnessNum += (float)log(magnIn[i]); @@ -550,24 +580,26 @@ void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) { return; } } - //normalize + // normalize avgSpectralFlatnessDen = avgSpectralFlatnessDen / inst->magnLen; avgSpectralFlatnessNum = avgSpectralFlatnessNum / inst->magnLen; - //ratio and inverse log: check for case of log(0) + // ratio and inverse log: check for case of log(0) spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen; - //time-avg update of spectral flatness feature + // time-avg update of spectral flatness feature inst->featureData[0] += SPECT_FL_TAVG * (spectralTmp - inst->featureData[0]); // done with flatness feature } -// Compute the difference measure between input spectrum and a template/learned noise spectrum +// Compute the difference measure between input spectrum and a template/learned +// noise spectrum // magnIn is the input spectrum // the reference/template spectrum is inst->magnAvgPause[i] // returns (normalized) spectral difference in inst->featureData[4] void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) { - // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause) + // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / + // var(magnAvgPause) int i; float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn; @@ -575,7 +607,7 @@ void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) { avgMagn = inst->sumMagn; // compute average quantities for (i = 0; i < inst->magnLen; i++) { - //conservative smooth noise spectrum from pause frames + // conservative smooth noise spectrum from pause frames avgPause += inst->magnAvgPause[i]; } avgPause = avgPause / ((float)inst->magnLen); @@ -587,7 +619,8 @@ void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) { // compute variance and covariance quantities for (i = 0; i < inst->magnLen; i++) { covMagnPause += (magnIn[i] - avgMagn) * (inst->magnAvgPause[i] - avgPause); - varPause += (inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause); + varPause += + (inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause); varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn); } covMagnPause = covMagnPause / ((float)inst->magnLen); @@ -596,19 +629,24 @@ void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) { // update of average magnitude spectrum inst->featureData[6] += inst->signalEnergy; - avgDiffNormMagn = varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001); + avgDiffNormMagn = + varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001); // normalize and compute time-avg update of difference feature - avgDiffNormMagn = (float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001)); - inst->featureData[4] += SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]); + avgDiffNormMagn = + (float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001)); + inst->featureData[4] += + SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]); } // Compute speech/noise probability // speech/noise probability is returned in: probSpeechFinal -//magn is the input magnitude spectrum -//noise is the noise spectrum -//snrLocPrior is the prior snr for each freq. -//snr loc_post is the post snr for each freq. -void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snrLocPrior, +// magn is the input magnitude spectrum +// noise is the noise spectrum +// snrLocPrior is the prior snr for each freq. +// snr loc_post is the post snr for each freq. +void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, + float* probSpeechFinal, + float* snrLocPrior, float* snrLocPost) { int i, sgnMap; float invLrt, gainPrior, indPrior; @@ -620,19 +658,19 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr float widthPrior, widthPrior0, widthPrior1, widthPrior2; widthPrior0 = WIDTH_PR_MAP; - widthPrior1 = (float)2.0 * WIDTH_PR_MAP; //width for pause region: + widthPrior1 = (float)2.0 * WIDTH_PR_MAP; // width for pause region: // lower range, so increase width in tanh map - widthPrior2 = (float)2.0 * WIDTH_PR_MAP; //for spectral-difference measure + widthPrior2 = (float)2.0 * WIDTH_PR_MAP; // for spectral-difference measure - //threshold parameters for features + // threshold parameters for features threshPrior0 = inst->priorModelPars[0]; threshPrior1 = inst->priorModelPars[1]; threshPrior2 = inst->priorModelPars[3]; - //sign for flatness feature + // sign for flatness feature sgnMap = (int)(inst->priorModelPars[2]); - //weight parameters for features + // weight parameters for features weightIndPrior0 = inst->priorModelPars[4]; weightIndPrior1 = inst->priorModelPars[5]; weightIndPrior2 = inst->priorModelPars[6]; @@ -644,8 +682,8 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr tmpFloat1 = (float)1.0 + (float)2.0 * snrLocPrior[i]; tmpFloat2 = (float)2.0 * snrLocPrior[i] / (tmpFloat1 + (float)0.0001); besselTmp = (snrLocPost[i] + (float)1.0) * tmpFloat2; - inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - - inst->logLrtTimeAvg[i]); + inst->logLrtTimeAvg[i] += + LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - inst->logLrtTimeAvg[i]); logLrtTimeAvgKsum += inst->logLrtTimeAvg[i]; } logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (inst->magnLen); @@ -653,23 +691,24 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr // done with computation of LR factor // - //compute the indicator functions + // compute the indicator functions // // average lrt feature widthPrior = widthPrior0; - //use larger width in tanh map for pause regions + // use larger width in tanh map for pause regions if (logLrtTimeAvgKsum < threshPrior0) { widthPrior = widthPrior1; } // compute indicator function: sigmoid map - indicator0 = (float)0.5 * ((float)tanh(widthPrior * - (logLrtTimeAvgKsum - threshPrior0)) + (float)1.0); + indicator0 = (float)0.5 * + ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + + (float)1.0); - //spectral flatness feature + // spectral flatness feature tmpFloat1 = inst->featureData[0]; widthPrior = widthPrior0; - //use larger width in tanh map for pause regions + // use larger width in tanh map for pause regions if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) { widthPrior = widthPrior1; } @@ -677,26 +716,29 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr widthPrior = widthPrior1; } // compute indicator function: sigmoid map - indicator1 = (float)0.5 * ((float)tanh((float)sgnMap * - widthPrior * (threshPrior1 - tmpFloat1)) + (float)1.0); + indicator1 = + (float)0.5 * + ((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) + + (float)1.0); - //for template spectrum-difference + // for template spectrum-difference tmpFloat1 = inst->featureData[4]; widthPrior = widthPrior0; - //use larger width in tanh map for pause regions + // use larger width in tanh map for pause regions if (tmpFloat1 < threshPrior2) { widthPrior = widthPrior2; } // compute indicator function: sigmoid map - indicator2 = (float)0.5 * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) - + (float)1.0); + indicator2 = + (float)0.5 * + ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + (float)1.0); - //combine the indicator function with the feature weights - indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2 - * indicator2; + // combine the indicator function with the feature weights + indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + + weightIndPrior2 * indicator2; // done with computing indicator function - //compute the prior probability + // compute the prior probability inst->priorSpeechProb += PRIOR_UPDATE * (indPrior - inst->priorSpeechProb); // make sure probabilities are within range: keep floor to 0.01 if (inst->priorSpeechProb > 1.0) { @@ -706,8 +748,9 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr inst->priorSpeechProb = (float)0.01; } - //final speech probability: combine prior model with LR factor: - gainPrior = ((float)1.0 - inst->priorSpeechProb) / (inst->priorSpeechProb + (float)0.0001); + // final speech probability: combine prior model with LR factor: + gainPrior = ((float)1.0 - inst->priorSpeechProb) / + (inst->priorSpeechProb + (float)0.0001); for (i = 0; i < inst->magnLen; i++) { invLrt = (float)exp(-inst->logLrtTimeAvg[i]); invLrt = (float)gainPrior * invLrt; @@ -716,29 +759,29 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr } int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { - int i; - const int kStartBand = 5; // Skip first frequency bins during estimation. - int updateParsFlag; - float energy; - float signalEnergy, sumMagn; - float snrPrior, currentEstimateStsa; - float tmpFloat1, tmpFloat2, tmpFloat3, probSpeech, probNonSpeech; - float gammaNoiseTmp, gammaNoiseOld; - float noiseUpdateTmp, fTmp; - float winData[ANAL_BLOCKL_MAX]; - float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; - float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; - float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; - float previousEstimateStsa[HALF_ANAL_BLOCKL]; - float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; + int i; + const int kStartBand = 5; // Skip first frequency bins during estimation. + int updateParsFlag; + float energy; + float signalEnergy, sumMagn; + float snrPrior, currentEstimateStsa; + float tmpFloat1, tmpFloat2, tmpFloat3, probSpeech, probNonSpeech; + float gammaNoiseTmp, gammaNoiseOld; + float noiseUpdateTmp, fTmp; + float winData[ANAL_BLOCKL_MAX]; + float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; + float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; + float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; + float previousEstimateStsa[HALF_ANAL_BLOCKL]; + float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; // Variables during startup - float sum_log_i = 0.0; - float sum_log_i_square = 0.0; - float sum_log_magn = 0.0; - float sum_log_i_log_magn = 0.0; - float parametric_noise = 0.0; - float parametric_exp = 0.0; - float parametric_num = 0.0; + float sum_log_i = 0.0; + float sum_log_i_square = 0.0; + float sum_log_magn = 0.0; + float sum_log_i_log_magn = 0.0; + float parametric_noise = 0.0; + float parametric_exp = 0.0; + float parametric_num = 0.0; // Check that initiation has been done if (inst->initFlag != 1) { @@ -749,9 +792,11 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { // // update analysis buffer for L band - memcpy(inst->analyzeBuf, inst->analyzeBuf + inst->blockLen10ms, + memcpy(inst->analyzeBuf, + inst->analyzeBuf + inst->blockLen10ms, sizeof(float) * (inst->anaLen - inst->blockLen10ms)); - memcpy(inst->analyzeBuf + inst->anaLen - inst->blockLen10ms, speechFrame, + memcpy(inst->analyzeBuf + inst->anaLen - inst->blockLen10ms, + speechFrame, sizeof(float) * inst->blockLen10ms); // check if processing needed @@ -764,17 +809,18 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { } if (energy == 0.0) { // we want to avoid updating statistics in this case: - // Updating feature statistics when we have zeros only will cause thresholds to - // move towards zero signal situations. This in turn has the effect that once the - // signal is "turned on" (non-zero values) everything will be treated as speech - // and there is no noise suppression effect. Depending on the duration of the - // inactive signal it takes a considerable amount of time for the system to learn - // what is noise and what is speech. + // Updating feature statistics when we have zeros only will cause + // thresholds to move towards zero signal situations. This in turn has the + // effect that once the signal is "turned on" (non-zero values) everything + // will be treated as speech and there is no noise suppression effect. + // Depending on the duration of the inactive signal it takes a + // considerable amount of time for the system to learn what is noise and + // what is speech. return 0; } // - inst->blockInd++; // Update the block index only when we process a block. + inst->blockInd++; // Update the block index only when we process a block. // FFT WebRtc_rdft(inst->anaLen, 1, winData, inst->ip, inst->wfft); @@ -822,18 +868,20 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { inst->signalEnergy = signalEnergy; inst->sumMagn = sumMagn; - //compute spectral flatness on input spectrum + // compute spectral flatness on input spectrum WebRtcNs_ComputeSpectralFlatness(inst, magn); // quantile noise estimate WebRtcNs_NoiseEstimation(inst, magn, noise); - //compute simplified noise model during startup + // compute simplified noise model during startup if (inst->blockInd < END_STARTUP_SHORT) { // Estimate White noise - inst->whiteNoiseLevel += sumMagn / ((float)inst->magnLen) * inst->overdrive; + inst->whiteNoiseLevel += + sumMagn / ((float)inst->magnLen) * inst->overdrive; // Estimate Pink noise parameters tmpFloat1 = sum_log_i_square * ((float)(inst->magnLen - kStartBand)); tmpFloat1 -= (sum_log_i * sum_log_i); - tmpFloat2 = (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); + tmpFloat2 = + (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); tmpFloat3 = tmpFloat2 / tmpFloat1; // Constrain the estimated spectrum to be positive if (tmpFloat3 < 0.0f) { @@ -858,18 +906,22 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { parametric_noise = inst->whiteNoiseLevel; } else { // Use pink noise estimate - parametric_num = exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1)); + parametric_num = + exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1)); parametric_num *= (float)(inst->blockInd + 1); parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1); - parametric_noise = parametric_num / pow((float)kStartBand, parametric_exp); + parametric_noise = + parametric_num / pow((float)kStartBand, parametric_exp); } for (i = 0; i < inst->magnLen; i++) { - // Estimate the background noise using the white and pink noise parameters + // Estimate the background noise using the white and pink noise + // parameters if ((inst->pinkNoiseExp > 0.0f) && (i >= kStartBand)) { // Use pink noise estimate parametric_noise = parametric_num / pow((float)i, parametric_exp); } - theFilterTmp[i] = (inst->initMagnEst[i] - inst->overdrive * parametric_noise); + theFilterTmp[i] = + (inst->initMagnEst[i] - inst->overdrive * parametric_noise); theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001); // Weight quantile noise with modeled noise noise[i] *= (inst->blockInd); @@ -878,7 +930,7 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { noise[i] /= END_STARTUP_SHORT; } } - //compute average signal during END_STARTUP_LONG time: + // compute average signal during END_STARTUP_LONG time: // used to normalize spectral difference measure if (inst->blockInd < END_STARTUP_LONG) { inst->featureData[5] *= inst->blockInd; @@ -886,11 +938,8 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { inst->featureData[5] /= (inst->blockInd + 1); } - //start processing at frames == converged+1 - // + // start processing at frames == converged+1 // STEP 1: compute prior and post snr based on quantile noise est - // - // compute DD estimate of prior SNR: needed for new method for (i = 0; i < inst->magnLen; i++) { // post snr @@ -900,23 +949,25 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { } // previous post snr // previous estimate: based on previous frame with gain filter - previousEstimateStsa[i] = inst->magnPrev[i] / (inst->noisePrev[i] + (float)0.0001) - * (inst->smooth[i]); + previousEstimateStsa[i] = inst->magnPrev[i] / + (inst->noisePrev[i] + (float)0.0001) * + (inst->smooth[i]); // DD estimate is sum of two terms: current estimate and previous estimate // directed decision update of snrPrior - snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) - * snrLocPost[i]; + snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] + + ((float)1.0 - DD_PR_SNR) * snrLocPost[i]; // post and prior snr needed for step 2 } // end of loop over freqs - // done with step 1: dd computation of prior and post snr + // done with step 1: dd computation of prior and post snr - // - //STEP 2: compute speech/noise likelihood - // - // compute difference of input spectrum with learned/estimated noise spectrum + // STEP 2: compute speech/noise likelihood + // compute difference of input spectrum with learned/estimated noise + // spectrum WebRtcNs_ComputeSpectralDifference(inst, magn); - // compute histograms for parameter decisions (thresholds and weights for features) - // parameters are extracted once every window time (=inst->modelUpdatePars[1]) + // compute histograms for parameter decisions (thresholds and weights for + // features) + // parameters are extracted once every window time + // (=inst->modelUpdatePars[1]) if (updateParsFlag >= 1) { // counter update inst->modelUpdatePars[3]--; @@ -934,10 +985,10 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { } else { // update every window: // get normalization for spectral difference for next window estimate - inst->featureData[6] = inst->featureData[6] - / ((float)inst->modelUpdatePars[1]); - inst->featureData[5] = (float)0.5 * (inst->featureData[6] - + inst->featureData[5]); + inst->featureData[6] = + inst->featureData[6] / ((float)inst->modelUpdatePars[1]); + inst->featureData[5] = + (float)0.5 * (inst->featureData[6] + inst->featureData[5]); inst->featureData[6] = (float)0.0; } } @@ -951,8 +1002,10 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { probNonSpeech = (float)1.0 - probSpeech; // temporary noise update: // use it for speech frames if update value is less than previous - noiseUpdateTmp = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) - * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); + noiseUpdateTmp = + gammaNoiseTmp * inst->noisePrev[i] + + ((float)1.0 - gammaNoiseTmp) * + (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); // // time-constant based on speech/noise state gammaNoiseOld = gammaNoiseTmp; @@ -963,16 +1016,20 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { } // conservative noise update if (probSpeech < PROB_RANGE) { - inst->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]); + inst->magnAvgPause[i] += + GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]); } // noise update if (gammaNoiseTmp == gammaNoiseOld) { noise[i] = noiseUpdateTmp; } else { - noise[i] = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) - * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); + noise[i] = + gammaNoiseTmp * inst->noisePrev[i] + + ((float)1.0 - gammaNoiseTmp) * + (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); // allow for noise update downwards: - // if noise update decreases the noise, it is safe, so allow it to happen + // if noise update decreases the noise, it is safe, so allow it to + // happen if (noiseUpdateTmp < noise[i]) { noise[i] = noiseUpdateTmp; } @@ -980,19 +1037,19 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { } // end of freq loop // done with step 2: noise update - // - // STEP 3: compute dd update of prior snr and post snr based on new noise estimate - // + // STEP 3: compute dd update of prior snr and post snr based on new noise + // estimate for (i = 0; i < inst->magnLen; i++) { // post and prior snr currentEstimateStsa = (float)0.0; if (magn[i] > noise[i]) { currentEstimateStsa = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; } - // DD estimate is sume of two terms: current estimate and previous estimate + // DD estimate is sume of two terms: current estimate and previous + // estimate // directed decision update of snrPrior - snrPrior = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) - * currentEstimateStsa; + snrPrior = DD_PR_SNR * previousEstimateStsa[i] + + ((float)1.0 - DD_PR_SNR) * currentEstimateStsa; // gain filter tmpFloat1 = inst->overdrive + snrPrior; tmpFloat2 = (float)snrPrior / tmpFloat1; @@ -1032,7 +1089,7 @@ int WebRtcNs_AnalyzeCore(NSinst_t* inst, float* speechFrame) { inst->noisePrev[i] = noise[i]; inst->magnPrev[i] = magn[i]; } - } // end of if inst->outLen == 0 + } // end of if inst->outLen == 0 return 0; } @@ -1043,21 +1100,21 @@ int WebRtcNs_ProcessCore(NSinst_t* inst, float* outFrame, float* outFrameHB) { // main routine for noise reduction - int flagHB = 0; - int i; + int flagHB = 0; + int i; - float energy1, energy2, gain, factor, factor1, factor2; - float fout[BLOCKL_MAX]; - float winData[ANAL_BLOCKL_MAX]; - float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; + float energy1, energy2, gain, factor, factor1, factor2; + float fout[BLOCKL_MAX]; + float winData[ANAL_BLOCKL_MAX]; + float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; // SWB variables - int deltaBweHB = 1; - int deltaGainHB = 1; - float decayBweHB = 1.0; - float gainMapParHB = 1.0; - float gainTimeDomainHB = 1.0; - float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; + int deltaBweHB = 1; + int deltaGainHB = 1; + float decayBweHB = 1.0; + float gainMapParHB = 1.0; + float gainTimeDomainHB = 1.0; + float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; // Check that initiation has been done if (inst->initFlag != 1) { @@ -1075,16 +1132,20 @@ int WebRtcNs_ProcessCore(NSinst_t* inst, } // update analysis buffer for L band - memcpy(inst->dataBuf, inst->dataBuf + inst->blockLen10ms, + memcpy(inst->dataBuf, + inst->dataBuf + inst->blockLen10ms, sizeof(float) * (inst->anaLen - inst->blockLen10ms)); - memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, speechFrame, + memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, + speechFrame, sizeof(float) * inst->blockLen10ms); if (flagHB == 1) { // update analysis buffer for H band - memcpy(inst->dataBufHB, inst->dataBufHB + inst->blockLen10ms, + memcpy(inst->dataBufHB, + inst->dataBufHB + inst->blockLen10ms, sizeof(float) * (inst->anaLen - inst->blockLen10ms)); - memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, speechFrameHB, + memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, + speechFrameHB, sizeof(float) * inst->blockLen10ms); } @@ -1103,9 +1164,11 @@ int WebRtcNs_ProcessCore(NSinst_t* inst, fout[i - inst->windShift] = inst->syntBuf[i]; } // update synthesis buffer - memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, + memcpy(inst->syntBuf, + inst->syntBuf + inst->blockLen, sizeof(float) * (inst->anaLen - inst->blockLen)); - memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, + memset(inst->syntBuf + inst->anaLen - inst->blockLen, + 0, sizeof(float) * inst->blockLen); // out buffer @@ -1154,10 +1217,10 @@ int WebRtcNs_ProcessCore(NSinst_t* inst, WebRtc_rdft(inst->anaLen, -1, winData, inst->ip, inst->wfft); for (i = 0; i < inst->anaLen; i++) { - real[i] = 2.0f * winData[i] / inst->anaLen; // fft scaling + real[i] = 2.0f * winData[i] / inst->anaLen; // fft scaling } - //scale factor: only do it after END_STARTUP_LONG time + // scale factor: only do it after END_STARTUP_LONG time factor = (float)1.0; if (inst->gainmap == 1 && inst->blockInd > END_STARTUP_LONG) { factor1 = (float)1.0; @@ -1177,17 +1240,17 @@ int WebRtcNs_ProcessCore(NSinst_t* inst, } } if (gain < B_LIM) { - //don't reduce scale too much for pause regions: + // don't reduce scale too much for pause regions: // attenuation here should be controlled by flooring if (gain <= inst->denoiseBound) { gain = inst->denoiseBound; } factor2 = (float)1.0 - (float)0.3 * (B_LIM - gain); } - //combine both scales with speech/noise prob: + // combine both scales with speech/noise prob: // note prior (priorSpeechProb) is not frequency dependent - factor = inst->priorSpeechProb * factor1 + ((float)1.0 - inst->priorSpeechProb) - * factor2; + factor = inst->priorSpeechProb * factor1 + + ((float)1.0 - inst->priorSpeechProb) * factor2; } // out of inst->gainmap==1 // synthesis @@ -1199,9 +1262,11 @@ int WebRtcNs_ProcessCore(NSinst_t* inst, fout[i - inst->windShift] = inst->syntBuf[i]; } // update synthesis buffer - memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, + memcpy(inst->syntBuf, + inst->syntBuf + inst->blockLen, sizeof(float) * (inst->anaLen - inst->blockLen)); - memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, + memset(inst->syntBuf + inst->anaLen - inst->blockLen, + 0, sizeof(float) * inst->blockLen); // out buffer @@ -1216,16 +1281,18 @@ int WebRtcNs_ProcessCore(NSinst_t* inst, for (i = 0; i < inst->blockLen10ms; i++) { fout[i] = inst->outBuf[i]; } - memcpy(inst->outBuf, inst->outBuf + inst->blockLen10ms, + memcpy(inst->outBuf, + inst->outBuf + inst->blockLen10ms, sizeof(float) * (inst->outLen - inst->blockLen10ms)); - memset(inst->outBuf + inst->outLen - inst->blockLen10ms, 0, + memset(inst->outBuf + inst->outLen - inst->blockLen10ms, + 0, sizeof(float) * inst->blockLen10ms); inst->outLen -= inst->blockLen10ms; } for (i = 0; i < inst->blockLen10ms; ++i) - outFrame[i] = WEBRTC_SPL_SAT( - WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); + outFrame[i] = + WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); // for time-domain gain of HB if (flagHB == 1) { @@ -1245,14 +1312,16 @@ int WebRtcNs_ProcessCore(NSinst_t* inst, avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB)); avgProbSpeechHBTmp = (float)2.0 * avgProbSpeechHB - (float)1.0; // gain based on speech prob: - gainModHB = (float)0.5 * ((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); - //combine gain with low band gain + gainModHB = (float)0.5 * + ((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); + // combine gain with low band gain gainTimeDomainHB = (float)0.5 * gainModHB + (float)0.5 * avgFilterGainHB; if (avgProbSpeechHB >= (float)0.5) { - gainTimeDomainHB = (float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB; + gainTimeDomainHB = + (float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB; } gainTimeDomainHB = gainTimeDomainHB * decayBweHB; - //make sure gain is within flooring range + // make sure gain is within flooring range // flooring bottom if (gainTimeDomainHB < inst->denoiseBound) { gainTimeDomainHB = inst->denoiseBound; @@ -1261,11 +1330,11 @@ int WebRtcNs_ProcessCore(NSinst_t* inst, if (gainTimeDomainHB > (float)1.0) { gainTimeDomainHB = 1.0; } - //apply gain + // apply gain for (i = 0; i < inst->blockLen10ms; i++) { float o = gainTimeDomainHB * inst->dataBufHB[i]; - outFrameHB[i] = WEBRTC_SPL_SAT( - WEBRTC_SPL_WORD16_MAX, o, WEBRTC_SPL_WORD16_MIN); + outFrameHB[i] = + WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, o, WEBRTC_SPL_WORD16_MIN); } } // end of H band gain computation // diff --git a/webrtc/modules/audio_processing/ns/ns_core.h b/webrtc/modules/audio_processing/ns/ns_core.h index 98de728a0a..2395eb29af 100644 --- a/webrtc/modules/audio_processing/ns/ns_core.h +++ b/webrtc/modules/audio_processing/ns/ns_core.h @@ -8,107 +8,106 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ #include "webrtc/modules/audio_processing/ns/defines.h" typedef struct NSParaExtract_t_ { - - //bin size of histogram + // bin size of histogram float binSizeLrt; float binSizeSpecFlat; float binSizeSpecDiff; - //range of histogram over which lrt threshold is computed + // range of histogram over which lrt threshold is computed float rangeAvgHistLrt; - //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain - //thresholds for prior model - float factor1ModelPars; //for lrt and spectral difference - float factor2ModelPars; //for spectral_flatness: used when noise is flatter than speech - //peak limit for spectral flatness (varies between 0 and 1) + // scale parameters: multiply dominant peaks of the histograms by scale factor + // to obtain thresholds for prior model + float factor1ModelPars; // for lrt and spectral difference + float factor2ModelPars; // for spectral_flatness: used when noise is flatter + // than speech + // peak limit for spectral flatness (varies between 0 and 1) float thresPosSpecFlat; - //limit on spacing of two highest peaks in histogram: spacing determined by bin size + // limit on spacing of two highest peaks in histogram: spacing determined by + // bin size float limitPeakSpacingSpecFlat; float limitPeakSpacingSpecDiff; - //limit on relevance of second peak: + // limit on relevance of second peak: float limitPeakWeightsSpecFlat; float limitPeakWeightsSpecDiff; - //limit on fluctuation of lrt feature + // limit on fluctuation of lrt feature float thresFluctLrt; - //limit on the max and min values for the feature thresholds + // limit on the max and min values for the feature thresholds float maxLrt; float minLrt; float maxSpecFlat; float minSpecFlat; float maxSpecDiff; float minSpecDiff; - //criteria of weight of histogram peak to accept/reject feature + // criteria of weight of histogram peak to accept/reject feature int thresWeightSpecFlat; int thresWeightSpecDiff; } NSParaExtract_t; typedef struct NSinst_t_ { + uint32_t fs; + int blockLen; + int blockLen10ms; + int windShift; + int outLen; + int anaLen; + int magnLen; + int aggrMode; + const float* window; + float analyzeBuf[ANAL_BLOCKL_MAX]; + float dataBuf[ANAL_BLOCKL_MAX]; + float syntBuf[ANAL_BLOCKL_MAX]; + float outBuf[3 * BLOCKL_MAX]; - uint32_t fs; - int blockLen; - int blockLen10ms; - int windShift; - int outLen; - int anaLen; - int magnLen; - int aggrMode; - const float* window; - float analyzeBuf[ANAL_BLOCKL_MAX]; - float dataBuf[ANAL_BLOCKL_MAX]; - float syntBuf[ANAL_BLOCKL_MAX]; - float outBuf[3 * BLOCKL_MAX]; - - int initFlag; + int initFlag; // parameters for quantile noise estimation - float density[SIMULT* HALF_ANAL_BLOCKL]; - float lquantile[SIMULT* HALF_ANAL_BLOCKL]; - float quantile[HALF_ANAL_BLOCKL]; - int counter[SIMULT]; - int updates; + float density[SIMULT * HALF_ANAL_BLOCKL]; + float lquantile[SIMULT * HALF_ANAL_BLOCKL]; + float quantile[HALF_ANAL_BLOCKL]; + int counter[SIMULT]; + int updates; // parameters for Wiener filter - float smooth[HALF_ANAL_BLOCKL]; - float overdrive; - float denoiseBound; - int gainmap; + float smooth[HALF_ANAL_BLOCKL]; + float overdrive; + float denoiseBound; + int gainmap; // fft work arrays. - int ip[IP_LENGTH]; - float wfft[W_LENGTH]; + int ip[IP_LENGTH]; + float wfft[W_LENGTH]; // parameters for new method: some not needed, will reduce/cleanup later - int32_t blockInd; //frame index counter - int modelUpdatePars[4]; //parameters for updating or estimating + int32_t blockInd; // frame index counter + int modelUpdatePars[4]; // parameters for updating or estimating // thresholds/weights for prior model - float priorModelPars[7]; //parameters for prior model - float noisePrev[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame - float magnPrev[HALF_ANAL_BLOCKL]; //magnitude spectrum of previous frame - float logLrtTimeAvg[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing - float priorSpeechProb; //prior speech/noise probability - float featureData[7]; //data for features - float magnAvgPause[HALF_ANAL_BLOCKL]; //conservative noise spectrum estimate - float signalEnergy; //energy of magn - float sumMagn; //sum of magn - float whiteNoiseLevel; //initial noise estimate - float initMagnEst[HALF_ANAL_BLOCKL]; //initial magnitude spectrum estimate - float pinkNoiseNumerator; //pink noise parameter: numerator - float pinkNoiseExp; //pink noise parameter: power of freq - NSParaExtract_t featureExtractionParams; //parameters for feature extraction - //histograms for parameter estimation - int histLrt[HIST_PAR_EST]; - int histSpecFlat[HIST_PAR_EST]; - int histSpecDiff[HIST_PAR_EST]; - //quantities for high band estimate - float speechProb[HALF_ANAL_BLOCKL]; //final speech/noise prob: prior + LRT - float dataBufHB[ANAL_BLOCKL_MAX]; //buffering data for HB + float priorModelPars[7]; // parameters for prior model + float noisePrev[HALF_ANAL_BLOCKL]; // noise spectrum from previous frame + float magnPrev[HALF_ANAL_BLOCKL]; // magnitude spectrum of previous frame + float logLrtTimeAvg[HALF_ANAL_BLOCKL]; // log lrt factor with time-smoothing + float priorSpeechProb; // prior speech/noise probability + float featureData[7]; // data for features + float magnAvgPause[HALF_ANAL_BLOCKL]; // conservative noise spectrum estimate + float signalEnergy; // energy of magn + float sumMagn; // sum of magn + float whiteNoiseLevel; // initial noise estimate + float initMagnEst[HALF_ANAL_BLOCKL]; // initial magnitude spectrum estimate + float pinkNoiseNumerator; // pink noise parameter: numerator + float pinkNoiseExp; // pink noise parameter: power of freq + NSParaExtract_t featureExtractionParams; // parameters for feature extraction + // histograms for parameter estimation + int histLrt[HIST_PAR_EST]; + int histSpecFlat[HIST_PAR_EST]; + int histSpecDiff[HIST_PAR_EST]; + // quantities for high band estimate + float speechProb[HALF_ANAL_BLOCKL]; // final speech/noise prob: prior + LRT + float dataBufHB[ANAL_BLOCKL_MAX]; // buffering data for HB } NSinst_t; - #ifdef __cplusplus extern "C" { #endif @@ -137,7 +136,7 @@ int WebRtcNs_InitCore(NSinst_t* inst, uint32_t fs); * * Input: * - inst : Instance that should be initialized - * - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB) + * - mode : 0: Mild (6dB), 1: Medium (10dB), 2: Aggressive (15dB) * * Output: * - NS_inst : Initialized instance @@ -188,8 +187,7 @@ int WebRtcNs_ProcessCore(NSinst_t* inst, float* outFrameLow, float* outFrameHigh); - #ifdef __cplusplus } #endif -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_