From ac2ea030fc06669bdec22e93437753e5e933d9f1 Mon Sep 17 00:00:00 2001 From: "bjornv@webrtc.org" Date: Thu, 29 Mar 2012 12:09:44 +0000 Subject: [PATCH] VAD Refactoring: Repalced WebRtc_ types with stdint BUG= TEST=vad_unittests,audioproc_unittest Review URL: https://webrtc-codereview.appspot.com/460009 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1954 4adac7df-926f-26a2-2b94-8c16560cd09d --- src/common_audio/vad/include/webrtc_vad.h | 6 +- src/common_audio/vad/vad_core.c | 150 +++++++++++----------- src/common_audio/vad/vad_core.h | 50 ++++---- src/common_audio/vad/webrtc_vad.c | 8 +- 4 files changed, 105 insertions(+), 109 deletions(-) diff --git a/src/common_audio/vad/include/webrtc_vad.h b/src/common_audio/vad/include/webrtc_vad.h index e0387f515c..07e5cdda4a 100644 --- a/src/common_audio/vad/include/webrtc_vad.h +++ b/src/common_audio/vad/include/webrtc_vad.h @@ -96,10 +96,8 @@ int WebRtcVad_set_mode(VadInst* handle, int mode); * 0 - Non-active Voice * -1 - Error */ -WebRtc_Word16 WebRtcVad_Process(VadInst *vad_inst, - WebRtc_Word16 fs, - WebRtc_Word16 *speech_frame, - WebRtc_Word16 frame_length); +int16_t WebRtcVad_Process(VadInst* vad_inst, int16_t fs, int16_t* speech_frame, + int16_t frame_length); #ifdef __cplusplus } diff --git a/src/common_audio/vad/vad_core.c b/src/common_audio/vad/vad_core.c index 8ad0a95e03..ef66658856 100644 --- a/src/common_audio/vad/vad_core.c +++ b/src/common_audio/vad/vad_core.c @@ -17,40 +17,40 @@ #include "vad_sp.h" // Spectrum Weighting -static const WebRtc_Word16 kSpectrumWeight[6] = { 6, 8, 10, 12, 14, 16 }; -static const WebRtc_Word16 kNoiseUpdateConst = 655; // Q15 -static const WebRtc_Word16 kSpeechUpdateConst = 6554; // Q15 -static const WebRtc_Word16 kBackEta = 154; // Q8 +static const int16_t kSpectrumWeight[6] = { 6, 8, 10, 12, 14, 16 }; +static const int16_t kNoiseUpdateConst = 655; // Q15 +static const int16_t kSpeechUpdateConst = 6554; // Q15 +static const int16_t kBackEta = 154; // Q8 // Minimum difference between the two models, Q5 -static const WebRtc_Word16 kMinimumDifference[6] = { +static const int16_t kMinimumDifference[6] = { 544, 544, 576, 576, 576, 576 }; // Upper limit of mean value for speech model, Q7 -static const WebRtc_Word16 kMaximumSpeech[6] = { +static const int16_t kMaximumSpeech[6] = { 11392, 11392, 11520, 11520, 11520, 11520 }; // Minimum value for mean value -static const WebRtc_Word16 kMinimumMean[2] = { 640, 768 }; +static const int16_t kMinimumMean[2] = { 640, 768 }; // Upper limit of mean value for noise model, Q7 -static const WebRtc_Word16 kMaximumNoise[6] = { +static const int16_t kMaximumNoise[6] = { 9216, 9088, 8960, 8832, 8704, 8576 }; // Start values for the Gaussian models, Q7 // Weights for the two Gaussians for the six channels (noise) -static const WebRtc_Word16 kNoiseDataWeights[12] = { +static const int16_t kNoiseDataWeights[12] = { 34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103 }; // Weights for the two Gaussians for the six channels (speech) -static const WebRtc_Word16 kSpeechDataWeights[12] = { +static const int16_t kSpeechDataWeights[12] = { 48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81 }; // Means for the two Gaussians for the six channels (noise) -static const WebRtc_Word16 kNoiseDataMeans[12] = { +static const int16_t kNoiseDataMeans[12] = { 6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362 }; // Means for the two Gaussians for the six channels (speech) -static const WebRtc_Word16 kSpeechDataMeans[12] = { +static const int16_t kSpeechDataMeans[12] = { 8306, 10085, 10078, 11823, 11843, 6309, 9473, 9571, 10879, 7581, 8180, 7483 }; // Stds for the two Gaussians for the six channels (noise) -static const WebRtc_Word16 kNoiseDataStds[12] = { +static const int16_t kNoiseDataStds[12] = { 378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455 }; // Stds for the two Gaussians for the six channels (speech) -static const WebRtc_Word16 kSpeechDataStds[12] = { +static const int16_t kSpeechDataStds[12] = { 555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, 1079, 850 }; // Constants used in GmmProbability(). @@ -100,30 +100,30 @@ static const int16_t kGlobalThresholdVAG[3] = { 1100, 1050, 1100 }; // - frame_length [i] : Number of input samples // // - returns : the VAD decision (0 - noise, 1 - speech). -static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, - WebRtc_Word16 total_power, int frame_length) +static int16_t GmmProbability(VadInstT *inst, int16_t *feature_vector, + int16_t total_power, int frame_length) { int n, k; - WebRtc_Word16 backval; - WebRtc_Word16 h0, h1; - WebRtc_Word16 ratvec, xval; - WebRtc_Word16 vadflag; - WebRtc_Word16 shifts0, shifts1; - WebRtc_Word16 tmp16, tmp16_1, tmp16_2; - WebRtc_Word16 diff, nr, pos; - WebRtc_Word16 nmk, nmk2, nmk3, smk, smk2, nsk, ssk; - WebRtc_Word16 delt, ndelt; - WebRtc_Word16 maxspe, maxmu; - WebRtc_Word16 deltaN[kTableSize], deltaS[kTableSize]; - WebRtc_Word16 ngprvec[kTableSize], sgprvec[kTableSize]; - WebRtc_Word32 h0test, h1test; - WebRtc_Word32 tmp32_1, tmp32_2; - WebRtc_Word32 dotVal; - WebRtc_Word32 nmid, smid; - WebRtc_Word32 probn[kNumGaussians], probs[kNumGaussians]; - WebRtc_Word16 *nmean1ptr, *nmean2ptr, *smean1ptr, *smean2ptr, *nstd1ptr, *nstd2ptr, + int16_t backval; + int16_t h0, h1; + int16_t ratvec, xval; + int16_t vadflag; + int16_t shifts0, shifts1; + int16_t tmp16, tmp16_1, tmp16_2; + int16_t diff, nr, pos; + int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk; + int16_t delt, ndelt; + int16_t maxspe, maxmu; + int16_t deltaN[kTableSize], deltaS[kTableSize]; + int16_t ngprvec[kTableSize], sgprvec[kTableSize]; + int32_t h0test, h1test; + int32_t tmp32_1, tmp32_2; + int32_t dotVal; + int32_t nmid, smid; + int32_t probn[kNumGaussians], probs[kNumGaussians]; + int16_t *nmean1ptr, *nmean2ptr, *smean1ptr, *smean2ptr, *nstd1ptr, *nstd2ptr, *sstd1ptr, *sstd2ptr; - WebRtc_Word16 overhead1, overhead2, individualTest, totalTest; + int16_t overhead1, overhead2, individualTest, totalTest; // Set the thresholds to different values based on frame length if (frame_length == 80) @@ -173,22 +173,22 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, // Probability for Noise, Q7 * Q20 = Q27 tmp32_1 = WebRtcVad_GaussianProbability(xval, *nmean1ptr++, *nstd1ptr++, &deltaN[pos]); - probn[0] = (WebRtc_Word32)(kNoiseDataWeights[n] * tmp32_1); + probn[0] = (int32_t)(kNoiseDataWeights[n] * tmp32_1); tmp32_1 = WebRtcVad_GaussianProbability(xval, *nmean2ptr++, *nstd2ptr++, &deltaN[pos + 1]); - probn[1] = (WebRtc_Word32)(kNoiseDataWeights[n + kNumChannels] * tmp32_1); + probn[1] = (int32_t)(kNoiseDataWeights[n + kNumChannels] * tmp32_1); h0test = probn[0] + probn[1]; // Q27 - h0 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(h0test, 12); // Q15 + h0 = (int16_t)WEBRTC_SPL_RSHIFT_W32(h0test, 12); // Q15 // Probability for Speech tmp32_1 = WebRtcVad_GaussianProbability(xval, *smean1ptr++, *sstd1ptr++, &deltaS[pos]); - probs[0] = (WebRtc_Word32)(kSpeechDataWeights[n] * tmp32_1); + probs[0] = (int32_t)(kSpeechDataWeights[n] * tmp32_1); tmp32_1 = WebRtcVad_GaussianProbability(xval, *smean2ptr++, *sstd2ptr++, &deltaS[pos + 1]); - probs[1] = (WebRtc_Word32)(kSpeechDataWeights[n + kNumChannels] * tmp32_1); + probs[1] = (int32_t)(kSpeechDataWeights[n + kNumChannels] * tmp32_1); h1test = probs[0] + probs[1]; // Q27 - h1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(h1test, 12); // Q15 + h1 = (int16_t)WEBRTC_SPL_RSHIFT_W32(h1test, 12); // Q15 // Get likelihood ratio. Approximate log2(H1/H0) with shifts0 - shifts1 shifts0 = WebRtcSpl_NormW32(h0test); @@ -222,7 +222,7 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, { tmp32_1 = probn[0] & 0xFFFFF000; // Q27 tmp32_2 = WEBRTC_SPL_LSHIFT_W32(tmp32_1, 2); // Q29 - ngprvec[pos] = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, h0); + ngprvec[pos] = (int16_t)WebRtcSpl_DivW32W16(tmp32_2, h0); ngprvec[pos + 1] = 16384 - ngprvec[pos]; } else { @@ -235,7 +235,7 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, { tmp32_1 = probs[0] & 0xFFFFF000; tmp32_2 = WEBRTC_SPL_LSHIFT_W32(tmp32_1, 2); - sgprvec[pos] = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, h1); + sgprvec[pos] = (int16_t)WebRtcSpl_DivW32W16(tmp32_2, h1); sgprvec[pos + 1] = 16384 - sgprvec[pos]; } else { @@ -271,7 +271,7 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, nmid = WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n], *nmean1ptr); // Q7 * Q7 nmid += WEBRTC_SPL_MUL_16_16(kNoiseDataWeights[n+kNumChannels], *(nmean1ptr+kNumChannels)); - tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 6); // Q8 + tmp16_1 = (int16_t)WEBRTC_SPL_RSHIFT_W32(nmid, 6); // Q8 for (k = 0; k < kNumGaussians; k++) { @@ -294,9 +294,9 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, // deltaN = (x-mu)/sigma^2 // ngprvec[k] = probn[k]/(probn[0] + probn[1]) - delt = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(ngprvec[nr], + delt = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ngprvec[nr], deltaN[nr], 11); // Q14*Q11 - nmk2 = nmk + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delt, + nmk2 = nmk + (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delt, kNoiseUpdateConst, 22); // Q7+(Q14*Q15>>22) } @@ -304,7 +304,7 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, // Long term correction of the noise mean ndelt = WEBRTC_SPL_LSHIFT_W16(backval, 4); ndelt -= tmp16_1; // Q8 - Q8 - nmk3 = nmk2 + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(ndelt, + nmk3 = nmk2 + (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ndelt, kBackEta, 9); // Q7+(Q8*Q8)>>9 @@ -323,10 +323,10 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, // deltaS = (x-mu)/sigma^2 // sgprvec[k] = probn[k]/(probn[0] + probn[1]) - delt = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(sgprvec[nr], + delt = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sgprvec[nr], deltaS[nr], 11); // (Q14*Q11)>>11=Q14 - tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delt, + tmp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(delt, kSpeechUpdateConst, 21) + 1; smk2 = smk + (tmp16 >> 1); // Q7 + (Q14 * Q15 >> 22) @@ -345,18 +345,18 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, tmp16 = feature_vector[n] - tmp16; // Q4 tmp32_1 = WEBRTC_SPL_MUL_16_16_RSFT(deltaS[nr], tmp16, 3); - tmp32_2 = tmp32_1 - (WebRtc_Word32)4096; // Q12 + tmp32_2 = tmp32_1 - (int32_t)4096; // Q12 tmp16 = WEBRTC_SPL_RSHIFT_W16((sgprvec[nr]), 2); - tmp32_1 = (WebRtc_Word32)(tmp16 * tmp32_2);// (Q15>>3)*(Q14>>2)=Q12*Q12=Q24 + tmp32_1 = (int32_t)(tmp16 * tmp32_2);// (Q15>>3)*(Q14>>2)=Q12*Q12=Q24 tmp32_2 = WEBRTC_SPL_RSHIFT_W32(tmp32_1, 4); // Q20 // 0.1 * Q20 / Q7 = Q13 if (tmp32_2 > 0) - tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_2, ssk * 10); + tmp16 = (int16_t)WebRtcSpl_DivW32W16(tmp32_2, ssk * 10); else { - tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(-tmp32_2, ssk * 10); + tmp16 = (int16_t)WebRtcSpl_DivW32W16(-tmp32_2, ssk * 10); tmp16 = -tmp16; } // divide by 4 giving an update factor of 0.025 @@ -375,17 +375,17 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, // (Q15>>3) * (Q14>>2) = Q12 * Q12 = Q24 tmp32_1 = WEBRTC_SPL_MUL_16_16_RSFT(deltaN[nr], tmp16, 3) - 4096; tmp16 = WEBRTC_SPL_RSHIFT_W16((ngprvec[nr]+2), 2); - tmp32_2 = (WebRtc_Word32)(tmp16 * tmp32_1); + tmp32_2 = (int32_t)(tmp16 * tmp32_1); tmp32_1 = WEBRTC_SPL_RSHIFT_W32(tmp32_2, 14); // Q20 * approx 0.001 (2^-10=0.0009766) // Q20 / Q7 = Q13 - tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_1, nsk); + tmp16 = (int16_t)WebRtcSpl_DivW32W16(tmp32_1, nsk); if (tmp32_1 > 0) - tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32_1, nsk); + tmp16 = (int16_t)WebRtcSpl_DivW32W16(tmp32_1, nsk); else { - tmp16 = (WebRtc_Word16)WebRtcSpl_DivW32W16(-tmp32_1, nsk); + tmp16 = (int16_t)WebRtcSpl_DivW32W16(-tmp32_1, nsk); tmp16 = -tmp16; } tmp16 += 32; // Rounding @@ -407,8 +407,8 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, smid += WEBRTC_SPL_MUL_16_16(kSpeechDataWeights[n+kNumChannels], *smean2ptr); // diff = "global" speech mean - "global" noise mean - diff = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(smid, 9); - tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 9); + diff = (int16_t)WEBRTC_SPL_RSHIFT_W32(smid, 9); + tmp16 = (int16_t)WEBRTC_SPL_RSHIFT_W32(nmid, 9); diff -= tmp16; if (diff < kMinimumDifference[n]) @@ -418,8 +418,8 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, // tmp16_1 = ~0.8 * (kMinimumDifference - diff) in Q7 // tmp16_2 = ~0.2 * (kMinimumDifference - diff) in Q7 - tmp16_1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(13, tmp16, 2); - tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(3, tmp16, 2); + tmp16_1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(13, tmp16, 2); + tmp16_2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(3, tmp16, 2); // First Gauss, speech model tmp16 = tmp16_1 + *smean1ptr; @@ -445,7 +445,7 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, // Control that the speech & noise means do not drift to much maxspe = kMaximumSpeech[n]; - tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(smid, 7); + tmp16_2 = (int16_t)WEBRTC_SPL_RSHIFT_W32(smid, 7); if (tmp16_2 > maxspe) { // Upper limit of speech model tmp16_2 -= maxspe; @@ -454,7 +454,7 @@ static int16_t GmmProbability(VadInstT *inst, WebRtc_Word16 *feature_vector, *smean2ptr -= tmp16_2; } - tmp16_2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(nmid, 7); + tmp16_2 = (int16_t)WEBRTC_SPL_RSHIFT_W32(nmid, 7); if (tmp16_2 > kMaximumNoise[n]) { tmp16_2 -= kMaximumNoise[n]; @@ -610,12 +610,12 @@ int WebRtcVad_set_mode_core(VadInstT* self, int mode) { // Calculate VAD decision by first extracting feature values and then calculate // probability for both speech and background noise. -WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT *inst, WebRtc_Word16 *speech_frame, - int frame_length) +int16_t WebRtcVad_CalcVad32khz(VadInstT* inst, int16_t* speech_frame, + int frame_length) { - WebRtc_Word16 len, vad; - WebRtc_Word16 speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB) - WebRtc_Word16 speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) + int16_t len, vad; + int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB) + int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) // Downsample signal 32->16->8 before doing VAD @@ -632,11 +632,11 @@ WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT *inst, WebRtc_Word16 *speech_frame return vad; } -WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT *inst, WebRtc_Word16 *speech_frame, - int frame_length) +int16_t WebRtcVad_CalcVad16khz(VadInstT* inst, int16_t* speech_frame, + int frame_length) { - WebRtc_Word16 len, vad; - WebRtc_Word16 speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) + int16_t len, vad; + int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) // Wideband: Downsample signal before doing VAD WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_states, @@ -648,10 +648,10 @@ WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT *inst, WebRtc_Word16 *speech_frame return vad; } -WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT *inst, WebRtc_Word16 *speech_frame, - int frame_length) +int16_t WebRtcVad_CalcVad8khz(VadInstT* inst, int16_t* speech_frame, + int frame_length) { - WebRtc_Word16 feature_vector[kNumChannels], total_power; + int16_t feature_vector[kNumChannels], total_power; // Get power in the bands total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length, diff --git a/src/common_audio/vad/vad_core.h b/src/common_audio/vad/vad_core.h index 932aeca0e2..a5c420c42c 100644 --- a/src/common_audio/vad/vad_core.h +++ b/src/common_audio/vad/vad_core.h @@ -26,28 +26,28 @@ enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal. typedef struct VadInstT_ { - WebRtc_Word16 vad; - WebRtc_Word32 downsampling_filter_states[4]; - WebRtc_Word16 noise_means[kTableSize]; - WebRtc_Word16 speech_means[kTableSize]; - WebRtc_Word16 noise_stds[kTableSize]; - WebRtc_Word16 speech_stds[kTableSize]; + int16_t vad; + int32_t downsampling_filter_states[4]; + int16_t noise_means[kTableSize]; + int16_t speech_means[kTableSize]; + int16_t noise_stds[kTableSize]; + int16_t speech_stds[kTableSize]; // TODO(bjornv): Change to |frame_count|. - WebRtc_Word32 frame_counter; - WebRtc_Word16 over_hang; // Over Hang - WebRtc_Word16 num_of_speech; + int32_t frame_counter; + int16_t over_hang; // Over Hang + int16_t num_of_speech; // TODO(bjornv): Change to |age_vector|. - WebRtc_Word16 index_vector[16 * kNumChannels]; - WebRtc_Word16 low_value_vector[16 * kNumChannels]; + int16_t index_vector[16 * kNumChannels]; + int16_t low_value_vector[16 * kNumChannels]; // TODO(bjornv): Change to |median|. - WebRtc_Word16 mean_value[kNumChannels]; - WebRtc_Word16 upper_state[5]; - WebRtc_Word16 lower_state[5]; - WebRtc_Word16 hp_filter_state[4]; - WebRtc_Word16 over_hang_max_1[3]; - WebRtc_Word16 over_hang_max_2[3]; - WebRtc_Word16 individual[3]; - WebRtc_Word16 total[3]; + int16_t mean_value[kNumChannels]; + int16_t upper_state[5]; + int16_t lower_state[5]; + int16_t hp_filter_state[4]; + int16_t over_hang_max_1[3]; + int16_t over_hang_max_2[3]; + int16_t individual[3]; + int16_t total[3]; int init_flag; @@ -100,11 +100,11 @@ int WebRtcVad_set_mode_core(VadInstT* self, int mode); * 0 - No active speech * 1-6 - Active speech */ -WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame, - int frame_length); -WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame, - int frame_length); -WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame, - int frame_length); +int16_t WebRtcVad_CalcVad32khz(VadInstT* inst, int16_t* speech_frame, + int frame_length); +int16_t WebRtcVad_CalcVad16khz(VadInstT* inst, int16_t* speech_frame, + int frame_length); +int16_t WebRtcVad_CalcVad8khz(VadInstT* inst, int16_t* speech_frame, + int frame_length); #endif // WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_ diff --git a/src/common_audio/vad/webrtc_vad.c b/src/common_audio/vad/webrtc_vad.c index b0cd292930..40ada95e1d 100644 --- a/src/common_audio/vad/webrtc_vad.c +++ b/src/common_audio/vad/webrtc_vad.c @@ -79,12 +79,10 @@ int WebRtcVad_set_mode(VadInst* handle, int mode) { return WebRtcVad_set_mode_core(self, mode); } -WebRtc_Word16 WebRtcVad_Process(VadInst *vad_inst, - WebRtc_Word16 fs, - WebRtc_Word16 *speech_frame, - WebRtc_Word16 frame_length) +int16_t WebRtcVad_Process(VadInst* vad_inst, int16_t fs, int16_t* speech_frame, + int16_t frame_length) { - WebRtc_Word16 vad; + int16_t vad; VadInstT* vad_ptr; if (vad_inst == NULL)