From b1c3276f5a64741a09fd28f063a5ca34ed9473cf Mon Sep 17 00:00:00 2001 From: "bjornv@webrtc.org" Date: Tue, 12 Jun 2012 08:19:24 +0000 Subject: [PATCH] VAD Refactoring: WebRtcVad_Process() Code style: Indentation, braces Tested with trybot, vad_unittests, audioproc_unittest BUG=None TEST=None Review URL: https://webrtc-codereview.appspot.com/579012 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2396 4adac7df-926f-26a2-2b94-8c16560cd09d --- src/common_audio/vad/include/webrtc_vad.h | 49 ++++----- src/common_audio/vad/vad_unittest.cc | 21 +++- src/common_audio/vad/vad_unittest.h | 7 +- src/common_audio/vad/webrtc_vad.c | 118 +++++++++++----------- 4 files changed, 107 insertions(+), 88 deletions(-) diff --git a/src/common_audio/vad/include/webrtc_vad.h b/src/common_audio/vad/include/webrtc_vad.h index 860c7364d1..4bad79faf7 100644 --- a/src/common_audio/vad/include/webrtc_vad.h +++ b/src/common_audio/vad/include/webrtc_vad.h @@ -13,12 +13,10 @@ * This header file includes the VAD API calls. Specific function calls are given below. */ -#ifndef WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ +#ifndef WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT #define WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ -#include - -#include "typedefs.h" +#include "typedefs.h" // NOLINT typedef struct WebRtcVadInst VadInst; @@ -61,29 +59,32 @@ int WebRtcVad_Init(VadInst* handle); // has not been initialized). int WebRtcVad_set_mode(VadInst* handle, int mode); -/**************************************************************************** - * WebRtcVad_Process(...) - * - * This functions does a VAD for the inserted speech frame - * - * Input - * - vad_inst : VAD Instance. Needs to be initiated before call. - * - fs : sampling frequency (Hz): 8000, 16000, or 32000 - * - speech_frame : Pointer to speech frame buffer - * - frame_length : Length of speech frame buffer in number of samples - * - * Output: - * - vad_inst : Updated VAD instance - * - * Return value : 1 - Active Voice - * 0 - Non-active Voice - * -1 - Error - */ -int16_t WebRtcVad_Process(VadInst* vad_inst, int16_t fs, int16_t* speech_frame, +// Calculates a VAD decision for the |audio_frame|. For valid sampling rates +// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths(). +// +// - handle [i/o] : VAD Instance. Needs to be initialized by +// WebRtcVad_Init() before call. +// - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000 +// - audio_frame [i] : Audio frame buffer. +// - frame_length [i] : Length of audio frame buffer in number of samples. +// +// returns : 1 - (Active Voice), +// 0 - (Non-active Voice), +// -1 - (Error) +int16_t WebRtcVad_Process(VadInst* handle, int16_t fs, int16_t* audio_frame, int16_t frame_length); +// Checks for valid combinations of |rate| and |frame_length|. We support 10, +// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz. +// +// - rate [i] : Sampling frequency (Hz). +// - frame_length [i] : Speech frame buffer length in number of samples. +// +// returns : 0 - (valid combination), -1 - (invalid combination) +int WebRtcVad_ValidRateAndFrameLength(int rate, int frame_length); + #ifdef __cplusplus } #endif -#endif // WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ +#endif // WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT diff --git a/src/common_audio/vad/vad_unittest.cc b/src/common_audio/vad/vad_unittest.cc index ffd24277ee..a06544770d 100644 --- a/src/common_audio/vad/vad_unittest.cc +++ b/src/common_audio/vad/vad_unittest.cc @@ -12,10 +12,11 @@ #include -#include "common_audio/signal_processing/include/signal_processing_library.h" #include "gtest/gtest.h" + +#include "common_audio/signal_processing/include/signal_processing_library.h" +#include "common_audio/vad/include/webrtc_vad.h" #include "typedefs.h" -#include "webrtc_vad.h" VadTest::VadTest() {} @@ -119,6 +120,22 @@ TEST_F(VadTest, ApiTest) { EXPECT_EQ(0, WebRtcVad_Free(handle)); } +TEST_F(VadTest, ValidRatesFrameLengths) { + // This test verifies valid and invalid rate/frame_length combinations. We + // loop through sampling rates and frame lengths from negative values to + // values larger than possible. + for (int16_t rate = -1; rate <= kRates[kRatesSize - 1] + 1; rate++) { + for (int16_t frame_length = -1; frame_length <= kMaxFrameLength + 1; + frame_length++) { + if (ValidRatesAndFrameLengths(rate, frame_length)) { + EXPECT_EQ(0, WebRtcVad_ValidRateAndFrameLength(rate, frame_length)); + } else { + EXPECT_EQ(-1, WebRtcVad_ValidRateAndFrameLength(rate, frame_length)); + } + } + } +} + // TODO(bjornv): Add a process test, run on file. } // namespace diff --git a/src/common_audio/vad/vad_unittest.h b/src/common_audio/vad/vad_unittest.h index f68427c6ef..bb08843bed 100644 --- a/src/common_audio/vad/vad_unittest.h +++ b/src/common_audio/vad/vad_unittest.h @@ -8,12 +8,13 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_COMMONT_AUDIO_VAD_VAD_UNIT_TESTS_H -#define WEBRTC_COMMONT_AUDIO_VAD_VAD_UNIT_TESTS_H +#ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_UNITTEST_H +#define WEBRTC_COMMON_AUDIO_VAD_VAD_UNITTEST_H #include // size_t #include "gtest/gtest.h" + #include "typedefs.h" namespace { @@ -44,4 +45,4 @@ class VadTest : public ::testing::Test { bool ValidRatesAndFrameLengths(int16_t rate, int16_t frame_length); }; -#endif // WEBRTC_COMMONT_AUDIO_VAD_VAD_UNIT_TESTS_H +#endif // WEBRTC_COMMON_AUDIO_VAD_VAD_UNITTEST_H diff --git a/src/common_audio/vad/webrtc_vad.c b/src/common_audio/vad/webrtc_vad.c index 0873418929..6cbb88c9f1 100644 --- a/src/common_audio/vad/webrtc_vad.c +++ b/src/common_audio/vad/webrtc_vad.c @@ -8,13 +8,18 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "common_audio/vad/include/webrtc_vad.h" + #include #include -#include "webrtc_vad.h" -#include "vad_core.h" +#include "common_audio/vad/vad_core.h" +#include "typedefs.h" static const int kInitCheck = 42; +static const int kValidRates[] = { 8000, 16000, 32000 }; +static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates); +static const int kMaxFrameLengthMs = 30; int WebRtcVad_Create(VadInst** handle) { VadInstT* self = NULL; @@ -66,65 +71,60 @@ int WebRtcVad_set_mode(VadInst* handle, int mode) { return WebRtcVad_set_mode_core(self, mode); } -int16_t WebRtcVad_Process(VadInst* vad_inst, int16_t fs, int16_t* speech_frame, - int16_t frame_length) -{ - int16_t vad; - VadInstT* vad_ptr; +int16_t WebRtcVad_Process(VadInst* handle, int16_t fs, int16_t* audio_frame, + int16_t frame_length) { + int16_t vad = -1; + VadInstT* self = (VadInstT*) handle; - if (vad_inst == NULL) - { - return -1; - } + if (handle == NULL) { + return -1; + } - vad_ptr = (VadInstT*)vad_inst; - if (vad_ptr->init_flag != kInitCheck) - { - return -1; - } + if (self->init_flag != kInitCheck) { + return -1; + } + if (audio_frame == NULL) { + return -1; + } + if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) { + return -1; + } - if (speech_frame == NULL) - { - return -1; - } + if (fs == 32000) { + vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length); + } else if (fs == 16000) { + vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length); + } else if (fs == 8000) { + vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length); + } - if (fs == 32000) - { - if ((frame_length != 320) && (frame_length != 640) && (frame_length != 960)) - { - return -1; - } - vad = WebRtcVad_CalcVad32khz((VadInstT*)vad_inst, speech_frame, frame_length); - - } else if (fs == 16000) - { - if ((frame_length != 160) && (frame_length != 320) && (frame_length != 480)) - { - return -1; - } - vad = WebRtcVad_CalcVad16khz((VadInstT*)vad_inst, speech_frame, frame_length); - - } else if (fs == 8000) - { - if ((frame_length != 80) && (frame_length != 160) && (frame_length != 240)) - { - return -1; - } - vad = WebRtcVad_CalcVad8khz((VadInstT*)vad_inst, speech_frame, frame_length); - - } else - { - return -1; // Not a supported sampling frequency - } - - if (vad > 0) - { - return 1; - } else if (vad == 0) - { - return 0; - } else - { - return -1; - } + if (vad > 0) { + vad = 1; + } + return vad; +} + +int WebRtcVad_ValidRateAndFrameLength(int rate, int frame_length) { + int return_value = -1; + size_t i; + int valid_length_ms; + int valid_length; + + // We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and + // see if we have a matching pair. + for (i = 0; i < kRatesSize; i++) { + if (kValidRates[i] == rate) { + for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs; + valid_length_ms += 10) { + valid_length = (kValidRates[i] / 1000 * valid_length_ms); + if (frame_length == valid_length) { + return_value = 0; + break; + } + } + break; + } + } + + return return_value; }