AGC2 RNN VAD: initial build targets
rnn_vad_tool is an executable that reads a wav file of any sample rate compatible with 10 ms frames that are resampled and, when the VAD is fully landed, will process the resampled frames to compute the VAD probability. To avoid mac, win and ios trybot failures, to_be_removed.h/.cc have been added and will be removed as soon as the :lib target includes code that leads to a non-empty static lib file on those platforms. Bug: webrtc:9076 Change-Id: I810c08acfa1adf2029e3baac2adda3045ae5214a Reviewed-on: https://webrtc-review.googlesource.com/70202 Reviewed-by: Alex Loiko <aleloi@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/master@{#22898}
This commit is contained in:
committed by
Commit Bot
parent
8aba6b4114
commit
8628f5bb7c
@ -43,6 +43,7 @@ rtc_source_set("adaptive_digital") {
|
||||
"../../../rtc_base:safe_minmax",
|
||||
"../vad",
|
||||
"../vad:vad_with_level",
|
||||
"rnn_vad",
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
41
modules/audio_processing/agc2/rnn_vad/BUILD.gn
Normal file
41
modules/audio_processing/agc2/rnn_vad/BUILD.gn
Normal file
@ -0,0 +1,41 @@
|
||||
# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
import("../../../../webrtc.gni")
|
||||
|
||||
group("rnn_vad") {
|
||||
deps = [
|
||||
":lib",
|
||||
]
|
||||
}
|
||||
|
||||
source_set("lib") {
|
||||
sources = [
|
||||
"common.h",
|
||||
]
|
||||
deps = [
|
||||
"../../../../api:array_view",
|
||||
"../../../../rtc_base:checks",
|
||||
]
|
||||
}
|
||||
|
||||
if (rtc_include_tests) {
|
||||
rtc_executable("rnn_vad_tool") {
|
||||
testonly = true
|
||||
sources = [
|
||||
"rnn_vad_tool.cc",
|
||||
]
|
||||
deps = [
|
||||
":lib",
|
||||
"../../../../api:array_view",
|
||||
"../../../../common_audio:common_audio",
|
||||
"../../../../rtc_base:rtc_base_approved",
|
||||
"../../../../test:test_support",
|
||||
]
|
||||
}
|
||||
}
|
||||
23
modules/audio_processing/agc2/rnn_vad/common.h
Normal file
23
modules/audio_processing/agc2/rnn_vad/common.h
Normal file
@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_
|
||||
|
||||
namespace webrtc {
|
||||
namespace rnn_vad {
|
||||
|
||||
constexpr size_t kSampleRate24kHz = 24000;
|
||||
constexpr size_t kFrameSize10ms24kHz = 240;
|
||||
|
||||
} // namespace rnn_vad
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_
|
||||
120
modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc
Normal file
120
modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc
Normal file
@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "common_audio/resampler/push_sinc_resampler.h"
|
||||
#include "common_audio/wav_file.h"
|
||||
#include "modules/audio_processing/agc2/rnn_vad/common.h"
|
||||
#include "rtc_base/flags.h"
|
||||
#include "rtc_base/logging.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace test {
|
||||
namespace {
|
||||
|
||||
using rnn_vad::kFrameSize10ms24kHz;
|
||||
|
||||
DEFINE_string(i, "", "Path to the input wav file");
|
||||
DEFINE_string(f, "", "Path to the output features file");
|
||||
DEFINE_string(o, "", "Path to the output VAD probabilities file");
|
||||
DEFINE_bool(help, false, "Prints this message");
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
rtc::LogMessage::LogToDebug(rtc::LS_INFO);
|
||||
rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true);
|
||||
if (FLAG_help) {
|
||||
rtc::FlagList::Print(nullptr, false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Open wav input file and check properties.
|
||||
WavReader wav_reader(FLAG_i);
|
||||
if (wav_reader.num_channels() != 1) {
|
||||
RTC_LOG(LS_ERROR) << "Only mono wav files are supported";
|
||||
return 1;
|
||||
}
|
||||
if (wav_reader.sample_rate() % 100 != 0) {
|
||||
RTC_LOG(LS_ERROR) << "The sample rate rate must allow 10 ms frames.";
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Init output files.
|
||||
FILE* vad_probs_file = fopen(FLAG_o, "wb");
|
||||
FILE* features_file = nullptr;
|
||||
if (std::string::empty(FLAG_f)) {
|
||||
features_file = fopen(FLAG_f, "wb");
|
||||
}
|
||||
|
||||
// Init resampling.
|
||||
const size_t frame_size_10ms =
|
||||
rtc::CheckedDivExact(wav_reader.sample_rate(), 100);
|
||||
std::vector<float> samples_10ms;
|
||||
samples_10ms.resize(frame_size_10ms);
|
||||
std::array<float, kFrameSize10ms24kHz> samples_10ms_24kHz;
|
||||
PushSincResampler resampler(frame_size_10ms, kFrameSize10ms24kHz);
|
||||
|
||||
// TODO(alessiob): Init feature extractor and RNN-based VAD.
|
||||
|
||||
// Compute VAD probabilities.
|
||||
while (true) {
|
||||
// Read frame at the input sample rate.
|
||||
const auto read_samples =
|
||||
wav_reader.ReadSamples(frame_size_10ms, samples_10ms.data());
|
||||
if (read_samples < frame_size_10ms) {
|
||||
break; // EOF.
|
||||
}
|
||||
// Resample input.
|
||||
resampler.Resample(samples_10ms.data(), samples_10ms.size(),
|
||||
samples_10ms_24kHz.data(), samples_10ms_24kHz.size());
|
||||
|
||||
// TODO(alessiob): Extract features.
|
||||
float vad_probability;
|
||||
bool is_silence = true;
|
||||
|
||||
// Write features.
|
||||
if (features_file) {
|
||||
const float float_is_silence = is_silence ? 1.f : 0.f;
|
||||
fwrite(&float_is_silence, sizeof(float), 1, features_file);
|
||||
// TODO(alessiob): Write feature vector.
|
||||
}
|
||||
|
||||
// Compute VAD probability.
|
||||
if (is_silence) {
|
||||
vad_probability = 0.f;
|
||||
// TODO(alessiob): Reset VAD.
|
||||
} else {
|
||||
// TODO(alessiob): Compute VAD probability.
|
||||
}
|
||||
RTC_DCHECK_GE(vad_probability, 0.f);
|
||||
RTC_DCHECK_GE(1.f, vad_probability);
|
||||
fwrite(&vad_probability, sizeof(float), 1, vad_probs_file);
|
||||
}
|
||||
// Close output file(s).
|
||||
fclose(vad_probs_file);
|
||||
RTC_LOG(LS_INFO) << "VAD probabilities written to " << FLAG_o;
|
||||
if (features_file) {
|
||||
fclose(features_file);
|
||||
RTC_LOG(LS_INFO) << "features written to " << FLAG_f;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace webrtc
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
return webrtc::test::main(argc, argv);
|
||||
}
|
||||
Reference in New Issue
Block a user