From a44b91de3e558d9d08c768a9d39df2d6993b525d Mon Sep 17 00:00:00 2001 From: Alessio Bazzica Date: Tue, 17 Apr 2018 15:58:40 +0200 Subject: [PATCH] Reland "Reland "AGC2 RNN VAD: initial build targets"" This reverts commit 772d43d4c0acbbc09c3dc280c0ffd17e9569700b. Reason for revert: fix issues and reland revert Original change's description: > Revert "Reland "AGC2 RNN VAD: initial build targets"" > > This reverts commit e0031500bafa4b114adfb2d29444592f1e7b3795. > > Reason for revert: reland automatically landed by mistake > > Original change's description: > > Reland "AGC2 RNN VAD: initial build targets" > > > > This reverts commit a153c00bceb832e697365f52e6a7697fdf210efa. > > > > Reason for revert: fix issues and reland revert > > > > Original change's description: > > > Revert "AGC2 RNN VAD: initial build targets" > > > > > > This reverts commit 8628f5bb7c7f5bd0373567095af08cebe8bb7f8d. > > > > > > Reason for revert: iOS buildbot failing > > > > > > Original change's description: > > > > AGC2 RNN VAD: initial build targets > > > > > > > > rnn_vad_tool is an executable that reads a wav file of any sample rate > > > > compatible with 10 ms frames that are resampled and, when the VAD is > > > > fully landed, will process the resampled frames to compute the VAD > > > > probability. > > > > > > > > To avoid mac, win and ios trybot failures, to_be_removed.h/.cc have > > > > been added and will be removed as soon as the :lib target includes > > > > code that leads to a non-empty static lib file on those platforms. > > > > > > > > Bug: webrtc:9076 > > > > Change-Id: I810c08acfa1adf2029e3baac2adda3045ae5214a > > > > Reviewed-on: https://webrtc-review.googlesource.com/70202 > > > > Reviewed-by: Alex Loiko > > > > Commit-Queue: Alessio Bazzica > > > > Cr-Commit-Position: refs/heads/master@{#22898} > > > > > > TBR=alessiob@webrtc.org,aleloi@webrtc.org > > > > > > Change-Id: Ic6014dde78b0ef371804c52608145ba8acdd9c97 > > > No-Presubmit: true > > > No-Tree-Checks: true > > > No-Try: true > > > Bug: webrtc:9076 > > > Reviewed-on: https://webrtc-review.googlesource.com/70144 > > > Reviewed-by: Alessio Bazzica > > > Commit-Queue: Alessio Bazzica > > > Cr-Commit-Position: refs/heads/master@{#22899} > > > > TBR=alessiob@webrtc.org,aleloi@webrtc.org > > > > Change-Id: I55e5a77274684b4cff3c950ca3514cc769d5dc26 > > No-Presubmit: true > > No-Tree-Checks: true > > No-Try: true > > Bug: webrtc:9076 > > Reviewed-on: https://webrtc-review.googlesource.com/70145 > > Reviewed-by: Alessio Bazzica > > Commit-Queue: Alessio Bazzica > > Cr-Commit-Position: refs/heads/master@{#22901} > > TBR=alessiob@webrtc.org,aleloi@webrtc.org > > Change-Id: Ia6a837f79ac3f12aa4b0659938454141c69fee61 > No-Presubmit: true > No-Tree-Checks: true > No-Try: true > Bug: webrtc:9076 > Reviewed-on: https://webrtc-review.googlesource.com/70520 > Reviewed-by: Alessio Bazzica > Commit-Queue: Alessio Bazzica > Cr-Commit-Position: refs/heads/master@{#22902} TBR=alessiob@webrtc.org,aleloi@webrtc.org Change-Id: If0884ab59d66ac3ba6460dbfe14a083f20493c10 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: webrtc:9076 Reviewed-on: https://webrtc-review.googlesource.com/70521 Reviewed-by: Minyue Li Reviewed-by: Alessio Bazzica Commit-Queue: Alessio Bazzica Cr-Commit-Position: refs/heads/master@{#22904} --- modules/audio_processing/agc2/BUILD.gn | 1 + .../audio_processing/agc2/rnn_vad/BUILD.gn | 41 ++++++ .../audio_processing/agc2/rnn_vad/common.h | 23 +++ .../agc2/rnn_vad/rnn_vad_tool.cc | 134 ++++++++++++++++++ 4 files changed, 199 insertions(+) create mode 100644 modules/audio_processing/agc2/rnn_vad/BUILD.gn create mode 100644 modules/audio_processing/agc2/rnn_vad/common.h create mode 100644 modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn index cc145fb43f..6f92f848cc 100644 --- a/modules/audio_processing/agc2/BUILD.gn +++ b/modules/audio_processing/agc2/BUILD.gn @@ -43,6 +43,7 @@ rtc_source_set("adaptive_digital") { "../../../rtc_base:safe_minmax", "../vad", "../vad:vad_with_level", + "rnn_vad", ] } diff --git a/modules/audio_processing/agc2/rnn_vad/BUILD.gn b/modules/audio_processing/agc2/rnn_vad/BUILD.gn new file mode 100644 index 0000000000..1941eff679 --- /dev/null +++ b/modules/audio_processing/agc2/rnn_vad/BUILD.gn @@ -0,0 +1,41 @@ +# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("../../../../webrtc.gni") + +group("rnn_vad") { + deps = [ + ":lib", + ] +} + +source_set("lib") { + sources = [ + "common.h", + ] + deps = [ + "../../../../api:array_view", + "../../../../rtc_base:checks", + ] +} + +if (rtc_include_tests) { + rtc_executable("rnn_vad_tool") { + testonly = true + sources = [ + "rnn_vad_tool.cc", + ] + deps = [ + ":lib", + "../../../../api:array_view", + "../../../../common_audio:common_audio", + "../../../../rtc_base:rtc_base_approved", + "../../../../test:test_support", + ] + } +} diff --git a/modules/audio_processing/agc2/rnn_vad/common.h b/modules/audio_processing/agc2/rnn_vad/common.h new file mode 100644 index 0000000000..93569ff1e9 --- /dev/null +++ b/modules/audio_processing/agc2/rnn_vad/common.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_ + +namespace webrtc { +namespace rnn_vad { + +constexpr size_t kSampleRate24kHz = 24000; +constexpr size_t kFrameSize10ms24kHz = 240; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_COMMON_H_ diff --git a/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc b/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc new file mode 100644 index 0000000000..af8905285c --- /dev/null +++ b/modules/audio_processing/agc2/rnn_vad/rnn_vad_tool.cc @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "common_audio/resampler/push_sinc_resampler.h" +#include "common_audio/wav_file.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "rtc_base/flags.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace test { +namespace { + +using rnn_vad::kFrameSize10ms24kHz; + +DEFINE_string(i, "", "Path to the input wav file"); +std::string InputWavFile() { + return static_cast(FLAG_i); +} + +DEFINE_string(f, "", "Path to the output features file"); +std::string OutputFeaturesFile() { + return static_cast(FLAG_f); +} + +DEFINE_string(o, "", "Path to the output VAD probabilities file"); +std::string OutputVadProbsFile() { + return static_cast(FLAG_o); +} + +DEFINE_bool(help, false, "Prints this message"); + +} // namespace + +int main(int argc, char* argv[]) { + rtc::LogMessage::LogToDebug(rtc::LS_INFO); + rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true); + if (FLAG_help) { + rtc::FlagList::Print(nullptr, false); + return 0; + } + + // Open wav input file and check properties. + WavReader wav_reader(InputWavFile()); + if (wav_reader.num_channels() != 1) { + RTC_LOG(LS_ERROR) << "Only mono wav files are supported"; + return 1; + } + if (wav_reader.sample_rate() % 100 != 0) { + RTC_LOG(LS_ERROR) << "The sample rate rate must allow 10 ms frames."; + return 1; + } + RTC_LOG(LS_INFO) << "Input sample rate: " << wav_reader.sample_rate(); + + // Init output files. + FILE* vad_probs_file = fopen(OutputVadProbsFile().c_str(), "wb"); + FILE* features_file = nullptr; + const std::string output_feature_file = OutputFeaturesFile(); + if (!output_feature_file.empty()) { + features_file = fopen(output_feature_file.c_str(), "wb"); + } + + // Init resampling. + const size_t frame_size_10ms = + rtc::CheckedDivExact(wav_reader.sample_rate(), 100); + std::vector samples_10ms; + samples_10ms.resize(frame_size_10ms); + std::array samples_10ms_24kHz; + PushSincResampler resampler(frame_size_10ms, kFrameSize10ms24kHz); + + // TODO(alessiob): Init feature extractor and RNN-based VAD. + + // Compute VAD probabilities. + while (true) { + // Read frame at the input sample rate. + const auto read_samples = + wav_reader.ReadSamples(frame_size_10ms, samples_10ms.data()); + if (read_samples < frame_size_10ms) { + break; // EOF. + } + // Resample input. + resampler.Resample(samples_10ms.data(), samples_10ms.size(), + samples_10ms_24kHz.data(), samples_10ms_24kHz.size()); + + // TODO(alessiob): Extract features. + float vad_probability; + bool is_silence = true; + + // Write features. + if (features_file) { + const float float_is_silence = is_silence ? 1.f : 0.f; + fwrite(&float_is_silence, sizeof(float), 1, features_file); + // TODO(alessiob): Write feature vector. + } + + // Compute VAD probability. + if (is_silence) { + vad_probability = 0.f; + // TODO(alessiob): Reset VAD. + } else { + // TODO(alessiob): Compute VAD probability. + } + RTC_DCHECK_GE(vad_probability, 0.f); + RTC_DCHECK_GE(1.f, vad_probability); + fwrite(&vad_probability, sizeof(float), 1, vad_probs_file); + } + // Close output file(s). + fclose(vad_probs_file); + RTC_LOG(LS_INFO) << "VAD probabilities written to " << FLAG_o; + if (features_file) { + fclose(features_file); + RTC_LOG(LS_INFO) << "features written to " << FLAG_f; + } + + return 0; +} + +} // namespace test +} // namespace webrtc + +int main(int argc, char* argv[]) { + return webrtc::test::main(argc, argv); +}