Conversational Speech generator, main script with shell arguments
BUG=webrtc:7218 NOTRY=True Review-Url: https://codereview.webrtc.org/2733863002 Cr-Commit-Position: refs/heads/master@{#17117}
This commit is contained in:
@ -26,12 +26,16 @@ IMPORTANT: **the whole code has not been landed yet.**
|
||||
For each end, there is a set of audio tracks, e.g., a1, a2 and a3 (speaker A)
|
||||
and b1, b2 (speaker B).
|
||||
The text file with the timing information may look like this:
|
||||
``` A a1 0
|
||||
B b1 0
|
||||
A a2 100
|
||||
B b2 -200
|
||||
A a3 0
|
||||
A a4 0```
|
||||
|
||||
```
|
||||
A a1 0
|
||||
B b1 0
|
||||
A a2 100
|
||||
B b2 -200
|
||||
A a3 0
|
||||
A a4 0
|
||||
```
|
||||
|
||||
The first column indicates the speaker name, the second contains the audio track
|
||||
file names, and the third the offsets (in milliseconds) used to concatenate the
|
||||
chunks.
|
||||
@ -39,26 +43,32 @@ chunks.
|
||||
Assume that all the audio tracks in the example above are 1000 ms long.
|
||||
The tool will then generate two tracks (A and B) that look like this:
|
||||
|
||||
```Track A:
|
||||
**Track A**
|
||||
```
|
||||
a1 (1000 ms)
|
||||
silence (1100 ms)
|
||||
a2 (1000 ms)
|
||||
silence (800 ms)
|
||||
a3 (1000 ms)
|
||||
a4 (1000 ms)```
|
||||
a4 (1000 ms)
|
||||
```
|
||||
|
||||
```Track B:
|
||||
**Track B**
|
||||
```
|
||||
silence (1000 ms)
|
||||
b1 (1000 ms)
|
||||
silence (900 ms)
|
||||
b2 (1000 ms)
|
||||
silence (2000 ms)```
|
||||
silence (2000 ms)
|
||||
```
|
||||
|
||||
The two tracks can be also visualized as follows (one characheter represents
|
||||
100 ms, "." is silence and "*" is speech).
|
||||
|
||||
```t: 0 1 2 3 4 5 6 (s)
|
||||
```
|
||||
t: 0 1 2 3 4 5 6 (s)
|
||||
A: **********...........**********........********************
|
||||
B: ..........**********.........**********....................
|
||||
^ 200 ms cross-talk
|
||||
100 ms silence ^```
|
||||
100 ms silence ^
|
||||
```
|
||||
|
||||
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
"""Generate multiple-end audio tracks to simulate conversational
|
||||
speech with two or more participants.
|
||||
|
||||
Usage: generate_conversational_tracks.py
|
||||
-i path/to/source/audiotracks
|
||||
-t path/to/timing_file.txt
|
||||
-o output/path
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
|
||||
def _InstanceArgumentsParser():
|
||||
parser = argparse.ArgumentParser(description=(
|
||||
'Generate multiple-end audio tracks to simulate conversational speech '
|
||||
'with two or more participants.'))
|
||||
|
||||
parser.add_argument('-i', '--input_tracks_path', required=True,
|
||||
help='directory containing the speech turn wav files')
|
||||
|
||||
parser.add_argument('-t', '--timing_file', required=True,
|
||||
help='path to the timing text file')
|
||||
|
||||
parser.add_argument('-o', '--output_dir', required=False,
|
||||
help=('base path to the output directory in which the '
|
||||
'output wav files are saved'),
|
||||
default='output')
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def main():
|
||||
# TODO(alessiob): level = logging.INFO once debugged.
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
parser = _InstanceArgumentsParser()
|
||||
args = parser.parse_args()
|
||||
|
||||
# TODO(alessiob): pass the arguments to the app controller.
|
||||
|
||||
# TODO(alessiob): remove when comment above addressed.
|
||||
logging.debug(args)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
import unittest
|
||||
|
||||
import generate_conversational_tracks
|
||||
|
||||
class TestGenerationScript(unittest.TestCase):
|
||||
|
||||
def TestMain(self):
|
||||
# Exit with error code if no arguments are passed.
|
||||
with self.assertRaises(SystemExit) as cm:
|
||||
generate_conversational_tracks.main()
|
||||
self.assertGreater(cm.exception.code, 0)
|
||||
Reference in New Issue
Block a user