Optionally copy clean speech input files under _cache with APM-QA.

TBR=

Bug: webrtc:7494
Change-Id: I41c5cfc6fd57aefaf246816c0ba4094947b9e767
Reviewed-on: https://webrtc-review.googlesource.com/13123
Reviewed-by: Alessio Bazzica <alessiob@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20343}
This commit is contained in:
Alessio Bazzica
2017-10-18 15:24:57 +02:00
committed by Commit Bot
parent bd83b914c3
commit 849030dab8
6 changed files with 96 additions and 9 deletions

View File

@ -108,6 +108,11 @@ def _InstanceArgumentsParser():
AudioProcWrapper. \
DEFAULT_APM_SIMULATOR_BIN_PATH)
parser.add_argument('--copy_with_identity_generator', required=False,
help=('If true, the identity test data generator makes a '
'copy of the clean speech input file.'),
default=False)
return parser
@ -135,7 +140,8 @@ def main():
test_data_generator_factory=(
test_data_generation_factory.TestDataGeneratorFactory(
aechen_ir_database_path=args.air_db_path,
noise_tracks_path=args.additive_noise_tracks_path)),
noise_tracks_path=args.additive_noise_tracks_path,
copy_with_identity=args.copy_with_identity_generator)),
evaluation_score_factory=eval_scores_factory.EvaluationScoreWorkerFactory(
polqa_tool_bin_path=os.path.join(args.polqa_path, _POLQA_BIN_NAME)),
ap_wrapper=audioproc_wrapper.AudioProcWrapper(args.apm_sim_path),

View File

@ -41,7 +41,8 @@ class TestEchoPathSimulators(unittest.TestCase):
test_data_generator_factory=(
test_data_generation_factory.TestDataGeneratorFactory(
aechen_ir_database_path='',
noise_tracks_path='')),
noise_tracks_path='',
copy_with_identity=False)),
evaluation_score_factory=(
eval_scores_factory.EvaluationScoreWorkerFactory(
polqa_tool_bin_path=os.path.join(

View File

@ -64,7 +64,8 @@ class TestApmModuleSimulator(unittest.TestCase):
test_data_generator_factory = (
test_data_generation_factory.TestDataGeneratorFactory(
aechen_ir_database_path='',
noise_tracks_path=''))
noise_tracks_path='',
copy_with_identity=False))
evaluation_score_factory = eval_scores_factory.EvaluationScoreWorkerFactory(
polqa_tool_bin_path=os.path.join(
os.path.dirname(__file__), 'fake_polqa'))
@ -108,7 +109,8 @@ class TestApmModuleSimulator(unittest.TestCase):
test_data_generator_factory=(
test_data_generation_factory.TestDataGeneratorFactory(
aechen_ir_database_path='',
noise_tracks_path='')),
noise_tracks_path='',
copy_with_identity=False)),
evaluation_score_factory=(
eval_scores_factory.EvaluationScoreWorkerFactory(
polqa_tool_bin_path=os.path.join(
@ -143,7 +145,8 @@ class TestApmModuleSimulator(unittest.TestCase):
test_data_generator_factory=(
test_data_generation_factory.TestDataGeneratorFactory(
aechen_ir_database_path='',
noise_tracks_path='')),
noise_tracks_path='',
copy_with_identity=False)),
evaluation_score_factory=(
eval_scores_factory.EvaluationScoreWorkerFactory(
polqa_tool_bin_path=os.path.join(

View File

@ -23,6 +23,7 @@ obtained by convolving the input signal with an impulse response.
import logging
import os
import shutil
import sys
try:
@ -182,13 +183,27 @@ class IdentityTestDataGenerator(TestDataGenerator):
NAME = 'identity'
def __init__(self, output_directory_prefix):
def __init__(self, output_directory_prefix, copy_with_identity):
TestDataGenerator.__init__(self, output_directory_prefix)
self._copy_with_identity = copy_with_identity
@property
def copy_with_identity(self):
return self._copy_with_identity
def _Generate(
self, input_signal_filepath, test_data_cache_path, base_output_path):
config_name = 'default'
output_path = self._MakeDir(base_output_path, config_name)
if self._copy_with_identity:
input_signal_filepath_new = os.path.join(
test_data_cache_path, os.path.split(input_signal_filepath)[1])
logging.info('copying ' + input_signal_filepath + ' to ' + (
input_signal_filepath_new))
shutil.copy(input_signal_filepath, input_signal_filepath_new)
input_signal_filepath = input_signal_filepath_new
self._AddNoiseReferenceFilesPair(
config_name=config_name,
noisy_signal_filepath=input_signal_filepath,

View File

@ -22,10 +22,20 @@ class TestDataGeneratorFactory(object):
generators will be produced.
"""
def __init__(self, aechen_ir_database_path, noise_tracks_path):
def __init__(self, aechen_ir_database_path, noise_tracks_path,
copy_with_identity):
"""Ctor.
Args:
aechen_ir_database_path: Path to the Aechen Impulse Response database.
noise_tracks_path: Path to the noise tracks to add.
copy_with_identity: Flag indicating whether the identity generator has to
make copies of the clean speech input files.
"""
self._output_directory_prefix = None
self._aechen_ir_database_path = aechen_ir_database_path
self._noise_tracks_path = noise_tracks_path
self._copy_with_identity = copy_with_identity
def SetOutputDirectoryPrefix(self, prefix):
self._output_directory_prefix = prefix
@ -46,6 +56,10 @@ class TestDataGeneratorFactory(object):
logging.debug('factory producing %s', test_data_generators_class)
if test_data_generators_class == (
test_data_generation.IdentityTestDataGenerator):
return test_data_generation.IdentityTestDataGenerator(
self._output_directory_prefix, self._copy_with_identity)
elif test_data_generators_class == (
test_data_generation.ReverberationTestDataGenerator):
return test_data_generation.ReverberationTestDataGenerator(
self._output_directory_prefix, self._aechen_ir_database_path)

View File

@ -68,10 +68,11 @@ class TestTestDataGenerators(unittest.TestCase):
aechen_ir_database_path=self._fake_air_db_path,
noise_tracks_path=test_data_generation. \
AdditiveNoiseTestDataGenerator. \
DEFAULT_NOISE_TRACKS_PATH)
DEFAULT_NOISE_TRACKS_PATH,
copy_with_identity=False)
generators_factory.SetOutputDirectoryPrefix('datagen-')
# Use a sample input file as clean input signal.
# Use a simple input file as clean input signal.
input_signal_filepath = os.path.join(
os.getcwd(), 'probing_signals', 'tone-880.wav')
self.assertTrue(os.path.exists(input_signal_filepath))
@ -97,6 +98,53 @@ class TestTestDataGenerators(unittest.TestCase):
self._CheckGeneratedPairsSignalDurations(generator, input_signal)
self._CheckGeneratedPairsOutputPaths(generator)
def testTestidentityDataGenerator(self):
# Preliminary check.
self.assertTrue(os.path.exists(self._base_output_path))
self.assertTrue(os.path.exists(self._test_data_cache_path))
# Use a simple input file as clean input signal.
input_signal_filepath = os.path.join(
os.getcwd(), 'probing_signals', 'tone-880.wav')
self.assertTrue(os.path.exists(input_signal_filepath))
def GetNoiseReferenceFilePaths(identity_generator):
noisy_signal_filepaths = identity_generator.noisy_signal_filepaths
reference_signal_filepaths = identity_generator.reference_signal_filepaths
assert noisy_signal_filepaths.keys() == reference_signal_filepaths.keys()
assert len(noisy_signal_filepaths.keys()) == 1
key = noisy_signal_filepaths.keys()[0]
return noisy_signal_filepaths[key], reference_signal_filepaths[key]
# Test the |copy_with_identity| flag.
for copy_with_identity in [False, True]:
# Instance the generator through the factory.
factory = test_data_generation_factory.TestDataGeneratorFactory(
aechen_ir_database_path='', noise_tracks_path='',
copy_with_identity=copy_with_identity)
factory.SetOutputDirectoryPrefix('datagen-')
generator = factory.GetInstance(
test_data_generation.IdentityTestDataGenerator)
# Check |copy_with_identity| is set correctly.
self.assertEqual(copy_with_identity, generator.copy_with_identity)
# Generate test data and extract the paths to the noise and the reference
# files.
generator.Generate(
input_signal_filepath=input_signal_filepath,
test_data_cache_path=self._test_data_cache_path,
base_output_path=self._base_output_path)
noisy_signal_filepath, reference_signal_filepath = (
GetNoiseReferenceFilePaths(generator))
# Check that a copy is made if and only if |copy_with_identity| is True.
if copy_with_identity:
self.assertNotEqual(noisy_signal_filepath, input_signal_filepath)
self.assertNotEqual(reference_signal_filepath, input_signal_filepath)
else:
self.assertEqual(noisy_signal_filepath, input_signal_filepath)
self.assertEqual(reference_signal_filepath, input_signal_filepath)
def _CheckGeneratedPairsListSizes(self, generator):
config_names = generator.config_names
number_of_pairs = len(config_names)