diff --git a/webrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn b/webrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn index 5b75153ffe..cf39d72b4a 100644 --- a/webrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn +++ b/webrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn @@ -24,6 +24,7 @@ copy("scripts") { "apm_quality_assessment.sh", "apm_quality_assessment_export.py", "apm_quality_assessment_gencfgs.py", + "apm_quality_assessment_optimize.py", ] outputs = [ "$root_build_dir/py_quality_assessment/{{source_file_part}}", @@ -53,6 +54,7 @@ copy("lib") { sources = [ "quality_assessment/__init__.py", "quality_assessment/audioproc_wrapper.py", + "quality_assessment/collect_data.py", "quality_assessment/data_access.py", "quality_assessment/echo_path_simulation.py", "quality_assessment/echo_path_simulation_factory.py", diff --git a/webrtc/modules/audio_processing/test/py_quality_assessment/README.md b/webrtc/modules/audio_processing/test/py_quality_assessment/README.md index 981f31555a..e19a780236 100644 --- a/webrtc/modules/audio_processing/test/py_quality_assessment/README.md +++ b/webrtc/modules/audio_processing/test/py_quality_assessment/README.md @@ -81,7 +81,7 @@ export separate reports. In this case, you can use the For instance: ``` -$ ./apm_quality_assessment-export.py \ +$ ./apm_quality_assessment_export.py \ -o output/ \ -c "(^default$)|(.*AE.*)" \ -t \(white_noise\) \ diff --git a/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py b/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py index 29618dcb6e..2d5b7cbe7c 100755 --- a/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py +++ b/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py @@ -11,142 +11,13 @@ HTML file. """ -import argparse import logging -import glob import os -import re import sys -try: - import pandas as pd -except ImportError: - logging.critical('Cannot import the third-party Python package pandas') - sys.exit(1) - -import quality_assessment.data_access as data_access +import apm_quality_assessment_collect_data as collect_data import quality_assessment.export as export -import quality_assessment.simulation as sim -# Compiled regular expressions used to extract score descriptors. -RE_CONFIG_NAME = re.compile( - sim.ApmModuleSimulator.GetPrefixApmConfig() + r'(.+)') -RE_CAPTURE_NAME = re.compile( - sim.ApmModuleSimulator.GetPrefixCapture() + r'(.+)') -RE_RENDER_NAME = re.compile( - sim.ApmModuleSimulator.GetPrefixRender() + r'(.+)') -RE_ECHO_SIM_NAME = re.compile( - sim.ApmModuleSimulator.GetPrefixEchoSimulator() + r'(.+)') -RE_TEST_DATA_GEN_NAME = re.compile( - sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + r'(.+)') -RE_TEST_DATA_GEN_PARAMS = re.compile( - sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + r'(.+)') -RE_SCORE_NAME = re.compile( - sim.ApmModuleSimulator.GetPrefixScore() + r'(.+)(\..+)') - - -def _InstanceArgumentsParser(): - """Arguments parser factory. - """ - parser = argparse.ArgumentParser(description=( - 'Exports pre-computed APM module quality assessment results into HTML ' - 'tables.')) - - parser.add_argument('-o', '--output_dir', required=True, - help=('the same base path used with the ' - 'apm_quality_assessment tool')) - - parser.add_argument('-f', '--filename_suffix', - help=('suffix of the exported file')) - - parser.add_argument('-c', '--config_names', type=re.compile, - help=('regular expression to filter the APM configuration' - ' names')) - - parser.add_argument('-i', '--capture_names', type=re.compile, - help=('regular expression to filter the capture signal ' - 'names')) - - parser.add_argument('-r', '--render_names', type=re.compile, - help=('regular expression to filter the render signal ' - 'names')) - - parser.add_argument('-e', '--echo_simulator_names', type=re.compile, - help=('regular expression to filter the echo simulator ' - 'names')) - - parser.add_argument('-t', '--test_data_generators', type=re.compile, - help=('regular expression to filter the test data ' - 'generator names')) - - parser.add_argument('-s', '--eval_scores', type=re.compile, - help=('regular expression to filter the evaluation score ' - 'names')) - - return parser - - -def _GetScoreDescriptors(score_filepath): - """Extracts a score descriptor from the given score file path. - - Args: - score_filepath: path to the score file. - - Returns: - A tuple of strings (APM configuration name, capture audio track name, - render audio track name, echo simulator name, test data generator name, - test data generator parameters as string, evaluation score name). - """ - fields = score_filepath.split(os.sep)[-7:] - extract_name = lambda index, reg_expr: ( - reg_expr.match(fields[index]).groups(0)[0]) - return ( - extract_name(0, RE_CONFIG_NAME), - extract_name(1, RE_CAPTURE_NAME), - extract_name(2, RE_RENDER_NAME), - extract_name(3, RE_ECHO_SIM_NAME), - extract_name(4, RE_TEST_DATA_GEN_NAME), - extract_name(5, RE_TEST_DATA_GEN_PARAMS), - extract_name(6, RE_SCORE_NAME), - ) - - -def _ExcludeScore(config_name, capture_name, render_name, echo_simulator_name, - test_data_gen_name, score_name, args): - """Decides whether excluding a score. - - A set of optional regular expressions in args is used to determine if the - score should be excluded (depending on its |*_name| descriptors). - - Args: - config_name: APM configuration name. - capture_name: capture audio track name. - render_name: render audio track name. - echo_simulator_name: echo simulator name. - test_data_gen_name: test data generator name. - score_name: evaluation score name. - args: parsed arguments. - - Returns: - A boolean. - """ - value_regexpr_pairs = [ - (config_name, args.config_names), - (capture_name, args.capture_names), - (render_name, args.render_names), - (echo_simulator_name, args.echo_simulator_names), - (test_data_gen_name, args.test_data_generators), - (score_name, args.eval_scores), - ] - - # Score accepted if each value matches the corresponding regular expression. - for value, regexpr in value_regexpr_pairs: - if regexpr is None: - continue - if not regexpr.match(value): - return True - - return False def _BuildOutputFilename(filename_suffix): @@ -162,111 +33,18 @@ def _BuildOutputFilename(filename_suffix): return 'results.html' return 'results-{}.html'.format(filename_suffix) - -def _FindScores(src_path, args): - """Given a search path, find scores and return a DataFrame object. - - Args: - src_path: Search path pattern. - args: parsed arguments. - - Returns: - A DataFrame object. - """ - # Get scores. - scores = [] - for score_filepath in glob.iglob(src_path): - # Extract score descriptor fields from the path. - (config_name, - capture_name, - render_name, - echo_simulator_name, - test_data_gen_name, - test_data_gen_params, - score_name) = _GetScoreDescriptors(score_filepath) - - # Ignore the score if required. - if _ExcludeScore( - config_name, - capture_name, - render_name, - echo_simulator_name, - test_data_gen_name, - score_name, - args): - logging.info( - 'ignored score: %s %s %s %s %s %s', - config_name, - capture_name, - render_name, - echo_simulator_name, - test_data_gen_name, - score_name) - continue - - # Read metadata and score. - metadata = data_access.Metadata.LoadAudioTestDataPaths( - os.path.split(score_filepath)[0]) - score = data_access.ScoreFile.Load(score_filepath) - - # Add a score with its descriptor fields. - scores.append(( - metadata['clean_capture_input_filepath'], - metadata['echo_free_capture_filepath'], - metadata['echo_filepath'], - metadata['render_filepath'], - metadata['capture_filepath'], - metadata['apm_output_filepath'], - metadata['apm_reference_filepath'], - config_name, - capture_name, - render_name, - echo_simulator_name, - test_data_gen_name, - test_data_gen_params, - score_name, - score, - )) - - return pd.DataFrame( - data=scores, - columns=( - 'clean_capture_input_filepath', - 'echo_free_capture_filepath', - 'echo_filepath', - 'render_filepath', - 'capture_filepath', - 'apm_output_filepath', - 'apm_reference_filepath', - 'apm_config', - 'capture', - 'render', - 'echo_simulator', - 'test_data_gen', - 'test_data_gen_params', - 'eval_score_name', - 'score', - )) - - def main(): # Init. logging.basicConfig(level=logging.DEBUG) # TODO(alessio): INFO once debugged. - parser = _InstanceArgumentsParser() + parser = collect_data.InstanceArgumentsParser() + parser.description = ('Exports pre-computed APM module quality assessment ' + 'results into HTML tables') args = parser.parse_args() # Get the scores. - src_path = os.path.join( - args.output_dir, - sim.ApmModuleSimulator.GetPrefixApmConfig() + '*', - sim.ApmModuleSimulator.GetPrefixCapture() + '*', - sim.ApmModuleSimulator.GetPrefixRender() + '*', - sim.ApmModuleSimulator.GetPrefixEchoSimulator() + '*', - sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + '*', - sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + '*', - sim.ApmModuleSimulator.GetPrefixScore() + '*') + src_path = collect_data.ConstructSrcPath(args) logging.debug(src_path) - scores_data_frame = _FindScores(src_path, args) + scores_data_frame = collect_data.FindScores(src_path, args) # Export. output_filepath = os.path.join(args.output_dir, _BuildOutputFilename( diff --git a/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py b/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py new file mode 100644 index 0000000000..7946fe2aeb --- /dev/null +++ b/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Finds the APM configuration that maximizes a provided metric by +parsing the output generated apm_quality_assessment.py. +""" + +from __future__ import division + +import collections +import logging +import os + +import quality_assessment.data_access as data_access +import quality_assessment.collect_data as collect_data + +def _InstanceArgumentsParser(): + """Arguments parser factory. Extends the arguments from 'collect_data' + with a few extra for selecting what parameters to optimize for. + """ + parser = collect_data.InstanceArgumentsParser() + parser.description = ( + 'Rudimentary optimization of a function over different parameter' + 'combinations.') + + parser.add_argument('-n', '--config_dir', required=False, + help=('path to the folder with the configuration files'), + default='apm_configs') + + parser.add_argument('-p', '--params', required=True, nargs='+', + help=('parameters to parse from the config files in' + 'config_dir')) + + parser.add_argument('-z', '--params_not_to_optimize', required=False, + nargs='+', default=[], + help=('parameters from `params` not to be optimized for')) + + return parser + + +def _ConfigurationAndScores(data_frame, params, + params_not_to_optimize, config_dir): + """Returns a list of all configurations and scores. + + Args: + data_frame: A pandas data frame with the scores and config name + returned by _FindScores. + params: The parameter names to parse from configs the config + directory + + params_not_to_optimize: The parameter names which shouldn't affect + the optimal parameter + selection. E.g., fixed settings and not + tunable parameters. + + config_dir: Path to folder with config files. + + Returns: + Dictionary of the form + {param_combination: [{params: {param1: value1, ...}, + scores: {score1: value1, ...}}]}. + + The key `param_combination` runs over all parameter combinations + of the parameters in `params` and not in + `params_not_to_optimize`. A corresponding value is a list of all + param combinations for params in `params_not_to_optimize` and + their scores. + """ + results = collections.defaultdict(list) + config_names = data_frame['apm_config'].drop_duplicates().values.tolist() + score_names = data_frame['eval_score_name'].drop_duplicates().values.tolist() + + # Normalize the scores + normalization_constants = {} + for score_name in score_names: + scores = data_frame[data_frame.eval_score_name == score_name].score + normalization_constants[score_name] = max(scores) + + params_to_optimize = [p for p in params if p not in params_not_to_optimize] + param_combination = collections.namedtuple("ParamCombination", + params_to_optimize) + + for config_name in config_names: + config_json = data_access.AudioProcConfigFile.Load( + os.path.join(config_dir, config_name + ".json")) + scores = {} + data_cell = data_frame[data_frame.apm_config == config_name] + for score_name in score_names: + data_cell_scores = data_cell[data_cell.eval_score_name == + score_name].score + scores[score_name] = sum(data_cell_scores) / len(data_cell_scores) + scores[score_name] /= normalization_constants[score_name] + + result = {'scores': scores, 'params': {}} + config_optimize_params = {} + for param in params: + if param in params_to_optimize: + config_optimize_params[param] = config_json['-' + param] + else: + result['params'][param] = config_json['-' + param] + + current_param_combination = param_combination( # pylint: disable=star-args + **config_optimize_params) + results[current_param_combination].append(result) + return results + + +def _FindOptimalParameter(configs_and_scores, score_weighting): + """Finds the config producing the maximal score. + + Args: + configs_and_scores: structure of the form returned by + _ConfigurationAndScores + + score_weighting: a function to weight together all score values of + the form [{params: {param1: value1, ...}, scores: + {score1: value1, ...}}] into a numeric + value + Returns: + the config that has the largest values of |score_weighting| applied + to its scores. + """ + + min_score = float('+inf') + best_params = None + for config in configs_and_scores: + scores_and_params = configs_and_scores[config] + current_score = score_weighting(scores_and_params) + if current_score < min_score: + min_score = current_score + best_params = config + logging.debug("Score: %f", current_score) + logging.debug("Config: %s", str(config)) + return best_params + + +def _ExampleWeighting(scores_and_configs): + """Example argument to `_FindOptimalParameter` + Args: + scores_and_configs: a list of configs and scores, in the form + described in _FindOptimalParameter + Returns: + numeric value, the sum of all scores + """ + res = 0 + for score_config in scores_and_configs: + res += sum(score_config['scores'].values()) + return res + + +def main(): + # Init. + # TODO(alessiob): INFO once debugged. + logging.basicConfig(level=logging.DEBUG) + parser = _InstanceArgumentsParser() + args = parser.parse_args() + + # Get the scores. + src_path = collect_data.ConstructSrcPath(args) + logging.debug('Src path <%s>', src_path) + scores_data_frame = collect_data.FindScores(src_path, args) + all_scores = _ConfigurationAndScores(scores_data_frame, + args.params, + args.params_not_to_optimize, + args.config_dir) + + opt_param = _FindOptimalParameter(all_scores, _ExampleWeighting) + + logging.info('Optimal parameter combination: <%s>', opt_param) + logging.info('It\'s score values: <%s>', all_scores[opt_param]) + +if __name__ == "__main__": + main() diff --git a/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py b/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py new file mode 100644 index 0000000000..bcad7a4209 --- /dev/null +++ b/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py @@ -0,0 +1,244 @@ +# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Imports a filtered subset of the scores and configurations computed +by apm_quality_assessment.py into a pandas data frame. +""" + +import argparse +import glob +import logging +import os +import re +import sys + +try: + import pandas as pd +except ImportError: + logging.critical('Cannot import the third-party Python package pandas') + sys.exit(1) + +from . import data_access as data_access +from . import simulation as sim + +# Compiled regular expressions used to extract score descriptors. +RE_CONFIG_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixApmConfig() + r'(.+)') +RE_CAPTURE_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixCapture() + r'(.+)') +RE_RENDER_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixRender() + r'(.+)') +RE_ECHO_SIM_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixEchoSimulator() + r'(.+)') +RE_TEST_DATA_GEN_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + r'(.+)') +RE_TEST_DATA_GEN_PARAMS = re.compile( + sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + r'(.+)') +RE_SCORE_NAME = re.compile( + sim.ApmModuleSimulator.GetPrefixScore() + r'(.+)(\..+)') + +def InstanceArgumentsParser(): + """Arguments parser factory. + """ + parser = argparse.ArgumentParser(description=( + 'Override this description in a user script by changing' + ' `parser.description` of the returned parser.')) + + parser.add_argument('-o', '--output_dir', required=True, + help=('the same base path used with the ' + 'apm_quality_assessment tool')) + + parser.add_argument('-f', '--filename_suffix', + help=('suffix of the exported file')) + + parser.add_argument('-c', '--config_names', type=re.compile, + help=('regular expression to filter the APM configuration' + ' names')) + + parser.add_argument('-i', '--capture_names', type=re.compile, + help=('regular expression to filter the capture signal ' + 'names')) + + parser.add_argument('-r', '--render_names', type=re.compile, + help=('regular expression to filter the render signal ' + 'names')) + + parser.add_argument('-e', '--echo_simulator_names', type=re.compile, + help=('regular expression to filter the echo simulator ' + 'names')) + + parser.add_argument('-t', '--test_data_generators', type=re.compile, + help=('regular expression to filter the test data ' + 'generator names')) + + parser.add_argument('-s', '--eval_scores', type=re.compile, + help=('regular expression to filter the evaluation score ' + 'names')) + + return parser + + +def _GetScoreDescriptors(score_filepath): + """Extracts a score descriptor from the given score file path. + + Args: + score_filepath: path to the score file. + + Returns: + A tuple of strings (APM configuration name, capture audio track name, + render audio track name, echo simulator name, test data generator name, + test data generator parameters as string, evaluation score name). + """ + fields = score_filepath.split(os.sep)[-7:] + extract_name = lambda index, reg_expr: ( + reg_expr.match(fields[index]).groups(0)[0]) + return ( + extract_name(0, RE_CONFIG_NAME), + extract_name(1, RE_CAPTURE_NAME), + extract_name(2, RE_RENDER_NAME), + extract_name(3, RE_ECHO_SIM_NAME), + extract_name(4, RE_TEST_DATA_GEN_NAME), + extract_name(5, RE_TEST_DATA_GEN_PARAMS), + extract_name(6, RE_SCORE_NAME), + ) + + +def _ExcludeScore(config_name, capture_name, render_name, echo_simulator_name, + test_data_gen_name, score_name, args): + """Decides whether excluding a score. + + A set of optional regular expressions in args is used to determine if the + score should be excluded (depending on its |*_name| descriptors). + + Args: + config_name: APM configuration name. + capture_name: capture audio track name. + render_name: render audio track name. + echo_simulator_name: echo simulator name. + test_data_gen_name: test data generator name. + score_name: evaluation score name. + args: parsed arguments. + + Returns: + A boolean. + """ + value_regexpr_pairs = [ + (config_name, args.config_names), + (capture_name, args.capture_names), + (render_name, args.render_names), + (echo_simulator_name, args.echo_simulator_names), + (test_data_gen_name, args.test_data_generators), + (score_name, args.eval_scores), + ] + + # Score accepted if each value matches the corresponding regular expression. + for value, regexpr in value_regexpr_pairs: + if regexpr is None: + continue + if not regexpr.match(value): + return True + + return False + + +def FindScores(src_path, args): + """Given a search path, find scores and return a DataFrame object. + + Args: + src_path: Search path pattern. + args: parsed arguments. + + Returns: + A DataFrame object. + """ + # Get scores. + scores = [] + for score_filepath in glob.iglob(src_path): + # Extract score descriptor fields from the path. + (config_name, + capture_name, + render_name, + echo_simulator_name, + test_data_gen_name, + test_data_gen_params, + score_name) = _GetScoreDescriptors(score_filepath) + + # Ignore the score if required. + if _ExcludeScore( + config_name, + capture_name, + render_name, + echo_simulator_name, + test_data_gen_name, + score_name, + args): + logging.info( + 'ignored score: %s %s %s %s %s %s', + config_name, + capture_name, + render_name, + echo_simulator_name, + test_data_gen_name, + score_name) + continue + + # Read metadata and score. + metadata = data_access.Metadata.LoadAudioTestDataPaths( + os.path.split(score_filepath)[0]) + score = data_access.ScoreFile.Load(score_filepath) + + # Add a score with its descriptor fields. + scores.append(( + metadata['clean_capture_input_filepath'], + metadata['echo_free_capture_filepath'], + metadata['echo_filepath'], + metadata['render_filepath'], + metadata['capture_filepath'], + metadata['apm_output_filepath'], + metadata['apm_reference_filepath'], + config_name, + capture_name, + render_name, + echo_simulator_name, + test_data_gen_name, + test_data_gen_params, + score_name, + score, + )) + + return pd.DataFrame( + data=scores, + columns=( + 'clean_capture_input_filepath', + 'echo_free_capture_filepath', + 'echo_filepath', + 'render_filepath', + 'capture_filepath', + 'apm_output_filepath', + 'apm_reference_filepath', + 'apm_config', + 'capture', + 'render', + 'echo_simulator', + 'test_data_gen', + 'test_data_gen_params', + 'eval_score_name', + 'score', + )) + + +def ConstructSrcPath(args): + return os.path.join( + args.output_dir, + sim.ApmModuleSimulator.GetPrefixApmConfig() + '*', + sim.ApmModuleSimulator.GetPrefixCapture() + '*', + sim.ApmModuleSimulator.GetPrefixRender() + '*', + sim.ApmModuleSimulator.GetPrefixEchoSimulator() + '*', + sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + '*', + sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + '*', + sim.ApmModuleSimulator.GetPrefixScore() + '*') diff --git a/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py b/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py index 720cb9b4cc..7bd226bd39 100644 --- a/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py +++ b/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py @@ -6,6 +6,7 @@ # in the file PATENTS. All contributing project authors may # be found in the AUTHORS file in the root of the source tree. +import functools import hashlib import os import re @@ -79,7 +80,8 @@ class HtmlExport(object): def _BuildBody(self): """Builds the content of the
section.""" - score_names = self._scores_data_frame.eval_score_name.unique().tolist() + score_names = self._scores_data_frame['eval_score_name'].drop_duplicates( + ).values.tolist() html = [ ('