Rudimentary optimization with APM/QA.

Added script 'apm_quality_assessment_optimize' for finding parameters that minimize a custom function of the scores generated by APM-QA. The script reuses the existing functionality for filtering the data on configs/scores/outputs. To archieve that, some modularization has been done: the part from apm_quality_assessment_export that reads in data into a pandas.DataFrame has been moved into quality_assessment.collect_data. TESTED = though extensive manual tests. Unit tests for the user scripts and 'collect_data' are missing, because we don't have a test framework for loading/exporting fake data. BUG=webrtc:7218 Change-Id: I5521b952970243da05fc4db1b9feef87a2e5ccad Reviewed-on: https://chromium-review.googlesource.com/643292 Commit-Queue: Alex Loiko <aleloi@webrtc.org> Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/master@{#19780}
2017-09-11 17:56:20 +02:00
parent 3b3c9c4eb0
commit 357429dd1e
6 changed files with 441 additions and 235 deletions
--- a/webrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn
+++ b/webrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn
@ -24,6 +24,7 @@ copy("scripts") {
    "apm_quality_assessment.sh",
    "apm_quality_assessment_export.py",
    "apm_quality_assessment_gencfgs.py",
+    "apm_quality_assessment_optimize.py",
  ]
  outputs = [
    "$root_build_dir/py_quality_assessment/{{source_file_part}}",
@ -53,6 +54,7 @@ copy("lib") {
  sources = [
    "quality_assessment/__init__.py",
    "quality_assessment/audioproc_wrapper.py",
+    "quality_assessment/collect_data.py",
    "quality_assessment/data_access.py",
    "quality_assessment/echo_path_simulation.py",
    "quality_assessment/echo_path_simulation_factory.py",
--- a/webrtc/modules/audio_processing/test/py_quality_assessment/README.md
+++ b/webrtc/modules/audio_processing/test/py_quality_assessment/README.md
@ -81,7 +81,7 @@ export separate reports. In this case, you can use the
 For instance:

 ```
-$ ./apm_quality_assessment-export.py \
+$ ./apm_quality_assessment_export.py \
  -o output/ \
  -c "(^default$)|(.*AE.*)" \
  -t \(white_noise\) \
--- a/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py
+++ b/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py
@ -11,142 +11,13 @@
   HTML file.
 """

-import argparse
 import logging
-import glob
 import os
-import re
 import sys

-try:
-  import pandas as pd
-except ImportError:
-  logging.critical('Cannot import the third-party Python package pandas')
-  sys.exit(1)
-
-import quality_assessment.data_access as data_access
+import apm_quality_assessment_collect_data as collect_data
 import quality_assessment.export as export
-import quality_assessment.simulation as sim

-# Compiled regular expressions used to extract score descriptors.
-RE_CONFIG_NAME = re.compile(
-    sim.ApmModuleSimulator.GetPrefixApmConfig() + r'(.+)')
-RE_CAPTURE_NAME = re.compile(
-    sim.ApmModuleSimulator.GetPrefixCapture() + r'(.+)')
-RE_RENDER_NAME = re.compile(
-    sim.ApmModuleSimulator.GetPrefixRender() + r'(.+)')
-RE_ECHO_SIM_NAME = re.compile(
-    sim.ApmModuleSimulator.GetPrefixEchoSimulator() + r'(.+)')
-RE_TEST_DATA_GEN_NAME = re.compile(
-    sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + r'(.+)')
-RE_TEST_DATA_GEN_PARAMS = re.compile(
-    sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + r'(.+)')
-RE_SCORE_NAME = re.compile(
-    sim.ApmModuleSimulator.GetPrefixScore() + r'(.+)(\..+)')
-
-
-def _InstanceArgumentsParser():
-  """Arguments parser factory.
-  """
-  parser = argparse.ArgumentParser(description=(
-      'Exports pre-computed APM module quality assessment results into HTML '
-      'tables.'))
-
-  parser.add_argument('-o', '--output_dir', required=True,
-                      help=('the same base path used with the '
-                            'apm_quality_assessment tool'))
-
-  parser.add_argument('-f', '--filename_suffix',
-                      help=('suffix of the exported file'))
-
-  parser.add_argument('-c', '--config_names', type=re.compile,
-                      help=('regular expression to filter the APM configuration'
-                            ' names'))
-
-  parser.add_argument('-i', '--capture_names', type=re.compile,
-                      help=('regular expression to filter the capture signal '
-                            'names'))
-
-  parser.add_argument('-r', '--render_names', type=re.compile,
-                      help=('regular expression to filter the render signal '
-                            'names'))
-
-  parser.add_argument('-e', '--echo_simulator_names', type=re.compile,
-                      help=('regular expression to filter the echo simulator '
-                            'names'))
-
-  parser.add_argument('-t', '--test_data_generators', type=re.compile,
-                      help=('regular expression to filter the test data '
-                            'generator names'))
-
-  parser.add_argument('-s', '--eval_scores', type=re.compile,
-                      help=('regular expression to filter the evaluation score '
-                            'names'))
-
-  return parser
-
-
-def _GetScoreDescriptors(score_filepath):
-  """Extracts a score descriptor from the given score file path.
-
-  Args:
-    score_filepath: path to the score file.
-
-  Returns:
-    A tuple of strings (APM configuration name, capture audio track name,
-    render audio track name, echo simulator name, test data generator name,
-    test data generator parameters as string, evaluation score name).
-  """
-  fields = score_filepath.split(os.sep)[-7:]
-  extract_name = lambda index, reg_expr: (
-      reg_expr.match(fields[index]).groups(0)[0])
-  return (
-      extract_name(0, RE_CONFIG_NAME),
-      extract_name(1, RE_CAPTURE_NAME),
-      extract_name(2, RE_RENDER_NAME),
-      extract_name(3, RE_ECHO_SIM_NAME),
-      extract_name(4, RE_TEST_DATA_GEN_NAME),
-      extract_name(5, RE_TEST_DATA_GEN_PARAMS),
-      extract_name(6, RE_SCORE_NAME),
-  )
-
-
-def _ExcludeScore(config_name, capture_name, render_name, echo_simulator_name,
-                  test_data_gen_name, score_name, args):
-  """Decides whether excluding a score.
-
-  A set of optional regular expressions in args is used to determine if the
-  score should be excluded (depending on its |*_name| descriptors).
-
-  Args:
-    config_name: APM configuration name.
-    capture_name: capture audio track name.
-    render_name: render audio track name.
-    echo_simulator_name: echo simulator name.
-    test_data_gen_name: test data generator name.
-    score_name: evaluation score name.
-    args: parsed arguments.
-
-  Returns:
-    A boolean.
-  """
-  value_regexpr_pairs = [
-      (config_name, args.config_names),
-      (capture_name, args.capture_names),
-      (render_name, args.render_names),
-      (echo_simulator_name, args.echo_simulator_names),
-      (test_data_gen_name, args.test_data_generators),
-      (score_name, args.eval_scores),
-  ]
-
-  # Score accepted if each value matches the corresponding regular expression.
-  for value, regexpr in value_regexpr_pairs:
-    if regexpr is None:
-      continue
-    if not regexpr.match(value):
-      return True
-
-  return False


 def _BuildOutputFilename(filename_suffix):
@ -162,111 +33,18 @@ def _BuildOutputFilename(filename_suffix):
    return 'results.html'
  return 'results-{}.html'.format(filename_suffix)

-
-def _FindScores(src_path, args):
-  """Given a search path, find scores and return a DataFrame object.
-
-  Args:
-    src_path: Search path pattern.
-    args: parsed arguments.
-
-  Returns:
-    A DataFrame object.
-  """
-  # Get scores.
-  scores = []
-  for score_filepath in glob.iglob(src_path):
-    # Extract score descriptor fields from the path.
-    (config_name,
-     capture_name,
-     render_name,
-     echo_simulator_name,
-     test_data_gen_name,
-     test_data_gen_params,
-     score_name) = _GetScoreDescriptors(score_filepath)
-
-    # Ignore the score if required.
-    if _ExcludeScore(
-        config_name,
-        capture_name,
-        render_name,
-        echo_simulator_name,
-        test_data_gen_name,
-        score_name,
-        args):
-      logging.info(
-          'ignored score: %s %s %s %s %s %s',
-          config_name,
-          capture_name,
-          render_name,
-          echo_simulator_name,
-          test_data_gen_name,
-          score_name)
-      continue
-
-    # Read metadata and score.
-    metadata = data_access.Metadata.LoadAudioTestDataPaths(
-        os.path.split(score_filepath)[0])
-    score = data_access.ScoreFile.Load(score_filepath)
-
-    # Add a score with its descriptor fields.
-    scores.append((
-        metadata['clean_capture_input_filepath'],
-        metadata['echo_free_capture_filepath'],
-        metadata['echo_filepath'],
-        metadata['render_filepath'],
-        metadata['capture_filepath'],
-        metadata['apm_output_filepath'],
-        metadata['apm_reference_filepath'],
-        config_name,
-        capture_name,
-        render_name,
-        echo_simulator_name,
-        test_data_gen_name,
-        test_data_gen_params,
-        score_name,
-        score,
-    ))
-
-  return pd.DataFrame(
-      data=scores,
-      columns=(
-          'clean_capture_input_filepath',
-          'echo_free_capture_filepath',
-          'echo_filepath',
-          'render_filepath',
-          'capture_filepath',
-          'apm_output_filepath',
-          'apm_reference_filepath',
-          'apm_config',
-          'capture',
-          'render',
-          'echo_simulator',
-          'test_data_gen',
-          'test_data_gen_params',
-          'eval_score_name',
-          'score',
-      ))
-
-
 def main():
  # Init.
  logging.basicConfig(level=logging.DEBUG)  # TODO(alessio): INFO once debugged.
-  parser = _InstanceArgumentsParser()
+  parser = collect_data.InstanceArgumentsParser()
+  parser.description = ('Exports pre-computed APM module quality assessment '
+                        'results into HTML tables')
  args = parser.parse_args()

  # Get the scores.
-  src_path = os.path.join(
-      args.output_dir,
-      sim.ApmModuleSimulator.GetPrefixApmConfig() + '*',
-      sim.ApmModuleSimulator.GetPrefixCapture() + '*',
-      sim.ApmModuleSimulator.GetPrefixRender() + '*',
-      sim.ApmModuleSimulator.GetPrefixEchoSimulator() + '*',
-      sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + '*',
-      sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + '*',
-      sim.ApmModuleSimulator.GetPrefixScore() + '*')
+  src_path = collect_data.ConstructSrcPath(args)
  logging.debug(src_path)
-  scores_data_frame = _FindScores(src_path, args)
+  scores_data_frame = collect_data.FindScores(src_path, args)

  # Export.
  output_filepath = os.path.join(args.output_dir, _BuildOutputFilename(
--- a/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py
+++ b/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py
@ -0,0 +1,179 @@
+#!/usr/bin/env python
+# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS.  All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+"""Finds the APM configuration that maximizes a provided metric by
+parsing the output generated apm_quality_assessment.py.
+"""
+
+from __future__ import division
+
+import collections
+import logging
+import os
+
+import quality_assessment.data_access as data_access
+import quality_assessment.collect_data as collect_data
+
+def _InstanceArgumentsParser():
+  """Arguments parser factory. Extends the arguments from 'collect_data'
+  with a few extra for selecting what parameters to optimize for.
+  """
+  parser = collect_data.InstanceArgumentsParser()
+  parser.description = (
+      'Rudimentary optimization of a function over different parameter'
+      'combinations.')
+
+  parser.add_argument('-n', '--config_dir', required=False,
+                      help=('path to the folder with the configuration files'),
+                      default='apm_configs')
+
+  parser.add_argument('-p', '--params', required=True, nargs='+',
+                      help=('parameters to parse from the config files in'
+                            'config_dir'))
+
+  parser.add_argument('-z', '--params_not_to_optimize', required=False,
+                      nargs='+', default=[],
+                      help=('parameters from `params` not to be optimized for'))
+
+  return parser
+
+
+def _ConfigurationAndScores(data_frame, params,
+                            params_not_to_optimize, config_dir):
+  """Returns a list of all configurations and scores.
+
+  Args:
+    data_frame: A pandas data frame with the scores and config name
+                returned by _FindScores.
+    params: The parameter names to parse from configs the config
+            directory
+
+    params_not_to_optimize: The parameter names which shouldn't affect
+                            the optimal parameter
+                            selection. E.g., fixed settings and not
+                            tunable parameters.
+
+    config_dir: Path to folder with config files.
+
+  Returns:
+    Dictionary of the form
+    {param_combination: [{params: {param1: value1, ...},
+                          scores: {score1: value1, ...}}]}.
+
+    The key `param_combination` runs over all parameter combinations
+    of the parameters in `params` and not in
+    `params_not_to_optimize`. A corresponding value is a list of all
+    param combinations for params in `params_not_to_optimize` and
+    their scores.
+  """
+  results = collections.defaultdict(list)
+  config_names = data_frame['apm_config'].drop_duplicates().values.tolist()
+  score_names = data_frame['eval_score_name'].drop_duplicates().values.tolist()
+
+  # Normalize the scores
+  normalization_constants = {}
+  for score_name in score_names:
+    scores = data_frame[data_frame.eval_score_name == score_name].score
+    normalization_constants[score_name] = max(scores)
+
+  params_to_optimize = [p for p in params if p not in params_not_to_optimize]
+  param_combination = collections.namedtuple("ParamCombination",
+                                            params_to_optimize)
+
+  for config_name in config_names:
+    config_json = data_access.AudioProcConfigFile.Load(
+        os.path.join(config_dir, config_name + ".json"))
+    scores = {}
+    data_cell = data_frame[data_frame.apm_config == config_name]
+    for score_name in score_names:
+      data_cell_scores = data_cell[data_cell.eval_score_name ==
+                                   score_name].score
+      scores[score_name] = sum(data_cell_scores) / len(data_cell_scores)
+      scores[score_name] /= normalization_constants[score_name]
+
+    result = {'scores': scores, 'params': {}}
+    config_optimize_params = {}
+    for param in params:
+      if param in params_to_optimize:
+        config_optimize_params[param] = config_json['-' + param]
+      else:
+        result['params'][param] = config_json['-' + param]
+
+    current_param_combination = param_combination( # pylint: disable=star-args
+        **config_optimize_params)
+    results[current_param_combination].append(result)
+  return results
+
+
+def _FindOptimalParameter(configs_and_scores, score_weighting):
+  """Finds the config producing the maximal score.
+
+  Args:
+    configs_and_scores: structure of the form returned by
+                        _ConfigurationAndScores
+
+    score_weighting: a function to weight together all score values of
+                     the form [{params: {param1: value1, ...}, scores:
+                                {score1: value1, ...}}] into a numeric
+                     value
+  Returns:
+    the config that has the largest values of |score_weighting| applied
+    to its scores.
+  """
+
+  min_score = float('+inf')
+  best_params = None
+  for config in configs_and_scores:
+    scores_and_params = configs_and_scores[config]
+    current_score = score_weighting(scores_and_params)
+    if current_score < min_score:
+      min_score = current_score
+      best_params = config
+      logging.debug("Score: %f", current_score)
+      logging.debug("Config: %s", str(config))
+  return best_params
+
+
+def _ExampleWeighting(scores_and_configs):
+  """Example argument to `_FindOptimalParameter`
+  Args:
+    scores_and_configs: a list of configs and scores, in the form
+                        described in _FindOptimalParameter
+  Returns:
+    numeric value, the sum of all scores
+  """
+  res = 0
+  for score_config in scores_and_configs:
+    res += sum(score_config['scores'].values())
+  return res
+
+
+def main():
+  # Init.
+  # TODO(alessiob): INFO once debugged.
+  logging.basicConfig(level=logging.DEBUG)
+  parser = _InstanceArgumentsParser()
+  args = parser.parse_args()
+
+  # Get the scores.
+  src_path = collect_data.ConstructSrcPath(args)
+  logging.debug('Src path <%s>', src_path)
+  scores_data_frame = collect_data.FindScores(src_path, args)
+  all_scores = _ConfigurationAndScores(scores_data_frame,
+                                       args.params,
+                                       args.params_not_to_optimize,
+                                       args.config_dir)
+
+  opt_param = _FindOptimalParameter(all_scores, _ExampleWeighting)
+
+  logging.info('Optimal parameter combination: <%s>', opt_param)
+  logging.info('It\'s score values: <%s>', all_scores[opt_param])
+
+if __name__ == "__main__":
+  main()
--- a/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py
+++ b/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py
@ -0,0 +1,244 @@
+# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS.  All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+"""Imports a filtered subset of the scores and configurations computed
+by apm_quality_assessment.py into a pandas data frame.
+"""
+
+import argparse
+import glob
+import logging
+import os
+import re
+import sys
+
+try:
+  import pandas as pd
+except ImportError:
+  logging.critical('Cannot import the third-party Python package pandas')
+  sys.exit(1)
+
+from . import data_access as data_access
+from . import simulation as sim
+
+# Compiled regular expressions used to extract score descriptors.
+RE_CONFIG_NAME = re.compile(
+    sim.ApmModuleSimulator.GetPrefixApmConfig() + r'(.+)')
+RE_CAPTURE_NAME = re.compile(
+    sim.ApmModuleSimulator.GetPrefixCapture() + r'(.+)')
+RE_RENDER_NAME = re.compile(
+    sim.ApmModuleSimulator.GetPrefixRender() + r'(.+)')
+RE_ECHO_SIM_NAME = re.compile(
+    sim.ApmModuleSimulator.GetPrefixEchoSimulator() + r'(.+)')
+RE_TEST_DATA_GEN_NAME = re.compile(
+    sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + r'(.+)')
+RE_TEST_DATA_GEN_PARAMS = re.compile(
+    sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + r'(.+)')
+RE_SCORE_NAME = re.compile(
+    sim.ApmModuleSimulator.GetPrefixScore() + r'(.+)(\..+)')
+
+def InstanceArgumentsParser():
+  """Arguments parser factory.
+  """
+  parser = argparse.ArgumentParser(description=(
+      'Override this description in a user script by changing'
+      ' `parser.description` of the returned parser.'))
+
+  parser.add_argument('-o', '--output_dir', required=True,
+                      help=('the same base path used with the '
+                            'apm_quality_assessment tool'))
+
+  parser.add_argument('-f', '--filename_suffix',
+                      help=('suffix of the exported file'))
+
+  parser.add_argument('-c', '--config_names', type=re.compile,
+                      help=('regular expression to filter the APM configuration'
+                            ' names'))
+
+  parser.add_argument('-i', '--capture_names', type=re.compile,
+                      help=('regular expression to filter the capture signal '
+                            'names'))
+
+  parser.add_argument('-r', '--render_names', type=re.compile,
+                      help=('regular expression to filter the render signal '
+                            'names'))
+
+  parser.add_argument('-e', '--echo_simulator_names', type=re.compile,
+                      help=('regular expression to filter the echo simulator '
+                            'names'))
+
+  parser.add_argument('-t', '--test_data_generators', type=re.compile,
+                      help=('regular expression to filter the test data '
+                            'generator names'))
+
+  parser.add_argument('-s', '--eval_scores', type=re.compile,
+                      help=('regular expression to filter the evaluation score '
+                            'names'))
+
+  return parser
+
+
+def _GetScoreDescriptors(score_filepath):
+  """Extracts a score descriptor from the given score file path.
+
+  Args:
+    score_filepath: path to the score file.
+
+  Returns:
+    A tuple of strings (APM configuration name, capture audio track name,
+    render audio track name, echo simulator name, test data generator name,
+    test data generator parameters as string, evaluation score name).
+  """
+  fields = score_filepath.split(os.sep)[-7:]
+  extract_name = lambda index, reg_expr: (
+      reg_expr.match(fields[index]).groups(0)[0])
+  return (
+      extract_name(0, RE_CONFIG_NAME),
+      extract_name(1, RE_CAPTURE_NAME),
+      extract_name(2, RE_RENDER_NAME),
+      extract_name(3, RE_ECHO_SIM_NAME),
+      extract_name(4, RE_TEST_DATA_GEN_NAME),
+      extract_name(5, RE_TEST_DATA_GEN_PARAMS),
+      extract_name(6, RE_SCORE_NAME),
+  )
+
+
+def _ExcludeScore(config_name, capture_name, render_name, echo_simulator_name,
+                  test_data_gen_name, score_name, args):
+  """Decides whether excluding a score.
+
+  A set of optional regular expressions in args is used to determine if the
+  score should be excluded (depending on its |*_name| descriptors).
+
+  Args:
+    config_name: APM configuration name.
+    capture_name: capture audio track name.
+    render_name: render audio track name.
+    echo_simulator_name: echo simulator name.
+    test_data_gen_name: test data generator name.
+    score_name: evaluation score name.
+    args: parsed arguments.
+
+  Returns:
+    A boolean.
+  """
+  value_regexpr_pairs = [
+      (config_name, args.config_names),
+      (capture_name, args.capture_names),
+      (render_name, args.render_names),
+      (echo_simulator_name, args.echo_simulator_names),
+      (test_data_gen_name, args.test_data_generators),
+      (score_name, args.eval_scores),
+  ]
+
+  # Score accepted if each value matches the corresponding regular expression.
+  for value, regexpr in value_regexpr_pairs:
+    if regexpr is None:
+      continue
+    if not regexpr.match(value):
+      return True
+
+  return False
+
+
+def FindScores(src_path, args):
+  """Given a search path, find scores and return a DataFrame object.
+
+  Args:
+    src_path: Search path pattern.
+    args: parsed arguments.
+
+  Returns:
+    A DataFrame object.
+  """
+  # Get scores.
+  scores = []
+  for score_filepath in glob.iglob(src_path):
+    # Extract score descriptor fields from the path.
+    (config_name,
+     capture_name,
+     render_name,
+     echo_simulator_name,
+     test_data_gen_name,
+     test_data_gen_params,
+     score_name) = _GetScoreDescriptors(score_filepath)
+
+    # Ignore the score if required.
+    if _ExcludeScore(
+        config_name,
+        capture_name,
+        render_name,
+        echo_simulator_name,
+        test_data_gen_name,
+        score_name,
+        args):
+      logging.info(
+          'ignored score: %s %s %s %s %s %s',
+          config_name,
+          capture_name,
+          render_name,
+          echo_simulator_name,
+          test_data_gen_name,
+          score_name)
+      continue
+
+    # Read metadata and score.
+    metadata = data_access.Metadata.LoadAudioTestDataPaths(
+        os.path.split(score_filepath)[0])
+    score = data_access.ScoreFile.Load(score_filepath)
+
+    # Add a score with its descriptor fields.
+    scores.append((
+        metadata['clean_capture_input_filepath'],
+        metadata['echo_free_capture_filepath'],
+        metadata['echo_filepath'],
+        metadata['render_filepath'],
+        metadata['capture_filepath'],
+        metadata['apm_output_filepath'],
+        metadata['apm_reference_filepath'],
+        config_name,
+        capture_name,
+        render_name,
+        echo_simulator_name,
+        test_data_gen_name,
+        test_data_gen_params,
+        score_name,
+        score,
+    ))
+
+  return pd.DataFrame(
+      data=scores,
+      columns=(
+          'clean_capture_input_filepath',
+          'echo_free_capture_filepath',
+          'echo_filepath',
+          'render_filepath',
+          'capture_filepath',
+          'apm_output_filepath',
+          'apm_reference_filepath',
+          'apm_config',
+          'capture',
+          'render',
+          'echo_simulator',
+          'test_data_gen',
+          'test_data_gen_params',
+          'eval_score_name',
+          'score',
+      ))
+
+
+def ConstructSrcPath(args):
+  return os.path.join(
+      args.output_dir,
+      sim.ApmModuleSimulator.GetPrefixApmConfig() + '*',
+      sim.ApmModuleSimulator.GetPrefixCapture() + '*',
+      sim.ApmModuleSimulator.GetPrefixRender() + '*',
+      sim.ApmModuleSimulator.GetPrefixEchoSimulator() + '*',
+      sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + '*',
+      sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + '*',
+      sim.ApmModuleSimulator.GetPrefixScore() + '*')
--- a/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py
+++ b/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py
@ -6,6 +6,7 @@
 # in the file PATENTS.  All contributing project authors may
 # be found in the AUTHORS file in the root of the source tree.

+import functools
 import hashlib
 import os
 import re
@ -79,7 +80,8 @@ class HtmlExport(object):

  def _BuildBody(self):
    """Builds the content of the <body> section."""
-    score_names = self._scores_data_frame.eval_score_name.unique().tolist()
+    score_names = self._scores_data_frame['eval_score_name'].drop_duplicates(
+    ).values.tolist()

    html = [
        ('<div class="mdl-layout mdl-js-layout mdl-layout--fixed-header '
@ -178,7 +180,7 @@ class HtmlExport(object):
        score_name + test_data_gen + test_data_gen_params + apm_config)
    if stats['count'] == 1:
      # Show the only available score.
-      item_id = hashlib.md5(items_id_prefix).hexdigest()
+      item_id = hashlib.md5(items_id_prefix.encode('utf-8')).hexdigest()
      html.append('<div id="single-value-{0}">{1:f}</div>'.format(
          item_id, scores['score'].mean()))
      html.append('<div class="mdl-tooltip" data-mdl-for="single-value-{}">{}'
@ -186,7 +188,8 @@ class HtmlExport(object):
    else:
      # Show stats.
      for stat_name in ['min', 'max', 'mean', 'std dev']:
-        item_id = hashlib.md5(items_id_prefix + stat_name).hexdigest()
+        item_id = hashlib.md5(
+            (items_id_prefix + stat_name).encode('utf-8')).hexdigest()
        html.append('<div id="stats-{0}">{1:f}</div>'.format(
            item_id, stats[stat_name]))
        html.append('<div class="mdl-tooltip" data-mdl-for="stats-{}">{}'
@ -289,7 +292,7 @@ class HtmlExport(object):
    masks.append(self._scores_data_frame.test_data_gen == test_data_gen)
    masks.append(
        self._scores_data_frame.test_data_gen_params == test_data_gen_params)
-    mask = reduce((lambda i1, i2: i1 & i2), masks)
+    mask = functools.reduce((lambda i1, i2: i1 & i2), masks)
    del masks
    return self._scores_data_frame[mask]

@ -302,7 +305,7 @@ class HtmlExport(object):
    masks.append(scores.capture == capture)
    masks.append(scores.render == render)
    masks.append(scores.echo_simulator == echo_simulator)
-    mask = reduce((lambda i1, i2: i1 & i2), masks)
+    mask = functools.reduce((lambda i1, i2: i1 & i2), masks)
    del masks

    sliced_data = scores[mask]
@ -333,7 +336,7 @@ class HtmlExport(object):
    return 'score-stats-dialog-' + hashlib.md5(
        'score-stats-inspector-{}-{}-{}-{}'.format(
            score_name, apm_config, test_data_gen,
-            test_data_gen_params)).hexdigest()
+            test_data_gen_params).encode('utf-8')).hexdigest()

  @classmethod
  def _Save(cls, output_filepath, html):