Rudimentary optimization with APM/QA.

Added script 'apm_quality_assessment_optimize' for finding parameters that minimize a custom function of the scores generated by APM-QA. The script reuses the existing functionality for filtering the data on configs/scores/outputs. To archieve that, some modularization has been done: the part from apm_quality_assessment_export that reads in data into a pandas.DataFrame has been moved into quality_assessment.collect_data. TESTED = though extensive manual tests. Unit tests for the user scripts and 'collect_data' are missing, because we don't have a test framework for loading/exporting fake data. BUG=webrtc:7218 Change-Id: I5521b952970243da05fc4db1b9feef87a2e5ccad Reviewed-on: https://chromium-review.googlesource.com/643292 Commit-Queue: Alex Loiko <aleloi@webrtc.org> Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/master@{#19780}
2017-09-11 17:56:20 +02:00
parent 3b3c9c4eb0
commit 357429dd1e
6 changed files with 441 additions and 235 deletions
--- a/webrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn
+++ b/webrtc/modules/audio_processing/test/py_quality_assessment/BUILD.gn
@ -24,6 +24,7 @@ copy("scripts") {
    "apm_quality_assessment.sh",
    "apm_quality_assessment_export.py",
    "apm_quality_assessment_gencfgs.py",
    "apm_quality_assessment_optimize.py",
  ]
  outputs = [
    "$root_build_dir/py_quality_assessment/{{source_file_part}}",
@ -53,6 +54,7 @@ copy("lib") {
  sources = [
    "quality_assessment/__init__.py",
    "quality_assessment/audioproc_wrapper.py",
    "quality_assessment/collect_data.py",
    "quality_assessment/data_access.py",
    "quality_assessment/echo_path_simulation.py",
    "quality_assessment/echo_path_simulation_factory.py",
--- a/webrtc/modules/audio_processing/test/py_quality_assessment/README.md
+++ b/webrtc/modules/audio_processing/test/py_quality_assessment/README.md
@ -81,7 +81,7 @@ export separate reports. In this case, you can use the
 For instance:
 ```
-$ ./apm_quality_assessment-export.py \
+$ ./apm_quality_assessment_export.py \
  -o output/ \
  -c "(^default$)|(.*AE.*)" \
  -t \(white_noise\) \
--- a/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py
+++ b/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_export.py
@ -11,142 +11,13 @@
   HTML file.
 """
 import argparse
 import logging
 import glob
 import os
 import re
 import sys
-try:
+import apm_quality_assessment_collect_data as collect_data
  import pandas as pd
 except ImportError:
  logging.critical('Cannot import the third-party Python package pandas')
  sys.exit(1)
 import quality_assessment.data_access as data_access
 import quality_assessment.export as export
 import quality_assessment.simulation as sim
 # Compiled regular expressions used to extract score descriptors.
 RE_CONFIG_NAME = re.compile(
    sim.ApmModuleSimulator.GetPrefixApmConfig() + r'(.+)')
 RE_CAPTURE_NAME = re.compile(
    sim.ApmModuleSimulator.GetPrefixCapture() + r'(.+)')
 RE_RENDER_NAME = re.compile(
    sim.ApmModuleSimulator.GetPrefixRender() + r'(.+)')
 RE_ECHO_SIM_NAME = re.compile(
    sim.ApmModuleSimulator.GetPrefixEchoSimulator() + r'(.+)')
 RE_TEST_DATA_GEN_NAME = re.compile(
    sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + r'(.+)')
 RE_TEST_DATA_GEN_PARAMS = re.compile(
    sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + r'(.+)')
 RE_SCORE_NAME = re.compile(
    sim.ApmModuleSimulator.GetPrefixScore() + r'(.+)(\..+)')
 def _InstanceArgumentsParser():
  """Arguments parser factory.
  """
  parser = argparse.ArgumentParser(description=(
      'Exports pre-computed APM module quality assessment results into HTML '
      'tables.'))
  parser.add_argument('-o', '--output_dir', required=True,
                      help=('the same base path used with the '
                            'apm_quality_assessment tool'))
  parser.add_argument('-f', '--filename_suffix',
                      help=('suffix of the exported file'))
  parser.add_argument('-c', '--config_names', type=re.compile,
                      help=('regular expression to filter the APM configuration'
                            ' names'))
  parser.add_argument('-i', '--capture_names', type=re.compile,
                      help=('regular expression to filter the capture signal '
                            'names'))
  parser.add_argument('-r', '--render_names', type=re.compile,
                      help=('regular expression to filter the render signal '
                            'names'))
  parser.add_argument('-e', '--echo_simulator_names', type=re.compile,
                      help=('regular expression to filter the echo simulator '
                            'names'))
  parser.add_argument('-t', '--test_data_generators', type=re.compile,
                      help=('regular expression to filter the test data '
                            'generator names'))
  parser.add_argument('-s', '--eval_scores', type=re.compile,
                      help=('regular expression to filter the evaluation score '
                            'names'))
  return parser
 def _GetScoreDescriptors(score_filepath):
  """Extracts a score descriptor from the given score file path.
  Args:
    score_filepath: path to the score file.
  Returns:
    A tuple of strings (APM configuration name, capture audio track name,
    render audio track name, echo simulator name, test data generator name,
    test data generator parameters as string, evaluation score name).
  """
  fields = score_filepath.split(os.sep)[-7:]
  extract_name = lambda index, reg_expr: (
      reg_expr.match(fields[index]).groups(0)[0])
  return (
      extract_name(0, RE_CONFIG_NAME),
      extract_name(1, RE_CAPTURE_NAME),
      extract_name(2, RE_RENDER_NAME),
      extract_name(3, RE_ECHO_SIM_NAME),
      extract_name(4, RE_TEST_DATA_GEN_NAME),
      extract_name(5, RE_TEST_DATA_GEN_PARAMS),
      extract_name(6, RE_SCORE_NAME),
  )
 def _ExcludeScore(config_name, capture_name, render_name, echo_simulator_name,
                  test_data_gen_name, score_name, args):
  """Decides whether excluding a score.
  A set of optional regular expressions in args is used to determine if the
  score should be excluded (depending on its |*_name| descriptors).
  Args:
    config_name: APM configuration name.
    capture_name: capture audio track name.
    render_name: render audio track name.
    echo_simulator_name: echo simulator name.
    test_data_gen_name: test data generator name.
    score_name: evaluation score name.
    args: parsed arguments.
  Returns:
    A boolean.
  """
  value_regexpr_pairs = [
      (config_name, args.config_names),
      (capture_name, args.capture_names),
      (render_name, args.render_names),
      (echo_simulator_name, args.echo_simulator_names),
      (test_data_gen_name, args.test_data_generators),
      (score_name, args.eval_scores),
  ]
  # Score accepted if each value matches the corresponding regular expression.
  for value, regexpr in value_regexpr_pairs:
    if regexpr is None:
      continue
    if not regexpr.match(value):
      return True
  return False
 def _BuildOutputFilename(filename_suffix):
@ -162,111 +33,18 @@ def _BuildOutputFilename(filename_suffix):
    return 'results.html'
  return 'results-{}.html'.format(filename_suffix)
 def _FindScores(src_path, args):
  """Given a search path, find scores and return a DataFrame object.
  Args:
    src_path: Search path pattern.
    args: parsed arguments.
  Returns:
    A DataFrame object.
  """
  # Get scores.
  scores = []
  for score_filepath in glob.iglob(src_path):
    # Extract score descriptor fields from the path.
    (config_name,
     capture_name,
     render_name,
     echo_simulator_name,
     test_data_gen_name,
     test_data_gen_params,
     score_name) = _GetScoreDescriptors(score_filepath)
    # Ignore the score if required.
    if _ExcludeScore(
        config_name,
        capture_name,
        render_name,
        echo_simulator_name,
        test_data_gen_name,
        score_name,
        args):
      logging.info(
          'ignored score: %s %s %s %s %s %s',
          config_name,
          capture_name,
          render_name,
          echo_simulator_name,
          test_data_gen_name,
          score_name)
      continue
    # Read metadata and score.
    metadata = data_access.Metadata.LoadAudioTestDataPaths(
        os.path.split(score_filepath)[0])
    score = data_access.ScoreFile.Load(score_filepath)
    # Add a score with its descriptor fields.
    scores.append((
        metadata['clean_capture_input_filepath'],
        metadata['echo_free_capture_filepath'],
        metadata['echo_filepath'],
        metadata['render_filepath'],
        metadata['capture_filepath'],
        metadata['apm_output_filepath'],
        metadata['apm_reference_filepath'],
        config_name,
        capture_name,
        render_name,
        echo_simulator_name,
        test_data_gen_name,
        test_data_gen_params,
        score_name,
        score,
    ))
  return pd.DataFrame(
      data=scores,
      columns=(
          'clean_capture_input_filepath',
          'echo_free_capture_filepath',
          'echo_filepath',
          'render_filepath',
          'capture_filepath',
          'apm_output_filepath',
          'apm_reference_filepath',
          'apm_config',
          'capture',
          'render',
          'echo_simulator',
          'test_data_gen',
          'test_data_gen_params',
          'eval_score_name',
          'score',
      ))
 def main():
  # Init.
  logging.basicConfig(level=logging.DEBUG)  # TODO(alessio): INFO once debugged.
-  parser = _InstanceArgumentsParser()
+  parser = collect_data.InstanceArgumentsParser()
  parser.description = ('Exports pre-computed APM module quality assessment '
                        'results into HTML tables')
  args = parser.parse_args()
  # Get the scores.
-  src_path = os.path.join(
+  src_path = collect_data.ConstructSrcPath(args)
      args.output_dir,
      sim.ApmModuleSimulator.GetPrefixApmConfig() + '*',
      sim.ApmModuleSimulator.GetPrefixCapture() + '*',
      sim.ApmModuleSimulator.GetPrefixRender() + '*',
      sim.ApmModuleSimulator.GetPrefixEchoSimulator() + '*',
      sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + '*',
      sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + '*',
      sim.ApmModuleSimulator.GetPrefixScore() + '*')
  logging.debug(src_path)
-  scores_data_frame = _FindScores(src_path, args)
+  scores_data_frame = collect_data.FindScores(src_path, args)
  # Export.
  output_filepath = os.path.join(args.output_dir, _BuildOutputFilename(
--- a/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py
+++ b/webrtc/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_optimize.py
@ -0,0 +1,179 @@
 #!/usr/bin/env python
 # Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
 #
 # Use of this source code is governed by a BSD-style license
 # that can be found in the LICENSE file in the root of the source
 # tree. An additional intellectual property rights grant can be found
 # in the file PATENTS.  All contributing project authors may
 # be found in the AUTHORS file in the root of the source tree.
 """Finds the APM configuration that maximizes a provided metric by
 parsing the output generated apm_quality_assessment.py.
 """
 from __future__ import division
 import collections
 import logging
 import os
 import quality_assessment.data_access as data_access
 import quality_assessment.collect_data as collect_data
 def _InstanceArgumentsParser():
  """Arguments parser factory. Extends the arguments from 'collect_data'
  with a few extra for selecting what parameters to optimize for.
  """
  parser = collect_data.InstanceArgumentsParser()
  parser.description = (
      'Rudimentary optimization of a function over different parameter'
      'combinations.')
  parser.add_argument('-n', '--config_dir', required=False,
                      help=('path to the folder with the configuration files'),
                      default='apm_configs')
  parser.add_argument('-p', '--params', required=True, nargs='+',
                      help=('parameters to parse from the config files in'
                            'config_dir'))
  parser.add_argument('-z', '--params_not_to_optimize', required=False,
                      nargs='+', default=[],
                      help=('parameters from `params` not to be optimized for'))
  return parser
 def _ConfigurationAndScores(data_frame, params,
                            params_not_to_optimize, config_dir):
  """Returns a list of all configurations and scores.
  Args:
    data_frame: A pandas data frame with the scores and config name
                returned by _FindScores.
    params: The parameter names to parse from configs the config
            directory
    params_not_to_optimize: The parameter names which shouldn't affect
                            the optimal parameter
                            selection. E.g., fixed settings and not
                            tunable parameters.
    config_dir: Path to folder with config files.
  Returns:
    Dictionary of the form
    {param_combination: [{params: {param1: value1, ...},
                          scores: {score1: value1, ...}}]}.
    The key `param_combination` runs over all parameter combinations
    of the parameters in `params` and not in
    `params_not_to_optimize`. A corresponding value is a list of all
    param combinations for params in `params_not_to_optimize` and
    their scores.
  """
  results = collections.defaultdict(list)
  config_names = data_frame['apm_config'].drop_duplicates().values.tolist()
  score_names = data_frame['eval_score_name'].drop_duplicates().values.tolist()
  # Normalize the scores
  normalization_constants = {}
  for score_name in score_names:
    scores = data_frame[data_frame.eval_score_name == score_name].score
    normalization_constants[score_name] = max(scores)
  params_to_optimize = [p for p in params if p not in params_not_to_optimize]
  param_combination = collections.namedtuple("ParamCombination",
                                            params_to_optimize)
  for config_name in config_names:
    config_json = data_access.AudioProcConfigFile.Load(
        os.path.join(config_dir, config_name + ".json"))
    scores = {}
    data_cell = data_frame[data_frame.apm_config == config_name]
    for score_name in score_names:
      data_cell_scores = data_cell[data_cell.eval_score_name ==
                                   score_name].score
      scores[score_name] = sum(data_cell_scores) / len(data_cell_scores)
      scores[score_name] /= normalization_constants[score_name]
    result = {'scores': scores, 'params': {}}
    config_optimize_params = {}
    for param in params:
      if param in params_to_optimize:
        config_optimize_params[param] = config_json['-' + param]
      else:
        result['params'][param] = config_json['-' + param]
    current_param_combination = param_combination( # pylint: disable=star-args
        **config_optimize_params)
    results[current_param_combination].append(result)
  return results
 def _FindOptimalParameter(configs_and_scores, score_weighting):
  """Finds the config producing the maximal score.
  Args:
    configs_and_scores: structure of the form returned by
                        _ConfigurationAndScores
    score_weighting: a function to weight together all score values of
                     the form [{params: {param1: value1, ...}, scores:
                                {score1: value1, ...}}] into a numeric
                     value
  Returns:
    the config that has the largest values of |score_weighting| applied
    to its scores.
  """
  min_score = float('+inf')
  best_params = None
  for config in configs_and_scores:
    scores_and_params = configs_and_scores[config]
    current_score = score_weighting(scores_and_params)
    if current_score < min_score:
      min_score = current_score
      best_params = config
      logging.debug("Score: %f", current_score)
      logging.debug("Config: %s", str(config))
  return best_params
 def _ExampleWeighting(scores_and_configs):
  """Example argument to `_FindOptimalParameter`
  Args:
    scores_and_configs: a list of configs and scores, in the form
                        described in _FindOptimalParameter
  Returns:
    numeric value, the sum of all scores
  """
  res = 0
  for score_config in scores_and_configs:
    res += sum(score_config['scores'].values())
  return res
 def main():
  # Init.
  # TODO(alessiob): INFO once debugged.
  logging.basicConfig(level=logging.DEBUG)
  parser = _InstanceArgumentsParser()
  args = parser.parse_args()
  # Get the scores.
  src_path = collect_data.ConstructSrcPath(args)
  logging.debug('Src path <%s>', src_path)
  scores_data_frame = collect_data.FindScores(src_path, args)
  all_scores = _ConfigurationAndScores(scores_data_frame,
                                       args.params,
                                       args.params_not_to_optimize,
                                       args.config_dir)
  opt_param = _FindOptimalParameter(all_scores, _ExampleWeighting)
  logging.info('Optimal parameter combination: <%s>', opt_param)
  logging.info('It\'s score values: <%s>', all_scores[opt_param])
 if __name__ == "__main__":
  main()
--- a/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py
+++ b/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/collect_data.py
@ -0,0 +1,244 @@
 # Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
 #
 # Use of this source code is governed by a BSD-style license
 # that can be found in the LICENSE file in the root of the source
 # tree. An additional intellectual property rights grant can be found
 # in the file PATENTS.  All contributing project authors may
 # be found in the AUTHORS file in the root of the source tree.
 """Imports a filtered subset of the scores and configurations computed
 by apm_quality_assessment.py into a pandas data frame.
 """
 import argparse
 import glob
 import logging
 import os
 import re
 import sys
 try:
  import pandas as pd
 except ImportError:
  logging.critical('Cannot import the third-party Python package pandas')
  sys.exit(1)
 from . import data_access as data_access
 from . import simulation as sim
 # Compiled regular expressions used to extract score descriptors.
 RE_CONFIG_NAME = re.compile(
    sim.ApmModuleSimulator.GetPrefixApmConfig() + r'(.+)')
 RE_CAPTURE_NAME = re.compile(
    sim.ApmModuleSimulator.GetPrefixCapture() + r'(.+)')
 RE_RENDER_NAME = re.compile(
    sim.ApmModuleSimulator.GetPrefixRender() + r'(.+)')
 RE_ECHO_SIM_NAME = re.compile(
    sim.ApmModuleSimulator.GetPrefixEchoSimulator() + r'(.+)')
 RE_TEST_DATA_GEN_NAME = re.compile(
    sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + r'(.+)')
 RE_TEST_DATA_GEN_PARAMS = re.compile(
    sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + r'(.+)')
 RE_SCORE_NAME = re.compile(
    sim.ApmModuleSimulator.GetPrefixScore() + r'(.+)(\..+)')
 def InstanceArgumentsParser():
  """Arguments parser factory.
  """
  parser = argparse.ArgumentParser(description=(
      'Override this description in a user script by changing'
      ' `parser.description` of the returned parser.'))
  parser.add_argument('-o', '--output_dir', required=True,
                      help=('the same base path used with the '
                            'apm_quality_assessment tool'))
  parser.add_argument('-f', '--filename_suffix',
                      help=('suffix of the exported file'))
  parser.add_argument('-c', '--config_names', type=re.compile,
                      help=('regular expression to filter the APM configuration'
                            ' names'))
  parser.add_argument('-i', '--capture_names', type=re.compile,
                      help=('regular expression to filter the capture signal '
                            'names'))
  parser.add_argument('-r', '--render_names', type=re.compile,
                      help=('regular expression to filter the render signal '
                            'names'))
  parser.add_argument('-e', '--echo_simulator_names', type=re.compile,
                      help=('regular expression to filter the echo simulator '
                            'names'))
  parser.add_argument('-t', '--test_data_generators', type=re.compile,
                      help=('regular expression to filter the test data '
                            'generator names'))
  parser.add_argument('-s', '--eval_scores', type=re.compile,
                      help=('regular expression to filter the evaluation score '
                            'names'))
  return parser
 def _GetScoreDescriptors(score_filepath):
  """Extracts a score descriptor from the given score file path.
  Args:
    score_filepath: path to the score file.
  Returns:
    A tuple of strings (APM configuration name, capture audio track name,
    render audio track name, echo simulator name, test data generator name,
    test data generator parameters as string, evaluation score name).
  """
  fields = score_filepath.split(os.sep)[-7:]
  extract_name = lambda index, reg_expr: (
      reg_expr.match(fields[index]).groups(0)[0])
  return (
      extract_name(0, RE_CONFIG_NAME),
      extract_name(1, RE_CAPTURE_NAME),
      extract_name(2, RE_RENDER_NAME),
      extract_name(3, RE_ECHO_SIM_NAME),
      extract_name(4, RE_TEST_DATA_GEN_NAME),
      extract_name(5, RE_TEST_DATA_GEN_PARAMS),
      extract_name(6, RE_SCORE_NAME),
  )
 def _ExcludeScore(config_name, capture_name, render_name, echo_simulator_name,
                  test_data_gen_name, score_name, args):
  """Decides whether excluding a score.
  A set of optional regular expressions in args is used to determine if the
  score should be excluded (depending on its |*_name| descriptors).
  Args:
    config_name: APM configuration name.
    capture_name: capture audio track name.
    render_name: render audio track name.
    echo_simulator_name: echo simulator name.
    test_data_gen_name: test data generator name.
    score_name: evaluation score name.
    args: parsed arguments.
  Returns:
    A boolean.
  """
  value_regexpr_pairs = [
      (config_name, args.config_names),
      (capture_name, args.capture_names),
      (render_name, args.render_names),
      (echo_simulator_name, args.echo_simulator_names),
      (test_data_gen_name, args.test_data_generators),
      (score_name, args.eval_scores),
  ]
  # Score accepted if each value matches the corresponding regular expression.
  for value, regexpr in value_regexpr_pairs:
    if regexpr is None:
      continue
    if not regexpr.match(value):
      return True
  return False
 def FindScores(src_path, args):
  """Given a search path, find scores and return a DataFrame object.
  Args:
    src_path: Search path pattern.
    args: parsed arguments.
  Returns:
    A DataFrame object.
  """
  # Get scores.
  scores = []
  for score_filepath in glob.iglob(src_path):
    # Extract score descriptor fields from the path.
    (config_name,
     capture_name,
     render_name,
     echo_simulator_name,
     test_data_gen_name,
     test_data_gen_params,
     score_name) = _GetScoreDescriptors(score_filepath)
    # Ignore the score if required.
    if _ExcludeScore(
        config_name,
        capture_name,
        render_name,
        echo_simulator_name,
        test_data_gen_name,
        score_name,
        args):
      logging.info(
          'ignored score: %s %s %s %s %s %s',
          config_name,
          capture_name,
          render_name,
          echo_simulator_name,
          test_data_gen_name,
          score_name)
      continue
    # Read metadata and score.
    metadata = data_access.Metadata.LoadAudioTestDataPaths(
        os.path.split(score_filepath)[0])
    score = data_access.ScoreFile.Load(score_filepath)
    # Add a score with its descriptor fields.
    scores.append((
        metadata['clean_capture_input_filepath'],
        metadata['echo_free_capture_filepath'],
        metadata['echo_filepath'],
        metadata['render_filepath'],
        metadata['capture_filepath'],
        metadata['apm_output_filepath'],
        metadata['apm_reference_filepath'],
        config_name,
        capture_name,
        render_name,
        echo_simulator_name,
        test_data_gen_name,
        test_data_gen_params,
        score_name,
        score,
    ))
  return pd.DataFrame(
      data=scores,
      columns=(
          'clean_capture_input_filepath',
          'echo_free_capture_filepath',
          'echo_filepath',
          'render_filepath',
          'capture_filepath',
          'apm_output_filepath',
          'apm_reference_filepath',
          'apm_config',
          'capture',
          'render',
          'echo_simulator',
          'test_data_gen',
          'test_data_gen_params',
          'eval_score_name',
          'score',
      ))
 def ConstructSrcPath(args):
  return os.path.join(
      args.output_dir,
      sim.ApmModuleSimulator.GetPrefixApmConfig() + '*',
      sim.ApmModuleSimulator.GetPrefixCapture() + '*',
      sim.ApmModuleSimulator.GetPrefixRender() + '*',
      sim.ApmModuleSimulator.GetPrefixEchoSimulator() + '*',
      sim.ApmModuleSimulator.GetPrefixTestDataGenerator() + '*',
      sim.ApmModuleSimulator.GetPrefixTestDataGeneratorParameters() + '*',
      sim.ApmModuleSimulator.GetPrefixScore() + '*')
--- a/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py
+++ b/webrtc/modules/audio_processing/test/py_quality_assessment/quality_assessment/export.py
@ -6,6 +6,7 @@
 # in the file PATENTS.  All contributing project authors may
 # be found in the AUTHORS file in the root of the source tree.
 import functools
 import hashlib
 import os
 import re
@ -79,7 +80,8 @@ class HtmlExport(object):
  def _BuildBody(self):
    """Builds the content of the <body> section."""
-    score_names = self._scores_data_frame.eval_score_name.unique().tolist()
+    score_names = self._scores_data_frame['eval_score_name'].drop_duplicates(
    ).values.tolist()
    html = [
        ('<div class="mdl-layout mdl-js-layout mdl-layout--fixed-header '
@ -178,7 +180,7 @@ class HtmlExport(object):
        score_name + test_data_gen + test_data_gen_params + apm_config)
    if stats['count'] == 1:
      # Show the only available score.
-      item_id = hashlib.md5(items_id_prefix).hexdigest()
+      item_id = hashlib.md5(items_id_prefix.encode('utf-8')).hexdigest()
      html.append('<div id="single-value-{0}">{1:f}</div>'.format(
          item_id, scores['score'].mean()))
      html.append('<div class="mdl-tooltip" data-mdl-for="single-value-{}">{}'
@ -186,7 +188,8 @@ class HtmlExport(object):
    else:
      # Show stats.
      for stat_name in ['min', 'max', 'mean', 'std dev']:
-        item_id = hashlib.md5(items_id_prefix + stat_name).hexdigest()
+        item_id = hashlib.md5(
            (items_id_prefix + stat_name).encode('utf-8')).hexdigest()
        html.append('<div id="stats-{0}">{1:f}</div>'.format(
            item_id, stats[stat_name]))
        html.append('<div class="mdl-tooltip" data-mdl-for="stats-{}">{}'
@ -289,7 +292,7 @@ class HtmlExport(object):
    masks.append(self._scores_data_frame.test_data_gen == test_data_gen)
    masks.append(
        self._scores_data_frame.test_data_gen_params == test_data_gen_params)
-    mask = reduce((lambda i1, i2: i1 & i2), masks)
+    mask = functools.reduce((lambda i1, i2: i1 & i2), masks)
    del masks
    return self._scores_data_frame[mask]
@ -302,7 +305,7 @@ class HtmlExport(object):
    masks.append(scores.capture == capture)
    masks.append(scores.render == render)
    masks.append(scores.echo_simulator == echo_simulator)
-    mask = reduce((lambda i1, i2: i1 & i2), masks)
+    mask = functools.reduce((lambda i1, i2: i1 & i2), masks)
    del masks
    sliced_data = scores[mask]
@ -333,7 +336,7 @@ class HtmlExport(object):
    return 'score-stats-dialog-' + hashlib.md5(
        'score-stats-inspector-{}-{}-{}-{}'.format(
            score_name, apm_config, test_data_gen,
-            test_data_gen_params)).hexdigest()
+            test_data_gen_params).encode('utf-8')).hexdigest()
  @classmethod
  def _Save(cls, output_filepath, html):