Rewrote perform_action_on_all_files to be parallell.

This speeds up decode for a 720p workload from 25s to 5s on my machine.
I did some benchmarking and it appears we spend the vast majority of
the time on zxing decoding, so now we will bring all processors on the
the machine to bear.

BUG=webrtc:4986

Review URL: https://codereview.webrtc.org/1428433002

Cr-Commit-Position: refs/heads/master@{#10443}
This commit is contained in:
phoglund
2015-10-29 02:22:47 -07:00
committed by Commit bot
parent 32df5efc6d
commit be81fa538d

View File

@ -7,6 +7,7 @@
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
import multiprocessing
import os
import subprocess
import sys
@ -64,8 +65,7 @@ def perform_action_on_all_files(directory, file_pattern, file_extension,
"""Function that performs a given action on all files matching a pattern.
It is assumed that the files are named file_patternxxxx.file_extension, where
xxxx are digits. The file names start from
file_patern0..start_number>.file_extension.
xxxx are digits starting from start_number.
Args:
directory(string): The directory where the files live.
@ -73,24 +73,33 @@ def perform_action_on_all_files(directory, file_pattern, file_extension,
file_extension(string): The files' extension.
start_number(int): From where to start to count frames.
action(function): The action to be performed over the files. Must return
False if the action failed, True otherwise.
False if the action failed, True otherwise. It should take a file name
as the first argument and **kwargs as arguments. The function must be
possible to pickle, so it cannot be a bound function (for instance).
Return:
(bool): Whether performing the action over all files was successful or not.
"""
file_prefix = os.path.join(directory, file_pattern)
file_exists = True
file_number = start_number
errors = False
while file_exists:
process_pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
results = []
while True:
zero_padded_file_number = zero_pad(file_number)
file_name = file_prefix + zero_padded_file_number + '.' + file_extension
if os.path.isfile(file_name):
if not action(file_name=file_name, **kwargs):
errors = True
break
file_number += 1
else:
file_exists = False
return not errors
if not os.path.isfile(file_name):
break
future = process_pool.apply_async(action, args=(file_name,), kwds=kwargs)
results.append(future)
file_number += 1
successful = True
for result in results:
if not result.get():
print "At least one action %s failed for files %sxxxx.%s." % (
action, file_pattern, file_extension)
successful = False
process_pool.close()
return successful