diff options
| author | Mike Frysinger <vapier@google.com> | 2021-02-16 01:45:39 -0500 | 
|---|---|---|
| committer | Mike Frysinger <vapier@google.com> | 2021-02-22 22:51:34 +0000 | 
| commit | 7c871163c8803e812998e5b2296e3bbb30b1367f (patch) | |
| tree | bbd137a70f4237e1c7837f3daeb9e0844326b051 | |
| parent | 6a2400a4d097b6e510dc9b8ec06283517b9ca3ad (diff) | |
| download | git-repo-7c871163c8803e812998e5b2296e3bbb30b1367f.tar.gz | |
status: improve parallel execution stability
The status command runs a bunch of jobs in parallel, and each one
is responsible for writing to stdout directly.  When running many
noisy jobs in parallel, output can get intermingled.  Pass down a
StringIO buffer for writing to so we can return the entire output
as a string so the main job can handle displaying it.  This fixes
interleaved output as well as making the output stable: we always
display results in the same project order now.  By switching from
map to imap, this ends up not really adding any overhead.
Bug: https://crbug.com/gerrit/12231
Change-Id: Ic18b07c8074c046ff36e306eb8d392fb34fb6eca
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/297242
Tested-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Chris Mcdonald <cjmcdonald@google.com>
| -rw-r--r-- | command.py | 9 | ||||
| -rw-r--r-- | subcmds/branches.py | 10 | ||||
| -rw-r--r-- | subcmds/status.py | 16 | 
3 files changed, 22 insertions, 13 deletions
| @@ -23,6 +23,15 @@ from error import NoSuchProjectError | |||
| 23 | from error import InvalidProjectGroupsError | 23 | from error import InvalidProjectGroupsError | 
| 24 | 24 | ||
| 25 | 25 | ||
| 26 | # Number of projects to submit to a single worker process at a time. | ||
| 27 | # This number represents a tradeoff between the overhead of IPC and finer | ||
| 28 | # grained opportunity for parallelism. This particular value was chosen by | ||
| 29 | # iterating through powers of two until the overall performance no longer | ||
| 30 | # improved. The performance of this batch size is not a function of the | ||
| 31 | # number of cores on the system. | ||
| 32 | WORKER_BATCH_SIZE = 32 | ||
| 33 | |||
| 34 | |||
| 26 | # How many jobs to run in parallel by default? This assumes the jobs are | 35 | # How many jobs to run in parallel by default? This assumes the jobs are | 
| 27 | # largely I/O bound and do not hit the network. | 36 | # largely I/O bound and do not hit the network. | 
| 28 | DEFAULT_LOCAL_JOBS = min(os.cpu_count(), 8) | 37 | DEFAULT_LOCAL_JOBS = min(os.cpu_count(), 8) | 
| diff --git a/subcmds/branches.py b/subcmds/branches.py index 9665e85f..d5ea580c 100644 --- a/subcmds/branches.py +++ b/subcmds/branches.py | |||
| @@ -16,15 +16,7 @@ import itertools | |||
| 16 | import multiprocessing | 16 | import multiprocessing | 
| 17 | import sys | 17 | import sys | 
| 18 | from color import Coloring | 18 | from color import Coloring | 
| 19 | from command import Command, DEFAULT_LOCAL_JOBS | 19 | from command import Command, DEFAULT_LOCAL_JOBS, WORKER_BATCH_SIZE | 
| 20 | |||
| 21 | # Number of projects to submit to a single worker process at a time. | ||
| 22 | # This number represents a tradeoff between the overhead of IPC and finer | ||
| 23 | # grained opportunity for parallelism. This particular value was chosen by | ||
| 24 | # iterating through powers of two until the overall performance no longer | ||
| 25 | # improved. The performance of this batch size is not a function of the | ||
| 26 | # number of cores on the system. | ||
| 27 | WORKER_BATCH_SIZE = 32 | ||
| 28 | 20 | ||
| 29 | 21 | ||
| 30 | class BranchColoring(Coloring): | 22 | class BranchColoring(Coloring): | 
| diff --git a/subcmds/status.py b/subcmds/status.py index f0f2e034..6c8e22e5 100644 --- a/subcmds/status.py +++ b/subcmds/status.py | |||
| @@ -14,10 +14,11 @@ | |||
| 14 | 14 | ||
| 15 | import functools | 15 | import functools | 
| 16 | import glob | 16 | import glob | 
| 17 | import io | ||
| 17 | import multiprocessing | 18 | import multiprocessing | 
| 18 | import os | 19 | import os | 
| 19 | 20 | ||
| 20 | from command import DEFAULT_LOCAL_JOBS, PagedCommand | 21 | from command import DEFAULT_LOCAL_JOBS, PagedCommand, WORKER_BATCH_SIZE | 
| 21 | 22 | ||
| 22 | from color import Coloring | 23 | from color import Coloring | 
| 23 | import platform_utils | 24 | import platform_utils | 
| @@ -99,7 +100,9 @@ the following meanings: | |||
| 99 | Returns: | 100 | Returns: | 
| 100 | The status of the project. | 101 | The status of the project. | 
| 101 | """ | 102 | """ | 
| 102 | return project.PrintWorkTreeStatus(quiet=quiet) | 103 | buf = io.StringIO() | 
| 104 | ret = project.PrintWorkTreeStatus(quiet=quiet, output_redir=buf) | ||
| 105 | return (ret, buf.getvalue()) | ||
| 103 | 106 | ||
| 104 | def _FindOrphans(self, dirs, proj_dirs, proj_dirs_parents, outstring): | 107 | def _FindOrphans(self, dirs, proj_dirs, proj_dirs_parents, outstring): | 
| 105 | """find 'dirs' that are present in 'proj_dirs_parents' but not in 'proj_dirs'""" | 108 | """find 'dirs' that are present in 'proj_dirs_parents' but not in 'proj_dirs'""" | 
| @@ -128,8 +131,13 @@ the following meanings: | |||
| 128 | counter += 1 | 131 | counter += 1 | 
| 129 | else: | 132 | else: | 
| 130 | with multiprocessing.Pool(opt.jobs) as pool: | 133 | with multiprocessing.Pool(opt.jobs) as pool: | 
| 131 | states = pool.map(functools.partial(self._StatusHelper, opt.quiet), all_projects) | 134 | states = pool.imap(functools.partial(self._StatusHelper, opt.quiet), | 
| 132 | counter += states.count('CLEAN') | 135 | all_projects, chunksize=WORKER_BATCH_SIZE) | 
| 136 | for (state, output) in states: | ||
| 137 | if output: | ||
| 138 | print(output, end='') | ||
| 139 | if state == 'CLEAN': | ||
| 140 | counter += 1 | ||
| 133 | if not opt.quiet and len(all_projects) == counter: | 141 | if not opt.quiet and len(all_projects) == counter: | 
| 134 | print('nothing to commit (working directory clean)') | 142 | print('nothing to commit (working directory clean)') | 
| 135 | 143 | ||
