summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Frysinger <vapier@google.com>2021-02-16 01:45:39 -0500
committerMike Frysinger <vapier@google.com>2021-02-22 22:51:34 +0000
commit7c871163c8803e812998e5b2296e3bbb30b1367f (patch)
treebbd137a70f4237e1c7837f3daeb9e0844326b051
parent6a2400a4d097b6e510dc9b8ec06283517b9ca3ad (diff)
downloadgit-repo-7c871163c8803e812998e5b2296e3bbb30b1367f.tar.gz
status: improve parallel execution stability
The status command runs a bunch of jobs in parallel, and each one is responsible for writing to stdout directly. When running many noisy jobs in parallel, output can get intermingled. Pass down a StringIO buffer for writing to so we can return the entire output as a string so the main job can handle displaying it. This fixes interleaved output as well as making the output stable: we always display results in the same project order now. By switching from map to imap, this ends up not really adding any overhead. Bug: https://crbug.com/gerrit/12231 Change-Id: Ic18b07c8074c046ff36e306eb8d392fb34fb6eca Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/297242 Tested-by: Mike Frysinger <vapier@google.com> Reviewed-by: Chris Mcdonald <cjmcdonald@google.com>
-rw-r--r--command.py9
-rw-r--r--subcmds/branches.py10
-rw-r--r--subcmds/status.py16
3 files changed, 22 insertions, 13 deletions
diff --git a/command.py b/command.py
index 7737ec71..90bd0021 100644
--- a/command.py
+++ b/command.py
@@ -23,6 +23,15 @@ from error import NoSuchProjectError
23from error import InvalidProjectGroupsError 23from error import InvalidProjectGroupsError
24 24
25 25
26# Number of projects to submit to a single worker process at a time.
27# This number represents a tradeoff between the overhead of IPC and finer
28# grained opportunity for parallelism. This particular value was chosen by
29# iterating through powers of two until the overall performance no longer
30# improved. The performance of this batch size is not a function of the
31# number of cores on the system.
32WORKER_BATCH_SIZE = 32
33
34
26# How many jobs to run in parallel by default? This assumes the jobs are 35# How many jobs to run in parallel by default? This assumes the jobs are
27# largely I/O bound and do not hit the network. 36# largely I/O bound and do not hit the network.
28DEFAULT_LOCAL_JOBS = min(os.cpu_count(), 8) 37DEFAULT_LOCAL_JOBS = min(os.cpu_count(), 8)
diff --git a/subcmds/branches.py b/subcmds/branches.py
index 9665e85f..d5ea580c 100644
--- a/subcmds/branches.py
+++ b/subcmds/branches.py
@@ -16,15 +16,7 @@ import itertools
16import multiprocessing 16import multiprocessing
17import sys 17import sys
18from color import Coloring 18from color import Coloring
19from command import Command, DEFAULT_LOCAL_JOBS 19from command import Command, DEFAULT_LOCAL_JOBS, WORKER_BATCH_SIZE
20
21# Number of projects to submit to a single worker process at a time.
22# This number represents a tradeoff between the overhead of IPC and finer
23# grained opportunity for parallelism. This particular value was chosen by
24# iterating through powers of two until the overall performance no longer
25# improved. The performance of this batch size is not a function of the
26# number of cores on the system.
27WORKER_BATCH_SIZE = 32
28 20
29 21
30class BranchColoring(Coloring): 22class BranchColoring(Coloring):
diff --git a/subcmds/status.py b/subcmds/status.py
index f0f2e034..6c8e22e5 100644
--- a/subcmds/status.py
+++ b/subcmds/status.py
@@ -14,10 +14,11 @@
14 14
15import functools 15import functools
16import glob 16import glob
17import io
17import multiprocessing 18import multiprocessing
18import os 19import os
19 20
20from command import DEFAULT_LOCAL_JOBS, PagedCommand 21from command import DEFAULT_LOCAL_JOBS, PagedCommand, WORKER_BATCH_SIZE
21 22
22from color import Coloring 23from color import Coloring
23import platform_utils 24import platform_utils
@@ -99,7 +100,9 @@ the following meanings:
99 Returns: 100 Returns:
100 The status of the project. 101 The status of the project.
101 """ 102 """
102 return project.PrintWorkTreeStatus(quiet=quiet) 103 buf = io.StringIO()
104 ret = project.PrintWorkTreeStatus(quiet=quiet, output_redir=buf)
105 return (ret, buf.getvalue())
103 106
104 def _FindOrphans(self, dirs, proj_dirs, proj_dirs_parents, outstring): 107 def _FindOrphans(self, dirs, proj_dirs, proj_dirs_parents, outstring):
105 """find 'dirs' that are present in 'proj_dirs_parents' but not in 'proj_dirs'""" 108 """find 'dirs' that are present in 'proj_dirs_parents' but not in 'proj_dirs'"""
@@ -128,8 +131,13 @@ the following meanings:
128 counter += 1 131 counter += 1
129 else: 132 else:
130 with multiprocessing.Pool(opt.jobs) as pool: 133 with multiprocessing.Pool(opt.jobs) as pool:
131 states = pool.map(functools.partial(self._StatusHelper, opt.quiet), all_projects) 134 states = pool.imap(functools.partial(self._StatusHelper, opt.quiet),
132 counter += states.count('CLEAN') 135 all_projects, chunksize=WORKER_BATCH_SIZE)
136 for (state, output) in states:
137 if output:
138 print(output, end='')
139 if state == 'CLEAN':
140 counter += 1
133 if not opt.quiet and len(all_projects) == counter: 141 if not opt.quiet and len(all_projects) == counter:
134 print('nothing to commit (working directory clean)') 142 print('nothing to commit (working directory clean)')
135 143