diff options
| author | Chris McDonald <cjmcdonald@google.com> | 2020-12-09 14:27:59 -0700 |
|---|---|---|
| committer | Mike Frysinger <vapier@google.com> | 2020-12-14 23:35:12 +0000 |
| commit | 8add62325dbe4df60cde1af6b093d99e79685140 (patch) | |
| tree | f9433863c843d5c36f1fe3c26c9f20e94c415ce5 | |
| parent | 974774761c5d11378b987d6f195bd057b81dba47 (diff) | |
| download | git-repo-8add62325dbe4df60cde1af6b093d99e79685140.tar.gz | |
Add parallelism to 'branches' command
Spread the operation of querying which local branches exist across a
pool of processes and build the name map of projects -> branches as
these tasks finish rather than blocking on the entire query. The search
operations are submitted in batches to reduce the overhead of interprocess
communication. The `chunksize` argument used to control this batch size
was selected by incrementing through powers of two until it stopped being
faster.
Change-Id: Ie3d7f799ee8e83e5058536caf53e2979175408b7
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/291342
Tested-by: Chris Mcdonald <cjmcdonald@google.com>
Reviewed-by: Mike Frysinger <vapier@google.com>
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | subcmds/branches.py | 46 |
2 files changed, 43 insertions, 4 deletions
| @@ -7,6 +7,7 @@ __pycache__ | |||
| 7 | .repopickle_* | 7 | .repopickle_* |
| 8 | /repoc | 8 | /repoc |
| 9 | /.tox | 9 | /.tox |
| 10 | /.venv | ||
| 10 | 11 | ||
| 11 | # PyCharm related | 12 | # PyCharm related |
| 12 | /.idea/ | 13 | /.idea/ |
diff --git a/subcmds/branches.py b/subcmds/branches.py index 9709f7f0..2b1f8075 100644 --- a/subcmds/branches.py +++ b/subcmds/branches.py | |||
| @@ -15,10 +15,20 @@ | |||
| 15 | # limitations under the License. | 15 | # limitations under the License. |
| 16 | 16 | ||
| 17 | from __future__ import print_function | 17 | from __future__ import print_function |
| 18 | import itertools | ||
| 19 | import multiprocessing | ||
| 18 | import sys | 20 | import sys |
| 19 | from color import Coloring | 21 | from color import Coloring |
| 20 | from command import Command | 22 | from command import Command |
| 21 | 23 | ||
| 24 | # Number of projects to submit to a single worker process at a time. | ||
| 25 | # This number represents a tradeoff between the overhead of IPC and finer | ||
| 26 | # grained opportunity for parallelism. This particular value was chosen by | ||
| 27 | # iterating through powers of two until the overall performance no longer | ||
| 28 | # improved. The performance of this batch size is not a function of the | ||
| 29 | # number of cores on the system. | ||
| 30 | WORKER_BATCH_SIZE = 32 | ||
| 31 | |||
| 22 | 32 | ||
| 23 | class BranchColoring(Coloring): | 33 | class BranchColoring(Coloring): |
| 24 | def __init__(self, config): | 34 | def __init__(self, config): |
| @@ -97,20 +107,32 @@ is shown, then the branch appears in all projects. | |||
| 97 | 107 | ||
| 98 | """ | 108 | """ |
| 99 | 109 | ||
| 110 | def _Options(self, p): | ||
| 111 | """Add flags to CLI parser for this subcommand.""" | ||
| 112 | default_jobs = min(multiprocessing.cpu_count(), 8) | ||
| 113 | p.add_option( | ||
| 114 | '-j', | ||
| 115 | '--jobs', | ||
| 116 | type=int, | ||
| 117 | default=default_jobs, | ||
| 118 | help='Number of worker processes to spawn ' | ||
| 119 | '(default: %s)' % default_jobs) | ||
| 120 | |||
| 100 | def Execute(self, opt, args): | 121 | def Execute(self, opt, args): |
| 101 | projects = self.GetProjects(args) | 122 | projects = self.GetProjects(args) |
| 102 | out = BranchColoring(self.manifest.manifestProject.config) | 123 | out = BranchColoring(self.manifest.manifestProject.config) |
| 103 | all_branches = {} | 124 | all_branches = {} |
| 104 | project_cnt = len(projects) | 125 | project_cnt = len(projects) |
| 126 | with multiprocessing.Pool(processes=opt.jobs) as pool: | ||
| 127 | project_branches = pool.imap_unordered( | ||
| 128 | expand_project_to_branches, projects, chunksize=WORKER_BATCH_SIZE) | ||
| 105 | 129 | ||
| 106 | for project in projects: | 130 | for name, b in itertools.chain.from_iterable(project_branches): |
| 107 | for name, b in project.GetBranches().items(): | ||
| 108 | b.project = project | ||
| 109 | if name not in all_branches: | 131 | if name not in all_branches: |
| 110 | all_branches[name] = BranchInfo(name) | 132 | all_branches[name] = BranchInfo(name) |
| 111 | all_branches[name].add(b) | 133 | all_branches[name].add(b) |
| 112 | 134 | ||
| 113 | names = list(sorted(all_branches)) | 135 | names = sorted(all_branches) |
| 114 | 136 | ||
| 115 | if not names: | 137 | if not names: |
| 116 | print(' (no branches)', file=sys.stderr) | 138 | print(' (no branches)', file=sys.stderr) |
| @@ -180,3 +202,19 @@ is shown, then the branch appears in all projects. | |||
| 180 | else: | 202 | else: |
| 181 | out.write(' in all projects') | 203 | out.write(' in all projects') |
| 182 | out.nl() | 204 | out.nl() |
| 205 | |||
| 206 | |||
| 207 | def expand_project_to_branches(project): | ||
| 208 | """Expands a project into a list of branch names & associated information. | ||
| 209 | |||
| 210 | Args: | ||
| 211 | project: project.Project | ||
| 212 | |||
| 213 | Returns: | ||
| 214 | List[Tuple[str, git_config.Branch]] | ||
| 215 | """ | ||
| 216 | branches = [] | ||
| 217 | for name, b in project.GetBranches().items(): | ||
| 218 | b.project = project | ||
| 219 | branches.append((name, b)) | ||
| 220 | return branches | ||
