diff options
author | Mike Frysinger <vapier@google.com> | 2021-02-24 12:50:30 -0500 |
---|---|---|
committer | Mike Frysinger <vapier@google.com> | 2021-04-01 14:43:19 +0000 |
commit | d246d1fee7f42f2526a20a96597c8f01eda31433 (patch) | |
tree | c61356504d2c6011346183171e3ed87dc4d04529 | |
parent | bec4fe8aa39cdf9d1a67bfba8a31b3826f9ff197 (diff) | |
download | git-repo-d246d1fee7f42f2526a20a96597c8f01eda31433.tar.gz |
grep: add --jobs support
Use multiprocessing to run in parallel. When operating on multiple
projects, this can greatly speed things up. Across 1000 repos, it
goes from ~40sec to ~16sec with the default -j8.
The output processing does not appear to be a significant bottle
neck -- it accounts for <1sec out of the ~16sec runtime. Thus we
leave it in the main thread to simplify the code.
Change-Id: I750b72c7711b0c5d26e65d480738fbaac3a69971
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/297984
Reviewed-by: Chris Mcdonald <cjmcdonald@google.com>
Tested-by: Mike Frysinger <vapier@google.com>
-rw-r--r-- | subcmds/grep.py | 118 |
1 files changed, 72 insertions, 46 deletions
diff --git a/subcmds/grep.py b/subcmds/grep.py index c16d4185..49feaf6b 100644 --- a/subcmds/grep.py +++ b/subcmds/grep.py | |||
@@ -12,10 +12,12 @@ | |||
12 | # See the License for the specific language governing permissions and | 12 | # See the License for the specific language governing permissions and |
13 | # limitations under the License. | 13 | # limitations under the License. |
14 | 14 | ||
15 | import functools | ||
16 | import multiprocessing | ||
15 | import sys | 17 | import sys |
16 | 18 | ||
17 | from color import Coloring | 19 | from color import Coloring |
18 | from command import PagedCommand | 20 | from command import DEFAULT_LOCAL_JOBS, PagedCommand, WORKER_BATCH_SIZE |
19 | from error import GitError | 21 | from error import GitError |
20 | from git_command import GitCommand | 22 | from git_command import GitCommand |
21 | 23 | ||
@@ -61,6 +63,7 @@ contain a line that matches both expressions: | |||
61 | repo grep --all-match -e NODE -e Unexpected | 63 | repo grep --all-match -e NODE -e Unexpected |
62 | 64 | ||
63 | """ | 65 | """ |
66 | PARALLEL_JOBS = DEFAULT_LOCAL_JOBS | ||
64 | 67 | ||
65 | @staticmethod | 68 | @staticmethod |
66 | def _carry_option(_option, opt_str, value, parser): | 69 | def _carry_option(_option, opt_str, value, parser): |
@@ -80,6 +83,7 @@ contain a line that matches both expressions: | |||
80 | pt.append(value) | 83 | pt.append(value) |
81 | 84 | ||
82 | def _Options(self, p): | 85 | def _Options(self, p): |
86 | super()._Options(p) | ||
83 | g = p.add_option_group('Sources') | 87 | g = p.add_option_group('Sources') |
84 | g.add_option('--cached', | 88 | g.add_option('--cached', |
85 | action='callback', callback=self._carry_option, | 89 | action='callback', callback=self._carry_option, |
@@ -152,73 +156,49 @@ contain a line that matches both expressions: | |||
152 | action='callback', callback=self._carry_option, | 156 | action='callback', callback=self._carry_option, |
153 | help='Show only file names not containing matching lines') | 157 | help='Show only file names not containing matching lines') |
154 | 158 | ||
155 | def Execute(self, opt, args): | 159 | def _ExecuteOne(self, cmd_argv, project): |
156 | out = GrepColoring(self.manifest.manifestProject.config) | 160 | """Process one project.""" |
157 | 161 | try: | |
158 | cmd_argv = ['grep'] | 162 | p = GitCommand(project, |
159 | if out.is_on: | 163 | cmd_argv, |
160 | cmd_argv.append('--color') | 164 | bare=False, |
161 | cmd_argv.extend(getattr(opt, 'cmd_argv', [])) | 165 | capture_stdout=True, |
162 | 166 | capture_stderr=True) | |
163 | if '-e' not in cmd_argv: | 167 | except GitError as e: |
164 | if not args: | 168 | return (project, -1, None, str(e)) |
165 | self.Usage() | ||
166 | cmd_argv.append('-e') | ||
167 | cmd_argv.append(args[0]) | ||
168 | args = args[1:] | ||
169 | |||
170 | projects = self.GetProjects(args) | ||
171 | 169 | ||
172 | full_name = False | 170 | return (project, p.Wait(), p.stdout, p.stderr) |
173 | if len(projects) > 1: | ||
174 | cmd_argv.append('--full-name') | ||
175 | full_name = True | ||
176 | |||
177 | have_rev = False | ||
178 | if opt.revision: | ||
179 | if '--cached' in cmd_argv: | ||
180 | print('fatal: cannot combine --cached and --revision', file=sys.stderr) | ||
181 | sys.exit(1) | ||
182 | have_rev = True | ||
183 | cmd_argv.extend(opt.revision) | ||
184 | cmd_argv.append('--') | ||
185 | 171 | ||
172 | @staticmethod | ||
173 | def _ProcessResults(out, full_name, have_rev, results): | ||
186 | git_failed = False | 174 | git_failed = False |
187 | bad_rev = False | 175 | bad_rev = False |
188 | have_match = False | 176 | have_match = False |
189 | 177 | ||
190 | for project in projects: | 178 | for project, rc, stdout, stderr in results: |
191 | try: | 179 | if rc < 0: |
192 | p = GitCommand(project, | ||
193 | cmd_argv, | ||
194 | bare=False, | ||
195 | capture_stdout=True, | ||
196 | capture_stderr=True) | ||
197 | except GitError as e: | ||
198 | git_failed = True | 180 | git_failed = True |
199 | out.project('--- project %s ---' % project.relpath) | 181 | out.project('--- project %s ---' % project.relpath) |
200 | out.nl() | 182 | out.nl() |
201 | out.fail('%s', str(e)) | 183 | out.fail('%s', stderr) |
202 | out.nl() | 184 | out.nl() |
203 | continue | 185 | continue |
204 | 186 | ||
205 | if p.Wait() != 0: | 187 | if rc: |
206 | # no results | 188 | # no results |
207 | # | 189 | if stderr: |
208 | if p.stderr: | 190 | if have_rev and 'fatal: ambiguous argument' in stderr: |
209 | if have_rev and 'fatal: ambiguous argument' in p.stderr: | ||
210 | bad_rev = True | 191 | bad_rev = True |
211 | else: | 192 | else: |
212 | out.project('--- project %s ---' % project.relpath) | 193 | out.project('--- project %s ---' % project.relpath) |
213 | out.nl() | 194 | out.nl() |
214 | out.fail('%s', p.stderr.strip()) | 195 | out.fail('%s', stderr.strip()) |
215 | out.nl() | 196 | out.nl() |
216 | continue | 197 | continue |
217 | have_match = True | 198 | have_match = True |
218 | 199 | ||
219 | # We cut the last element, to avoid a blank line. | 200 | # We cut the last element, to avoid a blank line. |
220 | # | 201 | r = stdout.split('\n') |
221 | r = p.stdout.split('\n') | ||
222 | r = r[0:-1] | 202 | r = r[0:-1] |
223 | 203 | ||
224 | if have_rev and full_name: | 204 | if have_rev and full_name: |
@@ -240,6 +220,52 @@ contain a line that matches both expressions: | |||
240 | for line in r: | 220 | for line in r: |
241 | print(line) | 221 | print(line) |
242 | 222 | ||
223 | return (git_failed, bad_rev, have_match) | ||
224 | |||
225 | def Execute(self, opt, args): | ||
226 | out = GrepColoring(self.manifest.manifestProject.config) | ||
227 | |||
228 | cmd_argv = ['grep'] | ||
229 | if out.is_on: | ||
230 | cmd_argv.append('--color') | ||
231 | cmd_argv.extend(getattr(opt, 'cmd_argv', [])) | ||
232 | |||
233 | if '-e' not in cmd_argv: | ||
234 | if not args: | ||
235 | self.Usage() | ||
236 | cmd_argv.append('-e') | ||
237 | cmd_argv.append(args[0]) | ||
238 | args = args[1:] | ||
239 | |||
240 | projects = self.GetProjects(args) | ||
241 | |||
242 | full_name = False | ||
243 | if len(projects) > 1: | ||
244 | cmd_argv.append('--full-name') | ||
245 | full_name = True | ||
246 | |||
247 | have_rev = False | ||
248 | if opt.revision: | ||
249 | if '--cached' in cmd_argv: | ||
250 | print('fatal: cannot combine --cached and --revision', file=sys.stderr) | ||
251 | sys.exit(1) | ||
252 | have_rev = True | ||
253 | cmd_argv.extend(opt.revision) | ||
254 | cmd_argv.append('--') | ||
255 | |||
256 | process_results = functools.partial( | ||
257 | self._ProcessResults, out, full_name, have_rev) | ||
258 | # NB: Multiprocessing is heavy, so don't spin it up for one job. | ||
259 | if len(projects) == 1 or opt.jobs == 1: | ||
260 | git_failed, bad_rev, have_match = process_results( | ||
261 | self._ExecuteOne(cmd_argv, x) for x in projects) | ||
262 | else: | ||
263 | with multiprocessing.Pool(opt.jobs) as pool: | ||
264 | results = pool.imap( | ||
265 | functools.partial(self._ExecuteOne, cmd_argv), projects, | ||
266 | chunksize=WORKER_BATCH_SIZE) | ||
267 | git_failed, bad_rev, have_match = process_results(results) | ||
268 | |||
243 | if git_failed: | 269 | if git_failed: |
244 | sys.exit(1) | 270 | sys.exit(1) |
245 | elif have_match: | 271 | elif have_match: |