summaryrefslogtreecommitdiffstats
path: root/subcmds/grep.py
diff options
context:
space:
mode:
authorKuang-che Wu <kcwu@google.com>2024-10-22 21:04:41 +0800
committerLUCI <gerrit-scoped@luci-project-accounts.iam.gserviceaccount.com>2024-10-23 23:34:34 +0000
commit8da4861b3860c505e39341b4135c21f67569e4d8 (patch)
tree6f300266c91322df0e61953b84381e1f403074a5 /subcmds/grep.py
parent39ffd9977e2f6cb1ca1757e59173fc93e0eab72c (diff)
downloadgit-repo-8da4861b3860c505e39341b4135c21f67569e4d8.tar.gz
subcmds: reduce multiprocessing serialization overhead
Follow the same approach as 39ffd9977e to reduce serialization overhead. Below benchmarks are tested with 2.7k projects on my workstation (warm cache). git tracing is disabled for benchmark. (seconds) | v2.48 | v2.48 | this CL | this CL | | -j32 | | -j32 ----------------------------------------------------------- with clean tree state: branches (none) | 5.6 | 5.9 | 1.0 | 0.9 status (clean) | 21.3 | 9.4 | 19.4 | 4.7 diff (none) | 7.6 | 7.2 | 5.7 | 2.2 prune (none) | 5.7 | 6.1 | 1.3 | 1.2 abandon (none) | 19.4 | 18.6 | 0.9 | 0.8 upload (none) | 19.7 | 18.7 | 0.9 | 0.8 forall -c true | 7.5 | 7.6 | 0.6 | 0.6 forall -c "git log -1" | 11.3 | 11.1 | 0.6 | 0.6 with branches: start BRANCH --all | 21.9 | 20.3 | 13.6 | 2.6 checkout BRANCH | 29.1 | 27.8 | 1.1 | 1.0 branches (2) | 28.0 | 28.6 | 1.5 | 1.3 abandon BRANCH | 29.2 | 27.5 | 9.7 | 2.2 Bug: b/371638995 Change-Id: I53989a3d1e43063587b3f52f852b1c2c56b49412 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/440221 Reviewed-by: Josip Sokcevic <sokcevic@google.com> Tested-by: Kuang-che Wu <kcwu@google.com> Commit-Queue: Kuang-che Wu <kcwu@google.com>
Diffstat (limited to 'subcmds/grep.py')
-rw-r--r--subcmds/grep.py49
1 files changed, 27 insertions, 22 deletions
diff --git a/subcmds/grep.py b/subcmds/grep.py
index b677b6bd..918651d9 100644
--- a/subcmds/grep.py
+++ b/subcmds/grep.py
@@ -23,7 +23,6 @@ from error import GitError
23from error import InvalidArgumentsError 23from error import InvalidArgumentsError
24from error import SilentRepoExitError 24from error import SilentRepoExitError
25from git_command import GitCommand 25from git_command import GitCommand
26from project import Project
27from repo_logging import RepoLogger 26from repo_logging import RepoLogger
28 27
29 28
@@ -40,7 +39,7 @@ class GrepColoring(Coloring):
40class ExecuteOneResult(NamedTuple): 39class ExecuteOneResult(NamedTuple):
41 """Result from an execute instance.""" 40 """Result from an execute instance."""
42 41
43 project: Project 42 project_idx: int
44 rc: int 43 rc: int
45 stdout: str 44 stdout: str
46 stderr: str 45 stderr: str
@@ -262,8 +261,10 @@ contain a line that matches both expressions:
262 help="Show only file names not containing matching lines", 261 help="Show only file names not containing matching lines",
263 ) 262 )
264 263
265 def _ExecuteOne(self, cmd_argv, project): 264 @classmethod
265 def _ExecuteOne(cls, cmd_argv, project_idx):
266 """Process one project.""" 266 """Process one project."""
267 project = cls.get_parallel_context()["projects"][project_idx]
267 try: 268 try:
268 p = GitCommand( 269 p = GitCommand(
269 project, 270 project,
@@ -274,7 +275,7 @@ contain a line that matches both expressions:
274 verify_command=True, 275 verify_command=True,
275 ) 276 )
276 except GitError as e: 277 except GitError as e:
277 return ExecuteOneResult(project, -1, None, str(e), e) 278 return ExecuteOneResult(project_idx, -1, None, str(e), e)
278 279
279 try: 280 try:
280 error = None 281 error = None
@@ -282,10 +283,12 @@ contain a line that matches both expressions:
282 except GitError as e: 283 except GitError as e:
283 rc = 1 284 rc = 1
284 error = e 285 error = e
285 return ExecuteOneResult(project, rc, p.stdout, p.stderr, error) 286 return ExecuteOneResult(project_idx, rc, p.stdout, p.stderr, error)
286 287
287 @staticmethod 288 @staticmethod
288 def _ProcessResults(full_name, have_rev, opt, _pool, out, results): 289 def _ProcessResults(
290 full_name, have_rev, opt, projects, _pool, out, results
291 ):
289 git_failed = False 292 git_failed = False
290 bad_rev = False 293 bad_rev = False
291 have_match = False 294 have_match = False
@@ -293,9 +296,10 @@ contain a line that matches both expressions:
293 errors = [] 296 errors = []
294 297
295 for result in results: 298 for result in results:
299 project = projects[result.project_idx]
296 if result.rc < 0: 300 if result.rc < 0:
297 git_failed = True 301 git_failed = True
298 out.project("--- project %s ---" % _RelPath(result.project)) 302 out.project("--- project %s ---" % _RelPath(project))
299 out.nl() 303 out.nl()
300 out.fail("%s", result.stderr) 304 out.fail("%s", result.stderr)
301 out.nl() 305 out.nl()
@@ -311,9 +315,7 @@ contain a line that matches both expressions:
311 ): 315 ):
312 bad_rev = True 316 bad_rev = True
313 else: 317 else:
314 out.project( 318 out.project("--- project %s ---" % _RelPath(project))
315 "--- project %s ---" % _RelPath(result.project)
316 )
317 out.nl() 319 out.nl()
318 out.fail("%s", result.stderr.strip()) 320 out.fail("%s", result.stderr.strip())
319 out.nl() 321 out.nl()
@@ -331,13 +333,13 @@ contain a line that matches both expressions:
331 rev, line = line.split(":", 1) 333 rev, line = line.split(":", 1)
332 out.write("%s", rev) 334 out.write("%s", rev)
333 out.write(":") 335 out.write(":")
334 out.project(_RelPath(result.project)) 336 out.project(_RelPath(project))
335 out.write("/") 337 out.write("/")
336 out.write("%s", line) 338 out.write("%s", line)
337 out.nl() 339 out.nl()
338 elif full_name: 340 elif full_name:
339 for line in r: 341 for line in r:
340 out.project(_RelPath(result.project)) 342 out.project(_RelPath(project))
341 out.write("/") 343 out.write("/")
342 out.write("%s", line) 344 out.write("%s", line)
343 out.nl() 345 out.nl()
@@ -381,16 +383,19 @@ contain a line that matches both expressions:
381 cmd_argv.extend(opt.revision) 383 cmd_argv.extend(opt.revision)
382 cmd_argv.append("--") 384 cmd_argv.append("--")
383 385
384 git_failed, bad_rev, have_match, errors = self.ExecuteInParallel( 386 with self.ParallelContext():
385 opt.jobs, 387 self.get_parallel_context()["projects"] = projects
386 functools.partial(self._ExecuteOne, cmd_argv), 388 git_failed, bad_rev, have_match, errors = self.ExecuteInParallel(
387 projects, 389 opt.jobs,
388 callback=functools.partial( 390 functools.partial(self._ExecuteOne, cmd_argv),
389 self._ProcessResults, full_name, have_rev, opt 391 range(len(projects)),
390 ), 392 callback=functools.partial(
391 output=out, 393 self._ProcessResults, full_name, have_rev, opt, projects
392 ordered=True, 394 ),
393 ) 395 output=out,
396 ordered=True,
397 chunksize=1,
398 )
394 399
395 if git_failed: 400 if git_failed:
396 raise GrepCommandError( 401 raise GrepCommandError(