From 8da4861b3860c505e39341b4135c21f67569e4d8 Mon Sep 17 00:00:00 2001 From: Kuang-che Wu Date: Tue, 22 Oct 2024 21:04:41 +0800 Subject: subcmds: reduce multiprocessing serialization overhead Follow the same approach as 39ffd9977e to reduce serialization overhead. Below benchmarks are tested with 2.7k projects on my workstation (warm cache). git tracing is disabled for benchmark. (seconds) | v2.48 | v2.48 | this CL | this CL | | -j32 | | -j32 ----------------------------------------------------------- with clean tree state: branches (none) | 5.6 | 5.9 | 1.0 | 0.9 status (clean) | 21.3 | 9.4 | 19.4 | 4.7 diff (none) | 7.6 | 7.2 | 5.7 | 2.2 prune (none) | 5.7 | 6.1 | 1.3 | 1.2 abandon (none) | 19.4 | 18.6 | 0.9 | 0.8 upload (none) | 19.7 | 18.7 | 0.9 | 0.8 forall -c true | 7.5 | 7.6 | 0.6 | 0.6 forall -c "git log -1" | 11.3 | 11.1 | 0.6 | 0.6 with branches: start BRANCH --all | 21.9 | 20.3 | 13.6 | 2.6 checkout BRANCH | 29.1 | 27.8 | 1.1 | 1.0 branches (2) | 28.0 | 28.6 | 1.5 | 1.3 abandon BRANCH | 29.2 | 27.5 | 9.7 | 2.2 Bug: b/371638995 Change-Id: I53989a3d1e43063587b3f52f852b1c2c56b49412 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/440221 Reviewed-by: Josip Sokcevic Tested-by: Kuang-che Wu Commit-Queue: Kuang-che Wu --- subcmds/grep.py | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) (limited to 'subcmds/grep.py') diff --git a/subcmds/grep.py b/subcmds/grep.py index b677b6bd..918651d9 100644 --- a/subcmds/grep.py +++ b/subcmds/grep.py @@ -23,7 +23,6 @@ from error import GitError from error import InvalidArgumentsError from error import SilentRepoExitError from git_command import GitCommand -from project import Project from repo_logging import RepoLogger @@ -40,7 +39,7 @@ class GrepColoring(Coloring): class ExecuteOneResult(NamedTuple): """Result from an execute instance.""" - project: Project + project_idx: int rc: int stdout: str stderr: str @@ -262,8 +261,10 @@ contain a line that matches both expressions: help="Show only file names not containing matching lines", ) - def _ExecuteOne(self, cmd_argv, project): + @classmethod + def _ExecuteOne(cls, cmd_argv, project_idx): """Process one project.""" + project = cls.get_parallel_context()["projects"][project_idx] try: p = GitCommand( project, @@ -274,7 +275,7 @@ contain a line that matches both expressions: verify_command=True, ) except GitError as e: - return ExecuteOneResult(project, -1, None, str(e), e) + return ExecuteOneResult(project_idx, -1, None, str(e), e) try: error = None @@ -282,10 +283,12 @@ contain a line that matches both expressions: except GitError as e: rc = 1 error = e - return ExecuteOneResult(project, rc, p.stdout, p.stderr, error) + return ExecuteOneResult(project_idx, rc, p.stdout, p.stderr, error) @staticmethod - def _ProcessResults(full_name, have_rev, opt, _pool, out, results): + def _ProcessResults( + full_name, have_rev, opt, projects, _pool, out, results + ): git_failed = False bad_rev = False have_match = False @@ -293,9 +296,10 @@ contain a line that matches both expressions: errors = [] for result in results: + project = projects[result.project_idx] if result.rc < 0: git_failed = True - out.project("--- project %s ---" % _RelPath(result.project)) + out.project("--- project %s ---" % _RelPath(project)) out.nl() out.fail("%s", result.stderr) out.nl() @@ -311,9 +315,7 @@ contain a line that matches both expressions: ): bad_rev = True else: - out.project( - "--- project %s ---" % _RelPath(result.project) - ) + out.project("--- project %s ---" % _RelPath(project)) out.nl() out.fail("%s", result.stderr.strip()) out.nl() @@ -331,13 +333,13 @@ contain a line that matches both expressions: rev, line = line.split(":", 1) out.write("%s", rev) out.write(":") - out.project(_RelPath(result.project)) + out.project(_RelPath(project)) out.write("/") out.write("%s", line) out.nl() elif full_name: for line in r: - out.project(_RelPath(result.project)) + out.project(_RelPath(project)) out.write("/") out.write("%s", line) out.nl() @@ -381,16 +383,19 @@ contain a line that matches both expressions: cmd_argv.extend(opt.revision) cmd_argv.append("--") - git_failed, bad_rev, have_match, errors = self.ExecuteInParallel( - opt.jobs, - functools.partial(self._ExecuteOne, cmd_argv), - projects, - callback=functools.partial( - self._ProcessResults, full_name, have_rev, opt - ), - output=out, - ordered=True, - ) + with self.ParallelContext(): + self.get_parallel_context()["projects"] = projects + git_failed, bad_rev, have_match, errors = self.ExecuteInParallel( + opt.jobs, + functools.partial(self._ExecuteOne, cmd_argv), + range(len(projects)), + callback=functools.partial( + self._ProcessResults, full_name, have_rev, opt, projects + ), + output=out, + ordered=True, + chunksize=1, + ) if git_failed: raise GrepCommandError( -- cgit v1.2.3-54-g00ecf