prune: add --jobs support

Use multiprocessing to run in parallel. When operating on multiple projects, this can greatly speed things up. Across 1000 repos, it goes from ~10sec to ~4sec with the default -j8. This only does a simple conversion over to get an easy speedup. It is currently written to collect all results before displaying them. If we refactored this module more, we could have it display results as they came in. Change-Id: I5caf4ca51df0b7f078f0db104ae5232268482c1c Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/298643 Reviewed-by: Chris Mcdonald <cjmcdonald@google.com> Tested-by: Mike Frysinger <vapier@google.com>
author: Mike Frysinger <vapier@google.com> 2021-02-27 15:31:58 -0500
committer: Mike Frysinger <vapier@google.com> 2021-03-31 16:28:24 +0000
commit: bec4fe8aa39cdf9d1a67bfba8a31b3826f9ff197 (patch)
tree: 4a4f803bb1e978457e4a25e4de1e4c641fa1b7de
parent: ddab0604eee41e26572f0cf9f3fd5ff7a0637594 (diff)
download: git-repo-bec4fe8aa39cdf9d1a67bfba8a31b3826f9ff197.tar.gz
1 files changed, 25 insertions, 4 deletions
diff --git a/subcmds/prune.py b/subcmds/prune.py
index 8cad8122..4084c8b6 100644
--- a/subcmds/prune.py
+++ b/subcmds/prune.py
@@ -12,8 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import itertools
+import multiprocessing
 from color import Coloring
-from command import PagedCommand
+from command import DEFAULT_LOCAL_JOBS, PagedCommand, WORKER_BATCH_SIZE
 class Prune(PagedCommand):
@@ -22,11 +25,29 @@ class Prune(PagedCommand):
  helpUsage = """
 %prog [<project>...]
 """
+  PARALLEL_JOBS = DEFAULT_LOCAL_JOBS
+  def _ExecuteOne(self, project):
+    """Process one project."""
+    return project.PruneHeads()
  def Execute(self, opt, args):
-    all_branches = []
+    projects = self.GetProjects(args)
-    for project in self.GetProjects(args):
-      all_branches.extend(project.PruneHeads())
+    # NB: Should be able to refactor this module to display summary as results
+    # come back from children.
+    def _ProcessResults(results):
+      return list(itertools.chain.from_iterable(results))
+    # NB: Multiprocessing is heavy, so don't spin it up for one job.
+    if len(projects) == 1 or opt.jobs == 1:
+      all_branches = _ProcessResults(self._ExecuteOne(x) for x in projects)
+    else:
+      with multiprocessing.Pool(opt.jobs) as pool:
+        results = pool.imap(
+            self._ExecuteOne, projects,
+            chunksize=WORKER_BATCH_SIZE)
+        all_branches = _ProcessResults(results)
    if not all_branches:
      return
author	Mike Frysinger <vapier@google.com>	2021-02-27 15:31:58 -0500
committer	Mike Frysinger <vapier@google.com>	2021-03-31 16:28:24 +0000
commit	bec4fe8aa39cdf9d1a67bfba8a31b3826f9ff197 (patch)
tree	4a4f803bb1e978457e4a25e4de1e4c641fa1b7de
parent	ddab0604eee41e26572f0cf9f3fd5ff7a0637594 (diff)
download	git-repo-bec4fe8aa39cdf9d1a67bfba8a31b3826f9ff197.tar.gz

diff --git a/subcmds/prune.py b/subcmds/prune.py index 8cad8122..4084c8b6 100644 --- a/subcmds/prune.py +++ b/subcmds/prune.py
@@ -12,8 +12,11 @@
12	# See the License for the specific language governing permissions and	12	# See the License for the specific language governing permissions and
13	# limitations under the License.	13	# limitations under the License.
14		14
		15	import itertools
		16	import multiprocessing
		17
15	from color import Coloring	18	from color import Coloring
16	from command import PagedCommand	19	from command import DEFAULT_LOCAL_JOBS, PagedCommand, WORKER_BATCH_SIZE
17		20
18		21
19	class Prune(PagedCommand):	22	class Prune(PagedCommand):
@@ -22,11 +25,29 @@ class Prune(PagedCommand):
22	helpUsage = """	25	helpUsage = """
23	%prog [<project>...]	26	%prog [<project>...]
24	"""	27	"""
		28	PARALLEL_JOBS = DEFAULT_LOCAL_JOBS
		29
		30	def _ExecuteOne(self, project):
		31	"""Process one project."""
		32	return project.PruneHeads()
25		33
26	def Execute(self, opt, args):	34	def Execute(self, opt, args):
27	all_branches = []	35	projects = self.GetProjects(args)
28	for project in self.GetProjects(args):	36
29	all_branches.extend(project.PruneHeads())	37	# NB: Should be able to refactor this module to display summary as results
		38	# come back from children.
		39	def _ProcessResults(results):
		40	return list(itertools.chain.from_iterable(results))
		41
		42	# NB: Multiprocessing is heavy, so don't spin it up for one job.
		43	if len(projects) == 1 or opt.jobs == 1:
		44	all_branches = _ProcessResults(self._ExecuteOne(x) for x in projects)
		45	else:
		46	with multiprocessing.Pool(opt.jobs) as pool:
		47	results = pool.imap(
		48	self._ExecuteOne, projects,
		49	chunksize=WORKER_BATCH_SIZE)
		50	all_branches = _ProcessResults(results)
30		51
31	if not all_branches:	52	if not all_branches:
32	return	53	return