sync: reduce multiprocessing serialization overhead

Background: - Manifest object is large (for projects like Android) in terms of serialization cost and size (more than 1mb). - Lots of Project objects usually share only a few manifest objects. Before this CL, Project objects were passed to workers via function parameters. Function parameters are pickled separately (in chunk). In other words, manifests are serialized again and again. The major serialization overhead of repo sync was O(manifest_size * projects / chunksize) This CL uses following tricks to reduce serialization overhead. - All projects are pickled in one invocation. Because Project objects share manifests, pickle library remembers which objects are already seen and avoid the serialization cost. - Pass the Project objects to workers at worker intialization time. And pass project index as function parameters instead. The number of workers is much smaller than the number of projects. - Worker init state are shared on Linux (fork based). So it requires zero serialization for Project objects. On Linux (fork based), the serialization overhead is O(projects) --- one int per project On Windows (spawn based), the serialization overhead is O(manifest_size * min(workers, projects)) Moreover, use chunksize=1 to avoid the chance that some workers are idle while other workers still have more than one job in their chunk queue. Using 2.7k projects as the baseline, originally "repo sync" no-op sync takes 31s for fetch and 25s for checkout on my Linux workstation. With this CL, it takes 12s for fetch and 1s for checkout. Bug: b/371638995 Change-Id: Ifa22072ea54eacb4a5c525c050d84de371e87caa Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/439921 Tested-by: Kuang-che Wu <kcwu@google.com> Reviewed-by: Josip Sokcevic <sokcevic@google.com> Commit-Queue: Kuang-che Wu <kcwu@google.com>
author: Kuang-che Wu <kcwu@google.com> 2024-10-18 23:32:08 +0800
committer: LUCI <gerrit-scoped@luci-project-accounts.iam.gserviceaccount.com> 2024-10-23 02:58:45 +0000
commit: 39ffd9977e2f6cb1ca1757e59173fc93e0eab72c (patch)
tree: 4b2f5f39ce1a20ef06c336cfb87a4269cedfd8d7 /subcmds
parent: 584863fb5e3e17ab364de40f80b10ac030b47788 (diff)
download: git-repo-39ffd9977e2f6cb1ca1757e59173fc93e0eab72c.tar.gz
1 files changed, 88 insertions, 83 deletions
diff --git a/subcmds/sync.py b/subcmds/sync.py
index bebe18b9..00fee776 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -141,7 +141,7 @@ class _FetchOneResult(NamedTuple):
    Attributes:
      success (bool): True if successful.
-      project (Project): The fetched project.
+      project_idx (int): The fetched project index.
      start (float): The starting time.time().
      finish (float): The ending time.time().
      remote_fetched (bool): True if the remote was actually queried.
@@ -149,7 +149,7 @@ class _FetchOneResult(NamedTuple):
    success: bool
    errors: List[Exception]
-    project: Project
+    project_idx: int
    start: float
    finish: float
    remote_fetched: bool
@@ -182,14 +182,14 @@ class _CheckoutOneResult(NamedTuple):
    Attributes:
      success (bool): True if successful.
-      project (Project): The project.
+      project_idx (int): The project index.
      start (float): The starting time.time().
      finish (float): The ending time.time().
    """
    success: bool
    errors: List[Exception]
-    project: Project
+    project_idx: int
    start: float
    finish: float
@@ -592,7 +592,8 @@ later is required to fix a server side protocol bug.
            branch = branch[len(R_HEADS) :]
        return branch
-    def _GetCurrentBranchOnly(self, opt, manifest):
+    @classmethod
+    def _GetCurrentBranchOnly(cls, opt, manifest):
        """Returns whether current-branch or use-superproject options are
        enabled.
@@ -710,7 +711,8 @@ later is required to fix a server side protocol bug.
        if need_unload:
            m.outer_client.manifest.Unload()
-    def _FetchProjectList(self, opt, projects):
+    @classmethod
+    def _FetchProjectList(cls, opt, projects):
        """Main function of the fetch worker.
        The projects we're given share the same underlying git object store, so
@@ -722,21 +724,23 @@ later is required to fix a server side protocol bug.
            opt: Program options returned from optparse.  See _Options().
            projects: Projects to fetch.
        """
-        return [self._FetchOne(opt, x) for x in projects]
+        return [cls._FetchOne(opt, x) for x in projects]
-    def _FetchOne(self, opt, project):
+    @classmethod
+    def _FetchOne(cls, opt, project_idx):
        """Fetch git objects for a single project.
        Args:
            opt: Program options returned from optparse.  See _Options().
-            project: Project object for the project to fetch.
+            project_idx: Project index for the project to fetch.
        Returns:
            Whether the fetch was successful.
        """
+        project = cls.get_parallel_context()["projects"][project_idx]
        start = time.time()
        k = f"{project.name} @ {project.relpath}"
-        self._sync_dict[k] = start
+        cls.get_parallel_context()["sync_dict"][k] = start
        success = False
        remote_fetched = False
        errors = []
@@ -746,7 +750,7 @@ later is required to fix a server side protocol bug.
                quiet=opt.quiet,
                verbose=opt.verbose,
                output_redir=buf,
-                current_branch_only=self._GetCurrentBranchOnly(
+                current_branch_only=cls._GetCurrentBranchOnly(
                    opt, project.manifest
                ),
                force_sync=opt.force_sync,
@@ -756,7 +760,7 @@ later is required to fix a server side protocol bug.
                optimized_fetch=opt.optimized_fetch,
                retry_fetches=opt.retry_fetches,
                prune=opt.prune,
-                ssh_proxy=self.ssh_proxy,
+                ssh_proxy=cls.get_parallel_context()["ssh_proxy"],
                clone_filter=project.manifest.CloneFilter,
                partial_clone_exclude=project.manifest.PartialCloneExclude,
                clone_filter_for_depth=project.manifest.CloneFilterForDepth,
@@ -788,24 +792,20 @@ later is required to fix a server side protocol bug.
                type(e).__name__,
                e,
            )
-            del self._sync_dict[k]
            errors.append(e)
            raise
+        finally:
+            del cls.get_parallel_context()["sync_dict"][k]
        finish = time.time()
-        del self._sync_dict[k]
        return _FetchOneResult(
-            success, errors, project, start, finish, remote_fetched
+            success, errors, project_idx, start, finish, remote_fetched
        )
-    @classmethod
-    def _FetchInitChild(cls, ssh_proxy):
-        cls.ssh_proxy = ssh_proxy
    def _GetSyncProgressMessage(self):
        earliest_time = float("inf")
        earliest_proj = None
-        items = self._sync_dict.items()
+        items = self.get_parallel_context()["sync_dict"].items()
        for project, t in items:
            if t < earliest_time:
                earliest_time = t
@@ -813,7 +813,7 @@ later is required to fix a server side protocol bug.
        if not earliest_proj:
            # This function is called when sync is still running but in some
-            # cases (by chance), _sync_dict can contain no entries. Return some
+            # cases (by chance), sync_dict can contain no entries. Return some
            # text to indicate that sync is still working.
            return "..working.."
@@ -835,7 +835,6 @@ later is required to fix a server side protocol bug.
            elide=True,
        )
-        self._sync_dict = multiprocessing.Manager().dict()
        sync_event = _threading.Event()
        def _MonitorSyncLoop():
@@ -846,21 +845,13 @@ later is required to fix a server side protocol bug.
        sync_progress_thread = _threading.Thread(target=_MonitorSyncLoop)
        sync_progress_thread.daemon = True
-        sync_progress_thread.start()
-        objdir_project_map = dict()
-        for project in projects:
-            objdir_project_map.setdefault(project.objdir, []).append(project)
-        projects_list = list(objdir_project_map.values())
-        jobs = min(opt.jobs_network, len(projects_list))
+        def _ProcessResults(pool, pm, results_sets):
-        def _ProcessResults(results_sets):
            ret = True
            for results in results_sets:
                for result in results:
                    success = result.success
-                    project = result.project
+                    project = projects[result.project_idx]
                    start = result.start
                    finish = result.finish
                    self._fetch_times.Set(project, finish - start)
@@ -884,45 +875,49 @@ later is required to fix a server side protocol bug.
                        fetched.add(project.gitdir)
                    pm.update()
                if not ret and opt.fail_fast:
+                    if pool:
+                        pool.close()
                    break
            return ret
-        # We pass the ssh proxy settings via the class.  This allows
+        with self.ParallelContext():
-        # multiprocessing to pickle it up when spawning children.  We can't pass
+            self.get_parallel_context()["projects"] = projects
-        # it as an argument to _FetchProjectList below as multiprocessing is
+            self.get_parallel_context()[
-        # unable to pickle those.
+                "sync_dict"
-        Sync.ssh_proxy = None
+            ] = multiprocessing.Manager().dict()
-        # NB: Multiprocessing is heavy, so don't spin it up for one job.
+            objdir_project_map = dict()
-        if jobs == 1:
+            for index, project in enumerate(projects):
-            self._FetchInitChild(ssh_proxy)
+                objdir_project_map.setdefault(project.objdir, []).append(index)
-            if not _ProcessResults(
+            projects_list = list(objdir_project_map.values())
-                self._FetchProjectList(opt, x) for x in projects_list
-            ):
+            jobs = min(opt.jobs_network, len(projects_list))
-                ret = False
-        else:
+            # We pass the ssh proxy settings via the class.  This allows
+            # multiprocessing to pickle it up when spawning children.  We can't
+            # pass it as an argument to _FetchProjectList below as
+            # multiprocessing is unable to pickle those.
+            self.get_parallel_context()["ssh_proxy"] = ssh_proxy
+            sync_progress_thread.start()
            if not opt.quiet:
                pm.update(inc=0, msg="warming up")
-            with multiprocessing.Pool(
+            try:
-                jobs, initializer=self._FetchInitChild, initargs=(ssh_proxy,)
+                ret = self.ExecuteInParallel(
-            ) as pool:
+                    jobs,
-                results = pool.imap_unordered(
                    functools.partial(self._FetchProjectList, opt),
                    projects_list,
-                    chunksize=_chunksize(len(projects_list), jobs),
+                    callback=_ProcessResults,
+                    output=pm,
+                    # Use chunksize=1 to avoid the chance that some workers are
+                    # idle while other workers still have more than one job in
+                    # their chunk queue.
+                    chunksize=1,
                )
-                if not _ProcessResults(results):
+            finally:
-                    ret = False
+                sync_event.set()
-                    pool.close()
+                sync_progress_thread.join()
-        # Cleanup the reference now that we're done with it, and we're going to
-        # release any resources it points to.  If we don't, later
-        # multiprocessing usage (e.g. checkouts) will try to pickle and then
-        # crash.
-        del Sync.ssh_proxy
-        sync_event.set()
-        pm.end()
        self._fetch_times.Save()
        self._local_sync_state.Save()
@@ -1008,14 +1003,15 @@ later is required to fix a server side protocol bug.
        return _FetchMainResult(all_projects)
+    @classmethod
    def _CheckoutOne(
-        self,
+        cls,
        detach_head,
        force_sync,
        force_checkout,
        force_rebase,
        verbose,
-        project,
+        project_idx,
    ):
        """Checkout work tree for one project
@@ -1027,11 +1023,12 @@ later is required to fix a server side protocol bug.
            force_checkout: Force checking out of the repo content.
            force_rebase: Force rebase.
            verbose: Whether to show verbose messages.
-            project: Project object for the project to checkout.
+            project_idx: Project index for the project to checkout.
        Returns:
            Whether the fetch was successful.
        """
+        project = cls.get_parallel_context()["projects"][project_idx]
        start = time.time()
        syncbuf = SyncBuffer(
            project.manifest.manifestProject.config, detach_head=detach_head
@@ -1065,7 +1062,7 @@ later is required to fix a server side protocol bug.
        if not success:
            logger.error("error: Cannot checkout %s", project.name)
        finish = time.time()
-        return _CheckoutOneResult(success, errors, project, start, finish)
+        return _CheckoutOneResult(success, errors, project_idx, start, finish)
    def _Checkout(self, all_projects, opt, err_results, checkout_errors):
        """Checkout projects listed in all_projects
@@ -1083,7 +1080,9 @@ later is required to fix a server side protocol bug.
            ret = True
            for result in results:
                success = result.success
-                project = result.project
+                project = self.get_parallel_context()["projects"][
+                    result.project_idx
+                ]
                start = result.start
                finish = result.finish
                self.event_log.AddSync(
@@ -1110,22 +1109,28 @@ later is required to fix a server side protocol bug.
            return ret
        for projects in _SafeCheckoutOrder(all_projects):
-            proc_res = self.ExecuteInParallel(
+            with self.ParallelContext():
-                opt.jobs_checkout,
+                self.get_parallel_context()["projects"] = projects
-                functools.partial(
+                proc_res = self.ExecuteInParallel(
-                    self._CheckoutOne,
+                    opt.jobs_checkout,
-                    opt.detach_head,
+                    functools.partial(
-                    opt.force_sync,
+                        self._CheckoutOne,
-                    opt.force_checkout,
+                        opt.detach_head,
-                    opt.rebase,
+                        opt.force_sync,
-                    opt.verbose,
+                        opt.force_checkout,
-                ),
+                        opt.rebase,
-                projects,
+                        opt.verbose,
-                callback=_ProcessResults,
+                    ),
-                output=Progress(
+                    range(len(projects)),
-                    "Checking out", len(all_projects), quiet=opt.quiet
+                    callback=_ProcessResults,
-                ),
+                    output=Progress(
-            )
+                        "Checking out", len(all_projects), quiet=opt.quiet
+                    ),
+                    # Use chunksize=1 to avoid the chance that some workers are
+                    # idle while other workers still have more than one job in
+                    # their chunk queue.
+                    chunksize=1,
+                )
        self._local_sync_state.Save()
        return proc_res and not err_results
author	Kuang-che Wu <kcwu@google.com>	2024-10-18 23:32:08 +0800
committer	LUCI <gerrit-scoped@luci-project-accounts.iam.gserviceaccount.com>	2024-10-23 02:58:45 +0000
commit	39ffd9977e2f6cb1ca1757e59173fc93e0eab72c (patch)
tree	4b2f5f39ce1a20ef06c336cfb87a4269cedfd8d7 /subcmds
parent	584863fb5e3e17ab364de40f80b10ac030b47788 (diff)
download	git-repo-39ffd9977e2f6cb1ca1757e59173fc93e0eab72c.tar.gz

diff --git a/subcmds/sync.py b/subcmds/sync.py index bebe18b9..00fee776 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py
@@ -141,7 +141,7 @@ class _FetchOneResult(NamedTuple):
141		141
142	Attributes:	142	Attributes:
143	success (bool): True if successful.	143	success (bool): True if successful.
144	project (Project): The fetched project.	144	project_idx (int): The fetched project index.
145	start (float): The starting time.time().	145	start (float): The starting time.time().
146	finish (float): The ending time.time().	146	finish (float): The ending time.time().
147	remote_fetched (bool): True if the remote was actually queried.	147	remote_fetched (bool): True if the remote was actually queried.
@@ -149,7 +149,7 @@ class _FetchOneResult(NamedTuple):
149		149
150	success: bool	150	success: bool
151	errors: List[Exception]	151	errors: List[Exception]
152	project: Project	152	project_idx: int
153	start: float	153	start: float
154	finish: float	154	finish: float
155	remote_fetched: bool	155	remote_fetched: bool
@@ -182,14 +182,14 @@ class _CheckoutOneResult(NamedTuple):
182		182
183	Attributes:	183	Attributes:
184	success (bool): True if successful.	184	success (bool): True if successful.
185	project (Project): The project.	185	project_idx (int): The project index.
186	start (float): The starting time.time().	186	start (float): The starting time.time().
187	finish (float): The ending time.time().	187	finish (float): The ending time.time().
188	"""	188	"""
189		189
190	success: bool	190	success: bool
191	errors: List[Exception]	191	errors: List[Exception]
192	project: Project	192	project_idx: int
193	start: float	193	start: float
194	finish: float	194	finish: float
195		195
@@ -592,7 +592,8 @@ later is required to fix a server side protocol bug.
592	branch = branch[len(R_HEADS) :]	592	branch = branch[len(R_HEADS) :]
593	return branch	593	return branch
594		594
595	def _GetCurrentBranchOnly(self, opt, manifest):	595	@classmethod
		596	def _GetCurrentBranchOnly(cls, opt, manifest):
596	"""Returns whether current-branch or use-superproject options are	597	"""Returns whether current-branch or use-superproject options are
597	enabled.	598	enabled.
598		599
@@ -710,7 +711,8 @@ later is required to fix a server side protocol bug.
710	if need_unload:	711	if need_unload:
711	m.outer_client.manifest.Unload()	712	m.outer_client.manifest.Unload()
712		713
713	def _FetchProjectList(self, opt, projects):	714	@classmethod
		715	def _FetchProjectList(cls, opt, projects):
714	"""Main function of the fetch worker.	716	"""Main function of the fetch worker.
715		717
716	The projects we're given share the same underlying git object store, so	718	The projects we're given share the same underlying git object store, so
@@ -722,21 +724,23 @@ later is required to fix a server side protocol bug.
722	opt: Program options returned from optparse. See _Options().	724	opt: Program options returned from optparse. See _Options().
723	projects: Projects to fetch.	725	projects: Projects to fetch.
724	"""	726	"""
725	return [self._FetchOne(opt, x) for x in projects]	727	return [cls._FetchOne(opt, x) for x in projects]
726		728
727	def _FetchOne(self, opt, project):	729	@classmethod
		730	def _FetchOne(cls, opt, project_idx):
728	"""Fetch git objects for a single project.	731	"""Fetch git objects for a single project.
729		732
730	Args:	733	Args:
731	opt: Program options returned from optparse. See _Options().	734	opt: Program options returned from optparse. See _Options().
732	project: Project object for the project to fetch.	735	project_idx: Project index for the project to fetch.
733		736
734	Returns:	737	Returns:
735	Whether the fetch was successful.	738	Whether the fetch was successful.
736	"""	739	"""
		740	project = cls.get_parallel_context()["projects"][project_idx]
737	start = time.time()	741	start = time.time()
738	k = f"{project.name} @ {project.relpath}"	742	k = f"{project.name} @ {project.relpath}"
739	self._sync_dict[k] = start	743	cls.get_parallel_context()["sync_dict"][k] = start
740	success = False	744	success = False
741	remote_fetched = False	745	remote_fetched = False
742	errors = []	746	errors = []
@@ -746,7 +750,7 @@ later is required to fix a server side protocol bug.
746	quiet=opt.quiet,	750	quiet=opt.quiet,
747	verbose=opt.verbose,	751	verbose=opt.verbose,
748	output_redir=buf,	752	output_redir=buf,
749	current_branch_only=self._GetCurrentBranchOnly(	753	current_branch_only=cls._GetCurrentBranchOnly(
750	opt, project.manifest	754	opt, project.manifest
751	),	755	),
752	force_sync=opt.force_sync,	756	force_sync=opt.force_sync,
@@ -756,7 +760,7 @@ later is required to fix a server side protocol bug.
756	optimized_fetch=opt.optimized_fetch,	760	optimized_fetch=opt.optimized_fetch,
757	retry_fetches=opt.retry_fetches,	761	retry_fetches=opt.retry_fetches,
758	prune=opt.prune,	762	prune=opt.prune,
759	ssh_proxy=self.ssh_proxy,	763	ssh_proxy=cls.get_parallel_context()["ssh_proxy"],
760	clone_filter=project.manifest.CloneFilter,	764	clone_filter=project.manifest.CloneFilter,
761	partial_clone_exclude=project.manifest.PartialCloneExclude,	765	partial_clone_exclude=project.manifest.PartialCloneExclude,
762	clone_filter_for_depth=project.manifest.CloneFilterForDepth,	766	clone_filter_for_depth=project.manifest.CloneFilterForDepth,
@@ -788,24 +792,20 @@ later is required to fix a server side protocol bug.
788	type(e).__name__,	792	type(e).__name__,
789	e,	793	e,
790	)	794	)
791	del self._sync_dict[k]
792	errors.append(e)	795	errors.append(e)
793	raise	796	raise
		797	finally:
		798	del cls.get_parallel_context()["sync_dict"][k]
794		799
795	finish = time.time()	800	finish = time.time()
796	del self._sync_dict[k]
797	return _FetchOneResult(	801	return _FetchOneResult(
798	success, errors, project, start, finish, remote_fetched	802	success, errors, project_idx, start, finish, remote_fetched
799	)	803	)
800		804
801	@classmethod
802	def _FetchInitChild(cls, ssh_proxy):
803	cls.ssh_proxy = ssh_proxy
804
805	def _GetSyncProgressMessage(self):	805	def _GetSyncProgressMessage(self):
806	earliest_time = float("inf")	806	earliest_time = float("inf")
807	earliest_proj = None	807	earliest_proj = None
808	items = self._sync_dict.items()	808	items = self.get_parallel_context()["sync_dict"].items()
809	for project, t in items:	809	for project, t in items:
810	if t < earliest_time:	810	if t < earliest_time:
811	earliest_time = t	811	earliest_time = t
@@ -813,7 +813,7 @@ later is required to fix a server side protocol bug.
813		813
814	if not earliest_proj:	814	if not earliest_proj:
815	# This function is called when sync is still running but in some	815	# This function is called when sync is still running but in some
816	# cases (by chance), _sync_dict can contain no entries. Return some	816	# cases (by chance), sync_dict can contain no entries. Return some
817	# text to indicate that sync is still working.	817	# text to indicate that sync is still working.
818	return "..working.."	818	return "..working.."
819		819
@@ -835,7 +835,6 @@ later is required to fix a server side protocol bug.
835	elide=True,	835	elide=True,
836	)	836	)
837		837
838	self._sync_dict = multiprocessing.Manager().dict()
839	sync_event = _threading.Event()	838	sync_event = _threading.Event()
840		839
841	def _MonitorSyncLoop():	840	def _MonitorSyncLoop():
@@ -846,21 +845,13 @@ later is required to fix a server side protocol bug.
846		845
847	sync_progress_thread = _threading.Thread(target=_MonitorSyncLoop)	846	sync_progress_thread = _threading.Thread(target=_MonitorSyncLoop)
848	sync_progress_thread.daemon = True	847	sync_progress_thread.daemon = True
849	sync_progress_thread.start()
850
851	objdir_project_map = dict()
852	for project in projects:
853	objdir_project_map.setdefault(project.objdir, []).append(project)
854	projects_list = list(objdir_project_map.values())
855		848
856	jobs = min(opt.jobs_network, len(projects_list))	849	def _ProcessResults(pool, pm, results_sets):
857
858	def _ProcessResults(results_sets):
859	ret = True	850	ret = True
860	for results in results_sets:	851	for results in results_sets:
861	for result in results:	852	for result in results:
862	success = result.success	853	success = result.success
863	project = result.project	854	project = projects[result.project_idx]
864	start = result.start	855	start = result.start
865	finish = result.finish	856	finish = result.finish
866	self._fetch_times.Set(project, finish - start)	857	self._fetch_times.Set(project, finish - start)
@@ -884,45 +875,49 @@ later is required to fix a server side protocol bug.
884	fetched.add(project.gitdir)	875	fetched.add(project.gitdir)
885	pm.update()	876	pm.update()
886	if not ret and opt.fail_fast:	877	if not ret and opt.fail_fast:
		878	if pool:
		879	pool.close()
887	break	880	break
888	return ret	881	return ret
889		882
890	# We pass the ssh proxy settings via the class. This allows	883	with self.ParallelContext():
891	# multiprocessing to pickle it up when spawning children. We can't pass	884	self.get_parallel_context()["projects"] = projects
892	# it as an argument to _FetchProjectList below as multiprocessing is	885	self.get_parallel_context()[
893	# unable to pickle those.	886	"sync_dict"
894	Sync.ssh_proxy = None	887	] = multiprocessing.Manager().dict()
895		888
896	# NB: Multiprocessing is heavy, so don't spin it up for one job.	889	objdir_project_map = dict()
897	if jobs == 1:	890	for index, project in enumerate(projects):
898	self._FetchInitChild(ssh_proxy)	891	objdir_project_map.setdefault(project.objdir, []).append(index)
899	if not _ProcessResults(	892	projects_list = list(objdir_project_map.values())
900	self._FetchProjectList(opt, x) for x in projects_list	893
901	):	894	jobs = min(opt.jobs_network, len(projects_list))
902	ret = False	895
903	else:	896	# We pass the ssh proxy settings via the class. This allows
		897	# multiprocessing to pickle it up when spawning children. We can't
		898	# pass it as an argument to _FetchProjectList below as
		899	# multiprocessing is unable to pickle those.
		900	self.get_parallel_context()["ssh_proxy"] = ssh_proxy
		901
		902	sync_progress_thread.start()
904	if not opt.quiet:	903	if not opt.quiet:
905	pm.update(inc=0, msg="warming up")	904	pm.update(inc=0, msg="warming up")
906	with multiprocessing.Pool(	905	try:
907	jobs, initializer=self._FetchInitChild, initargs=(ssh_proxy,)	906	ret = self.ExecuteInParallel(
908	) as pool:	907	jobs,
909	results = pool.imap_unordered(
910	functools.partial(self._FetchProjectList, opt),	908	functools.partial(self._FetchProjectList, opt),
911	projects_list,	909	projects_list,
912	chunksize=_chunksize(len(projects_list), jobs),	910	callback=_ProcessResults,
		911	output=pm,
		912	# Use chunksize=1 to avoid the chance that some workers are
		913	# idle while other workers still have more than one job in
		914	# their chunk queue.
		915	chunksize=1,
913	)	916	)
914	if not _ProcessResults(results):	917	finally:
915	ret = False	918	sync_event.set()
916	pool.close()	919	sync_progress_thread.join()
917
918	# Cleanup the reference now that we're done with it, and we're going to
919	# release any resources it points to. If we don't, later
920	# multiprocessing usage (e.g. checkouts) will try to pickle and then
921	# crash.
922	del Sync.ssh_proxy
923		920
924	sync_event.set()
925	pm.end()
926	self._fetch_times.Save()	921	self._fetch_times.Save()
927	self._local_sync_state.Save()	922	self._local_sync_state.Save()
928		923
@@ -1008,14 +1003,15 @@ later is required to fix a server side protocol bug.
1008		1003
1009	return _FetchMainResult(all_projects)	1004	return _FetchMainResult(all_projects)
1010		1005
		1006	@classmethod
1011	def _CheckoutOne(	1007	def _CheckoutOne(
1012	self,	1008	cls,
1013	detach_head,	1009	detach_head,
1014	force_sync,	1010	force_sync,
1015	force_checkout,	1011	force_checkout,
1016	force_rebase,	1012	force_rebase,
1017	verbose,	1013	verbose,
1018	project,	1014	project_idx,
1019	):	1015	):
1020	"""Checkout work tree for one project	1016	"""Checkout work tree for one project
1021		1017
@@ -1027,11 +1023,12 @@ later is required to fix a server side protocol bug.
1027	force_checkout: Force checking out of the repo content.	1023	force_checkout: Force checking out of the repo content.
1028	force_rebase: Force rebase.	1024	force_rebase: Force rebase.
1029	verbose: Whether to show verbose messages.	1025	verbose: Whether to show verbose messages.
1030	project: Project object for the project to checkout.	1026	project_idx: Project index for the project to checkout.
1031		1027
1032	Returns:	1028	Returns:
1033	Whether the fetch was successful.	1029	Whether the fetch was successful.
1034	"""	1030	"""
		1031	project = cls.get_parallel_context()["projects"][project_idx]
1035	start = time.time()	1032	start = time.time()
1036	syncbuf = SyncBuffer(	1033	syncbuf = SyncBuffer(
1037	project.manifest.manifestProject.config, detach_head=detach_head	1034	project.manifest.manifestProject.config, detach_head=detach_head
@@ -1065,7 +1062,7 @@ later is required to fix a server side protocol bug.
1065	if not success:	1062	if not success:
1066	logger.error("error: Cannot checkout %s", project.name)	1063	logger.error("error: Cannot checkout %s", project.name)
1067	finish = time.time()	1064	finish = time.time()
1068	return _CheckoutOneResult(success, errors, project, start, finish)	1065	return _CheckoutOneResult(success, errors, project_idx, start, finish)
1069		1066
1070	def _Checkout(self, all_projects, opt, err_results, checkout_errors):	1067	def _Checkout(self, all_projects, opt, err_results, checkout_errors):
1071	"""Checkout projects listed in all_projects	1068	"""Checkout projects listed in all_projects
@@ -1083,7 +1080,9 @@ later is required to fix a server side protocol bug.
1083	ret = True	1080	ret = True
1084	for result in results:	1081	for result in results:
1085	success = result.success	1082	success = result.success
1086	project = result.project	1083	project = self.get_parallel_context()["projects"][
		1084	result.project_idx
		1085	]
1087	start = result.start	1086	start = result.start
1088	finish = result.finish	1087	finish = result.finish
1089	self.event_log.AddSync(	1088	self.event_log.AddSync(
@@ -1110,22 +1109,28 @@ later is required to fix a server side protocol bug.
1110	return ret	1109	return ret
1111		1110
1112	for projects in _SafeCheckoutOrder(all_projects):	1111	for projects in _SafeCheckoutOrder(all_projects):
1113	proc_res = self.ExecuteInParallel(	1112	with self.ParallelContext():
1114	opt.jobs_checkout,	1113	self.get_parallel_context()["projects"] = projects
1115	functools.partial(	1114	proc_res = self.ExecuteInParallel(
1116	self._CheckoutOne,	1115	opt.jobs_checkout,
1117	opt.detach_head,	1116	functools.partial(
1118	opt.force_sync,	1117	self._CheckoutOne,
1119	opt.force_checkout,	1118	opt.detach_head,
1120	opt.rebase,	1119	opt.force_sync,
1121	opt.verbose,	1120	opt.force_checkout,
1122	),	1121	opt.rebase,
1123	projects,	1122	opt.verbose,
1124	callback=_ProcessResults,	1123	),
1125	output=Progress(	1124	range(len(projects)),
1126	"Checking out", len(all_projects), quiet=opt.quiet	1125	callback=_ProcessResults,
1127	),	1126	output=Progress(
1128	)	1127	"Checking out", len(all_projects), quiet=opt.quiet
		1128	),
		1129	# Use chunksize=1 to avoid the chance that some workers are
		1130	# idle while other workers still have more than one job in
		1131	# their chunk queue.
		1132	chunksize=1,
		1133	)
1129		1134
1130	self._local_sync_state.Save()	1135	self._local_sync_state.Save()
1131	return proc_res and not err_results	1136	return proc_res and not err_results