diff options
author | Gavin Mak <gavinmak@google.com> | 2025-06-11 00:13:52 +0000 |
---|---|---|
committer | LUCI <gerrit-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2025-06-11 16:31:35 -0700 |
commit | 85352825ff3903fe17bad05476284930dbf12fd8 (patch) | |
tree | 2970349ed8189906e4f62abde19df57c7b61114b /subcmds | |
parent | b262d0e4619c406a2708856ed312091d21c5bf39 (diff) | |
download | git-repo-85352825ff3903fe17bad05476284930dbf12fd8.tar.gz |
sync: Add scaffolding for interleaved sync
Prepare for an interleaved fetch and checkout mode for `repo sync`. The
goal of the new mode is to significantly speed up syncs by running fetch
and checkout operations in parallel for different projects, rather than
waiting for all fetches to complete before starting any checkouts.
Bug: 421935613
Change-Id: I8c66d1e790c7bba6280e409b95238c5e4e61a9c8
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/482821
Reviewed-by: Scott Lee <ddoman@google.com>
Commit-Queue: Gavin Mak <gavinmak@google.com>
Tested-by: Gavin Mak <gavinmak@google.com>
Diffstat (limited to 'subcmds')
-rw-r--r-- | subcmds/sync.py | 109 |
1 files changed, 87 insertions, 22 deletions
diff --git a/subcmds/sync.py b/subcmds/sync.py index 3a4151df..6e369a10 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py | |||
@@ -424,6 +424,11 @@ later is required to fix a server side protocol bug. | |||
424 | "(do not update to the latest revision)", | 424 | "(do not update to the latest revision)", |
425 | ) | 425 | ) |
426 | p.add_option( | 426 | p.add_option( |
427 | "--interleaved", | ||
428 | action="store_true", | ||
429 | help="fetch and checkout projects in parallel (experimental)", | ||
430 | ) | ||
431 | p.add_option( | ||
427 | "-n", | 432 | "-n", |
428 | "--network-only", | 433 | "--network-only", |
429 | action="store_true", | 434 | action="store_true", |
@@ -1772,8 +1777,6 @@ later is required to fix a server side protocol bug. | |||
1772 | e, | 1777 | e, |
1773 | ) | 1778 | ) |
1774 | 1779 | ||
1775 | err_event = multiprocessing.Event() | ||
1776 | |||
1777 | rp = manifest.repoProject | 1780 | rp = manifest.repoProject |
1778 | rp.PreSync() | 1781 | rp.PreSync() |
1779 | cb = rp.CurrentBranch | 1782 | cb = rp.CurrentBranch |
@@ -1825,6 +1828,64 @@ later is required to fix a server side protocol bug. | |||
1825 | all_manifests=not opt.this_manifest_only, | 1828 | all_manifests=not opt.this_manifest_only, |
1826 | ) | 1829 | ) |
1827 | 1830 | ||
1831 | if opt.interleaved: | ||
1832 | sync_method = self._SyncInterleaved | ||
1833 | else: | ||
1834 | sync_method = self._SyncPhased | ||
1835 | |||
1836 | sync_method( | ||
1837 | opt, | ||
1838 | args, | ||
1839 | errors, | ||
1840 | manifest, | ||
1841 | mp, | ||
1842 | all_projects, | ||
1843 | superproject_logging_data, | ||
1844 | ) | ||
1845 | |||
1846 | # Log the previous sync analysis state from the config. | ||
1847 | self.git_event_log.LogDataConfigEvents( | ||
1848 | mp.config.GetSyncAnalysisStateData(), "previous_sync_state" | ||
1849 | ) | ||
1850 | |||
1851 | # Update and log with the new sync analysis state. | ||
1852 | mp.config.UpdateSyncAnalysisState(opt, superproject_logging_data) | ||
1853 | self.git_event_log.LogDataConfigEvents( | ||
1854 | mp.config.GetSyncAnalysisStateData(), "current_sync_state" | ||
1855 | ) | ||
1856 | |||
1857 | self._local_sync_state.PruneRemovedProjects() | ||
1858 | if self._local_sync_state.IsPartiallySynced(): | ||
1859 | logger.warning( | ||
1860 | "warning: Partial syncs are not supported. For the best " | ||
1861 | "experience, sync the entire tree." | ||
1862 | ) | ||
1863 | |||
1864 | if not opt.quiet: | ||
1865 | print("repo sync has finished successfully.") | ||
1866 | |||
1867 | def _SyncPhased( | ||
1868 | self, | ||
1869 | opt, | ||
1870 | args, | ||
1871 | errors, | ||
1872 | manifest, | ||
1873 | mp, | ||
1874 | all_projects, | ||
1875 | superproject_logging_data, | ||
1876 | ): | ||
1877 | """Sync projects by separating network and local operations. | ||
1878 | |||
1879 | This method performs sync in two distinct, sequential phases: | ||
1880 | 1. Network Phase: Fetches updates for all projects from their remotes. | ||
1881 | 2. Local Phase: Checks out the updated revisions into the local | ||
1882 | worktrees for all projects. | ||
1883 | |||
1884 | This approach ensures that the local work-tree is not modified until | ||
1885 | all network operations are complete, providing a transactional-like | ||
1886 | safety net for the checkout state. | ||
1887 | """ | ||
1888 | err_event = multiprocessing.Event() | ||
1828 | err_network_sync = False | 1889 | err_network_sync = False |
1829 | err_update_projects = False | 1890 | err_update_projects = False |
1830 | err_update_linkfiles = False | 1891 | err_update_linkfiles = False |
@@ -1942,26 +2003,30 @@ later is required to fix a server side protocol bug. | |||
1942 | ) | 2003 | ) |
1943 | raise SyncError(aggregate_errors=errors) | 2004 | raise SyncError(aggregate_errors=errors) |
1944 | 2005 | ||
1945 | # Log the previous sync analysis state from the config. | 2006 | def _SyncInterleaved( |
1946 | self.git_event_log.LogDataConfigEvents( | 2007 | self, |
1947 | mp.config.GetSyncAnalysisStateData(), "previous_sync_state" | 2008 | opt, |
1948 | ) | 2009 | args, |
1949 | 2010 | errors, | |
1950 | # Update and log with the new sync analysis state. | 2011 | manifest, |
1951 | mp.config.UpdateSyncAnalysisState(opt, superproject_logging_data) | 2012 | mp, |
1952 | self.git_event_log.LogDataConfigEvents( | 2013 | all_projects, |
1953 | mp.config.GetSyncAnalysisStateData(), "current_sync_state" | 2014 | superproject_logging_data, |
1954 | ) | 2015 | ): |
1955 | 2016 | """Sync projects by performing network and local operations in parallel. | |
1956 | self._local_sync_state.PruneRemovedProjects() | 2017 | |
1957 | if self._local_sync_state.IsPartiallySynced(): | 2018 | This method processes each project (or groups of projects that share git |
1958 | logger.warning( | 2019 | objects) independently. For each project, it performs the fetch and |
1959 | "warning: Partial syncs are not supported. For the best " | 2020 | checkout operations back-to-back. These independent tasks are run in |
1960 | "experience, sync the entire tree." | 2021 | parallel. |
1961 | ) | 2022 | |
1962 | 2023 | It respects two constraints for correctness: | |
1963 | if not opt.quiet: | 2024 | 1. Projects in nested directories (e.g. 'foo' and 'foo/bar') are |
1964 | print("repo sync has finished successfully.") | 2025 | processed in hierarchical order. |
2026 | 2. Projects that share git objects are processed serially to prevent | ||
2027 | race conditions. | ||
2028 | """ | ||
2029 | raise NotImplementedError("Interleaved sync is not implemented yet.") | ||
1965 | 2030 | ||
1966 | 2031 | ||
1967 | def _PostRepoUpgrade(manifest, quiet=False): | 2032 | def _PostRepoUpgrade(manifest, quiet=False): |