diff options
author | Raman Tenneti <rtenneti@google.com> | 2021-02-09 00:26:31 -0800 |
---|---|---|
committer | Raman Tenneti <rtenneti@google.com> | 2021-02-11 18:59:29 +0000 |
commit | 21dce3d8b351538d0fe8c05e6106c8b281580dda (patch) | |
tree | 771c7d005adc27acfbfdd7e6e85339766efdb810 /git_superproject.py | |
parent | e3315bb49a782bcf62ba9df4fc1c2690b046763f (diff) | |
download | git-repo-21dce3d8b351538d0fe8c05e6106c8b281580dda.tar.gz |
init: added --use-superproject option to clone superproject.v2.12.2
Added --no-use-superproject to repo and init.py to disable use of
manifest superprojects.
Replaced the term "sha" with "commit id".
Added _GetBranch method to Superproject object.
Moved shared code between init and sync into SyncSuperproject function.
This function either does git clone or git fetch. If git fetch fails
it does git clone.
Changed Superproject constructor to accept manifest, repodir and branch
to avoid passing them to multiple functions as argument.
Changed functions that were raising exceptions to return either True
or False.
Saved the --use-superproject option in config as repo.superproject.
Updated internal-fs-layout.md document.
Updated the tests to work with the new API changes in Superproject.
Performance for the first time sync has improved from 20 minutes to
around 15 minutes.
Tested the code with the following commands.
$ ./run_tests -v
Tested the sync code by using repo_dev alias and pointing to this CL.
$ repo init took around 20 seconds longer because of cloning of superproject.
$ time repo_dev init -u sso://android.git.corp.google.com/platform/manifest -b master --partial-clone --clone-filter=blob:limit=10M --repo-rev=main --use-superproject
...
real 0m35.919s
user 0m21.947s
sys 0m8.977s
First run
$ time repo sync --use-superproject
...
real 16m41.982s
user 100m6.916s
sys 19m18.753s
No difference in repo sync time after the first run.
Bug: [google internal] b/179090734
Bug: https://crbug.com/gerrit/13709
Bug: https://crbug.com/gerrit/13707
Change-Id: I12df92112f46e001dfbc6f12cd633c3a15cf924b
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/296382
Reviewed-by: Mike Frysinger <vapier@google.com>
Tested-by: Raman Tenneti <rtenneti@google.com>
Diffstat (limited to 'git_superproject.py')
-rw-r--r-- | git_superproject.py | 154 |
1 files changed, 89 insertions, 65 deletions
diff --git a/git_superproject.py b/git_superproject.py index 378ede25..471dadc4 100644 --- a/git_superproject.py +++ b/git_superproject.py | |||
@@ -12,21 +12,22 @@ | |||
12 | # See the License for the specific language governing permissions and | 12 | # See the License for the specific language governing permissions and |
13 | # limitations under the License. | 13 | # limitations under the License. |
14 | 14 | ||
15 | """Provide functionality to get all projects and their SHAs from Superproject. | 15 | """Provide functionality to get all projects and their commit ids from Superproject. |
16 | 16 | ||
17 | For more information on superproject, check out: | 17 | For more information on superproject, check out: |
18 | https://en.wikibooks.org/wiki/Git/Submodules_and_Superprojects | 18 | https://en.wikibooks.org/wiki/Git/Submodules_and_Superprojects |
19 | 19 | ||
20 | Examples: | 20 | Examples: |
21 | superproject = Superproject() | 21 | superproject = Superproject() |
22 | project_shas = superproject.GetAllProjectsSHAs() | 22 | project_commit_ids = superproject.UpdateProjectsRevisionId(projects) |
23 | """ | 23 | """ |
24 | 24 | ||
25 | import os | 25 | import os |
26 | import sys | 26 | import sys |
27 | 27 | ||
28 | from error import BUG_REPORT_URL, GitError | 28 | from error import BUG_REPORT_URL |
29 | from git_command import GitCommand | 29 | from git_command import GitCommand |
30 | from git_refs import R_HEADS | ||
30 | import platform_utils | 31 | import platform_utils |
31 | 32 | ||
32 | _SUPERPROJECT_GIT_NAME = 'superproject.git' | 33 | _SUPERPROJECT_GIT_NAME = 'superproject.git' |
@@ -34,19 +35,24 @@ _SUPERPROJECT_MANIFEST_NAME = 'superproject_override.xml' | |||
34 | 35 | ||
35 | 36 | ||
36 | class Superproject(object): | 37 | class Superproject(object): |
37 | """Get SHAs from superproject. | 38 | """Get commit ids from superproject. |
38 | 39 | ||
39 | It does a 'git clone' of superproject and 'git ls-tree' to get list of SHAs for all projects. | 40 | It does a 'git clone' of superproject and 'git ls-tree' to get list of commit ids |
40 | It contains project_shas which is a dictionary with project/sha entries. | 41 | for all projects. It contains project_commit_ids which is a dictionary with |
42 | project/commit id entries. | ||
41 | """ | 43 | """ |
42 | def __init__(self, repodir, superproject_dir='exp-superproject'): | 44 | def __init__(self, manifest, repodir, superproject_dir='exp-superproject'): |
43 | """Initializes superproject. | 45 | """Initializes superproject. |
44 | 46 | ||
45 | Args: | 47 | Args: |
48 | manifest: A Manifest object that is to be written to a file. | ||
46 | repodir: Path to the .repo/ dir for holding all internal checkout state. | 49 | repodir: Path to the .repo/ dir for holding all internal checkout state. |
50 | It must be in the top directory of the repo client checkout. | ||
47 | superproject_dir: Relative path under |repodir| to checkout superproject. | 51 | superproject_dir: Relative path under |repodir| to checkout superproject. |
48 | """ | 52 | """ |
49 | self._project_shas = None | 53 | self._project_commit_ids = None |
54 | self._manifest = manifest | ||
55 | self._branch = self._GetBranch() | ||
50 | self._repodir = os.path.abspath(repodir) | 56 | self._repodir = os.path.abspath(repodir) |
51 | self._superproject_dir = superproject_dir | 57 | self._superproject_dir = superproject_dir |
52 | self._superproject_path = os.path.join(self._repodir, superproject_dir) | 58 | self._superproject_path = os.path.join(self._repodir, superproject_dir) |
@@ -56,25 +62,35 @@ class Superproject(object): | |||
56 | _SUPERPROJECT_GIT_NAME) | 62 | _SUPERPROJECT_GIT_NAME) |
57 | 63 | ||
58 | @property | 64 | @property |
59 | def project_shas(self): | 65 | def project_commit_ids(self): |
60 | """Returns a dictionary of projects and their SHAs.""" | 66 | """Returns a dictionary of projects and their commit ids.""" |
61 | return self._project_shas | 67 | return self._project_commit_ids |
68 | |||
69 | def _GetBranch(self): | ||
70 | """Returns the branch name for getting the approved manifest.""" | ||
71 | p = self._manifest.manifestProject | ||
72 | b = p.GetBranch(p.CurrentBranch) | ||
73 | if not b: | ||
74 | return None | ||
75 | branch = b.merge | ||
76 | if branch and branch.startswith(R_HEADS): | ||
77 | branch = branch[len(R_HEADS):] | ||
78 | return branch | ||
62 | 79 | ||
63 | def _Clone(self, url, branch=None): | 80 | def _Clone(self, url): |
64 | """Do a 'git clone' for the given url and branch. | 81 | """Do a 'git clone' for the given url. |
65 | 82 | ||
66 | Args: | 83 | Args: |
67 | url: superproject's url to be passed to git clone. | 84 | url: superproject's url to be passed to git clone. |
68 | branch: The branchname to be passed as argument to git clone. | ||
69 | 85 | ||
70 | Returns: | 86 | Returns: |
71 | True if 'git clone <url> <branch>' is successful, or False. | 87 | True if git clone is successful, or False. |
72 | """ | 88 | """ |
73 | if not os.path.exists(self._superproject_path): | 89 | if not os.path.exists(self._superproject_path): |
74 | os.mkdir(self._superproject_path) | 90 | os.mkdir(self._superproject_path) |
75 | cmd = ['clone', url, '--filter', 'blob:none', '--bare'] | 91 | cmd = ['clone', url, '--filter', 'blob:none', '--bare'] |
76 | if branch: | 92 | if self._branch: |
77 | cmd += ['--branch', branch] | 93 | cmd += ['--branch', self._branch] |
78 | p = GitCommand(None, | 94 | p = GitCommand(None, |
79 | cmd, | 95 | cmd, |
80 | cwd=self._superproject_path, | 96 | cwd=self._superproject_path, |
@@ -112,22 +128,20 @@ class Superproject(object): | |||
112 | return False | 128 | return False |
113 | return True | 129 | return True |
114 | 130 | ||
115 | def _LsTree(self, branch='HEAD'): | 131 | def _LsTree(self): |
116 | """Returns the data from 'git ls-tree -r <branch>'. | 132 | """Returns the data from 'git ls-tree ...'. |
117 | 133 | ||
118 | Works only in git repositories. | 134 | Works only in git repositories. |
119 | 135 | ||
120 | Args: | ||
121 | branch: The branchname to be passed as argument to git ls-tree. | ||
122 | |||
123 | Returns: | 136 | Returns: |
124 | data: data returned from 'git ls-tree -r HEAD' instead of None. | 137 | data: data returned from 'git ls-tree ...' instead of None. |
125 | """ | 138 | """ |
126 | if not os.path.exists(self._work_git): | 139 | if not os.path.exists(self._work_git): |
127 | print('git ls-tree missing drectory: %s' % self._work_git, | 140 | print('git ls-tree missing drectory: %s' % self._work_git, |
128 | file=sys.stderr) | 141 | file=sys.stderr) |
129 | return None | 142 | return None |
130 | data = None | 143 | data = None |
144 | branch = 'HEAD' if not self._branch else self._branch | ||
131 | cmd = ['ls-tree', '-z', '-r', branch] | 145 | cmd = ['ls-tree', '-z', '-r', branch] |
132 | 146 | ||
133 | p = GitCommand(None, | 147 | p = GitCommand(None, |
@@ -145,18 +159,25 @@ class Superproject(object): | |||
145 | retval, p.stderr), file=sys.stderr) | 159 | retval, p.stderr), file=sys.stderr) |
146 | return data | 160 | return data |
147 | 161 | ||
148 | def _GetAllProjectsSHAs(self, url, branch=None): | 162 | def Sync(self): |
149 | """Get SHAs for all projects from superproject and save them in _project_shas. | 163 | """Sync superproject either by git clone/fetch. |
150 | |||
151 | Args: | ||
152 | url: superproject's url to be passed to git clone or fetch. | ||
153 | branch: The branchname to be passed as argument to git clone or fetch. | ||
154 | 164 | ||
155 | Returns: | 165 | Returns: |
156 | A dictionary with the projects/SHAs instead of None. | 166 | True if sync of superproject is successful, or False. |
157 | """ | 167 | """ |
168 | print('WARNING: --use-superproject is experimental and not ' | ||
169 | 'for general use', file=sys.stderr) | ||
170 | |||
171 | if not self._manifest.superproject: | ||
172 | print('error: superproject tag is not defined in manifest', | ||
173 | file=sys.stderr) | ||
174 | return False | ||
175 | |||
176 | url = self._manifest.superproject['remote'].url | ||
158 | if not url: | 177 | if not url: |
159 | raise ValueError('url argument is not supplied.') | 178 | print('error: superproject URL is not defined in manifest', |
179 | file=sys.stderr) | ||
180 | return False | ||
160 | 181 | ||
161 | do_clone = True | 182 | do_clone = True |
162 | if os.path.exists(self._superproject_path): | 183 | if os.path.exists(self._superproject_path): |
@@ -166,35 +187,44 @@ class Superproject(object): | |||
166 | else: | 187 | else: |
167 | do_clone = False | 188 | do_clone = False |
168 | if do_clone: | 189 | if do_clone: |
169 | if not self._Clone(url, branch): | 190 | if not self._Clone(url): |
170 | raise GitError('git clone failed for url: %s' % url) | 191 | print('error: git clone failed for url: %s' % url, file=sys.stderr) |
192 | return False | ||
193 | return True | ||
171 | 194 | ||
172 | data = self._LsTree(branch) | 195 | def _GetAllProjectsCommitIds(self): |
196 | """Get commit ids for all projects from superproject and save them in _project_commit_ids. | ||
197 | |||
198 | Returns: | ||
199 | A dictionary with the projects/commit ids on success, otherwise None. | ||
200 | """ | ||
201 | if not self.Sync(): | ||
202 | return None | ||
203 | |||
204 | data = self._LsTree() | ||
173 | if not data: | 205 | if not data: |
174 | raise GitError('git ls-tree failed for url: %s' % url) | 206 | print('error: git ls-tree failed for superproject', file=sys.stderr) |
207 | return None | ||
175 | 208 | ||
176 | # Parse lines like the following to select lines starting with '160000' and | 209 | # Parse lines like the following to select lines starting with '160000' and |
177 | # build a dictionary with project path (last element) and its SHA (3rd element). | 210 | # build a dictionary with project path (last element) and its commit id (3rd element). |
178 | # | 211 | # |
179 | # 160000 commit 2c2724cb36cd5a9cec6c852c681efc3b7c6b86ea\tart\x00 | 212 | # 160000 commit 2c2724cb36cd5a9cec6c852c681efc3b7c6b86ea\tart\x00 |
180 | # 120000 blob acc2cbdf438f9d2141f0ae424cec1d8fc4b5d97f\tbootstrap.bash\x00 | 213 | # 120000 blob acc2cbdf438f9d2141f0ae424cec1d8fc4b5d97f\tbootstrap.bash\x00 |
181 | shas = {} | 214 | commit_ids = {} |
182 | for line in data.split('\x00'): | 215 | for line in data.split('\x00'): |
183 | ls_data = line.split(None, 3) | 216 | ls_data = line.split(None, 3) |
184 | if not ls_data: | 217 | if not ls_data: |
185 | break | 218 | break |
186 | if ls_data[0] == '160000': | 219 | if ls_data[0] == '160000': |
187 | shas[ls_data[3]] = ls_data[2] | 220 | commit_ids[ls_data[3]] = ls_data[2] |
188 | 221 | ||
189 | self._project_shas = shas | 222 | self._project_commit_ids = commit_ids |
190 | return shas | 223 | return commit_ids |
191 | 224 | ||
192 | def _WriteManfiestFile(self, manifest): | 225 | def _WriteManfiestFile(self): |
193 | """Writes manifest to a file. | 226 | """Writes manifest to a file. |
194 | 227 | ||
195 | Args: | ||
196 | manifest: A Manifest object that is to be written to a file. | ||
197 | |||
198 | Returns: | 228 | Returns: |
199 | manifest_path: Path name of the file into which manifest is written instead of None. | 229 | manifest_path: Path name of the file into which manifest is written instead of None. |
200 | """ | 230 | """ |
@@ -203,7 +233,7 @@ class Superproject(object): | |||
203 | self._superproject_path, | 233 | self._superproject_path, |
204 | file=sys.stderr) | 234 | file=sys.stderr) |
205 | return None | 235 | return None |
206 | manifest_str = manifest.ToXml().toxml() | 236 | manifest_str = self._manifest.ToXml().toxml() |
207 | manifest_path = self._manifest_path | 237 | manifest_path = self._manifest_path |
208 | try: | 238 | try: |
209 | with open(manifest_path, 'w', encoding='utf-8') as fp: | 239 | with open(manifest_path, 'w', encoding='utf-8') as fp: |
@@ -215,40 +245,34 @@ class Superproject(object): | |||
215 | return None | 245 | return None |
216 | return manifest_path | 246 | return manifest_path |
217 | 247 | ||
218 | def UpdateProjectsRevisionId(self, manifest, projects, url, branch=None): | 248 | def UpdateProjectsRevisionId(self, projects): |
219 | """Update revisionId of every project in projects with the SHA. | 249 | """Update revisionId of every project in projects with the commit id. |
220 | 250 | ||
221 | Args: | 251 | Args: |
222 | manifest: A Manifest object that is to be written to a file. | ||
223 | projects: List of projects whose revisionId needs to be updated. | 252 | projects: List of projects whose revisionId needs to be updated. |
224 | url: superproject's url to be passed to git clone or fetch. | ||
225 | branch: The branchname to be passed as argument to git clone or fetch. | ||
226 | 253 | ||
227 | Returns: | 254 | Returns: |
228 | manifest_path: Path name of the overriding manfiest file instead of None. | 255 | manifest_path: Path name of the overriding manfiest file instead of None. |
229 | """ | 256 | """ |
230 | try: | 257 | commit_ids = self._GetAllProjectsCommitIds() |
231 | shas = self._GetAllProjectsSHAs(url=url, branch=branch) | 258 | if not commit_ids: |
232 | except Exception as e: | 259 | print('error: Cannot get project commit ids from manifest', file=sys.stderr) |
233 | print('error: Cannot get project SHAs for %s: %s: %s' % | ||
234 | (url, type(e).__name__, str(e)), | ||
235 | file=sys.stderr) | ||
236 | return None | 260 | return None |
237 | 261 | ||
238 | projects_missing_shas = [] | 262 | projects_missing_commit_ids = [] |
239 | for project in projects: | 263 | for project in projects: |
240 | path = project.relpath | 264 | path = project.relpath |
241 | if not path: | 265 | if not path: |
242 | continue | 266 | continue |
243 | sha = shas.get(path) | 267 | commit_id = commit_ids.get(path) |
244 | if sha: | 268 | if commit_id: |
245 | project.SetRevisionId(sha) | 269 | project.SetRevisionId(commit_id) |
246 | else: | 270 | else: |
247 | projects_missing_shas.append(path) | 271 | projects_missing_commit_ids.append(path) |
248 | if projects_missing_shas: | 272 | if projects_missing_commit_ids: |
249 | print('error: please file a bug using %s to report missing shas for: %s' % | 273 | print('error: please file a bug using %s to report missing commit_ids for: %s' % |
250 | (BUG_REPORT_URL, projects_missing_shas), file=sys.stderr) | 274 | (BUG_REPORT_URL, projects_missing_commit_ids), file=sys.stderr) |
251 | return None | 275 | return None |
252 | 276 | ||
253 | manifest_path = self._WriteManfiestFile(manifest) | 277 | manifest_path = self._WriteManfiestFile() |
254 | return manifest_path | 278 | return manifest_path |