diff options
author | Josip Sokcevic <sokcevic@chromium.org> | 2025-01-14 19:20:21 +0000 |
---|---|---|
committer | LUCI <gerrit-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2025-01-14 15:17:34 -0800 |
commit | 3405446a4eb382467ef539764f6a31869fd1ce43 (patch) | |
tree | 86499dc07d72afc4bea0eea51cc063e2bcdaa700 | |
parent | 41a27eb854b011f1506cbf984645df5a0f67ad00 (diff) | |
download | git-repo-3405446a4eb382467ef539764f6a31869fd1ce43.tar.gz |
gc: Add repack option
When a repository is partially cloned, no longer needed blobs are never
removed. To reclaim some of disk space, allow user to pass --repack
which affects only repositories with filter=blob:none and if projects
are not shared.
Change-Id: I0608172c9eff82fb8a6b6ef703eb109fedb7a6cc
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/447722
Commit-Queue: Josip Sokcevic <sokcevic@chromium.org>
Tested-by: Josip Sokcevic <sokcevic@chromium.org>
Reviewed-by: Scott Lee <ddoman@google.com>
-rw-r--r-- | subcmds/gc.py | 162 |
1 files changed, 160 insertions, 2 deletions
diff --git a/subcmds/gc.py b/subcmds/gc.py index 14d9675c..0831dc3c 100644 --- a/subcmds/gc.py +++ b/subcmds/gc.py | |||
@@ -16,6 +16,7 @@ import os | |||
16 | from typing import List, Set | 16 | from typing import List, Set |
17 | 17 | ||
18 | from command import Command | 18 | from command import Command |
19 | from git_command import GitCommand | ||
19 | import platform_utils | 20 | import platform_utils |
20 | from progress import Progress | 21 | from progress import Progress |
21 | from project import Project | 22 | from project import Project |
@@ -23,7 +24,7 @@ from project import Project | |||
23 | 24 | ||
24 | class Gc(Command): | 25 | class Gc(Command): |
25 | COMMON = True | 26 | COMMON = True |
26 | helpSummary = "Cleaning up internal repo state." | 27 | helpSummary = "Cleaning up internal repo and Git state." |
27 | helpUsage = """ | 28 | helpUsage = """ |
28 | %prog | 29 | %prog |
29 | """ | 30 | """ |
@@ -44,6 +45,13 @@ class Gc(Command): | |||
44 | action="store_true", | 45 | action="store_true", |
45 | help="answer yes to all safe prompts", | 46 | help="answer yes to all safe prompts", |
46 | ) | 47 | ) |
48 | p.add_option( | ||
49 | "--repack", | ||
50 | default=False, | ||
51 | action="store_true", | ||
52 | help="repack all projects that use partial clone with " | ||
53 | "filter=blob:none", | ||
54 | ) | ||
47 | 55 | ||
48 | def _find_git_to_delete( | 56 | def _find_git_to_delete( |
49 | self, to_keep: Set[str], start_dir: str | 57 | self, to_keep: Set[str], start_dir: str |
@@ -126,9 +134,159 @@ class Gc(Command): | |||
126 | 134 | ||
127 | return 0 | 135 | return 0 |
128 | 136 | ||
137 | def _generate_promisor_files(self, pack_dir: str): | ||
138 | """Generates promisor files for all pack files in the given directory. | ||
139 | |||
140 | Promisor files are empty files with the same name as the corresponding | ||
141 | pack file but with the ".promisor" extension. They are used by Git. | ||
142 | """ | ||
143 | for root, _, files in platform_utils.walk(pack_dir): | ||
144 | for file in files: | ||
145 | if not file.endswith(".pack"): | ||
146 | continue | ||
147 | with open(os.path.join(root, f"{file[:-4]}promisor"), "w"): | ||
148 | pass | ||
149 | |||
150 | def repack_projects(self, projects: List[Project], opt): | ||
151 | repack_projects = [] | ||
152 | # Find all projects eligible for repacking: | ||
153 | # - can't be shared | ||
154 | # - have a specific fetch filter | ||
155 | for project in projects: | ||
156 | if project.config.GetBoolean("extensions.preciousObjects"): | ||
157 | continue | ||
158 | if not project.clone_depth: | ||
159 | continue | ||
160 | if project.manifest.CloneFilterForDepth != "blob:none": | ||
161 | continue | ||
162 | |||
163 | repack_projects.append(project) | ||
164 | |||
165 | if opt.dryrun: | ||
166 | print(f"Would have repacked {len(repack_projects)} projects.") | ||
167 | return 0 | ||
168 | |||
169 | pm = Progress( | ||
170 | "Repacking (this will take a while)", | ||
171 | len(repack_projects), | ||
172 | delay=False, | ||
173 | quiet=opt.quiet, | ||
174 | show_elapsed=True, | ||
175 | elide=True, | ||
176 | ) | ||
177 | |||
178 | for project in repack_projects: | ||
179 | pm.update(msg=f"{project.name}") | ||
180 | |||
181 | pack_dir = os.path.join(project.gitdir, "tmp_repo_repack") | ||
182 | if os.path.isdir(pack_dir): | ||
183 | platform_utils.rmtree(pack_dir) | ||
184 | os.mkdir(pack_dir) | ||
185 | |||
186 | # Prepare workspace for repacking - remove all unreachable refs and | ||
187 | # their objects. | ||
188 | GitCommand( | ||
189 | project, | ||
190 | ["reflog", "expire", "--expire-unreachable=all"], | ||
191 | verify_command=True, | ||
192 | ).Wait() | ||
193 | pm.update(msg=f"{project.name} | gc", inc=0) | ||
194 | GitCommand( | ||
195 | project, | ||
196 | ["gc"], | ||
197 | verify_command=True, | ||
198 | ).Wait() | ||
199 | |||
200 | # Get all objects that are reachable from the remote, and pack them. | ||
201 | pm.update(msg=f"{project.name} | generating list of objects", inc=0) | ||
202 | remote_objects_cmd = GitCommand( | ||
203 | project, | ||
204 | [ | ||
205 | "rev-list", | ||
206 | "--objects", | ||
207 | f"--remotes={project.remote.name}", | ||
208 | "--filter=blob:none", | ||
209 | ], | ||
210 | capture_stdout=True, | ||
211 | verify_command=True, | ||
212 | ) | ||
213 | |||
214 | # Get all local objects and pack them. | ||
215 | local_head_objects_cmd = GitCommand( | ||
216 | project, | ||
217 | ["rev-list", "--objects", "HEAD^{tree}"], | ||
218 | capture_stdout=True, | ||
219 | verify_command=True, | ||
220 | ) | ||
221 | local_objects_cmd = GitCommand( | ||
222 | project, | ||
223 | [ | ||
224 | "rev-list", | ||
225 | "--objects", | ||
226 | "--all", | ||
227 | "--reflog", | ||
228 | "--indexed-objects", | ||
229 | "--not", | ||
230 | f"--remotes={project.remote.name}", | ||
231 | ], | ||
232 | capture_stdout=True, | ||
233 | verify_command=True, | ||
234 | ) | ||
235 | |||
236 | remote_objects_cmd.Wait() | ||
237 | |||
238 | pm.update(msg=f"{project.name} | remote repack", inc=0) | ||
239 | GitCommand( | ||
240 | project, | ||
241 | ["pack-objects", os.path.join(pack_dir, "pack")], | ||
242 | input=remote_objects_cmd.stdout, | ||
243 | capture_stderr=True, | ||
244 | capture_stdout=True, | ||
245 | verify_command=True, | ||
246 | ).Wait() | ||
247 | |||
248 | # create promisor file for each pack file | ||
249 | self._generate_promisor_files(pack_dir) | ||
250 | |||
251 | local_head_objects_cmd.Wait() | ||
252 | local_objects_cmd.Wait() | ||
253 | |||
254 | pm.update(msg=f"{project.name} | local repack", inc=0) | ||
255 | GitCommand( | ||
256 | project, | ||
257 | ["pack-objects", os.path.join(pack_dir, "pack")], | ||
258 | input=local_head_objects_cmd.stdout + local_objects_cmd.stdout, | ||
259 | capture_stderr=True, | ||
260 | capture_stdout=True, | ||
261 | verify_command=True, | ||
262 | ).Wait() | ||
263 | |||
264 | # Swap the old pack directory with the new one. | ||
265 | platform_utils.rename( | ||
266 | os.path.join(project.objdir, "objects", "pack"), | ||
267 | os.path.join(project.objdir, "objects", "pack_old"), | ||
268 | ) | ||
269 | platform_utils.rename( | ||
270 | pack_dir, | ||
271 | os.path.join(project.objdir, "objects", "pack"), | ||
272 | ) | ||
273 | platform_utils.rmtree( | ||
274 | os.path.join(project.objdir, "objects", "pack_old") | ||
275 | ) | ||
276 | |||
277 | pm.end() | ||
278 | return 0 | ||
279 | |||
129 | def Execute(self, opt, args): | 280 | def Execute(self, opt, args): |
130 | projects: List[Project] = self.GetProjects( | 281 | projects: List[Project] = self.GetProjects( |
131 | args, all_manifests=not opt.this_manifest_only | 282 | args, all_manifests=not opt.this_manifest_only |
132 | ) | 283 | ) |
133 | 284 | ||
134 | return self.delete_unused_projects(projects, opt) | 285 | ret = self.delete_unused_projects(projects, opt) |
286 | if ret != 0: | ||
287 | return ret | ||
288 | |||
289 | if not opt.repack: | ||
290 | return | ||
291 | |||
292 | return self.repack_projects(projects, opt) | ||