summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosip Sokcevic <sokcevic@chromium.org>2025-01-14 19:20:21 +0000
committerLUCI <gerrit-scoped@luci-project-accounts.iam.gserviceaccount.com>2025-01-14 15:17:34 -0800
commit3405446a4eb382467ef539764f6a31869fd1ce43 (patch)
tree86499dc07d72afc4bea0eea51cc063e2bcdaa700
parent41a27eb854b011f1506cbf984645df5a0f67ad00 (diff)
downloadgit-repo-3405446a4eb382467ef539764f6a31869fd1ce43.tar.gz
gc: Add repack option
When a repository is partially cloned, no longer needed blobs are never removed. To reclaim some of disk space, allow user to pass --repack which affects only repositories with filter=blob:none and if projects are not shared. Change-Id: I0608172c9eff82fb8a6b6ef703eb109fedb7a6cc Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/447722 Commit-Queue: Josip Sokcevic <sokcevic@chromium.org> Tested-by: Josip Sokcevic <sokcevic@chromium.org> Reviewed-by: Scott Lee <ddoman@google.com>
-rw-r--r--subcmds/gc.py162
1 files changed, 160 insertions, 2 deletions
diff --git a/subcmds/gc.py b/subcmds/gc.py
index 14d9675c..0831dc3c 100644
--- a/subcmds/gc.py
+++ b/subcmds/gc.py
@@ -16,6 +16,7 @@ import os
16from typing import List, Set 16from typing import List, Set
17 17
18from command import Command 18from command import Command
19from git_command import GitCommand
19import platform_utils 20import platform_utils
20from progress import Progress 21from progress import Progress
21from project import Project 22from project import Project
@@ -23,7 +24,7 @@ from project import Project
23 24
24class Gc(Command): 25class Gc(Command):
25 COMMON = True 26 COMMON = True
26 helpSummary = "Cleaning up internal repo state." 27 helpSummary = "Cleaning up internal repo and Git state."
27 helpUsage = """ 28 helpUsage = """
28%prog 29%prog
29""" 30"""
@@ -44,6 +45,13 @@ class Gc(Command):
44 action="store_true", 45 action="store_true",
45 help="answer yes to all safe prompts", 46 help="answer yes to all safe prompts",
46 ) 47 )
48 p.add_option(
49 "--repack",
50 default=False,
51 action="store_true",
52 help="repack all projects that use partial clone with "
53 "filter=blob:none",
54 )
47 55
48 def _find_git_to_delete( 56 def _find_git_to_delete(
49 self, to_keep: Set[str], start_dir: str 57 self, to_keep: Set[str], start_dir: str
@@ -126,9 +134,159 @@ class Gc(Command):
126 134
127 return 0 135 return 0
128 136
137 def _generate_promisor_files(self, pack_dir: str):
138 """Generates promisor files for all pack files in the given directory.
139
140 Promisor files are empty files with the same name as the corresponding
141 pack file but with the ".promisor" extension. They are used by Git.
142 """
143 for root, _, files in platform_utils.walk(pack_dir):
144 for file in files:
145 if not file.endswith(".pack"):
146 continue
147 with open(os.path.join(root, f"{file[:-4]}promisor"), "w"):
148 pass
149
150 def repack_projects(self, projects: List[Project], opt):
151 repack_projects = []
152 # Find all projects eligible for repacking:
153 # - can't be shared
154 # - have a specific fetch filter
155 for project in projects:
156 if project.config.GetBoolean("extensions.preciousObjects"):
157 continue
158 if not project.clone_depth:
159 continue
160 if project.manifest.CloneFilterForDepth != "blob:none":
161 continue
162
163 repack_projects.append(project)
164
165 if opt.dryrun:
166 print(f"Would have repacked {len(repack_projects)} projects.")
167 return 0
168
169 pm = Progress(
170 "Repacking (this will take a while)",
171 len(repack_projects),
172 delay=False,
173 quiet=opt.quiet,
174 show_elapsed=True,
175 elide=True,
176 )
177
178 for project in repack_projects:
179 pm.update(msg=f"{project.name}")
180
181 pack_dir = os.path.join(project.gitdir, "tmp_repo_repack")
182 if os.path.isdir(pack_dir):
183 platform_utils.rmtree(pack_dir)
184 os.mkdir(pack_dir)
185
186 # Prepare workspace for repacking - remove all unreachable refs and
187 # their objects.
188 GitCommand(
189 project,
190 ["reflog", "expire", "--expire-unreachable=all"],
191 verify_command=True,
192 ).Wait()
193 pm.update(msg=f"{project.name} | gc", inc=0)
194 GitCommand(
195 project,
196 ["gc"],
197 verify_command=True,
198 ).Wait()
199
200 # Get all objects that are reachable from the remote, and pack them.
201 pm.update(msg=f"{project.name} | generating list of objects", inc=0)
202 remote_objects_cmd = GitCommand(
203 project,
204 [
205 "rev-list",
206 "--objects",
207 f"--remotes={project.remote.name}",
208 "--filter=blob:none",
209 ],
210 capture_stdout=True,
211 verify_command=True,
212 )
213
214 # Get all local objects and pack them.
215 local_head_objects_cmd = GitCommand(
216 project,
217 ["rev-list", "--objects", "HEAD^{tree}"],
218 capture_stdout=True,
219 verify_command=True,
220 )
221 local_objects_cmd = GitCommand(
222 project,
223 [
224 "rev-list",
225 "--objects",
226 "--all",
227 "--reflog",
228 "--indexed-objects",
229 "--not",
230 f"--remotes={project.remote.name}",
231 ],
232 capture_stdout=True,
233 verify_command=True,
234 )
235
236 remote_objects_cmd.Wait()
237
238 pm.update(msg=f"{project.name} | remote repack", inc=0)
239 GitCommand(
240 project,
241 ["pack-objects", os.path.join(pack_dir, "pack")],
242 input=remote_objects_cmd.stdout,
243 capture_stderr=True,
244 capture_stdout=True,
245 verify_command=True,
246 ).Wait()
247
248 # create promisor file for each pack file
249 self._generate_promisor_files(pack_dir)
250
251 local_head_objects_cmd.Wait()
252 local_objects_cmd.Wait()
253
254 pm.update(msg=f"{project.name} | local repack", inc=0)
255 GitCommand(
256 project,
257 ["pack-objects", os.path.join(pack_dir, "pack")],
258 input=local_head_objects_cmd.stdout + local_objects_cmd.stdout,
259 capture_stderr=True,
260 capture_stdout=True,
261 verify_command=True,
262 ).Wait()
263
264 # Swap the old pack directory with the new one.
265 platform_utils.rename(
266 os.path.join(project.objdir, "objects", "pack"),
267 os.path.join(project.objdir, "objects", "pack_old"),
268 )
269 platform_utils.rename(
270 pack_dir,
271 os.path.join(project.objdir, "objects", "pack"),
272 )
273 platform_utils.rmtree(
274 os.path.join(project.objdir, "objects", "pack_old")
275 )
276
277 pm.end()
278 return 0
279
129 def Execute(self, opt, args): 280 def Execute(self, opt, args):
130 projects: List[Project] = self.GetProjects( 281 projects: List[Project] = self.GetProjects(
131 args, all_manifests=not opt.this_manifest_only 282 args, all_manifests=not opt.this_manifest_only
132 ) 283 )
133 284
134 return self.delete_unused_projects(projects, opt) 285 ret = self.delete_unused_projects(projects, opt)
286 if ret != 0:
287 return ret
288
289 if not opt.repack:
290 return
291
292 return self.repack_projects(projects, opt)