diff options
author | Dave Borowitz <dborowitz@google.com> | 2012-10-23 17:02:59 -0700 |
---|---|---|
committer | Dave Borowitz <dborowitz@google.com> | 2012-10-25 08:12:48 -0700 |
commit | 188572170e8cdf28df55a5ca90ed70d14b72b804 (patch) | |
tree | d64ad4724552898c9c49ba19f715c74c5caa504c | |
parent | 091f893625269fd12adadf7d1f60c21b4b83e779 (diff) | |
download | git-repo-188572170e8cdf28df55a5ca90ed70d14b72b804.tar.gz |
sync: Run gc --auto in parallel
We can't just let this run wild with a high (or even low) -j, since
that would hose a system. Instead, limit the total number of threads
across all git gc subprocesses to the number of CPUs reported by the
multiprocessing module (available in Python 2.6 and above).
Change-Id: Icca0161a1e6116ffa5f7cfc6f5faecda510a7fb9
-rw-r--r-- | subcmds/sync.py | 55 |
1 files changed, 53 insertions, 2 deletions
diff --git a/subcmds/sync.py b/subcmds/sync.py index b83f2d4a..9e4a9754 100644 --- a/subcmds/sync.py +++ b/subcmds/sync.py | |||
@@ -39,6 +39,11 @@ except ImportError: | |||
39 | def _rlimit_nofile(): | 39 | def _rlimit_nofile(): |
40 | return (256, 256) | 40 | return (256, 256) |
41 | 41 | ||
42 | try: | ||
43 | import multiprocessing | ||
44 | except ImportError: | ||
45 | multiprocessing = None | ||
46 | |||
42 | from git_command import GIT | 47 | from git_command import GIT |
43 | from git_refs import R_HEADS, HEAD | 48 | from git_refs import R_HEADS, HEAD |
44 | from project import Project | 49 | from project import Project |
@@ -299,10 +304,56 @@ later is required to fix a server side protocol bug. | |||
299 | 304 | ||
300 | pm.end() | 305 | pm.end() |
301 | self._fetch_times.Save() | 306 | self._fetch_times.Save() |
302 | for project in projects: | 307 | |
303 | project.bare_git.gc('--auto') | 308 | self._GCProjects(projects) |
304 | return fetched | 309 | return fetched |
305 | 310 | ||
311 | def _GCProjects(self, projects): | ||
312 | if multiprocessing: | ||
313 | cpu_count = multiprocessing.cpu_count() | ||
314 | else: | ||
315 | cpu_count = 1 | ||
316 | jobs = min(self.jobs, cpu_count) | ||
317 | |||
318 | if jobs < 2: | ||
319 | for project in projects: | ||
320 | project.bare_git.gc('--auto') | ||
321 | return | ||
322 | |||
323 | config = {'pack.threads': cpu_count / jobs if cpu_count > jobs else 1} | ||
324 | |||
325 | threads = set() | ||
326 | sem = _threading.Semaphore(jobs) | ||
327 | err_event = _threading.Event() | ||
328 | |||
329 | def GC(project): | ||
330 | try: | ||
331 | try: | ||
332 | project.bare_git.gc('--auto', config=config) | ||
333 | except GitError: | ||
334 | err_event.set() | ||
335 | except: | ||
336 | err_event.set() | ||
337 | raise | ||
338 | finally: | ||
339 | sem.release() | ||
340 | |||
341 | for project in projects: | ||
342 | if err_event.isSet(): | ||
343 | break | ||
344 | sem.acquire() | ||
345 | t = _threading.Thread(target=GC, args=(project,)) | ||
346 | t.daemon = True | ||
347 | threads.add(t) | ||
348 | t.start() | ||
349 | |||
350 | for t in threads: | ||
351 | t.join() | ||
352 | |||
353 | if err_event.isSet(): | ||
354 | print >>sys.stderr, '\nerror: Exited sync due to gc errors' | ||
355 | sys.exit(1) | ||
356 | |||
306 | def UpdateProjectList(self): | 357 | def UpdateProjectList(self): |
307 | new_project_paths = [] | 358 | new_project_paths = [] |
308 | for project in self.GetProjects(None, missing_ok=True): | 359 | for project in self.GetProjects(None, missing_ok=True): |