summaryrefslogtreecommitdiffstats
path: root/subcmds/sync.py
diff options
context:
space:
mode:
authorXin Li <delphij@google.com>2019-06-03 11:24:30 -0700
committerDavid Pursehouse <dpursehouse@collab.net>2019-07-16 00:23:16 +0000
commit745be2ede1e67421275afc00c04d996d9d6908ee (patch)
tree6b7bfbd187a33eeeb14108518ece5440ce9456c0 /subcmds/sync.py
parent87fb5a1894354ec0e34ccec427a9803e24157847 (diff)
downloadgit-repo-745be2ede1e67421275afc00c04d996d9d6908ee.tar.gz
Add support for partial clone.v1.13.4
A new option, --partial-clone is added to 'repo init' which tells repo to utilize git's partial clone functionality, which reduces disk and bandwidth usage when downloading by omitting blob downloads initially. Different from restricting clone-depth, the user will have full access to change history, etc., as the objects are downloaded on demand. Change-Id: I60326744875eac16521a007bd7d5481112a98749 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/229532 Reviewed-by: Mike Frysinger <vapier@google.com> Tested-by: Xin Li <delphij@google.com>
Diffstat (limited to 'subcmds/sync.py')
-rw-r--r--subcmds/sync.py176
1 files changed, 156 insertions, 20 deletions
diff --git a/subcmds/sync.py b/subcmds/sync.py
index 02cd3879..b752cfbe 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -85,6 +85,9 @@ class _FetchError(Exception):
85 """Internal error thrown in _FetchHelper() when we don't want stack trace.""" 85 """Internal error thrown in _FetchHelper() when we don't want stack trace."""
86 pass 86 pass
87 87
88class _CheckoutError(Exception):
89 """Internal error thrown in _CheckoutOne() when we don't want stack trace."""
90
88class Sync(Command, MirrorSafeCommand): 91class Sync(Command, MirrorSafeCommand):
89 jobs = 1 92 jobs = 1
90 common = True 93 common = True
@@ -266,7 +269,7 @@ later is required to fix a server side protocol bug.
266 help=SUPPRESS_HELP) 269 help=SUPPRESS_HELP)
267 270
268 def _FetchProjectList(self, opt, projects, sem, *args, **kwargs): 271 def _FetchProjectList(self, opt, projects, sem, *args, **kwargs):
269 """Main function of the fetch threads when jobs are > 1. 272 """Main function of the fetch threads.
270 273
271 Delegates most of the work to _FetchHelper. 274 Delegates most of the work to _FetchHelper.
272 275
@@ -286,7 +289,8 @@ later is required to fix a server side protocol bug.
286 finally: 289 finally:
287 sem.release() 290 sem.release()
288 291
289 def _FetchHelper(self, opt, project, lock, fetched, pm, err_event): 292 def _FetchHelper(self, opt, project, lock, fetched, pm, err_event,
293 clone_filter):
290 """Fetch git objects for a single project. 294 """Fetch git objects for a single project.
291 295
292 Args: 296 Args:
@@ -300,6 +304,7 @@ later is required to fix a server side protocol bug.
300 lock held). 304 lock held).
301 err_event: We'll set this event in the case of an error (after printing 305 err_event: We'll set this event in the case of an error (after printing
302 out info about the error). 306 out info about the error).
307 clone_filter: Filter for use in a partial clone.
303 308
304 Returns: 309 Returns:
305 Whether the fetch was successful. 310 Whether the fetch was successful.
@@ -312,7 +317,6 @@ later is required to fix a server side protocol bug.
312 317
313 # Encapsulate everything in a try/except/finally so that: 318 # Encapsulate everything in a try/except/finally so that:
314 # - We always set err_event in the case of an exception. 319 # - We always set err_event in the case of an exception.
315 # - We always make sure we call sem.release().
316 # - We always make sure we unlock the lock if we locked it. 320 # - We always make sure we unlock the lock if we locked it.
317 start = time.time() 321 start = time.time()
318 success = False 322 success = False
@@ -325,7 +329,8 @@ later is required to fix a server side protocol bug.
325 clone_bundle=not opt.no_clone_bundle, 329 clone_bundle=not opt.no_clone_bundle,
326 no_tags=opt.no_tags, archive=self.manifest.IsArchive, 330 no_tags=opt.no_tags, archive=self.manifest.IsArchive,
327 optimized_fetch=opt.optimized_fetch, 331 optimized_fetch=opt.optimized_fetch,
328 prune=opt.prune) 332 prune=opt.prune,
333 clone_filter=clone_filter)
329 self._fetch_times.Set(project, time.time() - start) 334 self._fetch_times.Set(project, time.time() - start)
330 335
331 # Lock around all the rest of the code, since printing, updating a set 336 # Lock around all the rest of the code, since printing, updating a set
@@ -389,7 +394,8 @@ later is required to fix a server side protocol bug.
389 lock=lock, 394 lock=lock,
390 fetched=fetched, 395 fetched=fetched,
391 pm=pm, 396 pm=pm,
392 err_event=err_event) 397 err_event=err_event,
398 clone_filter=self.manifest.CloneFilter)
393 if self.jobs > 1: 399 if self.jobs > 1:
394 t = _threading.Thread(target = self._FetchProjectList, 400 t = _threading.Thread(target = self._FetchProjectList,
395 kwargs = kwargs) 401 kwargs = kwargs)
@@ -416,6 +422,148 @@ later is required to fix a server side protocol bug.
416 422
417 return fetched 423 return fetched
418 424
425 def _CheckoutWorker(self, opt, sem, project, *args, **kwargs):
426 """Main function of the fetch threads.
427
428 Delegates most of the work to _CheckoutOne.
429
430 Args:
431 opt: Program options returned from optparse. See _Options().
432 projects: Projects to fetch.
433 sem: We'll release() this semaphore when we exit so that another thread
434 can be started up.
435 *args, **kwargs: Remaining arguments to pass to _CheckoutOne. See the
436 _CheckoutOne docstring for details.
437 """
438 try:
439 success = self._CheckoutOne(opt, project, *args, **kwargs)
440 if not success:
441 sys.exit(1)
442 finally:
443 sem.release()
444
445 def _CheckoutOne(self, opt, project, lock, pm, err_event):
446 """Checkout work tree for one project
447
448 Args:
449 opt: Program options returned from optparse. See _Options().
450 project: Project object for the project to checkout.
451 lock: Lock for accessing objects that are shared amongst multiple
452 _CheckoutWorker() threads.
453 pm: Instance of a Project object. We will call pm.update() (with our
454 lock held).
455 err_event: We'll set this event in the case of an error (after printing
456 out info about the error).
457
458 Returns:
459 Whether the fetch was successful.
460 """
461 # We'll set to true once we've locked the lock.
462 did_lock = False
463
464 if not opt.quiet:
465 print('Checking out project %s' % project.name)
466
467 # Encapsulate everything in a try/except/finally so that:
468 # - We always set err_event in the case of an exception.
469 # - We always make sure we unlock the lock if we locked it.
470 start = time.time()
471 syncbuf = SyncBuffer(self.manifest.manifestProject.config,
472 detach_head=opt.detach_head)
473 success = False
474 try:
475 try:
476 project.Sync_LocalHalf(syncbuf, force_sync=opt.force_sync)
477 success = syncbuf.Finish()
478
479 # Lock around all the rest of the code, since printing, updating a set
480 # and Progress.update() are not thread safe.
481 lock.acquire()
482 did_lock = True
483
484 if not success:
485 err_event.set()
486 print('error: Cannot checkout %s' % (project.name),
487 file=sys.stderr)
488 raise _CheckoutError()
489
490 pm.update()
491 except _CheckoutError:
492 pass
493 except Exception as e:
494 print('error: Cannot checkout %s: %s: %s' %
495 (project.name, type(e).__name__, str(e)),
496 file=sys.stderr)
497 err_event.set()
498 raise
499 finally:
500 if did_lock:
501 lock.release()
502 finish = time.time()
503 self.event_log.AddSync(project, event_log.TASK_SYNC_LOCAL,
504 start, finish, success)
505
506 return success
507
508 def _Checkout(self, all_projects, opt):
509 """Checkout projects listed in all_projects
510
511 Args:
512 all_projects: List of all projects that should be checked out.
513 opt: Program options returned from optparse. See _Options().
514 """
515
516 # Perform checkouts in multiple threads when we are using partial clone.
517 # Without partial clone, all needed git objects are already downloaded,
518 # in this situation it's better to use only one process because the checkout
519 # would be mostly disk I/O; with partial clone, the objects are only
520 # downloaded when demanded (at checkout time), which is similar to the
521 # Sync_NetworkHalf case and parallelism would be helpful.
522 if self.manifest.CloneFilter:
523 syncjobs = self.jobs
524 else:
525 syncjobs = 1
526
527 lock = _threading.Lock()
528 pm = Progress('Syncing work tree', len(all_projects))
529
530 threads = set()
531 sem = _threading.Semaphore(syncjobs)
532 err_event = _threading.Event()
533
534 for project in all_projects:
535 # Check for any errors before running any more tasks.
536 # ...we'll let existing threads finish, though.
537 if err_event.isSet() and not opt.force_broken:
538 break
539
540 sem.acquire()
541 if project.worktree:
542 kwargs = dict(opt=opt,
543 sem=sem,
544 project=project,
545 lock=lock,
546 pm=pm,
547 err_event=err_event)
548 if syncjobs > 1:
549 t = _threading.Thread(target=self._CheckoutWorker,
550 kwargs=kwargs)
551 # Ensure that Ctrl-C will not freeze the repo process.
552 t.daemon = True
553 threads.add(t)
554 t.start()
555 else:
556 self._CheckoutWorker(**kwargs)
557
558 for t in threads:
559 t.join()
560
561 pm.end()
562 # If we saw an error, exit with code 1 so that other scripts can check.
563 if err_event.isSet():
564 print('\nerror: Exited sync due to checkout errors', file=sys.stderr)
565 sys.exit(1)
566
419 def _GCProjects(self, projects): 567 def _GCProjects(self, projects):
420 gc_gitdirs = {} 568 gc_gitdirs = {}
421 for project in projects: 569 for project in projects:
@@ -746,7 +894,8 @@ later is required to fix a server side protocol bug.
746 current_branch_only=opt.current_branch_only, 894 current_branch_only=opt.current_branch_only,
747 no_tags=opt.no_tags, 895 no_tags=opt.no_tags,
748 optimized_fetch=opt.optimized_fetch, 896 optimized_fetch=opt.optimized_fetch,
749 submodules=self.manifest.HasSubmodules) 897 submodules=self.manifest.HasSubmodules,
898 clone_filter=self.manifest.CloneFilter)
750 finish = time.time() 899 finish = time.time()
751 self.event_log.AddSync(mp, event_log.TASK_SYNC_NETWORK, 900 self.event_log.AddSync(mp, event_log.TASK_SYNC_NETWORK,
752 start, finish, success) 901 start, finish, success)
@@ -846,20 +995,7 @@ later is required to fix a server side protocol bug.
846 if self.UpdateProjectList(opt): 995 if self.UpdateProjectList(opt):
847 sys.exit(1) 996 sys.exit(1)
848 997
849 syncbuf = SyncBuffer(mp.config, 998 self._Checkout(all_projects, opt)
850 detach_head = opt.detach_head)
851 pm = Progress('Syncing work tree', len(all_projects))
852 for project in all_projects:
853 pm.update()
854 if project.worktree:
855 start = time.time()
856 project.Sync_LocalHalf(syncbuf, force_sync=opt.force_sync)
857 self.event_log.AddSync(project, event_log.TASK_SYNC_LOCAL,
858 start, time.time(), syncbuf.Recently())
859 pm.end()
860 print(file=sys.stderr)
861 if not syncbuf.Finish():
862 sys.exit(1)
863 999
864 # If there's a notice that's supposed to print at the end of the sync, print 1000 # If there's a notice that's supposed to print at the end of the sync, print
865 # it now... 1001 # it now...