summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorXin Li <delphij@google.com>2019-06-03 11:24:30 -0700
committerDavid Pursehouse <dpursehouse@collab.net>2019-07-16 00:23:16 +0000
commit745be2ede1e67421275afc00c04d996d9d6908ee (patch)
tree6b7bfbd187a33eeeb14108518ece5440ce9456c0
parent87fb5a1894354ec0e34ccec427a9803e24157847 (diff)
downloadgit-repo-745be2ede1e67421275afc00c04d996d9d6908ee.tar.gz
Add support for partial clone.v1.13.4
A new option, --partial-clone is added to 'repo init' which tells repo to utilize git's partial clone functionality, which reduces disk and bandwidth usage when downloading by omitting blob downloads initially. Different from restricting clone-depth, the user will have full access to change history, etc., as the objects are downloaded on demand. Change-Id: I60326744875eac16521a007bd7d5481112a98749 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/229532 Reviewed-by: Mike Frysinger <vapier@google.com> Tested-by: Xin Li <delphij@google.com>
-rw-r--r--git_command.py6
-rw-r--r--manifest_xml.py6
-rwxr-xr-xproject.py18
-rwxr-xr-xrepo7
-rw-r--r--subcmds/init.py21
-rw-r--r--subcmds/sync.py176
6 files changed, 206 insertions, 28 deletions
diff --git a/git_command.py b/git_command.py
index 54db4d16..f5352ea0 100644
--- a/git_command.py
+++ b/git_command.py
@@ -98,13 +98,15 @@ class _GitCall(object):
98 return fun 98 return fun
99git = _GitCall() 99git = _GitCall()
100 100
101def git_require(min_version, fail=False): 101def git_require(min_version, fail=False, msg=''):
102 git_version = git.version_tuple() 102 git_version = git.version_tuple()
103 if min_version <= git_version: 103 if min_version <= git_version:
104 return True 104 return True
105 if fail: 105 if fail:
106 need = '.'.join(map(str, min_version)) 106 need = '.'.join(map(str, min_version))
107 print('fatal: git %s or later required' % need, file=sys.stderr) 107 if msg:
108 msg = ' for ' + msg
109 print('fatal: git %s or later required%s' % (need, msg), file=sys.stderr)
108 sys.exit(1) 110 sys.exit(1)
109 return False 111 return False
110 112
diff --git a/manifest_xml.py b/manifest_xml.py
index 94d22b3c..3814a25a 100644
--- a/manifest_xml.py
+++ b/manifest_xml.py
@@ -414,6 +414,12 @@ class XmlManifest(object):
414 return self._manifest_server 414 return self._manifest_server
415 415
416 @property 416 @property
417 def CloneFilter(self):
418 if self.manifestProject.config.GetBoolean('repo.partialclone'):
419 return self.manifestProject.config.GetString('repo.clonefilter')
420 return None
421
422 @property
417 def IsMirror(self): 423 def IsMirror(self):
418 return self.manifestProject.config.GetBoolean('repo.mirror') 424 return self.manifestProject.config.GetBoolean('repo.mirror')
419 425
diff --git a/project.py b/project.py
index 67d3bb20..9702e9da 100755
--- a/project.py
+++ b/project.py
@@ -1226,7 +1226,8 @@ class Project(object):
1226 archive=False, 1226 archive=False,
1227 optimized_fetch=False, 1227 optimized_fetch=False,
1228 prune=False, 1228 prune=False,
1229 submodules=False): 1229 submodules=False,
1230 clone_filter=None):
1230 """Perform only the network IO portion of the sync process. 1231 """Perform only the network IO portion of the sync process.
1231 Local working directory/branch state is not affected. 1232 Local working directory/branch state is not affected.
1232 """ 1233 """
@@ -1309,7 +1310,8 @@ class Project(object):
1309 not self._RemoteFetch(initial=is_new, quiet=quiet, alt_dir=alt_dir, 1310 not self._RemoteFetch(initial=is_new, quiet=quiet, alt_dir=alt_dir,
1310 current_branch_only=current_branch_only, 1311 current_branch_only=current_branch_only,
1311 no_tags=no_tags, prune=prune, depth=depth, 1312 no_tags=no_tags, prune=prune, depth=depth,
1312 submodules=submodules, force_sync=force_sync)): 1313 submodules=submodules, force_sync=force_sync,
1314 clone_filter=clone_filter)):
1313 return False 1315 return False
1314 1316
1315 mp = self.manifest.manifestProject 1317 mp = self.manifest.manifestProject
@@ -1959,7 +1961,8 @@ class Project(object):
1959 prune=False, 1961 prune=False,
1960 depth=None, 1962 depth=None,
1961 submodules=False, 1963 submodules=False,
1962 force_sync=False): 1964 force_sync=False,
1965 clone_filter=None):
1963 1966
1964 is_sha1 = False 1967 is_sha1 = False
1965 tag_name = None 1968 tag_name = None
@@ -2050,6 +2053,11 @@ class Project(object):
2050 2053
2051 cmd = ['fetch'] 2054 cmd = ['fetch']
2052 2055
2056 if clone_filter:
2057 git_require((2, 19, 0), fail=True, msg='partial clones')
2058 cmd.append('--filter=%s' % clone_filter)
2059 self.config.SetString('extensions.partialclone', self.remote.name)
2060
2053 if depth: 2061 if depth:
2054 cmd.append('--depth=%s' % depth) 2062 cmd.append('--depth=%s' % depth)
2055 else: 2063 else:
@@ -2150,12 +2158,12 @@ class Project(object):
2150 return self._RemoteFetch(name=name, 2158 return self._RemoteFetch(name=name,
2151 current_branch_only=current_branch_only, 2159 current_branch_only=current_branch_only,
2152 initial=False, quiet=quiet, alt_dir=alt_dir, 2160 initial=False, quiet=quiet, alt_dir=alt_dir,
2153 depth=None) 2161 depth=None, clone_filter=clone_filter)
2154 else: 2162 else:
2155 # Avoid infinite recursion: sync all branches with depth set to None 2163 # Avoid infinite recursion: sync all branches with depth set to None
2156 return self._RemoteFetch(name=name, current_branch_only=False, 2164 return self._RemoteFetch(name=name, current_branch_only=False,
2157 initial=False, quiet=quiet, alt_dir=alt_dir, 2165 initial=False, quiet=quiet, alt_dir=alt_dir,
2158 depth=None) 2166 depth=None, clone_filter=clone_filter)
2159 2167
2160 return ok 2168 return ok
2161 2169
diff --git a/repo b/repo
index 8ed147df..649c4e48 100755
--- a/repo
+++ b/repo
@@ -199,6 +199,13 @@ group.add_option('--dissociate',
199group.add_option('--depth', type='int', default=None, 199group.add_option('--depth', type='int', default=None,
200 dest='depth', 200 dest='depth',
201 help='create a shallow clone with given depth; see git clone') 201 help='create a shallow clone with given depth; see git clone')
202group.add_option('--partial-clone', action='store_true',
203 dest='partial_clone',
204 help='perform partial clone (https://git-scm.com/'
205 'docs/gitrepository-layout#_code_partialclone_code)')
206group.add_option('--clone-filter', action='store', default='blob:none',
207 dest='clone_filter',
208 help='filter for use with --partial-clone [default: %default]')
202group.add_option('--archive', 209group.add_option('--archive',
203 dest='archive', action='store_true', 210 dest='archive', action='store_true',
204 help='checkout an archive instead of a git repository for ' 211 help='checkout an archive instead of a git repository for '
diff --git a/subcmds/init.py b/subcmds/init.py
index 1c809ab4..eaa6da50 100644
--- a/subcmds/init.py
+++ b/subcmds/init.py
@@ -115,6 +115,13 @@ to update the working directory files.
115 g.add_option('--depth', type='int', default=None, 115 g.add_option('--depth', type='int', default=None,
116 dest='depth', 116 dest='depth',
117 help='create a shallow clone with given depth; see git clone') 117 help='create a shallow clone with given depth; see git clone')
118 g.add_option('--partial-clone', action='store_true',
119 dest='partial_clone',
120 help='perform partial clone (https://git-scm.com/'
121 'docs/gitrepository-layout#_code_partialclone_code)')
122 g.add_option('--clone-filter', action='store', default='blob:none',
123 dest='clone_filter',
124 help='filter for use with --partial-clone [default: %default]')
118 g.add_option('--archive', 125 g.add_option('--archive',
119 dest='archive', action='store_true', 126 dest='archive', action='store_true',
120 help='checkout an archive instead of a git repository for ' 127 help='checkout an archive instead of a git repository for '
@@ -253,13 +260,25 @@ to update the working directory files.
253 'in another location.', file=sys.stderr) 260 'in another location.', file=sys.stderr)
254 sys.exit(1) 261 sys.exit(1)
255 262
263 if opt.partial_clone:
264 if opt.mirror:
265 print('fatal: --mirror and --partial-clone are mutually exclusive',
266 file=sys.stderr)
267 sys.exit(1)
268 m.config.SetString('repo.partialclone', 'true')
269 if opt.clone_filter:
270 m.config.SetString('repo.clonefilter', opt.clone_filter)
271 else:
272 opt.clone_filter = None
273
256 if opt.submodules: 274 if opt.submodules:
257 m.config.SetString('repo.submodules', 'true') 275 m.config.SetString('repo.submodules', 'true')
258 276
259 if not m.Sync_NetworkHalf(is_new=is_new, quiet=opt.quiet, 277 if not m.Sync_NetworkHalf(is_new=is_new, quiet=opt.quiet,
260 clone_bundle=not opt.no_clone_bundle, 278 clone_bundle=not opt.no_clone_bundle,
261 current_branch_only=opt.current_branch_only, 279 current_branch_only=opt.current_branch_only,
262 no_tags=opt.no_tags, submodules=opt.submodules): 280 no_tags=opt.no_tags, submodules=opt.submodules,
281 clone_filter=opt.clone_filter):
263 r = m.GetRemote(m.remote.name) 282 r = m.GetRemote(m.remote.name)
264 print('fatal: cannot obtain manifest %s' % r.url, file=sys.stderr) 283 print('fatal: cannot obtain manifest %s' % r.url, file=sys.stderr)
265 284
diff --git a/subcmds/sync.py b/subcmds/sync.py
index 02cd3879..b752cfbe 100644
--- a/subcmds/sync.py
+++ b/subcmds/sync.py
@@ -85,6 +85,9 @@ class _FetchError(Exception):
85 """Internal error thrown in _FetchHelper() when we don't want stack trace.""" 85 """Internal error thrown in _FetchHelper() when we don't want stack trace."""
86 pass 86 pass
87 87
88class _CheckoutError(Exception):
89 """Internal error thrown in _CheckoutOne() when we don't want stack trace."""
90
88class Sync(Command, MirrorSafeCommand): 91class Sync(Command, MirrorSafeCommand):
89 jobs = 1 92 jobs = 1
90 common = True 93 common = True
@@ -266,7 +269,7 @@ later is required to fix a server side protocol bug.
266 help=SUPPRESS_HELP) 269 help=SUPPRESS_HELP)
267 270
268 def _FetchProjectList(self, opt, projects, sem, *args, **kwargs): 271 def _FetchProjectList(self, opt, projects, sem, *args, **kwargs):
269 """Main function of the fetch threads when jobs are > 1. 272 """Main function of the fetch threads.
270 273
271 Delegates most of the work to _FetchHelper. 274 Delegates most of the work to _FetchHelper.
272 275
@@ -286,7 +289,8 @@ later is required to fix a server side protocol bug.
286 finally: 289 finally:
287 sem.release() 290 sem.release()
288 291
289 def _FetchHelper(self, opt, project, lock, fetched, pm, err_event): 292 def _FetchHelper(self, opt, project, lock, fetched, pm, err_event,
293 clone_filter):
290 """Fetch git objects for a single project. 294 """Fetch git objects for a single project.
291 295
292 Args: 296 Args:
@@ -300,6 +304,7 @@ later is required to fix a server side protocol bug.
300 lock held). 304 lock held).
301 err_event: We'll set this event in the case of an error (after printing 305 err_event: We'll set this event in the case of an error (after printing
302 out info about the error). 306 out info about the error).
307 clone_filter: Filter for use in a partial clone.
303 308
304 Returns: 309 Returns:
305 Whether the fetch was successful. 310 Whether the fetch was successful.
@@ -312,7 +317,6 @@ later is required to fix a server side protocol bug.
312 317
313 # Encapsulate everything in a try/except/finally so that: 318 # Encapsulate everything in a try/except/finally so that:
314 # - We always set err_event in the case of an exception. 319 # - We always set err_event in the case of an exception.
315 # - We always make sure we call sem.release().
316 # - We always make sure we unlock the lock if we locked it. 320 # - We always make sure we unlock the lock if we locked it.
317 start = time.time() 321 start = time.time()
318 success = False 322 success = False
@@ -325,7 +329,8 @@ later is required to fix a server side protocol bug.
325 clone_bundle=not opt.no_clone_bundle, 329 clone_bundle=not opt.no_clone_bundle,
326 no_tags=opt.no_tags, archive=self.manifest.IsArchive, 330 no_tags=opt.no_tags, archive=self.manifest.IsArchive,
327 optimized_fetch=opt.optimized_fetch, 331 optimized_fetch=opt.optimized_fetch,
328 prune=opt.prune) 332 prune=opt.prune,
333 clone_filter=clone_filter)
329 self._fetch_times.Set(project, time.time() - start) 334 self._fetch_times.Set(project, time.time() - start)
330 335
331 # Lock around all the rest of the code, since printing, updating a set 336 # Lock around all the rest of the code, since printing, updating a set
@@ -389,7 +394,8 @@ later is required to fix a server side protocol bug.
389 lock=lock, 394 lock=lock,
390 fetched=fetched, 395 fetched=fetched,
391 pm=pm, 396 pm=pm,
392 err_event=err_event) 397 err_event=err_event,
398 clone_filter=self.manifest.CloneFilter)
393 if self.jobs > 1: 399 if self.jobs > 1:
394 t = _threading.Thread(target = self._FetchProjectList, 400 t = _threading.Thread(target = self._FetchProjectList,
395 kwargs = kwargs) 401 kwargs = kwargs)
@@ -416,6 +422,148 @@ later is required to fix a server side protocol bug.
416 422
417 return fetched 423 return fetched
418 424
425 def _CheckoutWorker(self, opt, sem, project, *args, **kwargs):
426 """Main function of the fetch threads.
427
428 Delegates most of the work to _CheckoutOne.
429
430 Args:
431 opt: Program options returned from optparse. See _Options().
432 projects: Projects to fetch.
433 sem: We'll release() this semaphore when we exit so that another thread
434 can be started up.
435 *args, **kwargs: Remaining arguments to pass to _CheckoutOne. See the
436 _CheckoutOne docstring for details.
437 """
438 try:
439 success = self._CheckoutOne(opt, project, *args, **kwargs)
440 if not success:
441 sys.exit(1)
442 finally:
443 sem.release()
444
445 def _CheckoutOne(self, opt, project, lock, pm, err_event):
446 """Checkout work tree for one project
447
448 Args:
449 opt: Program options returned from optparse. See _Options().
450 project: Project object for the project to checkout.
451 lock: Lock for accessing objects that are shared amongst multiple
452 _CheckoutWorker() threads.
453 pm: Instance of a Project object. We will call pm.update() (with our
454 lock held).
455 err_event: We'll set this event in the case of an error (after printing
456 out info about the error).
457
458 Returns:
459 Whether the fetch was successful.
460 """
461 # We'll set to true once we've locked the lock.
462 did_lock = False
463
464 if not opt.quiet:
465 print('Checking out project %s' % project.name)
466
467 # Encapsulate everything in a try/except/finally so that:
468 # - We always set err_event in the case of an exception.
469 # - We always make sure we unlock the lock if we locked it.
470 start = time.time()
471 syncbuf = SyncBuffer(self.manifest.manifestProject.config,
472 detach_head=opt.detach_head)
473 success = False
474 try:
475 try:
476 project.Sync_LocalHalf(syncbuf, force_sync=opt.force_sync)
477 success = syncbuf.Finish()
478
479 # Lock around all the rest of the code, since printing, updating a set
480 # and Progress.update() are not thread safe.
481 lock.acquire()
482 did_lock = True
483
484 if not success:
485 err_event.set()
486 print('error: Cannot checkout %s' % (project.name),
487 file=sys.stderr)
488 raise _CheckoutError()
489
490 pm.update()
491 except _CheckoutError:
492 pass
493 except Exception as e:
494 print('error: Cannot checkout %s: %s: %s' %
495 (project.name, type(e).__name__, str(e)),
496 file=sys.stderr)
497 err_event.set()
498 raise
499 finally:
500 if did_lock:
501 lock.release()
502 finish = time.time()
503 self.event_log.AddSync(project, event_log.TASK_SYNC_LOCAL,
504 start, finish, success)
505
506 return success
507
508 def _Checkout(self, all_projects, opt):
509 """Checkout projects listed in all_projects
510
511 Args:
512 all_projects: List of all projects that should be checked out.
513 opt: Program options returned from optparse. See _Options().
514 """
515
516 # Perform checkouts in multiple threads when we are using partial clone.
517 # Without partial clone, all needed git objects are already downloaded,
518 # in this situation it's better to use only one process because the checkout
519 # would be mostly disk I/O; with partial clone, the objects are only
520 # downloaded when demanded (at checkout time), which is similar to the
521 # Sync_NetworkHalf case and parallelism would be helpful.
522 if self.manifest.CloneFilter:
523 syncjobs = self.jobs
524 else:
525 syncjobs = 1
526
527 lock = _threading.Lock()
528 pm = Progress('Syncing work tree', len(all_projects))
529
530 threads = set()
531 sem = _threading.Semaphore(syncjobs)
532 err_event = _threading.Event()
533
534 for project in all_projects:
535 # Check for any errors before running any more tasks.
536 # ...we'll let existing threads finish, though.
537 if err_event.isSet() and not opt.force_broken:
538 break
539
540 sem.acquire()
541 if project.worktree:
542 kwargs = dict(opt=opt,
543 sem=sem,
544 project=project,
545 lock=lock,
546 pm=pm,
547 err_event=err_event)
548 if syncjobs > 1:
549 t = _threading.Thread(target=self._CheckoutWorker,
550 kwargs=kwargs)
551 # Ensure that Ctrl-C will not freeze the repo process.
552 t.daemon = True
553 threads.add(t)
554 t.start()
555 else:
556 self._CheckoutWorker(**kwargs)
557
558 for t in threads:
559 t.join()
560
561 pm.end()
562 # If we saw an error, exit with code 1 so that other scripts can check.
563 if err_event.isSet():
564 print('\nerror: Exited sync due to checkout errors', file=sys.stderr)
565 sys.exit(1)
566
419 def _GCProjects(self, projects): 567 def _GCProjects(self, projects):
420 gc_gitdirs = {} 568 gc_gitdirs = {}
421 for project in projects: 569 for project in projects:
@@ -746,7 +894,8 @@ later is required to fix a server side protocol bug.
746 current_branch_only=opt.current_branch_only, 894 current_branch_only=opt.current_branch_only,
747 no_tags=opt.no_tags, 895 no_tags=opt.no_tags,
748 optimized_fetch=opt.optimized_fetch, 896 optimized_fetch=opt.optimized_fetch,
749 submodules=self.manifest.HasSubmodules) 897 submodules=self.manifest.HasSubmodules,
898 clone_filter=self.manifest.CloneFilter)
750 finish = time.time() 899 finish = time.time()
751 self.event_log.AddSync(mp, event_log.TASK_SYNC_NETWORK, 900 self.event_log.AddSync(mp, event_log.TASK_SYNC_NETWORK,
752 start, finish, success) 901 start, finish, success)
@@ -846,20 +995,7 @@ later is required to fix a server side protocol bug.
846 if self.UpdateProjectList(opt): 995 if self.UpdateProjectList(opt):
847 sys.exit(1) 996 sys.exit(1)
848 997
849 syncbuf = SyncBuffer(mp.config, 998 self._Checkout(all_projects, opt)
850 detach_head = opt.detach_head)
851 pm = Progress('Syncing work tree', len(all_projects))
852 for project in all_projects:
853 pm.update()
854 if project.worktree:
855 start = time.time()
856 project.Sync_LocalHalf(syncbuf, force_sync=opt.force_sync)
857 self.event_log.AddSync(project, event_log.TASK_SYNC_LOCAL,
858 start, time.time(), syncbuf.Recently())
859 pm.end()
860 print(file=sys.stderr)
861 if not syncbuf.Finish():
862 sys.exit(1)
863 999
864 # If there's a notice that's supposed to print at the end of the sync, print 1000 # If there's a notice that's supposed to print at the end of the sync, print
865 # it now... 1001 # it now...