diff options
author | Mike Frysinger <vapier@google.com> | 2019-07-31 23:32:58 -0400 |
---|---|---|
committer | Mike Frysinger <vapier@google.com> | 2020-02-04 20:34:01 +0000 |
commit | 04122b7261319dae3abcaf0eb63af7ed937dc463 (patch) | |
tree | 4e8092cae702cd7b667b4cd95f1cfc5dbba221f3 | |
parent | f5525fb310f0aae2783d9ccf647cac967efb2600 (diff) | |
download | git-repo-04122b7261319dae3abcaf0eb63af7ed937dc463.tar.gz |
manifest: add basic path checks for <copyfile> & <linkfile>
Reject paths in <copyfile> & <linkfile> that point outside of their
respective scopes. This validates paths while parsing the manifest
as this should be quick & cheap: we don't access the filesystem as
this code runs before we've synced.
Bug: https://crbug.com/gerrit/11218
Change-Id: I8e17bb91f3f5b905a9d76391b29fbab4cb77aa58
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/232932
Tested-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Mike Frysinger <vapier@google.com>
Reviewed-by: Michael Mortensen <mmortensen@google.com>
-rw-r--r-- | docs/manifest-format.md | 2 | ||||
-rw-r--r-- | error.py | 4 | ||||
-rw-r--r-- | manifest_xml.py | 85 | ||||
-rw-r--r-- | tests/test_manifest_xml.py | 83 |
4 files changed, 170 insertions, 4 deletions
diff --git a/docs/manifest-format.md b/docs/manifest-format.md index 93d9b960..a39f97e8 100644 --- a/docs/manifest-format.md +++ b/docs/manifest-format.md | |||
@@ -338,7 +338,7 @@ It's just like copyfile and runs at the same time as copyfile but | |||
338 | instead of copying it creates a symlink. | 338 | instead of copying it creates a symlink. |
339 | 339 | ||
340 | The symlink is created at "dest" (relative to the top of the tree) and | 340 | The symlink is created at "dest" (relative to the top of the tree) and |
341 | points to the path specified by "src". | 341 | points to the path specified by "src" which is a path in the project. |
342 | 342 | ||
343 | Parent directories of "dest" will be automatically created if missing. | 343 | Parent directories of "dest" will be automatically created if missing. |
344 | 344 | ||
@@ -22,6 +22,10 @@ class ManifestInvalidRevisionError(Exception): | |||
22 | """The revision value in a project is incorrect. | 22 | """The revision value in a project is incorrect. |
23 | """ | 23 | """ |
24 | 24 | ||
25 | class ManifestInvalidPathError(Exception): | ||
26 | """A path used in <copyfile> or <linkfile> is incorrect. | ||
27 | """ | ||
28 | |||
25 | class NoManifestException(Exception): | 29 | class NoManifestException(Exception): |
26 | """The required manifest does not exist. | 30 | """The required manifest does not exist. |
27 | """ | 31 | """ |
diff --git a/manifest_xml.py b/manifest_xml.py index 3814a25a..69105c9e 100644 --- a/manifest_xml.py +++ b/manifest_xml.py | |||
@@ -35,7 +35,8 @@ from git_config import GitConfig | |||
35 | from git_refs import R_HEADS, HEAD | 35 | from git_refs import R_HEADS, HEAD |
36 | import platform_utils | 36 | import platform_utils |
37 | from project import RemoteSpec, Project, MetaProject | 37 | from project import RemoteSpec, Project, MetaProject |
38 | from error import ManifestParseError, ManifestInvalidRevisionError | 38 | from error import (ManifestParseError, ManifestInvalidPathError, |
39 | ManifestInvalidRevisionError) | ||
39 | 40 | ||
40 | MANIFEST_FILE_NAME = 'manifest.xml' | 41 | MANIFEST_FILE_NAME = 'manifest.xml' |
41 | LOCAL_MANIFEST_NAME = 'local_manifest.xml' | 42 | LOCAL_MANIFEST_NAME = 'local_manifest.xml' |
@@ -943,12 +944,88 @@ class XmlManifest(object): | |||
943 | worktree = os.path.join(parent.worktree, path).replace('\\', '/') | 944 | worktree = os.path.join(parent.worktree, path).replace('\\', '/') |
944 | return relpath, worktree, gitdir, objdir | 945 | return relpath, worktree, gitdir, objdir |
945 | 946 | ||
947 | @staticmethod | ||
948 | def _CheckLocalPath(path, symlink=False): | ||
949 | """Verify |path| is reasonable for use in <copyfile> & <linkfile>.""" | ||
950 | if '~' in path: | ||
951 | return '~ not allowed (due to 8.3 filenames on Windows filesystems)' | ||
952 | |||
953 | # Some filesystems (like Apple's HFS+) try to normalize Unicode codepoints | ||
954 | # which means there are alternative names for ".git". Reject paths with | ||
955 | # these in it as there shouldn't be any reasonable need for them here. | ||
956 | # The set of codepoints here was cribbed from jgit's implementation: | ||
957 | # https://eclipse.googlesource.com/jgit/jgit/+/9110037e3e9461ff4dac22fee84ef3694ed57648/org.eclipse.jgit/src/org/eclipse/jgit/lib/ObjectChecker.java#884 | ||
958 | BAD_CODEPOINTS = { | ||
959 | u'\u200C', # ZERO WIDTH NON-JOINER | ||
960 | u'\u200D', # ZERO WIDTH JOINER | ||
961 | u'\u200E', # LEFT-TO-RIGHT MARK | ||
962 | u'\u200F', # RIGHT-TO-LEFT MARK | ||
963 | u'\u202A', # LEFT-TO-RIGHT EMBEDDING | ||
964 | u'\u202B', # RIGHT-TO-LEFT EMBEDDING | ||
965 | u'\u202C', # POP DIRECTIONAL FORMATTING | ||
966 | u'\u202D', # LEFT-TO-RIGHT OVERRIDE | ||
967 | u'\u202E', # RIGHT-TO-LEFT OVERRIDE | ||
968 | u'\u206A', # INHIBIT SYMMETRIC SWAPPING | ||
969 | u'\u206B', # ACTIVATE SYMMETRIC SWAPPING | ||
970 | u'\u206C', # INHIBIT ARABIC FORM SHAPING | ||
971 | u'\u206D', # ACTIVATE ARABIC FORM SHAPING | ||
972 | u'\u206E', # NATIONAL DIGIT SHAPES | ||
973 | u'\u206F', # NOMINAL DIGIT SHAPES | ||
974 | u'\uFEFF', # ZERO WIDTH NO-BREAK SPACE | ||
975 | } | ||
976 | if BAD_CODEPOINTS & set(path): | ||
977 | # This message is more expansive than reality, but should be fine. | ||
978 | return 'Unicode combining characters not allowed' | ||
979 | |||
980 | # Assume paths might be used on case-insensitive filesystems. | ||
981 | path = path.lower() | ||
982 | |||
983 | # We don't really need to reject '.' here, but there shouldn't really be a | ||
984 | # need to ever use it, so no need to accept it either. | ||
985 | for part in set(path.split(os.path.sep)): | ||
986 | if part in {'.', '..', '.git'} or part.startswith('.repo'): | ||
987 | return 'bad component: %s' % (part,) | ||
988 | |||
989 | if not symlink and path.endswith(os.path.sep): | ||
990 | return 'dirs not allowed' | ||
991 | |||
992 | norm = os.path.normpath(path) | ||
993 | if norm == '..' or norm.startswith('../') or norm.startswith(os.path.sep): | ||
994 | return 'path cannot be outside' | ||
995 | |||
996 | @classmethod | ||
997 | def _ValidateFilePaths(cls, element, src, dest): | ||
998 | """Verify |src| & |dest| are reasonable for <copyfile> & <linkfile>. | ||
999 | |||
1000 | We verify the path independent of any filesystem state as we won't have a | ||
1001 | checkout available to compare to. i.e. This is for parsing validation | ||
1002 | purposes only. | ||
1003 | |||
1004 | We'll do full/live sanity checking before we do the actual filesystem | ||
1005 | modifications in _CopyFile/_LinkFile/etc... | ||
1006 | """ | ||
1007 | # |dest| is the file we write to or symlink we create. | ||
1008 | # It is relative to the top of the repo client checkout. | ||
1009 | msg = cls._CheckLocalPath(dest) | ||
1010 | if msg: | ||
1011 | raise ManifestInvalidPathError( | ||
1012 | '<%s> invalid "dest": %s: %s' % (element, dest, msg)) | ||
1013 | |||
1014 | # |src| is the file we read from or path we point to for symlinks. | ||
1015 | # It is relative to the top of the git project checkout. | ||
1016 | msg = cls._CheckLocalPath(src, symlink=element == 'linkfile') | ||
1017 | if msg: | ||
1018 | raise ManifestInvalidPathError( | ||
1019 | '<%s> invalid "src": %s: %s' % (element, src, msg)) | ||
1020 | |||
946 | def _ParseCopyFile(self, project, node): | 1021 | def _ParseCopyFile(self, project, node): |
947 | src = self._reqatt(node, 'src') | 1022 | src = self._reqatt(node, 'src') |
948 | dest = self._reqatt(node, 'dest') | 1023 | dest = self._reqatt(node, 'dest') |
949 | if not self.IsMirror: | 1024 | if not self.IsMirror: |
950 | # src is project relative; | 1025 | # src is project relative; |
951 | # dest is relative to the top of the tree | 1026 | # dest is relative to the top of the tree. |
1027 | # We only validate paths if we actually plan to process them. | ||
1028 | self._ValidateFilePaths('copyfile', src, dest) | ||
952 | project.AddCopyFile(src, dest, os.path.join(self.topdir, dest)) | 1029 | project.AddCopyFile(src, dest, os.path.join(self.topdir, dest)) |
953 | 1030 | ||
954 | def _ParseLinkFile(self, project, node): | 1031 | def _ParseLinkFile(self, project, node): |
@@ -956,7 +1033,9 @@ class XmlManifest(object): | |||
956 | dest = self._reqatt(node, 'dest') | 1033 | dest = self._reqatt(node, 'dest') |
957 | if not self.IsMirror: | 1034 | if not self.IsMirror: |
958 | # src is project relative; | 1035 | # src is project relative; |
959 | # dest is relative to the top of the tree | 1036 | # dest is relative to the top of the tree. |
1037 | # We only validate paths if we actually plan to process them. | ||
1038 | self._ValidateFilePaths('linkfile', src, dest) | ||
960 | project.AddLinkFile(src, dest, os.path.join(self.topdir, dest)) | 1039 | project.AddLinkFile(src, dest, os.path.join(self.topdir, dest)) |
961 | 1040 | ||
962 | def _ParseAnnotation(self, project, node): | 1041 | def _ParseAnnotation(self, project, node): |
diff --git a/tests/test_manifest_xml.py b/tests/test_manifest_xml.py new file mode 100644 index 00000000..ecc84ad7 --- /dev/null +++ b/tests/test_manifest_xml.py | |||
@@ -0,0 +1,83 @@ | |||
1 | # -*- coding:utf-8 -*- | ||
2 | # | ||
3 | # Copyright (C) 2019 The Android Open Source Project | ||
4 | # | ||
5 | # Licensed under the Apache License, Version 2.0 (the "License"); | ||
6 | # you may not use this file except in compliance with the License. | ||
7 | # You may obtain a copy of the License at | ||
8 | # | ||
9 | # http://www.apache.org/licenses/LICENSE-2.0 | ||
10 | # | ||
11 | # Unless required by applicable law or agreed to in writing, software | ||
12 | # distributed under the License is distributed on an "AS IS" BASIS, | ||
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
14 | # See the License for the specific language governing permissions and | ||
15 | # limitations under the License. | ||
16 | |||
17 | """Unittests for the manifest_xml.py module.""" | ||
18 | |||
19 | from __future__ import print_function | ||
20 | |||
21 | import unittest | ||
22 | |||
23 | import error | ||
24 | import manifest_xml | ||
25 | |||
26 | |||
27 | class ManifestValidateFilePaths(unittest.TestCase): | ||
28 | """Check _ValidateFilePaths helper. | ||
29 | |||
30 | This doesn't access a real filesystem. | ||
31 | """ | ||
32 | |||
33 | def check_both(self, *args): | ||
34 | manifest_xml.XmlManifest._ValidateFilePaths('copyfile', *args) | ||
35 | manifest_xml.XmlManifest._ValidateFilePaths('linkfile', *args) | ||
36 | |||
37 | def test_normal_path(self): | ||
38 | """Make sure good paths are accepted.""" | ||
39 | self.check_both('foo', 'bar') | ||
40 | self.check_both('foo/bar', 'bar') | ||
41 | self.check_both('foo', 'bar/bar') | ||
42 | self.check_both('foo/bar', 'bar/bar') | ||
43 | |||
44 | def test_symlink_targets(self): | ||
45 | """Some extra checks for symlinks.""" | ||
46 | def check(*args): | ||
47 | manifest_xml.XmlManifest._ValidateFilePaths('linkfile', *args) | ||
48 | |||
49 | # We allow symlinks to end in a slash since we allow them to point to dirs | ||
50 | # in general. Technically the slash isn't necessary. | ||
51 | check('foo/', 'bar') | ||
52 | |||
53 | def test_bad_paths(self): | ||
54 | """Make sure bad paths (src & dest) are rejected.""" | ||
55 | PATHS = ( | ||
56 | '..', | ||
57 | '../', | ||
58 | './', | ||
59 | 'foo/', | ||
60 | './foo', | ||
61 | '../foo', | ||
62 | 'foo/./bar', | ||
63 | 'foo/../../bar', | ||
64 | '/foo', | ||
65 | './../foo', | ||
66 | '.git/foo', | ||
67 | # Check case folding. | ||
68 | '.GIT/foo', | ||
69 | 'blah/.git/foo', | ||
70 | '.repo/foo', | ||
71 | '.repoconfig', | ||
72 | # Block ~ due to 8.3 filenames on Windows filesystems. | ||
73 | '~', | ||
74 | 'foo~', | ||
75 | 'blah/foo~', | ||
76 | # Block Unicode characters that get normalized out by filesystems. | ||
77 | u'foo\u200Cbar', | ||
78 | ) | ||
79 | for path in PATHS: | ||
80 | self.assertRaises( | ||
81 | error.ManifestInvalidPathError, self.check_both, path, 'a') | ||
82 | self.assertRaises( | ||
83 | error.ManifestInvalidPathError, self.check_both, 'a', path) | ||