diff options
author | Michael Kelly <mkelly@arista.com> | 2023-09-19 09:51:03 -0700 |
---|---|---|
committer | LUCI <gerrit-scoped@luci-project-accounts.iam.gserviceaccount.com> | 2023-11-08 05:03:20 +0000 |
commit | 3652b497bbbd6227b2cb84bb61a0fe8d21ba20d6 (patch) | |
tree | 437ade2bdcf975c2f82d5656b3d4c6425f7234d1 | |
parent | 89f761cfefc406aabc5658e6c0a5cd25812d0525 (diff) | |
download | git-repo-3652b497bbbd6227b2cb84bb61a0fe8d21ba20d6.tar.gz |
Correctly handle schema-less URIs for remote fetch URL
Currently we don't deal with schema-less URIs like
`git@github.com:foo` at all resulting in a scenario where we append
them to the manifest repo URL.
In order to deal with this, we munge both the manifest URL and the
fetch URL into a format we like and proceed with that.
Bug: https://g-issues.gerritcodereview.com/issues/40010331
Change-Id: I7b79fc4ed276630fdbeb235b94e327b172f0879b
Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/386954
Reviewed-by: Mike Frysinger <vapier@google.com>
Tested-by: Michael Kelly <mkelly@arista.com>
Commit-Queue: Mike Frysinger <vapier@google.com>
-rw-r--r-- | manifest_xml.py | 58 | ||||
-rw-r--r-- | tests/test_manifest_xml.py | 29 |
2 files changed, 74 insertions, 13 deletions
diff --git a/manifest_xml.py b/manifest_xml.py index 61b130cf..b27bf805 100644 --- a/manifest_xml.py +++ b/manifest_xml.py | |||
@@ -117,6 +117,36 @@ def XmlInt(node, attr, default=None): | |||
117 | raise ManifestParseError(f'manifest: invalid {attr}="{value}" integer') | 117 | raise ManifestParseError(f'manifest: invalid {attr}="{value}" integer') |
118 | 118 | ||
119 | 119 | ||
120 | def normalize_url(url: str) -> str: | ||
121 | """Mutate input 'url' into normalized form: | ||
122 | |||
123 | * remove trailing slashes | ||
124 | * convert SCP-like syntax to SSH URL | ||
125 | |||
126 | Args: | ||
127 | url: URL to modify | ||
128 | |||
129 | Returns: | ||
130 | The normalized URL. | ||
131 | """ | ||
132 | |||
133 | url = url.rstrip("/") | ||
134 | parsed_url = urllib.parse.urlparse(url) | ||
135 | |||
136 | # This matches patterns like "git@github.com:foo/bar". | ||
137 | scp_like_url_re = r"^[^:]+@[^:]+:[^/]+/" | ||
138 | |||
139 | # If our URL is missing a schema and matches git's | ||
140 | # SCP-like syntax we should convert it to a proper | ||
141 | # SSH URL instead to make urljoin() happier. | ||
142 | # | ||
143 | # See: https://git-scm.com/docs/git-clone#URLS | ||
144 | if not parsed_url.scheme and re.match(scp_like_url_re, url): | ||
145 | return "ssh://" + url.replace(":", "/", 1) | ||
146 | |||
147 | return url | ||
148 | |||
149 | |||
120 | class _Default: | 150 | class _Default: |
121 | """Project defaults within the manifest.""" | 151 | """Project defaults within the manifest.""" |
122 | 152 | ||
@@ -180,20 +210,22 @@ class _XmlRemote: | |||
180 | def _resolveFetchUrl(self): | 210 | def _resolveFetchUrl(self): |
181 | if self.fetchUrl is None: | 211 | if self.fetchUrl is None: |
182 | return "" | 212 | return "" |
183 | url = self.fetchUrl.rstrip("/") | 213 | |
184 | manifestUrl = self.manifestUrl.rstrip("/") | 214 | fetch_url = normalize_url(self.fetchUrl) |
185 | # urljoin will gets confused over quite a few things. The ones we care | 215 | manifest_url = normalize_url(self.manifestUrl) |
186 | # about here are: | 216 | |
187 | # * no scheme in the base url, like <hostname:port> | 217 | # urljoin doesn't like URLs with no scheme in the base URL |
188 | # We handle no scheme by replacing it with an obscure protocol, gopher | 218 | # such as file paths. We handle this by prefixing it with |
189 | # and then replacing it with the original when we are done. | 219 | # an obscure protocol, gopher, and replacing it with the |
190 | 220 | # original after urljoin | |
191 | if manifestUrl.find(":") != manifestUrl.find("/") - 1: | 221 | if manifest_url.find(":") != manifest_url.find("/") - 1: |
192 | url = urllib.parse.urljoin("gopher://" + manifestUrl, url) | 222 | fetch_url = urllib.parse.urljoin( |
193 | url = re.sub(r"^gopher://", "", url) | 223 | "gopher://" + manifest_url, fetch_url |
224 | ) | ||
225 | fetch_url = re.sub(r"^gopher://", "", fetch_url) | ||
194 | else: | 226 | else: |
195 | url = urllib.parse.urljoin(manifestUrl, url) | 227 | fetch_url = urllib.parse.urljoin(manifest_url, fetch_url) |
196 | return url | 228 | return fetch_url |
197 | 229 | ||
198 | def ToRemoteSpec(self, projectName): | 230 | def ToRemoteSpec(self, projectName): |
199 | fetchUrl = self.resolvedFetchUrl.rstrip("/") | 231 | fetchUrl = self.resolvedFetchUrl.rstrip("/") |
diff --git a/tests/test_manifest_xml.py b/tests/test_manifest_xml.py index 3fcf09fa..11c0c15e 100644 --- a/tests/test_manifest_xml.py +++ b/tests/test_manifest_xml.py | |||
@@ -1128,3 +1128,32 @@ class ExtendProjectElementTests(ManifestParseTestCase): | |||
1128 | ) | 1128 | ) |
1129 | self.assertEqual(len(manifest.projects), 1) | 1129 | self.assertEqual(len(manifest.projects), 1) |
1130 | self.assertEqual(manifest.projects[0].upstream, "bar") | 1130 | self.assertEqual(manifest.projects[0].upstream, "bar") |
1131 | |||
1132 | |||
1133 | class NormalizeUrlTests(ManifestParseTestCase): | ||
1134 | """Tests for normalize_url() in manifest_xml.py""" | ||
1135 | |||
1136 | def test_has_trailing_slash(self): | ||
1137 | url = "http://foo.com/bar/baz/" | ||
1138 | self.assertEqual( | ||
1139 | "http://foo.com/bar/baz", manifest_xml.normalize_url(url) | ||
1140 | ) | ||
1141 | |||
1142 | def test_has_no_scheme(self): | ||
1143 | """Deal with cases where we have no scheme, but we also | ||
1144 | aren't dealing with the git SCP-like syntax | ||
1145 | """ | ||
1146 | url = "foo.com/baf/bat" | ||
1147 | self.assertEqual(url, manifest_xml.normalize_url(url)) | ||
1148 | |||
1149 | url = "git@foo.com/baf/bat" | ||
1150 | self.assertEqual(url, manifest_xml.normalize_url(url)) | ||
1151 | |||
1152 | url = "/file/path/here" | ||
1153 | self.assertEqual(url, manifest_xml.normalize_url(url)) | ||
1154 | |||
1155 | def test_has_no_scheme_matches_scp_like_syntax(self): | ||
1156 | url = "git@foo.com:bar/baf" | ||
1157 | self.assertEqual( | ||
1158 | "ssh://git@foo.com/bar/baf", manifest_xml.normalize_url(url) | ||
1159 | ) | ||