summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--manifest_xml.py58
-rw-r--r--tests/test_manifest_xml.py29
2 files changed, 74 insertions, 13 deletions
diff --git a/manifest_xml.py b/manifest_xml.py
index 61b130cf..b27bf805 100644
--- a/manifest_xml.py
+++ b/manifest_xml.py
@@ -117,6 +117,36 @@ def XmlInt(node, attr, default=None):
117 raise ManifestParseError(f'manifest: invalid {attr}="{value}" integer') 117 raise ManifestParseError(f'manifest: invalid {attr}="{value}" integer')
118 118
119 119
120def normalize_url(url: str) -> str:
121 """Mutate input 'url' into normalized form:
122
123 * remove trailing slashes
124 * convert SCP-like syntax to SSH URL
125
126 Args:
127 url: URL to modify
128
129 Returns:
130 The normalized URL.
131 """
132
133 url = url.rstrip("/")
134 parsed_url = urllib.parse.urlparse(url)
135
136 # This matches patterns like "git@github.com:foo/bar".
137 scp_like_url_re = r"^[^:]+@[^:]+:[^/]+/"
138
139 # If our URL is missing a schema and matches git's
140 # SCP-like syntax we should convert it to a proper
141 # SSH URL instead to make urljoin() happier.
142 #
143 # See: https://git-scm.com/docs/git-clone#URLS
144 if not parsed_url.scheme and re.match(scp_like_url_re, url):
145 return "ssh://" + url.replace(":", "/", 1)
146
147 return url
148
149
120class _Default: 150class _Default:
121 """Project defaults within the manifest.""" 151 """Project defaults within the manifest."""
122 152
@@ -180,20 +210,22 @@ class _XmlRemote:
180 def _resolveFetchUrl(self): 210 def _resolveFetchUrl(self):
181 if self.fetchUrl is None: 211 if self.fetchUrl is None:
182 return "" 212 return ""
183 url = self.fetchUrl.rstrip("/") 213
184 manifestUrl = self.manifestUrl.rstrip("/") 214 fetch_url = normalize_url(self.fetchUrl)
185 # urljoin will gets confused over quite a few things. The ones we care 215 manifest_url = normalize_url(self.manifestUrl)
186 # about here are: 216
187 # * no scheme in the base url, like <hostname:port> 217 # urljoin doesn't like URLs with no scheme in the base URL
188 # We handle no scheme by replacing it with an obscure protocol, gopher 218 # such as file paths. We handle this by prefixing it with
189 # and then replacing it with the original when we are done. 219 # an obscure protocol, gopher, and replacing it with the
190 220 # original after urljoin
191 if manifestUrl.find(":") != manifestUrl.find("/") - 1: 221 if manifest_url.find(":") != manifest_url.find("/") - 1:
192 url = urllib.parse.urljoin("gopher://" + manifestUrl, url) 222 fetch_url = urllib.parse.urljoin(
193 url = re.sub(r"^gopher://", "", url) 223 "gopher://" + manifest_url, fetch_url
224 )
225 fetch_url = re.sub(r"^gopher://", "", fetch_url)
194 else: 226 else:
195 url = urllib.parse.urljoin(manifestUrl, url) 227 fetch_url = urllib.parse.urljoin(manifest_url, fetch_url)
196 return url 228 return fetch_url
197 229
198 def ToRemoteSpec(self, projectName): 230 def ToRemoteSpec(self, projectName):
199 fetchUrl = self.resolvedFetchUrl.rstrip("/") 231 fetchUrl = self.resolvedFetchUrl.rstrip("/")
diff --git a/tests/test_manifest_xml.py b/tests/test_manifest_xml.py
index 3fcf09fa..11c0c15e 100644
--- a/tests/test_manifest_xml.py
+++ b/tests/test_manifest_xml.py
@@ -1128,3 +1128,32 @@ class ExtendProjectElementTests(ManifestParseTestCase):
1128 ) 1128 )
1129 self.assertEqual(len(manifest.projects), 1) 1129 self.assertEqual(len(manifest.projects), 1)
1130 self.assertEqual(manifest.projects[0].upstream, "bar") 1130 self.assertEqual(manifest.projects[0].upstream, "bar")
1131
1132
1133class NormalizeUrlTests(ManifestParseTestCase):
1134 """Tests for normalize_url() in manifest_xml.py"""
1135
1136 def test_has_trailing_slash(self):
1137 url = "http://foo.com/bar/baz/"
1138 self.assertEqual(
1139 "http://foo.com/bar/baz", manifest_xml.normalize_url(url)
1140 )
1141
1142 def test_has_no_scheme(self):
1143 """Deal with cases where we have no scheme, but we also
1144 aren't dealing with the git SCP-like syntax
1145 """
1146 url = "foo.com/baf/bat"
1147 self.assertEqual(url, manifest_xml.normalize_url(url))
1148
1149 url = "git@foo.com/baf/bat"
1150 self.assertEqual(url, manifest_xml.normalize_url(url))
1151
1152 url = "/file/path/here"
1153 self.assertEqual(url, manifest_xml.normalize_url(url))
1154
1155 def test_has_no_scheme_matches_scp_like_syntax(self):
1156 url = "git@foo.com:bar/baf"
1157 self.assertEqual(
1158 "ssh://git@foo.com/bar/baf", manifest_xml.normalize_url(url)
1159 )