From 4c4dec318f21e2eae9c69cd5b83cb54b4f9bb746 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Wed, 9 Jul 2025 19:55:22 +0100 Subject: [PATCH] Add trailer parsing logic (#11269) (#11287) (cherry picked from commit 7dd4b5535e6bf9c2d2f05fde638517bff065ba74) CVE: CVE-2025-53643 Upstream-Status: Backport [https://github.com/aio-libs/aiohttp/commit/e8d774f635dc6d1cd3174d0e38891da5de0e2b6a] Signed-off-by: Jiaying Song --- aiohttp/http_parser.py | 70 ++++++++++++++++++++++-------------------- aiohttp/multipart.py | 2 +- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 1ee126940..175eb7f68 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -147,8 +147,8 @@ class HeadersParser: headers: CIMultiDict[str] = CIMultiDict() raw_headers = [] - lines_idx = 1 - line = lines[1] + lines_idx = 0 + line = lines[lines_idx] line_count = len(lines) while line: @@ -386,6 +386,7 @@ class HttpParser(abc.ABC, Generic[_MsgT]): response_with_body=self.response_with_body, auto_decompress=self._auto_decompress, lax=self.lax, + headers_parser=self._headers_parser, ) if not payload_parser.done: self._payload_parser = payload_parser @@ -405,6 +406,7 @@ class HttpParser(abc.ABC, Generic[_MsgT]): readall=True, auto_decompress=self._auto_decompress, lax=self.lax, + headers_parser=self._headers_parser, ) else: if ( @@ -429,6 +431,7 @@ class HttpParser(abc.ABC, Generic[_MsgT]): response_with_body=self.response_with_body, auto_decompress=self._auto_decompress, lax=self.lax, + headers_parser=self._headers_parser, ) if not payload_parser.done: self._payload_parser = payload_parser @@ -462,6 +465,10 @@ class HttpParser(abc.ABC, Generic[_MsgT]): eof = True data = b"" + if isinstance( + underlying_exc, (InvalidHeader, TransferEncodingError) + ): + raise if eof: start_pos = 0 @@ -617,7 +624,7 @@ class HttpRequestParser(HttpParser[RawRequestMessage]): compression, upgrade, chunked, - ) = self.parse_headers(lines) + ) = self.parse_headers(lines[1:]) if close is None: # then the headers weren't set in the request if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close @@ -696,7 +703,7 @@ class HttpResponseParser(HttpParser[RawResponseMessage]): compression, upgrade, chunked, - ) = self.parse_headers(lines) + ) = self.parse_headers(lines[1:]) if close is None: close = version_o <= HttpVersion10 @@ -727,6 +734,8 @@ class HttpPayloadParser: response_with_body: bool = True, auto_decompress: bool = True, lax: bool = False, + *, + headers_parser: HeadersParser, ) -> None: self._length = 0 self._type = ParseState.PARSE_NONE @@ -735,6 +744,8 @@ class HttpPayloadParser: self._chunk_tail = b"" self._auto_decompress = auto_decompress self._lax = lax + self._headers_parser = headers_parser + self._trailer_lines: list[bytes] = [] self.done = False # payload decompression wrapper @@ -822,7 +833,7 @@ class HttpPayloadParser: size_b = chunk[:i] # strip chunk-extensions # Verify no LF in the chunk-extension if b"\n" in (ext := chunk[i:pos]): - exc = BadHttpMessage( + exc = TransferEncodingError( f"Unexpected LF in chunk-extension: {ext!r}" ) set_exception(self.payload, exc) @@ -843,7 +854,7 @@ class HttpPayloadParser: chunk = chunk[pos + len(SEP) :] if size == 0: # eof marker - self._chunk = ChunkState.PARSE_MAYBE_TRAILERS + self._chunk = ChunkState.PARSE_TRAILERS if self._lax and chunk.startswith(b"\r"): chunk = chunk[1:] else: @@ -881,38 +892,31 @@ class HttpPayloadParser: self._chunk_tail = chunk return False, b"" - # if stream does not contain trailer, after 0\r\n - # we should get another \r\n otherwise - # trailers needs to be skiped until \r\n\r\n - if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS: - head = chunk[: len(SEP)] - if head == SEP: - # end of stream - self.payload.feed_eof() - return True, chunk[len(SEP) :] - # Both CR and LF, or only LF may not be received yet. It is - # expected that CRLF or LF will be shown at the very first - # byte next time, otherwise trailers should come. The last - # CRLF which marks the end of response might not be - # contained in the same TCP segment which delivered the - # size indicator. - if not head: - return False, b"" - if head == SEP[:1]: - self._chunk_tail = head - return False, b"" - self._chunk = ChunkState.PARSE_TRAILERS - - # read and discard trailer up to the CRLF terminator if self._chunk == ChunkState.PARSE_TRAILERS: pos = chunk.find(SEP) - if pos >= 0: - chunk = chunk[pos + len(SEP) :] - self._chunk = ChunkState.PARSE_MAYBE_TRAILERS - else: + if pos < 0: # No line found self._chunk_tail = chunk return False, b"" + line = chunk[:pos] + chunk = chunk[pos + len(SEP) :] + if SEP == b"\n": # For lax response parsing + line = line.rstrip(b"\r") + self._trailer_lines.append(line) + + # \r\n\r\n found, end of stream + if self._trailer_lines[-1] == b"": + # Headers and trailers are defined the same way, + # so we reuse the HeadersParser here. + try: + trailers, raw_trailers = self._headers_parser.parse_headers( + self._trailer_lines + ) + finally: + self._trailer_lines.clear() + self.payload.feed_eof() + return True, chunk + # Read all bytes until eof elif self._type == ParseState.PARSE_UNTIL_EOF: self.payload.feed_data(chunk, len(chunk)) diff --git a/aiohttp/multipart.py b/aiohttp/multipart.py index 9cd49bb5d..2457f80ec 100644 --- a/aiohttp/multipart.py +++ b/aiohttp/multipart.py @@ -718,7 +718,7 @@ class MultipartReader: raise ValueError(f"Invalid boundary {chunk!r}, expected {self._boundary!r}") async def _read_headers(self) -> "CIMultiDictProxy[str]": - lines = [b""] + lines = [] while True: chunk = await self._content.readline() chunk = chunk.strip() -- 2.34.1