From c7e0151fdf33e1b11d488b6f67b94fdf3a30614a Mon Sep 17 00:00:00 2001
From: Nick Pope <nick@nickpope.me.uk>
Date: Wed, 25 Jan 2023 12:21:48 +0100
Subject: [PATCH] [3.2.x] Fixed CVE-2023-23969 -- Prevented DoS with
 pathological values for Accept-Language.

The parsed values of Accept-Language headers are cached in order to
avoid repetitive parsing. This leads to a potential denial-of-service
vector via excessive memory usage if the raw value of Accept-Language
headers is very large.

Accept-Language headers are now limited to a maximum length in order
to avoid this issue.

CVE: CVE-2023-23969

Upstream-Status: Backport [https://github.com/django/django/commit/c7e0151fdf33e1b11d488b6f67b94fdf3a30614a]

Signed-off-by: Soumya Sambu <soumya.sambu@windriver.com>
---
 django/utils/translation/trans_real.py | 30 +++++++++++++++++++++++++-
 tests/i18n/tests.py                    | 12 +++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/django/utils/translation/trans_real.py b/django/utils/translation/trans_real.py
index 486b2b2..7f658cf 100644
--- a/django/utils/translation/trans_real.py
+++ b/django/utils/translation/trans_real.py
@@ -29,6 +29,10 @@ _default = None
 # magic gettext number to separate context from message
 CONTEXT_SEPARATOR = "\x04"

+# Maximum number of characters that will be parsed from the Accept-Language
+# header to prevent possible denial of service or memory exhaustion attacks.
+ACCEPT_LANGUAGE_HEADER_MAX_LENGTH = 500
+
 # Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9
 # and RFC 3066, section 2.1
 accept_language_re = re.compile(r'''
@@ -560,7 +564,7 @@ def get_language_from_request(request, check_path=False):


 @functools.lru_cache(maxsize=1000)
-def parse_accept_lang_header(lang_string):
+def _parse_accept_lang_header(lang_string):
     """
     Parse the lang_string, which is the body of an HTTP Accept-Language
     header, and return a tuple of (lang, q-value), ordered by 'q' values.
@@ -582,3 +586,27 @@ def parse_accept_lang_header(lang_string):
         result.append((lang, priority))
     result.sort(key=lambda k: k[1], reverse=True)
     return tuple(result)
+
+
+def parse_accept_lang_header(lang_string):
+    """
+    Parse the value of the Accept-Language header up to a maximum length.
+
+    The value of the header is truncated to a maximum length to avoid potential
+    denial of service and memory exhaustion attacks. Excessive memory could be
+    used if the raw value is very large as it would be cached due to the use of
+    `functools.lru_cache()` to avoid repetitive parsing of common header values.
+    """
+    # If the header value doesn't exceed the maximum allowed length, parse it.
+    if len(lang_string) <= ACCEPT_LANGUAGE_HEADER_MAX_LENGTH:
+        return _parse_accept_lang_header(lang_string)
+
+    # If there is at least one comma in the value, parse up to the last comma,
+    # skipping any truncated parts at the end of the header value.
+    index = lang_string.rfind(",", 0, ACCEPT_LANGUAGE_HEADER_MAX_LENGTH)
+    if index > 0:
+        return _parse_accept_lang_header(lang_string[:index])
+
+    # Don't attempt to parse if there is only one language-range value which is
+    # longer than the maximum allowed length and so truncated.
+    return ()
diff --git a/tests/i18n/tests.py b/tests/i18n/tests.py
index 7381cb9..6efc3a5 100644
--- a/tests/i18n/tests.py
+++ b/tests/i18n/tests.py
@@ -1282,6 +1282,14 @@ class MiscTests(SimpleTestCase):
             ('de;q=0.', [('de', 0.0)]),
             ('en; q=1,', [('en', 1.0)]),
             ('en; q=1.0, * ; q=0.5', [('en', 1.0), ('*', 0.5)]),
+            (
+                'en' + '-x' * 20,
+                [('en-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x', 1.0)],
+            ),
+            (
+                ', '.join(['en; q=1.0'] * 20),
+                [('en', 1.0)] * 20,
+            ),
             # Bad headers
             ('en-gb;q=1.0000', []),
             ('en;q=0.1234', []),
@@ -1297,6 +1305,10 @@ class MiscTests(SimpleTestCase):
             ('12-345', []),
             ('', []),
             ('en;q=1e0', []),
+            # Invalid as language-range value too long.
+            ('xxxxxxxx' + '-xxxxxxxx' * 500, []),
+            # Header value too long, only parse up to limit.
+            (', '.join(['en; q=1.0'] * 500), [('en', 1.0)] * 45),
         ]
         for value, expected in tests:
             with self.subTest(value=value):
--
2.40.0