From 79f368764295df109a37192f6182fb6f361d85b5 Mon Sep 17 00:00:00 2001 From: Adam Johnson Date: Mon, 24 Jun 2024 15:30:59 +0200 Subject: [PATCH] [4.2.x] Fixed CVE-2024-38875 -- Mitigated potential DoS in urlize and urlizetrunc template filters. Thank you to Elias Myllymäki for the report. Co-authored-by: Sarah Boyce <42296566+sarahboyce@users.noreply.github.com> CVE: CVE-2024-38875 Upstream-Status: Backport [https://github.com/django/django/commit/79f368764295df109a37192f6182fb6f361d85b5] Signed-off-by: Soumya Sambu --- django/utils/html.py | 90 +++++++++++++++++++++++++--------- tests/utils_tests/test_html.py | 7 +++ 2 files changed, 73 insertions(+), 21 deletions(-) diff --git a/django/utils/html.py b/django/utils/html.py index 7a33d5f..f1b74ab 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -234,6 +234,15 @@ def smart_urlquote(url): return urlunsplit((scheme, netloc, path, query, fragment)) +class CountsDict(dict): + def __init__(self, *args, word, **kwargs): + super().__init__(*args, *kwargs) + self.word = word + + def __missing__(self, key): + self[key] = self.word.count(key) + return self[key] + @keep_lazy_text def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): @@ -268,36 +277,69 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): return text.replace('&', '&').replace('<', '<').replace( '>', '>').replace('"', '"').replace(''', "'") - def trim_punctuation(lead, middle, trail): + def wrapping_punctuation_openings(): + return "".join(dict(WRAPPING_PUNCTUATION).keys()) + + def trailing_punctuation_chars_no_semicolon(): + return TRAILING_PUNCTUATION_CHARS.replace(";", "") + + def trailing_punctuation_chars_has_semicolon(): + return ";" in TRAILING_PUNCTUATION_CHARS + + def trim_punctuation(word): """ Trim trailing and wrapping punctuation from `middle`. Return the items of the new state. """ + # Strip all opening wrapping punctuation. + middle = word.lstrip(wrapping_punctuation_openings()) + lead = word[: len(word) - len(middle)] + trail = "" + # Continue trimming until middle remains unchanged. trimmed_something = True - while trimmed_something: + counts = CountsDict(word=middle) + while trimmed_something and middle: trimmed_something = False # Trim wrapping punctuation. for opening, closing in WRAPPING_PUNCTUATION: - if middle.startswith(opening): - middle = middle[len(opening):] - lead += opening - trimmed_something = True - # Keep parentheses at the end only if they're balanced. - if (middle.endswith(closing) and - middle.count(closing) == middle.count(opening) + 1): - middle = middle[:-len(closing)] - trail = closing + trail - trimmed_something = True - # Trim trailing punctuation (after trimming wrapping punctuation, - # as encoded entities contain ';'). Unescape entites to avoid - # breaking them by removing ';'. - middle_unescaped = unescape(middle) - stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS) - if middle_unescaped != stripped: - trail = middle[len(stripped):] + trail - middle = middle[:len(stripped) - len(middle_unescaped)] + if counts[opening] < counts[closing]: + rstripped = middle.rstrip(closing) + if rstripped != middle: + strip = counts[closing] - counts[opening] + trail = middle[-strip:] + middle = middle[:-strip] + trimmed_something = True + counts[closing] -= strip + + rstripped = middle.rstrip(trailing_punctuation_chars_no_semicolon()) + if rstripped != middle: + trail = middle[len(rstripped) :] + trail + middle = rstripped trimmed_something = True + + if trailing_punctuation_chars_has_semicolon() and middle.endswith(";"): + # Only strip if not part of an HTML entity. + amp = middle.rfind("&") + if amp == -1: + can_strip = True + else: + potential_entity = middle[amp:] + escaped = unescape(potential_entity) + can_strip = (escaped == potential_entity) or escaped.endswith(";") + + if can_strip: + rstripped = middle.rstrip(";") + amount_stripped = len(middle) - len(rstripped) + if amp > -1 and amount_stripped > 1: + # Leave a trailing semicolon as might be an entity. + trail = middle[len(rstripped) + 1 :] + trail + middle = rstripped + ";" + else: + trail = middle[len(rstripped) :] + trail + middle = rstripped + trimmed_something = True + return lead, middle, trail def is_email_simple(value): @@ -321,9 +363,7 @@ def urlize(text, trim_url_limit=None, no # lead: Current punctuation trimmed from the beginning of the word. # middle: Current state of the word. # trail: Current punctuation trimmed from the end of the word. - lead, middle, trail = '', word, '' - # Deal with punctuation. - lead, middle, trail = trim_punctuation(lead, middle, trail) + lead, middle, trail = trim_punctuation(word) # Make URL we want to point to. url = None diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py index 5cc2d9b..715c1c6 100644 --- a/tests/utils_tests/test_html.py +++ b/tests/utils_tests/test_html.py @@ -267,6 +267,13 @@ class TestUtilsHtml(SimpleTestCase): 'foo@.example.com', 'foo@localhost', 'foo@localhost.', + # trim_punctuation catastrophic tests + "(" * 100_000 + ":" + ")" * 100_000, + "(" * 100_000 + "&:" + ")" * 100_000, + "([" * 100_000 + ":" + "])" * 100_000, + "[(" * 100_000 + ":" + ")]" * 100_000, + "([[" * 100_000 + ":" + "]])" * 100_000, + "&:" + ";" * 100_000, ) for value in tests: with self.subTest(value=value): -- 2.40.0