From d147a8ebbdf28c17cafbbe2884f0bc57e2bf82e2 Mon Sep 17 00:00:00 2001 From: Sarah Boyce <42296566+sarahboyce@users.noreply.github.com> Date: Mon, 12 Aug 2024 15:17:57 +0200 Subject: [PATCH] [4.2.x] Fixed CVE-2024-45230 -- Mitigated potential DoS in urlize and urlizetrunc template filters. Thanks MProgrammer (https://hackerone.com/mprogrammer) for the report. CVE: CVE-2024-45230 Upstream-Status: Backport [https://github.com/django/django/commit/d147a8ebbdf28c17cafbbe2884f0bc57e2bf82e2] Signed-off-by: Soumya Sambu --- django/utils/html.py | 22 +++++++++++-------- docs/ref/templates/builtins.txt | 11 ++++++++++ .../filter_tests/test_urlize.py | 22 +++++++++++++++++++ tests/utils_tests/test_html.py | 1 + 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/django/utils/html.py b/django/utils/html.py index 79f06bd..d129334 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -1,5 +1,6 @@ """HTML utilities suitable for global use.""" +import html import json import re from html.parser import HTMLParser @@ -327,16 +328,19 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): if trailing_punctuation_chars_has_semicolon() and middle.endswith(";"): # Only strip if not part of an HTML entity. potential_entity = middle[amp:] - escaped = unescape(potential_entity) + escaped = html.unescape(potential_entity) if escaped == potential_entity or escaped.endswith(";"): - rstripped = middle.rstrip(";") - amount_stripped = len(middle) - len(rstripped) - if amp > -1 and amount_stripped > 1: - # Leave a trailing semicolon as might be an entity. - trail = middle[len(rstripped) + 1 :] + trail - middle = rstripped + ";" + rstripped = middle.rstrip(TRAILING_PUNCTUATION_CHARS) + trail_start = len(rstripped) + amount_trailing_semicolons = len(middle) - len(middle.rstrip(";")) + if amp > -1 and amount_trailing_semicolons > 1: + # Leave up to most recent semicolon as might be an entity. + recent_semicolon = middle[trail_start:].index(";") + middle_semicolon_index = recent_semicolon + trail_start + 1 + trail = middle[middle_semicolon_index:] + trail + middle = rstripped + middle[trail_start:middle_semicolon_index] else: - trail = middle[len(rstripped) :] + trail + trail = middle[trail_start:] + trail middle = rstripped trimmed_something = True @@ -373,7 +377,7 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): url = None nofollow_attr = ' rel="nofollow"' if nofollow else '' if len(middle) <= MAX_URL_LENGTH and simple_url_re.match(middle): - url = smart_urlquote(unescape(middle)) + url = smart_urlquote(html.unescape(middle)) elif len(middle) <= MAX_URL_LENGTH and simple_url_2_re.match(middle): url = smart_urlquote('http://%s' % unescape(middle)) elif ':' not in middle and is_email_simple(middle): diff --git a/docs/ref/templates/builtins.txt b/docs/ref/templates/builtins.txt index 4faab38..1990ed0 100644 --- a/docs/ref/templates/builtins.txt +++ b/docs/ref/templates/builtins.txt @@ -2483,6 +2483,17 @@ Django's built-in :tfilter:`escape` filter. The default value for email addresses that contain single quotes (``'``), things won't work as expected. Apply this filter only to plain text. +.. warning:: + + Using ``urlize`` or ``urlizetrunc`` can incur a performance penalty, which + can become severe when applied to user controlled values such as content + stored in a :class:`~django.db.models.TextField`. You can use + :tfilter:`truncatechars` to add a limit to such inputs: + + .. code-block:: html+django + + {{ value|truncatechars:500|urlize }} + .. templatefilter:: urlizetrunc ``urlizetrunc`` diff --git a/tests/template_tests/filter_tests/test_urlize.py b/tests/template_tests/filter_tests/test_urlize.py index 649a965..1991301 100644 --- a/tests/template_tests/filter_tests/test_urlize.py +++ b/tests/template_tests/filter_tests/test_urlize.py @@ -260,6 +260,28 @@ class FunctionTests(SimpleTestCase): 'A test http://testing.com/example.,:;)"!' ) + def test_trailing_semicolon(self): + self.assertEqual( + urlize("http://example.com?x=&", autoescape=False), + '' + "http://example.com?x=&", + ) + self.assertEqual( + urlize("http://example.com?x=&;", autoescape=False), + '' + "http://example.com?x=&;", + ) + self.assertEqual( + urlize("http://example.com?x=&;;", autoescape=False), + '' + "http://example.com?x=&;;", + ) + self.assertEqual( + urlize("http://example.com?x=&.;...;", autoescape=False), + '' + "http://example.com?x=&.;...;", + ) + def test_brackets(self): """ #19070 - Check urlize handles brackets properly diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py index 1a5c963..b382843 100644 --- a/tests/utils_tests/test_html.py +++ b/tests/utils_tests/test_html.py @@ -289,6 +289,7 @@ class TestUtilsHtml(SimpleTestCase): "&:" + ";" * 100_000, "&.;" * 100_000, ".;" * 100_000, + "&" + ";:" * 100_000, ) for value in tests: with self.subTest(value=value): -- 2.40.0