From d147a8ebbdf28c17cafbbe2884f0bc57e2bf82e2 Mon Sep 17 00:00:00 2001
From: Sarah Boyce <42296566+sarahboyce@users.noreply.github.com>
Date: Mon, 12 Aug 2024 15:17:57 +0200
Subject: [PATCH] [4.2.x] Fixed CVE-2024-45230 -- Mitigated potential DoS in
 urlize and urlizetrunc template filters.

Thanks MProgrammer (https://hackerone.com/mprogrammer) for the report.

CVE: CVE-2024-45230

Upstream-Status: Backport [https://github.com/django/django/commit/d147a8ebbdf28c17cafbbe2884f0bc57e2bf82e2]

Signed-off-by: Soumya Sambu <soumya.sambu@windriver.com>
---
 django/utils/html.py                          | 22 +++++++++++--------
 docs/ref/templates/builtins.txt               | 11 ++++++++++
 .../filter_tests/test_urlize.py               | 22 +++++++++++++++++++
 tests/utils_tests/test_html.py                |  1 +
 4 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/django/utils/html.py b/django/utils/html.py
index 79f06bd..d129334 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -1,5 +1,6 @@
 """HTML utilities suitable for global use."""

+import html
 import json
 import re
 from html.parser import HTMLParser
@@ -327,16 +328,19 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
             if trailing_punctuation_chars_has_semicolon() and middle.endswith(";"):
                 # Only strip if not part of an HTML entity.
                 potential_entity = middle[amp:]
-                escaped = unescape(potential_entity)
+                escaped = html.unescape(potential_entity)
                 if escaped == potential_entity or escaped.endswith(";"):
-                    rstripped = middle.rstrip(";")
-                    amount_stripped = len(middle) - len(rstripped)
-                    if amp > -1 and amount_stripped > 1:
-                        # Leave a trailing semicolon as might be an entity.
-                        trail = middle[len(rstripped) + 1 :] + trail
-                        middle = rstripped + ";"
+                    rstripped = middle.rstrip(TRAILING_PUNCTUATION_CHARS)
+                    trail_start = len(rstripped)
+                    amount_trailing_semicolons = len(middle) - len(middle.rstrip(";"))
+                    if amp > -1 and amount_trailing_semicolons > 1:
+                        # Leave up to most recent semicolon as might be an entity.
+                        recent_semicolon = middle[trail_start:].index(";")
+                        middle_semicolon_index = recent_semicolon + trail_start + 1
+                        trail = middle[middle_semicolon_index:] + trail
+                        middle = rstripped + middle[trail_start:middle_semicolon_index]
                     else:
-                        trail = middle[len(rstripped) :] + trail
+                        trail = middle[trail_start:] + trail
                         middle = rstripped
                     trimmed_something = True

@@ -373,7 +377,7 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
             url = None
             nofollow_attr = ' rel="nofollow"' if nofollow else ''
             if len(middle) <= MAX_URL_LENGTH and simple_url_re.match(middle):
-                url = smart_urlquote(unescape(middle))
+                url = smart_urlquote(html.unescape(middle))
             elif len(middle) <= MAX_URL_LENGTH and simple_url_2_re.match(middle):
                 url = smart_urlquote('http://%s' % unescape(middle))
             elif ':' not in middle and is_email_simple(middle):
diff --git a/docs/ref/templates/builtins.txt b/docs/ref/templates/builtins.txt
index 4faab38..1990ed0 100644
--- a/docs/ref/templates/builtins.txt
+++ b/docs/ref/templates/builtins.txt
@@ -2483,6 +2483,17 @@ Django's built-in :tfilter:`escape` filter. The default value for
     email addresses that contain single quotes (``'``), things won't work as
     expected. Apply this filter only to plain text.

+.. warning::
+
+    Using ``urlize`` or ``urlizetrunc`` can incur a performance penalty, which
+    can become severe when applied to user controlled values such as content
+    stored in a :class:`~django.db.models.TextField`. You can use
+    :tfilter:`truncatechars` to add a limit to such inputs:
+
+    .. code-block:: html+django
+
+        {{ value|truncatechars:500|urlize }}
+
 .. templatefilter:: urlizetrunc

 ``urlizetrunc``
diff --git a/tests/template_tests/filter_tests/test_urlize.py b/tests/template_tests/filter_tests/test_urlize.py
index 649a965..1991301 100644
--- a/tests/template_tests/filter_tests/test_urlize.py
+++ b/tests/template_tests/filter_tests/test_urlize.py
@@ -260,6 +260,28 @@ class FunctionTests(SimpleTestCase):
             'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>.,:;)&quot;!'
         )

+    def test_trailing_semicolon(self):
+        self.assertEqual(
+            urlize("http://example.com?x=&amp;", autoescape=False),
+            '<a href="http://example.com?x=" rel="nofollow">'
+            "http://example.com?x=&amp;</a>",
+        )
+        self.assertEqual(
+            urlize("http://example.com?x=&amp;;", autoescape=False),
+            '<a href="http://example.com?x=" rel="nofollow">'
+            "http://example.com?x=&amp;</a>;",
+        )
+        self.assertEqual(
+            urlize("http://example.com?x=&amp;;;", autoescape=False),
+            '<a href="http://example.com?x=" rel="nofollow">'
+            "http://example.com?x=&amp;</a>;;",
+        )
+        self.assertEqual(
+            urlize("http://example.com?x=&amp.;...;", autoescape=False),
+            '<a href="http://example.com?x=" rel="nofollow">'
+            "http://example.com?x=&amp</a>.;...;",
+        )
+
     def test_brackets(self):
         """
         #19070 - Check urlize handles brackets properly
diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py
index 1a5c963..b382843 100644
--- a/tests/utils_tests/test_html.py
+++ b/tests/utils_tests/test_html.py
@@ -289,6 +289,7 @@ class TestUtilsHtml(SimpleTestCase):
             "&:" + ";" * 100_000,
             "&.;" * 100_000,
             ".;" * 100_000,
+            "&" + ";:" * 100_000,
         )
         for value in tests:
             with self.subTest(value=value):
--
2.40.0