1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
From d147a8ebbdf28c17cafbbe2884f0bc57e2bf82e2 Mon Sep 17 00:00:00 2001
From: Sarah Boyce <42296566+sarahboyce@users.noreply.github.com>
Date: Mon, 12 Aug 2024 15:17:57 +0200
Subject: [PATCH] [4.2.x] Fixed CVE-2024-45230 -- Mitigated potential DoS in
urlize and urlizetrunc template filters.
Thanks MProgrammer (https://hackerone.com/mprogrammer) for the report.
CVE: CVE-2024-45230
Upstream-Status: Backport [https://github.com/django/django/commit/d147a8ebbdf28c17cafbbe2884f0bc57e2bf82e2]
Signed-off-by: Soumya Sambu <soumya.sambu@windriver.com>
---
django/utils/html.py | 22 +++++++++++--------
docs/ref/templates/builtins.txt | 11 ++++++++++
.../filter_tests/test_urlize.py | 22 +++++++++++++++++++
tests/utils_tests/test_html.py | 1 +
4 files changed, 47 insertions(+), 9 deletions(-)
diff --git a/django/utils/html.py b/django/utils/html.py
index 79f06bd..d129334 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -1,5 +1,6 @@
"""HTML utilities suitable for global use."""
+import html
import json
import re
from html.parser import HTMLParser
@@ -327,16 +328,19 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
if trailing_punctuation_chars_has_semicolon() and middle.endswith(";"):
# Only strip if not part of an HTML entity.
potential_entity = middle[amp:]
- escaped = unescape(potential_entity)
+ escaped = html.unescape(potential_entity)
if escaped == potential_entity or escaped.endswith(";"):
- rstripped = middle.rstrip(";")
- amount_stripped = len(middle) - len(rstripped)
- if amp > -1 and amount_stripped > 1:
- # Leave a trailing semicolon as might be an entity.
- trail = middle[len(rstripped) + 1 :] + trail
- middle = rstripped + ";"
+ rstripped = middle.rstrip(TRAILING_PUNCTUATION_CHARS)
+ trail_start = len(rstripped)
+ amount_trailing_semicolons = len(middle) - len(middle.rstrip(";"))
+ if amp > -1 and amount_trailing_semicolons > 1:
+ # Leave up to most recent semicolon as might be an entity.
+ recent_semicolon = middle[trail_start:].index(";")
+ middle_semicolon_index = recent_semicolon + trail_start + 1
+ trail = middle[middle_semicolon_index:] + trail
+ middle = rstripped + middle[trail_start:middle_semicolon_index]
else:
- trail = middle[len(rstripped) :] + trail
+ trail = middle[trail_start:] + trail
middle = rstripped
trimmed_something = True
@@ -373,7 +377,7 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
url = None
nofollow_attr = ' rel="nofollow"' if nofollow else ''
if len(middle) <= MAX_URL_LENGTH and simple_url_re.match(middle):
- url = smart_urlquote(unescape(middle))
+ url = smart_urlquote(html.unescape(middle))
elif len(middle) <= MAX_URL_LENGTH and simple_url_2_re.match(middle):
url = smart_urlquote('http://%s' % unescape(middle))
elif ':' not in middle and is_email_simple(middle):
diff --git a/docs/ref/templates/builtins.txt b/docs/ref/templates/builtins.txt
index 4faab38..1990ed0 100644
--- a/docs/ref/templates/builtins.txt
+++ b/docs/ref/templates/builtins.txt
@@ -2483,6 +2483,17 @@ Django's built-in :tfilter:`escape` filter. The default value for
email addresses that contain single quotes (``'``), things won't work as
expected. Apply this filter only to plain text.
+.. warning::
+
+ Using ``urlize`` or ``urlizetrunc`` can incur a performance penalty, which
+ can become severe when applied to user controlled values such as content
+ stored in a :class:`~django.db.models.TextField`. You can use
+ :tfilter:`truncatechars` to add a limit to such inputs:
+
+ .. code-block:: html+django
+
+ {{ value|truncatechars:500|urlize }}
+
.. templatefilter:: urlizetrunc
``urlizetrunc``
diff --git a/tests/template_tests/filter_tests/test_urlize.py b/tests/template_tests/filter_tests/test_urlize.py
index 649a965..1991301 100644
--- a/tests/template_tests/filter_tests/test_urlize.py
+++ b/tests/template_tests/filter_tests/test_urlize.py
@@ -260,6 +260,28 @@ class FunctionTests(SimpleTestCase):
'A test <a href="http://testing.com/example" rel="nofollow">http://testing.com/example</a>.,:;)"!'
)
+ def test_trailing_semicolon(self):
+ self.assertEqual(
+ urlize("http://example.com?x=&", autoescape=False),
+ '<a href="http://example.com?x=" rel="nofollow">'
+ "http://example.com?x=&</a>",
+ )
+ self.assertEqual(
+ urlize("http://example.com?x=&;", autoescape=False),
+ '<a href="http://example.com?x=" rel="nofollow">'
+ "http://example.com?x=&</a>;",
+ )
+ self.assertEqual(
+ urlize("http://example.com?x=&;;", autoescape=False),
+ '<a href="http://example.com?x=" rel="nofollow">'
+ "http://example.com?x=&</a>;;",
+ )
+ self.assertEqual(
+ urlize("http://example.com?x=&.;...;", autoescape=False),
+ '<a href="http://example.com?x=" rel="nofollow">'
+ "http://example.com?x=&</a>.;...;",
+ )
+
def test_brackets(self):
"""
#19070 - Check urlize handles brackets properly
diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py
index 1a5c963..b382843 100644
--- a/tests/utils_tests/test_html.py
+++ b/tests/utils_tests/test_html.py
@@ -289,6 +289,7 @@ class TestUtilsHtml(SimpleTestCase):
"&:" + ";" * 100_000,
"&.;" * 100_000,
".;" * 100_000,
+ "&" + ";:" * 100_000,
)
for value in tests:
with self.subTest(value=value):
--
2.40.0
|