meta-python/recipes-devtools/python/python3-django/CVE-2024-38875.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161

From 79f368764295df109a37192f6182fb6f361d85b5 Mon Sep 17 00:00:00 2001
From: Adam Johnson <me@adamj.eu>
Date: Mon, 24 Jun 2024 15:30:59 +0200
Subject: [PATCH] [4.2.x] Fixed CVE-2024-38875 -- Mitigated potential DoS in
 urlize and urlizetrunc template filters.

Thank you to Elias Myllymäki for the report.

Co-authored-by: Sarah Boyce <42296566+sarahboyce@users.noreply.github.com>

CVE: CVE-2024-38875

Upstream-Status: Backport [https://github.com/django/django/commit/79f368764295df109a37192f6182fb6f361d85b5]

Signed-off-by: Soumya Sambu <soumya.sambu@windriver.com>
---
 django/utils/html.py           | 90 +++++++++++++++++++++++++---------
 tests/utils_tests/test_html.py |  7 +++
 2 files changed, 73 insertions(+), 21 deletions(-)

diff --git a/django/utils/html.py b/django/utils/html.py
index 7a33d5f..f1b74ab 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -234,6 +234,15 @@ def smart_urlquote(url):

     return urlunsplit((scheme, netloc, path, query, fragment))

+class CountsDict(dict):
+    def __init__(self, *args, word, **kwargs):
+        super().__init__(*args, *kwargs)
+        self.word = word
+
+    def __missing__(self, key):
+        self[key] = self.word.count(key)
+        return self[key]
+

 @keep_lazy_text
 def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
@@ -268,36 +277,69 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
         return text.replace('&amp;', '&').replace('&lt;', '<').replace(
             '&gt;', '>').replace('&quot;', '"').replace('&#39;', "'")

-    def trim_punctuation(lead, middle, trail):
+    def wrapping_punctuation_openings():
+        return "".join(dict(WRAPPING_PUNCTUATION).keys())
+
+    def trailing_punctuation_chars_no_semicolon():
+        return TRAILING_PUNCTUATION_CHARS.replace(";", "")
+
+    def trailing_punctuation_chars_has_semicolon():
+        return ";" in TRAILING_PUNCTUATION_CHARS
+
+    def trim_punctuation(word):
         """
         Trim trailing and wrapping punctuation from `middle`. Return the items
         of the new state.
         """
+        # Strip all opening wrapping punctuation.
+        middle = word.lstrip(wrapping_punctuation_openings())
+        lead = word[: len(word) - len(middle)]
+        trail = ""
+
         # Continue trimming until middle remains unchanged.
         trimmed_something = True
-        while trimmed_something:
+        counts = CountsDict(word=middle)
+        while trimmed_something and middle:
             trimmed_something = False
             # Trim wrapping punctuation.
             for opening, closing in WRAPPING_PUNCTUATION:
-                if middle.startswith(opening):
-                    middle = middle[len(opening):]
-                    lead += opening
-                    trimmed_something = True
-                # Keep parentheses at the end only if they're balanced.
-                if (middle.endswith(closing) and
-                        middle.count(closing) == middle.count(opening) + 1):
-                    middle = middle[:-len(closing)]
-                    trail = closing + trail
-                    trimmed_something = True
-            # Trim trailing punctuation (after trimming wrapping punctuation,
-            # as encoded entities contain ';'). Unescape entites to avoid
-            # breaking them by removing ';'.
-            middle_unescaped = unescape(middle)
-            stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS)
-            if middle_unescaped != stripped:
-                trail = middle[len(stripped):] + trail
-                middle = middle[:len(stripped) - len(middle_unescaped)]
+                if counts[opening] < counts[closing]:
+                    rstripped = middle.rstrip(closing)
+                    if rstripped != middle:
+                        strip = counts[closing] - counts[opening]
+                        trail = middle[-strip:]
+                        middle = middle[:-strip]
+                        trimmed_something = True
+                        counts[closing] -= strip
+
+            rstripped = middle.rstrip(trailing_punctuation_chars_no_semicolon())
+            if rstripped != middle:
+                trail = middle[len(rstripped) :] + trail
+                middle = rstripped
                 trimmed_something = True
+
+            if trailing_punctuation_chars_has_semicolon() and middle.endswith(";"):
+                # Only strip if not part of an HTML entity.
+                amp = middle.rfind("&")
+                if amp == -1:
+                    can_strip = True
+                else:
+                    potential_entity = middle[amp:]
+                    escaped = unescape(potential_entity)
+                    can_strip = (escaped == potential_entity) or escaped.endswith(";")
+
+                if can_strip:
+                    rstripped = middle.rstrip(";")
+                    amount_stripped = len(middle) - len(rstripped)
+                    if amp > -1 and amount_stripped > 1:
+                        # Leave a trailing semicolon as might be an entity.
+                        trail = middle[len(rstripped) + 1 :] + trail
+                        middle = rstripped + ";"
+                    else:
+                        trail = middle[len(rstripped) :] + trail
+                        middle = rstripped
+                    trimmed_something = True
+
         return lead, middle, trail

     def is_email_simple(value):
@@ -321,9 +363,7 @@ def urlize(text, trim_url_limit=None, no
             # lead: Current punctuation trimmed from the beginning of the word.
             # middle: Current state of the word.
             # trail: Current punctuation trimmed from the end of the word.
-            lead, middle, trail = '', word, ''
-            # Deal with punctuation.
-            lead, middle, trail = trim_punctuation(lead, middle, trail)
+            lead, middle, trail = trim_punctuation(word)

             # Make URL we want to point to.
             url = None
diff --git a/tests/utils_tests/test_html.py b/tests/utils_tests/test_html.py
index 5cc2d9b..715c1c6 100644
--- a/tests/utils_tests/test_html.py
+++ b/tests/utils_tests/test_html.py
@@ -267,6 +267,13 @@ class TestUtilsHtml(SimpleTestCase):
             'foo@.example.com',
             'foo@localhost',
             'foo@localhost.',
+            # trim_punctuation catastrophic tests
+            "(" * 100_000 + ":" + ")" * 100_000,
+            "(" * 100_000 + "&:" + ")" * 100_000,
+            "([" * 100_000 + ":" + "])" * 100_000,
+            "[(" * 100_000 + ":" + ")]" * 100_000,
+            "([[" * 100_000 + ":" + "]])" * 100_000,
+            "&:" + ";" * 100_000,
         )
         for value in tests:
             with self.subTest(value=value):
--
2.40.0