From b269a0063e9b10a6c88c92b24d1b92c7421950de Mon Sep 17 00:00:00 2001 From: Natalia <124304+nessita@users.noreply.github.com> Date: Wed, 29 Nov 2023 12:20:01 +0000 Subject: [PATCH 1/2] Fixed CVE-2023-43665 -- Mitigated potential DoS in django.utils.text.Truncator when truncating HTML text. Thanks Wenchao Li of Alibaba Group for the report. CVE: CVE-2023-43665 Upstream-Status: Backport [https://github.com/django/django/commit/ccdade1a0262537868d7ca64374de3d957ca50c5] Signed-off-by: Narpat Mali --- django/utils/text.py | 18 ++++++++++++++++- docs/ref/templates/builtins.txt | 20 +++++++++++++++++++ docs/releases/2.2.28.txt | 20 +++++++++++++++++++ tests/utils_tests/test_text.py | 35 ++++++++++++++++++++++++--------- 4 files changed, 83 insertions(+), 10 deletions(-) diff --git a/django/utils/text.py b/django/utils/text.py index 1fae7b2..06a377b 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -57,7 +57,14 @@ def wrap(text, width): class Truncator(SimpleLazyObject): """ An object used to truncate text, either by characters or words. + + When truncating HTML text (either chars or words), input will be limited to + at most `MAX_LENGTH_HTML` characters. """ + + # 5 million characters are approximately 4000 text pages or 3 web pages. + MAX_LENGTH_HTML = 5_000_000 + def __init__(self, text): super().__init__(lambda: str(text)) @@ -154,6 +161,11 @@ class Truncator(SimpleLazyObject): if words and length <= 0: return '' + size_limited = False + if len(text) > self.MAX_LENGTH_HTML: + text = text[: self.MAX_LENGTH_HTML] + size_limited = True + html4_singlets = ( 'br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input' @@ -203,10 +215,14 @@ class Truncator(SimpleLazyObject): # Add it to the start of the open tags list open_tags.insert(0, tagname) + truncate_text = self.add_truncation_text("", truncate) + if current_len <= length: + if size_limited and truncate_text: + text += truncate_text return text + out = text[:end_text_pos] - truncate_text = self.add_truncation_text('', truncate) if truncate_text: out += truncate_text # Close any tags still open diff --git a/docs/ref/templates/builtins.txt b/docs/ref/templates/builtins.txt index c4b0fa3..4faab38 100644 --- a/docs/ref/templates/builtins.txt +++ b/docs/ref/templates/builtins.txt @@ -2318,6 +2318,16 @@ If ``value`` is ``"

Joel is a slug

"``, the output will be Newlines in the HTML content will be preserved. +.. admonition:: Size of input string + + Processing large, potentially malformed HTML strings can be + resource-intensive and impact service performance. ``truncatechars_html`` + limits input to the first five million characters. + +.. versionchanged:: 2.2.28 + + In older versions, strings over five million characters were processed. + .. templatefilter:: truncatewords ``truncatewords`` @@ -2356,6 +2366,16 @@ If ``value`` is ``"

Joel is a slug

"``, the output will be Newlines in the HTML content will be preserved. +.. admonition:: Size of input string + + Processing large, potentially malformed HTML strings can be + resource-intensive and impact service performance. ``truncatewords_html`` + limits input to the first five million characters. + +.. versionchanged:: 2.2.28 + + In older versions, strings over five million characters were processed. + .. templatefilter:: unordered_list ``unordered_list`` diff --git a/docs/releases/2.2.28.txt b/docs/releases/2.2.28.txt index 40eb230..6a38e9c 100644 --- a/docs/releases/2.2.28.txt +++ b/docs/releases/2.2.28.txt @@ -56,3 +56,23 @@ CVE-2023-41164: Potential denial of service vulnerability in ``django.utils.enco ``django.utils.encoding.uri_to_iri()`` was subject to potential denial of service attack via certain inputs with a very large number of Unicode characters. + +Backporting the CVE-2023-43665 fix on Django 2.2.28. + +CVE-2023-43665: Denial-of-service possibility in ``django.utils.text.Truncator`` +================================================================================ + +Following the fix for :cve:`2019-14232`, the regular expressions used in the +implementation of ``django.utils.text.Truncator``'s ``chars()`` and ``words()`` +methods (with ``html=True``) were revised and improved. However, these regular +expressions still exhibited linear backtracking complexity, so when given a +very long, potentially malformed HTML input, the evaluation would still be +slow, leading to a potential denial of service vulnerability. + +The ``chars()`` and ``words()`` methods are used to implement the +:tfilter:`truncatechars_html` and :tfilter:`truncatewords_html` template +filters, which were thus also vulnerable. + +The input processed by ``Truncator``, when operating in HTML mode, has been +limited to the first five million characters in order to avoid potential +performance and memory issues. diff --git a/tests/utils_tests/test_text.py b/tests/utils_tests/test_text.py index 27e440b..cb3063d 100644 --- a/tests/utils_tests/test_text.py +++ b/tests/utils_tests/test_text.py @@ -1,5 +1,6 @@ import json import sys +from unittest.mock import patch from django.core.exceptions import SuspiciousFileOperation from django.test import SimpleTestCase @@ -87,11 +88,17 @@ class TestUtilsText(SimpleTestCase): # lazy strings are handled correctly self.assertEqual(text.Truncator(lazystr('The quick brown fox')).chars(10), 'The quick…') - def test_truncate_chars_html(self): + @patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000) + def test_truncate_chars_html_size_limit(self): + max_len = text.Truncator.MAX_LENGTH_HTML + bigger_len = text.Truncator.MAX_LENGTH_HTML + 1 + valid_html = "

Joel is a slug

" # 14 chars perf_test_values = [ - (('', None), - ('&' * 50000, '&' * 9 + '…'), - ('_X<<<<<<<<<<<>', None), + ("", None), + ("", "", None), + (valid_html * bigger_len, "

Joel is a…

"), # 10 chars ] for value, expected in perf_test_values: with self.subTest(value=value): @@ -149,15 +156,25 @@ class TestUtilsText(SimpleTestCase): truncator = text.Truncator('

I <3 python, what about you?

') self.assertEqual('

I <3 python,…

', truncator.words(3, html=True)) + @patch("django.utils.text.Truncator.MAX_LENGTH_HTML", 10_000) + def test_truncate_words_html_size_limit(self): + max_len = text.Truncator.MAX_LENGTH_HTML + bigger_len = text.Truncator.MAX_LENGTH_HTML + 1 + valid_html = "

Joel is a slug

" # 4 words perf_test_values = [ - ('', - '&' * 50000, - '_X<<<<<<<<<<<>', + ("", None), + ("", "", None), + (valid_html * bigger_len, valid_html * 12 + "

Joel is…

"), # 50 words ] - for value in perf_test_values: + for value, expected in perf_test_values: with self.subTest(value=value): truncator = text.Truncator(value) - self.assertEqual(value, truncator.words(50, html=True)) + self.assertEqual( + expected if expected else value, truncator.words(50, html=True) + ) def test_wrap(self): digits = '1234 67 9' -- 2.40.0