XSS prevention added (with tests)

2025-11-27 15:43:41 +01:00
parent fd729b3019
commit f933b7d99a
7 changed files with 65 additions and 12 deletions
--- a/abschnitte/tests.py
+++ b/abschnitte/tests.py
@@ -467,6 +467,32 @@ A -> B
        typ, html = result[0]
        self.assertEqual(typ, "text")

+    def test_render_textabschnitte_xss_prevention(self):
+        """Test that malicious HTML is sanitized in rendered content"""
+        from dokumente.models import VorgabeLangtext
+
+        # Create content with malicious HTML
+        malicious_abschnitt = VorgabeLangtext.objects.create(
+            abschnitt=self.vorgabe,
+            abschnitttyp=self.typ_text,
+            inhalt='<script>alert("xss")</script><img src=x onerror=alert(1)>Normal text',
+            order=1
+        )
+
+        result = render_textabschnitte(VorgabeLangtext.objects.filter(pk=malicious_abschnitt.pk))
+
+        self.assertEqual(len(result), 1)
+        typ, html = result[0]
+        self.assertEqual(typ, "text")
+
+        # Dangerous tags and attributes should be removed or sanitized
+        self.assertNotIn('<script>', html)  # Script tags should not be present unescaped
+        self.assertNotIn('onerror', html)   # Dangerous attributes removed
+        # Note: 'alert' may still be present in escaped script tags, which is safe
+
+        # Safe content should remain
+        self.assertIn('Normal text', html)
+

 class MdTableToHtmlTest(TestCase):
    """Test cases for md_table_to_html function"""
--- a/abschnitte/utils.py
+++ b/abschnitte/utils.py
@@ -4,12 +4,34 @@ import zlib
 import re
 from textwrap import dedent
 from django.conf import settings
+import bleach

 # Import the caching function
 from diagramm_proxy.diagram_cache import get_cached_diagram

 DIAGRAMMSERVER="/diagramm"

+# Allowed HTML tags for bleach sanitization
+ALLOWED_TAGS = [
+    'p', 'br', 'strong', 'em', 'u', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+    'ul', 'ol', 'li', 'blockquote', 'code', 'pre', 'hr',
+    'table', 'thead', 'tbody', 'tr', 'th', 'td',
+    'img', 'a', 'sup', 'sub', 'span', 'div'
+]
+
+ALLOWED_ATTRIBUTES = {
+    'img': ['src', 'alt', 'width', 'height'],
+    'a': ['href', 'title'],
+    'span': ['class'],
+    'div': ['class'],
+    'p': ['class'],
+    'table': ['class'],
+    'th': ['colspan', 'rowspan', 'class'],
+    'td': ['colspan', 'rowspan', 'class'],
+    'pre': ['class'],
+    'code': ['class'],
+}
+
 def render_textabschnitte(queryset):
    """
    Converts a queryset of Textabschnitt-like models into a list of (typ, html) tuples.
@@ -52,6 +74,8 @@ def render_textabschnitte(queryset):
            html += "</code></pre>"
        else:
            html = markdown(inhalt, extensions=['tables', 'attr_list','footnotes'])
+        # Sanitize HTML to prevent XSS
+        html = bleach.clean(html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES)
        output.append((typ, html))
    return output