diff --git a/backend/models/postgis/project_info.py b/backend/models/postgis/project_info.py index eca6bed91e..aeb4dbda19 100644 --- a/backend/models/postgis/project_info.py +++ b/backend/models/postgis/project_info.py @@ -1,5 +1,6 @@ from typing import List +from backend.models.postgis.utils import sanitize_markdown from databases import Database from sqlalchemy import ( Column, @@ -57,11 +58,11 @@ async def create_from_dto(cls, dto: ProjectInfoDTO, project_id: int, db: Databas self.project_id = project_id self.project_id_str = str(project_id) # Allows project_id to be searched - # Note project info not bleached on basis that admins are trusted users and shouldn't be doing anything bad - self.short_description = dto.short_description - self.description = dto.description - self.instructions = dto.instructions - self.per_task_instructions = dto.per_task_instructions + self.short_description = sanitize_markdown(dto.short_description) + self.description = sanitize_markdown(dto.description) + self.instructions = sanitize_markdown(dto.instructions) + self.per_task_instructions = sanitize_markdown(dto.per_task_instructions) + columns = { c.key: getattr(self, c.key) for c in inspect(self).mapper.column_attrs } @@ -75,11 +76,11 @@ async def update_from_dto(self, dto: ProjectInfoDTO, db: Database): self.name = dto.name self.project_id_str = str(self.project_id) # Allows project_id to be searched - # Note project info not bleached on basis that admins are trusted users and shouldn't be doing anything bad - self.short_description = dto.short_description - self.description = dto.description - self.instructions = dto.instructions - self.per_task_instructions = dto.per_task_instructions + self.short_description = sanitize_markdown(dto.short_description) + self.description = sanitize_markdown(dto.description) + self.instructions = sanitize_markdown(dto.instructions) + self.per_task_instructions = sanitize_markdown(dto.per_task_instructions) + columns = { c.key: getattr(self, c.key) for c in inspect(self).mapper.column_attrs } diff --git a/backend/models/postgis/utils.py b/backend/models/postgis/utils.py index 0238626188..da74fdddcd 100644 --- a/backend/models/postgis/utils.py +++ b/backend/models/postgis/utils.py @@ -1,9 +1,11 @@ import datetime import json -import re from geoalchemy2 import Geometry from geoalchemy2.functions import GenericFunction from loguru import logger +import re +from markdown import markdown +import bleach class NotFound(Exception): @@ -156,6 +158,100 @@ def parse_duration(time_str): return datetime.timedelta(**time_params) +def sanitize_markdown(text: str | None) -> str | None: + """Convert markdown to sanitized HTML. Returns None for empty input.""" + if not text: + return None + + # Extended allowed tags to support markdown features + allowed_tags = [ + "a", + "abbr", + "acronym", + "b", + "blockquote", + "br", + "code", + "em", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "img", + "i", + "li", + "ol", + "p", + "pre", + "strong", + "ul", + "div", + "table", + "thead", + "tbody", + "tfoot", + "tr", + "td", + "th", + "iframe", + "input", + "hr", + "del", + "s", + "strike", + "span", + "caption", + "col", + "colgroup", + ] + + allowed_attributes = { + "a": ["href", "rel", "target", "title"], + "img": ["src", "alt", "title", "width", "height"], + "iframe": [ + "width", + "height", + "src", + "title", + "frameborder", + "allow", + "referrerpolicy", + "allowfullscreen", + ], + "input": ["type", "checked", "disabled"], + "th": ["align", "scope"], + "td": ["align", "colspan", "rowspan"], + "table": ["class"], + "code": ["class"], + "pre": ["class"], + } + + # Support markdown ~~strike~~ -> + text = re.sub(r"~~(.*?)~~", r"\1", text) + + html_content = markdown( + text, + extensions=[ + "markdown.extensions.tables", + "markdown.extensions.fenced_code", + "markdown.extensions.nl2br", + "markdown.extensions.sane_lists", + "markdown.extensions.codehilite", + ], + ) + + clean_message = bleach.clean( + html_content, tags=allowed_tags, attributes=allowed_attributes, strip=False + ) + + # Turn URLs into links and parse emails + clean_message = bleach.linkify(clean_message, parse_email=True) + + return clean_message + + class DateTimeEncoder(json.JSONEncoder): """ Custom JSON Encoder that handles Python date/times