from __future__ import annotations from lxml import etree from app.core.config import settings DANGEROUS_TAGS = {"script"} SVG_NS = "http://www.w3.org/2000/svg" XLINK_NS = "http://www.w3.org/1999/xlink" XLINK_HREF = f"{{{XLINK_NS}}}href" def _local_name(tag: str) -> str: if "}" in tag: return tag.split("}", 1)[1] return tag def _is_external_ref(value: str) -> bool: low = value.strip().lower() return ( low.startswith("http://") or low.startswith("https://") or low.startswith("file:") or low.startswith("javascript:") or low.startswith("data:") or low.startswith("//") ) def sanitize_svg_bytes(content: bytes) -> tuple[bytes, int, int]: parser = etree.XMLParser( resolve_entities=False, remove_blank_text=False, remove_comments=False, no_network=True, recover=False, huge_tree=True, ) root = etree.fromstring(content, parser=parser) removed_elements_count = 0 removed_attributes_count = 0 for node in list(root.iter()): tag_name = _local_name(node.tag) if tag_name in DANGEROUS_TAGS: parent = node.getparent() if parent is not None: parent.remove(node) removed_elements_count += 1 continue if settings.svg_forbid_foreign_object_v1 and tag_name == "foreignObject": parent = node.getparent() if parent is not None: parent.remove(node) removed_elements_count += 1 continue if settings.svg_forbid_image_v1 and tag_name == "image": href = node.attrib.get("href") or node.attrib.get(XLINK_HREF) if href and _is_external_ref(href): parent = node.getparent() if parent is not None: parent.remove(node) removed_elements_count += 1 continue for attr_name in list(node.attrib.keys()): local_attr = _local_name(attr_name).lower() value = node.attrib.get(attr_name) or "" if local_attr.startswith("on"): del node.attrib[attr_name] removed_attributes_count += 1 continue if local_attr in {"href"}: if value and not value.startswith("#") and _is_external_ref(value): del node.attrib[attr_name] removed_attributes_count += 1 continue if attr_name == XLINK_HREF: if value and not value.startswith("#") and _is_external_ref(value): del node.attrib[attr_name] removed_attributes_count += 1 continue sanitized = etree.tostring( root, encoding="utf-8", xml_declaration=True, pretty_print=False, ) return sanitized, removed_elements_count, removed_attributes_count