100 lines
2.9 KiB
Python
100 lines
2.9 KiB
Python
from __future__ import annotations
|
|
|
|
from lxml import etree
|
|
|
|
from app.core.config import settings
|
|
|
|
|
|
DANGEROUS_TAGS = {"script"}
|
|
SVG_NS = "http://www.w3.org/2000/svg"
|
|
XLINK_NS = "http://www.w3.org/1999/xlink"
|
|
XLINK_HREF = f"{{{XLINK_NS}}}href"
|
|
|
|
|
|
def _local_name(tag: str) -> str:
|
|
if "}" in tag:
|
|
return tag.split("}", 1)[1]
|
|
return tag
|
|
|
|
|
|
def _is_external_ref(value: str) -> bool:
|
|
low = value.strip().lower()
|
|
return (
|
|
low.startswith("http://")
|
|
or low.startswith("https://")
|
|
or low.startswith("file:")
|
|
or low.startswith("javascript:")
|
|
or low.startswith("data:")
|
|
or low.startswith("//")
|
|
)
|
|
|
|
|
|
def sanitize_svg_bytes(content: bytes) -> tuple[bytes, int, int]:
|
|
parser = etree.XMLParser(
|
|
resolve_entities=False,
|
|
remove_blank_text=False,
|
|
remove_comments=False,
|
|
no_network=True,
|
|
recover=False,
|
|
huge_tree=True,
|
|
)
|
|
root = etree.fromstring(content, parser=parser)
|
|
|
|
removed_elements_count = 0
|
|
removed_attributes_count = 0
|
|
|
|
for node in list(root.iter()):
|
|
tag_name = _local_name(node.tag)
|
|
|
|
if tag_name in DANGEROUS_TAGS:
|
|
parent = node.getparent()
|
|
if parent is not None:
|
|
parent.remove(node)
|
|
removed_elements_count += 1
|
|
continue
|
|
|
|
if settings.svg_forbid_foreign_object_v1 and tag_name == "foreignObject":
|
|
parent = node.getparent()
|
|
if parent is not None:
|
|
parent.remove(node)
|
|
removed_elements_count += 1
|
|
continue
|
|
|
|
if settings.svg_forbid_image_v1 and tag_name == "image":
|
|
href = node.attrib.get("href") or node.attrib.get(XLINK_HREF)
|
|
if href and _is_external_ref(href):
|
|
parent = node.getparent()
|
|
if parent is not None:
|
|
parent.remove(node)
|
|
removed_elements_count += 1
|
|
continue
|
|
|
|
for attr_name in list(node.attrib.keys()):
|
|
local_attr = _local_name(attr_name).lower()
|
|
value = node.attrib.get(attr_name) or ""
|
|
|
|
if local_attr.startswith("on"):
|
|
del node.attrib[attr_name]
|
|
removed_attributes_count += 1
|
|
continue
|
|
|
|
if local_attr in {"href"}:
|
|
if value and not value.startswith("#") and _is_external_ref(value):
|
|
del node.attrib[attr_name]
|
|
removed_attributes_count += 1
|
|
continue
|
|
|
|
if attr_name == XLINK_HREF:
|
|
if value and not value.startswith("#") and _is_external_ref(value):
|
|
del node.attrib[attr_name]
|
|
removed_attributes_count += 1
|
|
continue
|
|
|
|
sanitized = etree.tostring(
|
|
root,
|
|
encoding="utf-8",
|
|
xml_declaration=True,
|
|
pretty_print=False,
|
|
)
|
|
return sanitized, removed_elements_count, removed_attributes_count
|