Initial commit: svg backend
This commit is contained in:
99
backend/app/services/svg_sanitizer.py
Normal file
99
backend/app/services/svg_sanitizer.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
|
||||
DANGEROUS_TAGS = {"script"}
|
||||
SVG_NS = "http://www.w3.org/2000/svg"
|
||||
XLINK_NS = "http://www.w3.org/1999/xlink"
|
||||
XLINK_HREF = f"{{{XLINK_NS}}}href"
|
||||
|
||||
|
||||
def _local_name(tag: str) -> str:
|
||||
if "}" in tag:
|
||||
return tag.split("}", 1)[1]
|
||||
return tag
|
||||
|
||||
|
||||
def _is_external_ref(value: str) -> bool:
|
||||
low = value.strip().lower()
|
||||
return (
|
||||
low.startswith("http://")
|
||||
or low.startswith("https://")
|
||||
or low.startswith("file:")
|
||||
or low.startswith("javascript:")
|
||||
or low.startswith("data:")
|
||||
or low.startswith("//")
|
||||
)
|
||||
|
||||
|
||||
def sanitize_svg_bytes(content: bytes) -> tuple[bytes, int, int]:
|
||||
parser = etree.XMLParser(
|
||||
resolve_entities=False,
|
||||
remove_blank_text=False,
|
||||
remove_comments=False,
|
||||
no_network=True,
|
||||
recover=False,
|
||||
huge_tree=True,
|
||||
)
|
||||
root = etree.fromstring(content, parser=parser)
|
||||
|
||||
removed_elements_count = 0
|
||||
removed_attributes_count = 0
|
||||
|
||||
for node in list(root.iter()):
|
||||
tag_name = _local_name(node.tag)
|
||||
|
||||
if tag_name in DANGEROUS_TAGS:
|
||||
parent = node.getparent()
|
||||
if parent is not None:
|
||||
parent.remove(node)
|
||||
removed_elements_count += 1
|
||||
continue
|
||||
|
||||
if settings.svg_forbid_foreign_object_v1 and tag_name == "foreignObject":
|
||||
parent = node.getparent()
|
||||
if parent is not None:
|
||||
parent.remove(node)
|
||||
removed_elements_count += 1
|
||||
continue
|
||||
|
||||
if settings.svg_forbid_image_v1 and tag_name == "image":
|
||||
href = node.attrib.get("href") or node.attrib.get(XLINK_HREF)
|
||||
if href and _is_external_ref(href):
|
||||
parent = node.getparent()
|
||||
if parent is not None:
|
||||
parent.remove(node)
|
||||
removed_elements_count += 1
|
||||
continue
|
||||
|
||||
for attr_name in list(node.attrib.keys()):
|
||||
local_attr = _local_name(attr_name).lower()
|
||||
value = node.attrib.get(attr_name) or ""
|
||||
|
||||
if local_attr.startswith("on"):
|
||||
del node.attrib[attr_name]
|
||||
removed_attributes_count += 1
|
||||
continue
|
||||
|
||||
if local_attr in {"href"}:
|
||||
if value and not value.startswith("#") and _is_external_ref(value):
|
||||
del node.attrib[attr_name]
|
||||
removed_attributes_count += 1
|
||||
continue
|
||||
|
||||
if attr_name == XLINK_HREF:
|
||||
if value and not value.startswith("#") and _is_external_ref(value):
|
||||
del node.attrib[attr_name]
|
||||
removed_attributes_count += 1
|
||||
continue
|
||||
|
||||
sanitized = etree.tostring(
|
||||
root,
|
||||
encoding="utf-8",
|
||||
xml_declaration=True,
|
||||
pretty_print=False,
|
||||
)
|
||||
return sanitized, removed_elements_count, removed_attributes_count
|
||||
Reference in New Issue
Block a user