Files
svg-backend/backend/app/services/svg_normalizer.py
2026-03-19 13:39:32 +03:00

201 lines
6.0 KiB
Python

import json
import re
from typing import Any
from xml.etree import ElementTree as StdET
SHAPE_TAGS = {"rect", "circle", "ellipse", "path", "polygon", "polyline", "line"}
CONTAINER_TAGS = {"g"}
TEXT_TAGS = {"text", "tspan"}
def _local_name(tag: str) -> str:
if "}" in tag:
return tag.split("}", 1)[1]
return tag
def _parse_classes(value: str | None) -> list[str]:
if not value:
return []
return [item for item in value.strip().split() if item]
def _to_float(value: str | None) -> float | None:
if value is None or value == "":
return None
try:
return float(value)
except ValueError:
return None
def _infer_kind(element_id: str | None, classes: list[str], tag: str) -> str:
haystack = " ".join([element_id or ""] + classes).lower()
if "seat" in haystack or "place" in haystack:
return "seat"
if "sector" in haystack or "zone" in haystack:
return "sector"
if "group" in haystack:
return "group"
if tag in SHAPE_TAGS:
return "shape"
if tag in CONTAINER_TAGS:
return "container"
if tag in TEXT_TAGS:
return "text"
return "other"
def _extract_prefixed_id(value: str | None, prefix: str) -> str | None:
if not value:
return None
low = value.lower()
pref = f"{prefix}-"
if low.startswith(pref):
return value[len(pref):]
return None
def _extract_seat_parts_from_id(value: str | None) -> tuple[str | None, str | None]:
if not value:
return None, None
patterns = [
r"^seat[-_]?([a-zA-Z]+)[-_]?(\d+)$",
r"^place[-_]?([a-zA-Z]+)[-_]?(\d+)$",
r"^([a-zA-Z]+)[-_]?(\d+)$",
]
for pattern in patterns:
match = re.match(pattern, value)
if match:
return match.group(1), match.group(2)
return None, None
def _build_parent_map(root) -> dict[int, dict[str, str | None]]:
parent_map: dict[int, dict[str, str | None]] = {}
def walk(node, current_sector_id: str | None, current_group_id: str | None):
node_id = node.attrib.get("id")
classes = _parse_classes(node.attrib.get("class"))
kind = _infer_kind(node_id, classes, _local_name(node.tag))
sector_id = current_sector_id
group_id = current_group_id
explicit_sector = node.attrib.get("data-sector-id") or _extract_prefixed_id(node_id, "sector")
explicit_group = node.attrib.get("data-group-id") or _extract_prefixed_id(node_id, "group")
if kind == "sector":
sector_id = explicit_sector or node_id or sector_id
if kind == "group":
group_id = explicit_group or node_id or group_id
parent_map[id(node)] = {
"sector_id": sector_id,
"group_id": group_id,
}
for child in list(node):
walk(child, sector_id, group_id)
walk(root, None, None)
return parent_map
def normalize_svg_bytes(content: bytes) -> dict[str, Any]:
root = StdET.fromstring(content)
parent_map = _build_parent_map(root)
elements: list[dict[str, Any]] = []
seats: list[dict[str, Any]] = []
groups: list[dict[str, Any]] = []
sectors: list[dict[str, Any]] = []
for node in root.iter():
tag = _local_name(node.tag)
if tag == "svg":
continue
element_id = node.attrib.get("id")
classes = _parse_classes(node.attrib.get("class"))
kind = _infer_kind(element_id=element_id, classes=classes, tag=tag)
inherited = parent_map.get(id(node), {})
inherited_sector_id = inherited.get("sector_id")
inherited_group_id = inherited.get("group_id")
explicit_sector_id = node.attrib.get("data-sector-id")
explicit_group_id = node.attrib.get("data-group-id")
explicit_seat_id = node.attrib.get("data-seat-id")
explicit_row = node.attrib.get("data-row")
explicit_seat_number = node.attrib.get("data-seat-number")
row_from_id, seat_number_from_id = _extract_seat_parts_from_id(element_id)
seat_id = explicit_seat_id or (element_id if kind == "seat" else None)
sector_id = explicit_sector_id or inherited_sector_id
group_id = explicit_group_id or inherited_group_id
row = explicit_row or row_from_id
seat_number = explicit_seat_number or seat_number_from_id
item = {
"id": element_id,
"tag": tag,
"kind": kind,
"classes": classes,
"x": _to_float(node.attrib.get("x")),
"y": _to_float(node.attrib.get("y")),
"cx": _to_float(node.attrib.get("cx")),
"cy": _to_float(node.attrib.get("cy")),
"width": _to_float(node.attrib.get("width")),
"height": _to_float(node.attrib.get("height")),
"href": node.attrib.get("href") or node.attrib.get("{http://www.w3.org/1999/xlink}href"),
"seat_id": seat_id,
"sector_id": sector_id,
"group_id": group_id,
"row": row,
"seat_number": seat_number,
}
elements.append(item)
if kind == "seat":
seats.append(item)
elif kind == "group":
groups.append(item)
elif kind == "sector":
sectors.append(item)
return {
"summary": {
"elements_count": len(elements),
"seats_count": len(seats),
"groups_count": len(groups),
"sectors_count": len(sectors),
},
"contract": {
"seat_fields": ["seat_id", "sector_id", "group_id", "row", "seat_number"],
"priority": [
"data-* attributes",
"inherited parent sector/group",
"fallback to element id",
],
},
"elements": elements,
"seats": seats,
"groups": groups,
"sectors": sectors,
}
def normalize_svg_bytes_to_json(content: bytes) -> tuple[str, dict[str, Any]]:
payload = normalize_svg_bytes(content)
return json.dumps(payload, ensure_ascii=False, indent=2), payload