import json import re from typing import Any from xml.etree import ElementTree as StdET SHAPE_TAGS = {"rect", "circle", "ellipse", "path", "polygon", "polyline", "line"} CONTAINER_TAGS = {"g"} TEXT_TAGS = {"text", "tspan"} def _local_name(tag: str) -> str: if "}" in tag: return tag.split("}", 1)[1] return tag def _parse_classes(value: str | None) -> list[str]: if not value: return [] return [item for item in value.strip().split() if item] def _to_float(value: str | None) -> float | None: if value is None or value == "": return None try: return float(value) except ValueError: return None def _infer_kind(element_id: str | None, classes: list[str], tag: str) -> str: haystack = " ".join([element_id or ""] + classes).lower() if "seat" in haystack or "place" in haystack: return "seat" if "sector" in haystack or "zone" in haystack: return "sector" if "group" in haystack: return "group" if tag in SHAPE_TAGS: return "shape" if tag in CONTAINER_TAGS: return "container" if tag in TEXT_TAGS: return "text" return "other" def _extract_prefixed_id(value: str | None, prefix: str) -> str | None: if not value: return None low = value.lower() pref = f"{prefix}-" if low.startswith(pref): return value[len(pref):] return None def _extract_seat_parts_from_id(value: str | None) -> tuple[str | None, str | None]: if not value: return None, None patterns = [ r"^seat[-_]?([a-zA-Z]+)[-_]?(\d+)$", r"^place[-_]?([a-zA-Z]+)[-_]?(\d+)$", r"^([a-zA-Z]+)[-_]?(\d+)$", ] for pattern in patterns: match = re.match(pattern, value) if match: return match.group(1), match.group(2) return None, None def _build_parent_map(root) -> dict[int, dict[str, str | None]]: parent_map: dict[int, dict[str, str | None]] = {} def walk(node, current_sector_id: str | None, current_group_id: str | None): node_id = node.attrib.get("id") classes = _parse_classes(node.attrib.get("class")) kind = _infer_kind(node_id, classes, _local_name(node.tag)) sector_id = current_sector_id group_id = current_group_id explicit_sector = node.attrib.get("data-sector-id") or _extract_prefixed_id(node_id, "sector") explicit_group = node.attrib.get("data-group-id") or _extract_prefixed_id(node_id, "group") if kind == "sector": sector_id = explicit_sector or node_id or sector_id if kind == "group": group_id = explicit_group or node_id or group_id parent_map[id(node)] = { "sector_id": sector_id, "group_id": group_id, } for child in list(node): walk(child, sector_id, group_id) walk(root, None, None) return parent_map def normalize_svg_bytes(content: bytes) -> dict[str, Any]: root = StdET.fromstring(content) parent_map = _build_parent_map(root) elements: list[dict[str, Any]] = [] seats: list[dict[str, Any]] = [] groups: list[dict[str, Any]] = [] sectors: list[dict[str, Any]] = [] for node in root.iter(): tag = _local_name(node.tag) if tag == "svg": continue element_id = node.attrib.get("id") classes = _parse_classes(node.attrib.get("class")) kind = _infer_kind(element_id=element_id, classes=classes, tag=tag) inherited = parent_map.get(id(node), {}) inherited_sector_id = inherited.get("sector_id") inherited_group_id = inherited.get("group_id") explicit_sector_id = node.attrib.get("data-sector-id") explicit_group_id = node.attrib.get("data-group-id") explicit_seat_id = node.attrib.get("data-seat-id") explicit_row = node.attrib.get("data-row") explicit_seat_number = node.attrib.get("data-seat-number") row_from_id, seat_number_from_id = _extract_seat_parts_from_id(element_id) seat_id = explicit_seat_id or (element_id if kind == "seat" else None) sector_id = explicit_sector_id or inherited_sector_id group_id = explicit_group_id or inherited_group_id row = explicit_row or row_from_id seat_number = explicit_seat_number or seat_number_from_id item = { "id": element_id, "tag": tag, "kind": kind, "classes": classes, "x": _to_float(node.attrib.get("x")), "y": _to_float(node.attrib.get("y")), "cx": _to_float(node.attrib.get("cx")), "cy": _to_float(node.attrib.get("cy")), "width": _to_float(node.attrib.get("width")), "height": _to_float(node.attrib.get("height")), "href": node.attrib.get("href") or node.attrib.get("{http://www.w3.org/1999/xlink}href"), "seat_id": seat_id, "sector_id": sector_id, "group_id": group_id, "row": row, "seat_number": seat_number, } elements.append(item) if kind == "seat": seats.append(item) elif kind == "group": groups.append(item) elif kind == "sector": sectors.append(item) return { "summary": { "elements_count": len(elements), "seats_count": len(seats), "groups_count": len(groups), "sectors_count": len(sectors), }, "contract": { "seat_fields": ["seat_id", "sector_id", "group_id", "row", "seat_number"], "priority": [ "data-* attributes", "inherited parent sector/group", "fallback to element id", ], }, "elements": elements, "seats": seats, "groups": groups, "sectors": sectors, } def normalize_svg_bytes_to_json(content: bytes) -> tuple[str, dict[str, Any]]: payload = normalize_svg_bytes(content) return json.dumps(payload, ensure_ascii=False, indent=2), payload