diff --git a/backend/app/api/routes/admin.py b/backend/app/api/routes/admin.py index 08f23d9..056a5d5 100644 --- a/backend/app/api/routes/admin.py +++ b/backend/app/api/routes/admin.py @@ -6,6 +6,10 @@ from app.repositories.scheme_versions import get_current_scheme_version from app.repositories.schemes import get_scheme_record_by_scheme_id, list_scheme_records from app.repositories.uploads import get_upload_record_by_upload_id from app.security.auth import require_api_key +from app.services.artifact_maintenance import ( + cleanup_publish_preview_storage, + inspect_publish_preview_storage, +) from app.services.display_regenerator import regenerate_display_artifact from app.services.scheme_validation import build_scheme_validation_report @@ -160,3 +164,18 @@ async def bulk_backfill_display_artifacts( "skipped": skipped, "failed": failed, } + + +@router.get(f"{settings.api_v1_prefix}/admin/artifacts/publish-preview/audit") +async def audit_publish_preview_storage( + role: str = Depends(require_api_key), +): + return await inspect_publish_preview_storage() + + +@router.post(f"{settings.api_v1_prefix}/admin/artifacts/publish-preview/cleanup") +async def cleanup_publish_preview_artifacts_endpoint( + dry_run: bool = Query(default=True), + role: str = Depends(require_api_key), +): + return await cleanup_publish_preview_storage(dry_run=dry_run) diff --git a/backend/app/repositories/scheme_artifacts.py b/backend/app/repositories/scheme_artifacts.py index 1ad55c1..4504ccb 100644 --- a/backend/app/repositories/scheme_artifacts.py +++ b/backend/app/repositories/scheme_artifacts.py @@ -59,6 +59,28 @@ async def list_scheme_artifacts( return list(result.scalars().all()) +async def list_artifacts_by_type( + *, + artifact_type: str, + artifact_variant: str | None = None, +) -> list[SchemeArtifactRecord]: + async with AsyncSessionLocal() as session: + stmt = select(SchemeArtifactRecord).where( + SchemeArtifactRecord.artifact_type == artifact_type + ) + + if artifact_variant is not None: + stmt = stmt.where(SchemeArtifactRecord.artifact_variant == artifact_variant) + + stmt = stmt.order_by( + asc(SchemeArtifactRecord.created_at), + asc(SchemeArtifactRecord.id), + ) + + result = await session.execute(stmt) + return list(result.scalars().all()) + + async def artifact_exists( *, scheme_version_id: str, diff --git a/backend/app/services/artifact_maintenance.py b/backend/app/services/artifact_maintenance.py new file mode 100644 index 0000000..ce9440f --- /dev/null +++ b/backend/app/services/artifact_maintenance.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +from pathlib import Path + +from app.core.config import settings +from app.repositories.scheme_artifacts import ( + delete_scheme_artifacts_by_artifact_ids, + list_artifacts_by_type, +) + +PUBLISH_PREVIEW_ARTIFACT_TYPE = "publish_preview" + + +def _preview_root() -> Path: + path = Path(settings.storage_preview_dir) + path.mkdir(parents=True, exist_ok=True) + return path + + +def _cleanup_file_and_empty_parent(path: Path) -> bool: + removed = False + + try: + if path.exists() and path.is_file(): + path.unlink() + removed = True + except FileNotFoundError: + pass + + try: + parent = path.parent + root = _preview_root() + if parent != root and parent.exists(): + parent.rmdir() + except OSError: + pass + + return removed + + +async def inspect_publish_preview_storage() -> dict: + rows = await list_artifacts_by_type(artifact_type=PUBLISH_PREVIEW_ARTIFACT_TYPE) + root = _preview_root() + + db_by_path: dict[str, object] = {row.storage_path: row for row in rows} + disk_files = sorted(str(path) for path in root.glob("*/*.json")) + + orphan_files = [path for path in disk_files if path not in db_by_path] + missing_files_for_db_rows = [ + { + "artifact_id": row.artifact_id, + "scheme_id": row.scheme_id, + "scheme_version_id": row.scheme_version_id, + "artifact_variant": row.artifact_variant, + "storage_path": row.storage_path, + } + for row in rows + if not Path(row.storage_path).exists() + ] + + return { + "artifact_type": PUBLISH_PREVIEW_ARTIFACT_TYPE, + "db_rows_count": len(rows), + "disk_files_count": len(disk_files), + "orphan_files_count": len(orphan_files), + "missing_files_for_db_rows_count": len(missing_files_for_db_rows), + "orphan_files": orphan_files, + "missing_files_for_db_rows": missing_files_for_db_rows, + } + + +async def cleanup_publish_preview_storage(*, dry_run: bool = True) -> dict: + audit = await inspect_publish_preview_storage() + + orphan_files = [Path(path) for path in audit["orphan_files"]] + missing_rows = audit["missing_files_for_db_rows"] + + deleted_files: list[str] = [] + deleted_db_artifact_ids: list[str] = [] + + if not dry_run: + for path in orphan_files: + if _cleanup_file_and_empty_parent(path): + deleted_files.append(str(path)) + + deleted_db_artifact_ids = [ + item["artifact_id"] for item in missing_rows + ] + await delete_scheme_artifacts_by_artifact_ids(deleted_db_artifact_ids) + + return { + "artifact_type": PUBLISH_PREVIEW_ARTIFACT_TYPE, + "dry_run": dry_run, + "orphan_files_count": len(orphan_files), + "missing_files_for_db_rows_count": len(missing_rows), + "orphan_files": [str(path) for path in orphan_files], + "missing_files_for_db_rows": missing_rows, + "deleted_files_count": len(deleted_files), + "deleted_files": deleted_files, + "deleted_db_rows_count": len(deleted_db_artifact_ids), + "deleted_db_artifact_ids": deleted_db_artifact_ids, + }