Initial MVP skeleton with auth, chat persistence, UI and text LLM integration

2026-03-10 16:58:02 +00:00
commit 105b8b3db4
40 changed files with 1984 additions and 0 deletions
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -0,0 +1,39 @@
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(extra="ignore")
+
+    app_env: str = "dev"
+    app_host: str = "0.0.0.0"
+    app_port: int = 8000
+
+    database_url: str
+
+    admin_bootstrap_login: str = "admin"
+    admin_bootstrap_password: str = "change_me_later"
+
+    llm_manager_base_url: str
+    llm_manager_api_key: str
+    searxng_base_url: str
+
+    upload_root: str = "/data/uploads"
+    temp_root: str = "/data/temp"
+    log_root: str = "/data/logs"
+
+    max_image_mb: int = 10
+    max_audio_mb: int = 25
+    max_audio_duration_sec: int = 300
+    max_message_chars: int = 16000
+
+    tts_ttl_hours: int = 4
+    temp_audio_ttl_hours: int = 24
+    orphan_file_grace_hours: int = 24
+
+    summary_trigger_message_count: int = 30
+    summary_keep_recent_messages: int = 16
+    summary_max_chars: int = 8000
+    summary_model_alias: str = "qwen3.5-4b"
+
+
+settings = Settings()
--- a/backend/app/core/llm_client.py
+++ b/backend/app/core/llm_client.py
@@ -0,0 +1,98 @@
+import httpx
+import asyncio
+from fastapi import HTTPException
+from app.core.config import settings
+import logging
+
+logger = logging.getLogger(__name__)
+
+# Global lock to prevent concurrent switches and generation requests
+# This is safe for a single-worker MVP (uvicorn without --workers)
+inference_lock = asyncio.Lock()
+
+class LLMClient:
+    def __init__(self):
+        self.base_url = settings.llm_manager_base_url.rstrip("/")
+        self.api_key = settings.llm_manager_api_key
+        self.headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json"
+        }
+
+    async def get_status(self):
+        """Fetch the current global state of llm-manager."""
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.get(
+                    f"{self.base_url}/status",
+                    headers=self.headers,
+                    timeout=10.0
+                )
+                response.raise_for_status()
+                return response.json()
+            except httpx.HTTPError as e:
+                logger.error(f"Failed to fetch llm-manager status: {e}")
+                raise HTTPException(status_code=502, detail="llm-manager status check failed")
+
+    async def switch_model(self, model_name: str):
+        """Request llm-manager to switch its active model."""
+        async with httpx.AsyncClient() as client:
+            try:
+                logger.info(f"Requesting llm-manager switch to model: {model_name}")
+                response = await client.post(
+                    f"{self.base_url}/switch/{model_name}",
+                    headers=self.headers,
+                    timeout=60.0 # Switching can take a while via LLM manager
+                )
+                response.raise_for_status()
+                return response.json()
+            except httpx.HTTPError as e:
+                logger.error(f"Failed to switch model to {model_name}: {e}")
+                raise HTTPException(status_code=502, detail=f"Failed to switch model to {model_name}")
+
+    async def wait_for_model_ready(self, model_name: str, timeout: float = 60.0, poll_interval: float = 2.0):
+        """Wait for the model to be active and not loading/unloading."""
+        import time
+        start_time = time.time()
+        iterations = 0
+        while time.time() - start_time < timeout:
+            iterations += 1
+            status = await self.get_status()
+            current_model = status.get("active_model")
+            vram_state = status.get("vram_state", "")
+            
+            logger.info(f"Readiness poll #{iterations}: model={current_model}, vram_state={vram_state}")
+            
+            if current_model == model_name and vram_state not in ("loading", "unloading"):
+                return True, iterations, status
+                
+            await asyncio.sleep(poll_interval)
+            
+        return False, iterations, None
+
+    async def chat_completion(self, messages: list, max_tokens: int = None, temperature: float = None):
+        """Generate response via llm-manager."""
+        async with httpx.AsyncClient() as client:
+            try:
+                payload = {
+                    "messages": messages,
+                    "stream": False
+                }
+                if max_tokens is not None:
+                    payload["max_tokens"] = max_tokens
+                if temperature is not None:
+                    payload["temperature"] = temperature
+                
+                response = await client.post(
+                    f"{self.base_url}/v1/chat/completions",
+                    headers=self.headers,
+                    json=payload,
+                    timeout=120.0
+                )
+                response.raise_for_status()
+                return response.json()
+            except httpx.HTTPError as e:
+                logger.error(f"Failed to generate chat completion: {e}")
+                raise HTTPException(status_code=502, detail="Chat completion generation failed")
+
+llm_client = LLMClient()
--- a/backend/app/core/models_catalog.py
+++ b/backend/app/core/models_catalog.py
@@ -0,0 +1,15 @@
+from typing import Optional
+from pydantic import BaseModel
+
+class ModelInfo(BaseModel):
+    alias: str
+    name: str
+    vision_alias: Optional[str] = None
+
+# Defined curated list avoiding direct LLM integration dynamically
+AVAILABLE_MODELS = [
+    ModelInfo(alias="qwen3.5-4b", name="Qwen 3.5 4B", vision_alias="qwen3.5-4b-vl"),
+    ModelInfo(alias="qwen3.5-9b", name="Qwen 3.5 9B", vision_alias="qwen3.5-9b-vl"),
+    ModelInfo(alias="qwen2.5-coder-14b", name="Qwen 2.5 Coder 14B"),
+    ModelInfo(alias="a-vibe", name="A-Vibe"),
+]
--- a/backend/app/core/security.py
+++ b/backend/app/core/security.py
@@ -0,0 +1,13 @@
+from argon2 import PasswordHasher
+from argon2.exceptions import VerifyMismatchError
+
+ph = PasswordHasher()
+
+def verify_password(plain_password: str, hashed_password: str) -> bool:
+    try:
+        return ph.verify(hashed_password, plain_password)
+    except VerifyMismatchError:
+        return False
+
+def get_password_hash(password: str) -> str:
+    return ph.hash(password)