Initial MVP skeleton with auth, chat persistence, UI and text LLM integration
This commit is contained in:
283
backend/app/api/chats.py
Normal file
283
backend/app/api/chats.py
Normal file
@@ -0,0 +1,283 @@
|
||||
from typing import List, Optional
|
||||
from datetime import datetime
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.orm import Session as DBSession
|
||||
from sqlalchemy import select, desc
|
||||
|
||||
from app.db.session import get_db
|
||||
from app.db.models import User, Chat, Message
|
||||
from app.api.deps import get_current_user
|
||||
from app.core.models_catalog import AVAILABLE_MODELS, ModelInfo
|
||||
import logging
|
||||
import sys
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
if not logger.handlers:
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
||||
logger.addHandler(handler)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
class ChatCreateRequest(BaseModel):
|
||||
title: str = "New Chat"
|
||||
model_alias: str
|
||||
|
||||
class ChatResponse(BaseModel):
|
||||
id: str
|
||||
title: str
|
||||
model_alias: str
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class MessageCreateRequest(BaseModel):
|
||||
content: str
|
||||
role: str = "user"
|
||||
|
||||
class MessageResponse(BaseModel):
|
||||
id: str
|
||||
role: str
|
||||
content: str
|
||||
created_at: datetime
|
||||
|
||||
@router.get("/models", response_model=List[ModelInfo])
|
||||
def get_models(user: User = Depends(get_current_user)):
|
||||
return AVAILABLE_MODELS
|
||||
|
||||
@router.post("/chats", response_model=ChatResponse)
|
||||
def create_chat(
|
||||
req: ChatCreateRequest,
|
||||
db: DBSession = Depends(get_db),
|
||||
user: User = Depends(get_current_user)
|
||||
):
|
||||
valid_aliases = {m.alias for m in AVAILABLE_MODELS}
|
||||
if req.model_alias not in valid_aliases:
|
||||
raise HTTPException(status_code=400, detail="Invalid model alias")
|
||||
|
||||
chat = Chat(user_id=user.id, title=req.title, model_alias=req.model_alias)
|
||||
db.add(chat)
|
||||
db.commit()
|
||||
db.refresh(chat)
|
||||
|
||||
return chat
|
||||
|
||||
@router.get("/chats", response_model=List[ChatResponse])
|
||||
def list_chats(
|
||||
db: DBSession = Depends(get_db),
|
||||
user: User = Depends(get_current_user)
|
||||
):
|
||||
stmt = select(Chat).where(Chat.user_id == user.id).order_by(desc(Chat.updated_at))
|
||||
chats = db.scalars(stmt).all()
|
||||
return chats
|
||||
|
||||
@router.get("/chats/{chat_id}", response_model=ChatResponse)
|
||||
def get_chat(
|
||||
chat_id: str,
|
||||
db: DBSession = Depends(get_db),
|
||||
user: User = Depends(get_current_user)
|
||||
):
|
||||
chat = db.get(Chat, chat_id)
|
||||
if not chat or chat.user_id != user.id:
|
||||
raise HTTPException(status_code=404, detail="Chat not found")
|
||||
return chat
|
||||
|
||||
@router.delete("/chats/{chat_id}")
|
||||
def delete_chat(
|
||||
chat_id: str,
|
||||
db: DBSession = Depends(get_db),
|
||||
user: User = Depends(get_current_user)
|
||||
):
|
||||
chat = db.get(Chat, chat_id)
|
||||
if not chat or chat.user_id != user.id:
|
||||
raise HTTPException(status_code=404, detail="Chat not found")
|
||||
|
||||
db.delete(chat)
|
||||
db.commit()
|
||||
return {"status": "ok"}
|
||||
|
||||
@router.get("/chats/{chat_id}/messages", response_model=List[MessageResponse])
|
||||
def list_messages(
|
||||
chat_id: str,
|
||||
db: DBSession = Depends(get_db),
|
||||
user: User = Depends(get_current_user)
|
||||
):
|
||||
chat = db.get(Chat, chat_id)
|
||||
if not chat or chat.user_id != user.id:
|
||||
raise HTTPException(status_code=404, detail="Chat not found")
|
||||
|
||||
stmt = select(Message).where(Message.chat_id == chat_id).order_by(Message.created_at)
|
||||
messages = db.scalars(stmt).all()
|
||||
return messages
|
||||
|
||||
from app.core.llm_client import llm_client, inference_lock
|
||||
|
||||
def sanitize_llm_text(raw_text: Optional[str]) -> Optional[str]:
|
||||
if not raw_text:
|
||||
return None
|
||||
text = raw_text.strip()
|
||||
if not text:
|
||||
return None
|
||||
|
||||
cleaned = text.replace("<reasoning>", "").replace("</reasoning>", "").strip()
|
||||
if not cleaned:
|
||||
return None
|
||||
|
||||
return cleaned
|
||||
|
||||
def normalize_llm_response(content: str, reasoning: str) -> Optional[str]:
|
||||
c_sanitized = sanitize_llm_text(content)
|
||||
if c_sanitized:
|
||||
return c_sanitized
|
||||
|
||||
r_sanitized = sanitize_llm_text(reasoning)
|
||||
if r_sanitized:
|
||||
return r_sanitized
|
||||
|
||||
return None
|
||||
|
||||
@router.post("/chats/{chat_id}/messages", response_model=List[MessageResponse])
|
||||
async def add_message(
|
||||
chat_id: str,
|
||||
req: MessageCreateRequest,
|
||||
db: DBSession = Depends(get_db),
|
||||
user: User = Depends(get_current_user)
|
||||
):
|
||||
chat = db.get(Chat, chat_id)
|
||||
if not chat or chat.user_id != user.id:
|
||||
raise HTTPException(status_code=404, detail="Chat not found")
|
||||
|
||||
# 1. Save user message
|
||||
user_msg = Message(chat_id=chat.id, role=req.role, content=req.content)
|
||||
db.add(user_msg)
|
||||
|
||||
from datetime import datetime, timezone
|
||||
chat.updated_at = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
db.add(chat)
|
||||
db.commit()
|
||||
db.refresh(user_msg)
|
||||
|
||||
logger.info(f"User message saved for chat {chat.id}. Selected model: {chat.model_alias}")
|
||||
|
||||
# 2. Fetch recent chat history to assemble prompt
|
||||
# Get last 20 messages
|
||||
stmt = select(Message).where(Message.chat_id == chat_id).order_by(desc(Message.created_at)).limit(20)
|
||||
recent_msgs = db.scalars(stmt).all()
|
||||
recent_msgs.reverse()
|
||||
|
||||
llm_history = []
|
||||
for m in recent_msgs:
|
||||
llm_history.append({"role": m.role, "content": m.content})
|
||||
|
||||
# 3. Enter Critical Section for LLM Switch and Inference
|
||||
ai_response = None
|
||||
final_content = None
|
||||
async with inference_lock:
|
||||
try:
|
||||
status_data = await llm_client.get_status()
|
||||
current_model = status_data.get("active_model")
|
||||
logger.info(f"Current active llm-manager model: {current_model}")
|
||||
|
||||
# Switch if needed
|
||||
switched = (current_model != chat.model_alias)
|
||||
if switched:
|
||||
logger.info(f"Switching model to {chat.model_alias}... (switch requested)")
|
||||
await llm_client.switch_model(chat.model_alias)
|
||||
logger.info(f"Successfully requested switch to {chat.model_alias}. Waiting for readiness...")
|
||||
|
||||
# Wait for readiness
|
||||
is_ready, iterations, final_status = await llm_client.wait_for_model_ready(
|
||||
model_name=chat.model_alias,
|
||||
timeout=60.0,
|
||||
poll_interval=2.0
|
||||
)
|
||||
|
||||
if not is_ready:
|
||||
logger.error(f"Readiness timeout for {chat.model_alias} after {iterations} iterations. Final status: {final_status}")
|
||||
raise HTTPException(status_code=504, detail=f"LLM Manager readiness timeout for {chat.model_alias}")
|
||||
|
||||
logger.info(f"Model {chat.model_alias} is ready after {iterations} iterations. Final status before completion: {final_status}")
|
||||
|
||||
async def do_completion(msgs, max_tok=None, temp=None):
|
||||
try:
|
||||
return await llm_client.chat_completion(messages=msgs, max_tokens=max_tok, temperature=temp)
|
||||
except HTTPException as e:
|
||||
if e.status_code == 502 or "503" in str(e.detail):
|
||||
logger.warning("Generation failed (possibly 503 unloading). Retrying switch and completion...")
|
||||
await llm_client.switch_model(chat.model_alias)
|
||||
return await llm_client.chat_completion(messages=msgs, max_tokens=max_tok, temperature=temp)
|
||||
raise e
|
||||
|
||||
# Call inference (Attempt 1)
|
||||
logger.info("Starting chat completion (Attempt 1)...")
|
||||
ai_response = await do_completion(llm_history)
|
||||
|
||||
# Parse Attempt 1
|
||||
ai_choice = ai_response.get("choices", [{}])[0].get("message", {})
|
||||
ai_content_raw = ai_choice.get("content", "") or ""
|
||||
ai_reasoning_raw = ai_choice.get("reasoning_content", "") or ""
|
||||
|
||||
c_san = sanitize_llm_text(ai_content_raw)
|
||||
r_san = sanitize_llm_text(ai_reasoning_raw)
|
||||
final_content = normalize_llm_response(ai_content_raw, ai_reasoning_raw)
|
||||
|
||||
logger.info(
|
||||
f"LLM Stats (Attempt 1) | model: {chat.model_alias} | "
|
||||
f"switched: {switched} | "
|
||||
f"content_raw_len: {len(ai_content_raw)} | reasoning_raw_len: {len(ai_reasoning_raw)} | "
|
||||
f"content_san_len: {len(c_san) if c_san else 0} | reasoning_san_len: {len(r_san) if r_san else 0}"
|
||||
)
|
||||
|
||||
if not final_content:
|
||||
logger.warning("Attempt 1 rejected: invalid response (both sanitized texts are empty). Triggering controlled retry.")
|
||||
|
||||
retry_history = list(llm_history)
|
||||
retry_history.append({
|
||||
"role": "user",
|
||||
"content": "Ответь сразу финальным текстом. Не выводи reasoning, chain-of-thought, XML-теги или служебную разметку."
|
||||
})
|
||||
|
||||
logger.info("Starting chat completion (Attempt 2 - Retry) with max_tokens=2048 and temperature=0.1...")
|
||||
ai_response_retry = await do_completion(retry_history, max_tok=2048, temp=0.1)
|
||||
|
||||
ai_choice_r = ai_response_retry.get("choices", [{}])[0].get("message", {})
|
||||
ai_content_r_raw = ai_choice_r.get("content", "") or ""
|
||||
ai_reasoning_r_raw = ai_choice_r.get("reasoning_content", "") or ""
|
||||
|
||||
c_san_r = sanitize_llm_text(ai_content_r_raw)
|
||||
r_san_r = sanitize_llm_text(ai_reasoning_r_raw)
|
||||
final_content = normalize_llm_response(ai_content_r_raw, ai_reasoning_r_raw)
|
||||
|
||||
logger.info(
|
||||
f"LLM Stats (Attempt 2 - Retry) | model: {chat.model_alias} | "
|
||||
f"content_raw_len: {len(ai_content_r_raw)} | reasoning_raw_len: {len(ai_reasoning_r_raw)} | "
|
||||
f"content_san_len: {len(c_san_r) if c_san_r else 0} | reasoning_san_len: {len(r_san_r) if r_san_r else 0}"
|
||||
)
|
||||
|
||||
if not final_content:
|
||||
logger.error("Attempt 2 also failed to produce valid output. Aborting.")
|
||||
raise HTTPException(status_code=500, detail="LLM failed to produce valid output after retry.")
|
||||
else:
|
||||
logger.info("Attempt 2 succeeded in producing valid output.")
|
||||
else:
|
||||
if not ai_content_raw.strip() and final_content:
|
||||
logger.info("Fallback to reasoning_content was chosen because 'content' was empty (Attempt 1).")
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Inference pipeline failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# 5. Save AI message
|
||||
assistant_msg = Message(chat_id=chat.id, role="assistant", content=final_content)
|
||||
db.add(assistant_msg)
|
||||
|
||||
chat.updated_at = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
db.add(chat)
|
||||
db.commit()
|
||||
db.refresh(assistant_msg)
|
||||
logger.info("Assistant message saved successfully.")
|
||||
|
||||
return [user_msg, assistant_msg]
|
||||
Reference in New Issue
Block a user