Files
chat-frontend/backend/app/api/chats.py

284 lines
11 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from typing import List, Optional
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel
from sqlalchemy.orm import Session as DBSession
from sqlalchemy import select, desc
from app.db.session import get_db
from app.db.models import User, Chat, Message
from app.api.deps import get_current_user
from app.core.models_catalog import AVAILABLE_MODELS, ModelInfo
import logging
import sys
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
if not logger.handlers:
handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(handler)
router = APIRouter()
class ChatCreateRequest(BaseModel):
title: str = "New Chat"
model_alias: str
class ChatResponse(BaseModel):
id: str
title: str
model_alias: str
created_at: datetime
updated_at: datetime
class MessageCreateRequest(BaseModel):
content: str
role: str = "user"
class MessageResponse(BaseModel):
id: str
role: str
content: str
created_at: datetime
@router.get("/models", response_model=List[ModelInfo])
def get_models(user: User = Depends(get_current_user)):
return AVAILABLE_MODELS
@router.post("/chats", response_model=ChatResponse)
def create_chat(
req: ChatCreateRequest,
db: DBSession = Depends(get_db),
user: User = Depends(get_current_user)
):
valid_aliases = {m.alias for m in AVAILABLE_MODELS}
if req.model_alias not in valid_aliases:
raise HTTPException(status_code=400, detail="Invalid model alias")
chat = Chat(user_id=user.id, title=req.title, model_alias=req.model_alias)
db.add(chat)
db.commit()
db.refresh(chat)
return chat
@router.get("/chats", response_model=List[ChatResponse])
def list_chats(
db: DBSession = Depends(get_db),
user: User = Depends(get_current_user)
):
stmt = select(Chat).where(Chat.user_id == user.id).order_by(desc(Chat.updated_at))
chats = db.scalars(stmt).all()
return chats
@router.get("/chats/{chat_id}", response_model=ChatResponse)
def get_chat(
chat_id: str,
db: DBSession = Depends(get_db),
user: User = Depends(get_current_user)
):
chat = db.get(Chat, chat_id)
if not chat or chat.user_id != user.id:
raise HTTPException(status_code=404, detail="Chat not found")
return chat
@router.delete("/chats/{chat_id}")
def delete_chat(
chat_id: str,
db: DBSession = Depends(get_db),
user: User = Depends(get_current_user)
):
chat = db.get(Chat, chat_id)
if not chat or chat.user_id != user.id:
raise HTTPException(status_code=404, detail="Chat not found")
db.delete(chat)
db.commit()
return {"status": "ok"}
@router.get("/chats/{chat_id}/messages", response_model=List[MessageResponse])
def list_messages(
chat_id: str,
db: DBSession = Depends(get_db),
user: User = Depends(get_current_user)
):
chat = db.get(Chat, chat_id)
if not chat or chat.user_id != user.id:
raise HTTPException(status_code=404, detail="Chat not found")
stmt = select(Message).where(Message.chat_id == chat_id).order_by(Message.created_at)
messages = db.scalars(stmt).all()
return messages
from app.core.llm_client import llm_client, inference_lock
def sanitize_llm_text(raw_text: Optional[str]) -> Optional[str]:
if not raw_text:
return None
text = raw_text.strip()
if not text:
return None
cleaned = text.replace("<reasoning>", "").replace("</reasoning>", "").strip()
if not cleaned:
return None
return cleaned
def normalize_llm_response(content: str, reasoning: str) -> Optional[str]:
c_sanitized = sanitize_llm_text(content)
if c_sanitized:
return c_sanitized
r_sanitized = sanitize_llm_text(reasoning)
if r_sanitized:
return r_sanitized
return None
@router.post("/chats/{chat_id}/messages", response_model=List[MessageResponse])
async def add_message(
chat_id: str,
req: MessageCreateRequest,
db: DBSession = Depends(get_db),
user: User = Depends(get_current_user)
):
chat = db.get(Chat, chat_id)
if not chat or chat.user_id != user.id:
raise HTTPException(status_code=404, detail="Chat not found")
# 1. Save user message
user_msg = Message(chat_id=chat.id, role=req.role, content=req.content)
db.add(user_msg)
from datetime import datetime, timezone
chat.updated_at = datetime.now(timezone.utc).replace(tzinfo=None)
db.add(chat)
db.commit()
db.refresh(user_msg)
logger.info(f"User message saved for chat {chat.id}. Selected model: {chat.model_alias}")
# 2. Fetch recent chat history to assemble prompt
# Get last 20 messages
stmt = select(Message).where(Message.chat_id == chat_id).order_by(desc(Message.created_at)).limit(20)
recent_msgs = db.scalars(stmt).all()
recent_msgs.reverse()
llm_history = []
for m in recent_msgs:
llm_history.append({"role": m.role, "content": m.content})
# 3. Enter Critical Section for LLM Switch and Inference
ai_response = None
final_content = None
async with inference_lock:
try:
status_data = await llm_client.get_status()
current_model = status_data.get("active_model")
logger.info(f"Current active llm-manager model: {current_model}")
# Switch if needed
switched = (current_model != chat.model_alias)
if switched:
logger.info(f"Switching model to {chat.model_alias}... (switch requested)")
await llm_client.switch_model(chat.model_alias)
logger.info(f"Successfully requested switch to {chat.model_alias}. Waiting for readiness...")
# Wait for readiness
is_ready, iterations, final_status = await llm_client.wait_for_model_ready(
model_name=chat.model_alias,
timeout=60.0,
poll_interval=2.0
)
if not is_ready:
logger.error(f"Readiness timeout for {chat.model_alias} after {iterations} iterations. Final status: {final_status}")
raise HTTPException(status_code=504, detail=f"LLM Manager readiness timeout for {chat.model_alias}")
logger.info(f"Model {chat.model_alias} is ready after {iterations} iterations. Final status before completion: {final_status}")
async def do_completion(msgs, max_tok=None, temp=None):
try:
return await llm_client.chat_completion(messages=msgs, max_tokens=max_tok, temperature=temp)
except HTTPException as e:
if e.status_code == 502 or "503" in str(e.detail):
logger.warning("Generation failed (possibly 503 unloading). Retrying switch and completion...")
await llm_client.switch_model(chat.model_alias)
return await llm_client.chat_completion(messages=msgs, max_tokens=max_tok, temperature=temp)
raise e
# Call inference (Attempt 1)
logger.info("Starting chat completion (Attempt 1)...")
ai_response = await do_completion(llm_history)
# Parse Attempt 1
ai_choice = ai_response.get("choices", [{}])[0].get("message", {})
ai_content_raw = ai_choice.get("content", "") or ""
ai_reasoning_raw = ai_choice.get("reasoning_content", "") or ""
c_san = sanitize_llm_text(ai_content_raw)
r_san = sanitize_llm_text(ai_reasoning_raw)
final_content = normalize_llm_response(ai_content_raw, ai_reasoning_raw)
logger.info(
f"LLM Stats (Attempt 1) | model: {chat.model_alias} | "
f"switched: {switched} | "
f"content_raw_len: {len(ai_content_raw)} | reasoning_raw_len: {len(ai_reasoning_raw)} | "
f"content_san_len: {len(c_san) if c_san else 0} | reasoning_san_len: {len(r_san) if r_san else 0}"
)
if not final_content:
logger.warning("Attempt 1 rejected: invalid response (both sanitized texts are empty). Triggering controlled retry.")
retry_history = list(llm_history)
retry_history.append({
"role": "user",
"content": "Ответь сразу финальным текстом. Не выводи reasoning, chain-of-thought, XML-теги или служебную разметку."
})
logger.info("Starting chat completion (Attempt 2 - Retry) with max_tokens=2048 and temperature=0.1...")
ai_response_retry = await do_completion(retry_history, max_tok=2048, temp=0.1)
ai_choice_r = ai_response_retry.get("choices", [{}])[0].get("message", {})
ai_content_r_raw = ai_choice_r.get("content", "") or ""
ai_reasoning_r_raw = ai_choice_r.get("reasoning_content", "") or ""
c_san_r = sanitize_llm_text(ai_content_r_raw)
r_san_r = sanitize_llm_text(ai_reasoning_r_raw)
final_content = normalize_llm_response(ai_content_r_raw, ai_reasoning_r_raw)
logger.info(
f"LLM Stats (Attempt 2 - Retry) | model: {chat.model_alias} | "
f"content_raw_len: {len(ai_content_r_raw)} | reasoning_raw_len: {len(ai_reasoning_r_raw)} | "
f"content_san_len: {len(c_san_r) if c_san_r else 0} | reasoning_san_len: {len(r_san_r) if r_san_r else 0}"
)
if not final_content:
logger.error("Attempt 2 also failed to produce valid output. Aborting.")
raise HTTPException(status_code=500, detail="LLM failed to produce valid output after retry.")
else:
logger.info("Attempt 2 succeeded in producing valid output.")
else:
if not ai_content_raw.strip() and final_content:
logger.info("Fallback to reasoning_content was chosen because 'content' was empty (Attempt 1).")
except HTTPException:
raise
except Exception as e:
logger.error(f"Inference pipeline failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
# 5. Save AI message
assistant_msg = Message(chat_id=chat.id, role="assistant", content=final_content)
db.add(assistant_msg)
chat.updated_at = datetime.now(timezone.utc).replace(tzinfo=None)
db.add(chat)
db.commit()
db.refresh(assistant_msg)
logger.info("Assistant message saved successfully.")
return [user_msg, assistant_msg]